chiark / gitweb /
journald: don't enforce monotonicity of realtime clocks when copying entries
[elogind.git] / src / journal / journal-file.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/mman.h>
23 #include <errno.h>
24 #include <sys/uio.h>
25 #include <unistd.h>
26 #include <sys/statvfs.h>
27 #include <fcntl.h>
28 #include <stddef.h>
29
30 #include "journal-def.h"
31 #include "journal-file.h"
32 #include "lookup3.h"
33 #include "compress.h"
34
35 #define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*16ULL)
36 #define DEFAULT_FIELD_HASH_TABLE_SIZE (2047ULL*16ULL)
37
38 #define DEFAULT_WINDOW_SIZE (8ULL*1024ULL*1024ULL)
39
40 #define COMPRESSION_SIZE_THRESHOLD (512ULL)
41
42 /* This is the minimum journal file size */
43 #define JOURNAL_FILE_SIZE_MIN (64ULL*1024ULL)                  /* 64 KiB */
44
45 /* These are the lower and upper bounds if we deduce the max_use value
46  * from the file system size */
47 #define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL)           /* 1 MiB */
48 #define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL)   /* 4 GiB */
49
50 /* This is the upper bound if we deduce max_size from max_use */
51 #define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL)        /* 128 MiB */
52
53 /* This is the upper bound if we deduce the keep_free value from the
54  * file system size */
55 #define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
56
57 /* This is the keep_free value when we can't determine the system
58  * size */
59 #define DEFAULT_KEEP_FREE (1024ULL*1024ULL)                    /* 1 MB */
60
61 static const char signature[] = { 'L', 'P', 'K', 'S', 'H', 'H', 'R', 'H' };
62
63 #define ALIGN64(x) (((x) + 7ULL) & ~7ULL)
64
65 void journal_file_close(JournalFile *f) {
66         int t;
67
68         assert(f);
69
70         if (f->header) {
71                 if (f->writable)
72                         f->header->state = STATE_OFFLINE;
73
74                 munmap(f->header, PAGE_ALIGN(sizeof(Header)));
75         }
76
77         for (t = 0; t < _WINDOW_MAX; t++)
78                 if (f->windows[t].ptr)
79                         munmap(f->windows[t].ptr, f->windows[t].size);
80
81         if (f->fd >= 0)
82                 close_nointr_nofail(f->fd);
83
84         free(f->path);
85
86 #ifdef HAVE_XZ
87         free(f->compress_buffer);
88 #endif
89
90         free(f);
91 }
92
93 static int journal_file_init_header(JournalFile *f, JournalFile *template) {
94         Header h;
95         ssize_t k;
96         int r;
97
98         assert(f);
99
100         zero(h);
101         memcpy(h.signature, signature, 8);
102         h.header_size = htole64(ALIGN64(sizeof(h)));
103
104         r = sd_id128_randomize(&h.file_id);
105         if (r < 0)
106                 return r;
107
108         if (template) {
109                 h.seqnum_id = template->header->seqnum_id;
110                 h.seqnum = template->header->seqnum;
111         } else
112                 h.seqnum_id = h.file_id;
113
114         k = pwrite(f->fd, &h, sizeof(h), 0);
115         if (k < 0)
116                 return -errno;
117
118         if (k != sizeof(h))
119                 return -EIO;
120
121         return 0;
122 }
123
124 static int journal_file_refresh_header(JournalFile *f) {
125         int r;
126         sd_id128_t boot_id;
127
128         assert(f);
129
130         r = sd_id128_get_machine(&f->header->machine_id);
131         if (r < 0)
132                 return r;
133
134         r = sd_id128_get_boot(&boot_id);
135         if (r < 0)
136                 return r;
137
138         if (sd_id128_equal(boot_id, f->header->boot_id))
139                 f->tail_entry_monotonic_valid = true;
140
141         f->header->boot_id = boot_id;
142
143         f->header->state = STATE_ONLINE;
144
145         __sync_synchronize();
146
147         return 0;
148 }
149
150 static int journal_file_verify_header(JournalFile *f) {
151         assert(f);
152
153         if (memcmp(f->header, signature, 8))
154                 return -EBADMSG;
155
156 #ifdef HAVE_XZ
157         if ((le64toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) != 0)
158                 return -EPROTONOSUPPORT;
159 #else
160         if (f->header->incompatible_flags != 0)
161                 return -EPROTONOSUPPORT;
162 #endif
163
164         if (f->header->header_size != htole64(ALIGN64(sizeof(*(f->header)))))
165                 return -EBADMSG;
166
167         if ((uint64_t) f->last_stat.st_size < (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
168                 return -ENODATA;
169
170         if (f->writable) {
171                 uint8_t state;
172                 sd_id128_t machine_id;
173                 int r;
174
175                 r = sd_id128_get_machine(&machine_id);
176                 if (r < 0)
177                         return r;
178
179                 if (!sd_id128_equal(machine_id, f->header->machine_id))
180                         return -EHOSTDOWN;
181
182                 state = f->header->state;
183
184                 if (state == STATE_ONLINE)
185                         log_debug("Journal file %s is already online. Assuming unclean closing. Ignoring.", f->path);
186                         /* FIXME: immediately rotate */
187                 else if (state == STATE_ARCHIVED)
188                         return -ESHUTDOWN;
189                 else if (state != STATE_OFFLINE)
190                         log_debug("Journal file %s has unknown state %u. Ignoring.", f->path, state);
191         }
192
193         return 0;
194 }
195
196 static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
197         uint64_t old_size, new_size;
198         int r;
199
200         assert(f);
201
202         /* We assume that this file is not sparse, and we know that
203          * for sure, since we always call posix_fallocate()
204          * ourselves */
205
206         old_size =
207                 le64toh(f->header->header_size) +
208                 le64toh(f->header->arena_size);
209
210         new_size = PAGE_ALIGN(offset + size);
211         if (new_size < le64toh(f->header->header_size))
212                 new_size = le64toh(f->header->header_size);
213
214         if (new_size <= old_size)
215                 return 0;
216
217         if (f->metrics.max_size > 0 &&
218             new_size > f->metrics.max_size)
219                 return -E2BIG;
220
221         if (new_size > f->metrics.min_size &&
222             f->metrics.keep_free > 0) {
223                 struct statvfs svfs;
224
225                 if (fstatvfs(f->fd, &svfs) >= 0) {
226                         uint64_t available;
227
228                         available = svfs.f_bfree * svfs.f_bsize;
229
230                         if (available >= f->metrics.keep_free)
231                                 available -= f->metrics.keep_free;
232                         else
233                                 available = 0;
234
235                         if (new_size - old_size > available)
236                                 return -E2BIG;
237                 }
238         }
239
240         /* Note that the glibc fallocate() fallback is very
241            inefficient, hence we try to minimize the allocation area
242            as we can. */
243         r = posix_fallocate(f->fd, old_size, new_size - old_size);
244         if (r != 0)
245                 return -r;
246
247         if (fstat(f->fd, &f->last_stat) < 0)
248                 return -errno;
249
250         f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
251
252         return 0;
253 }
254
255 static int journal_file_map(
256                 JournalFile *f,
257                 uint64_t offset,
258                 uint64_t size,
259                 void **_window,
260                 uint64_t *_woffset,
261                 uint64_t *_wsize,
262                 void **ret) {
263
264         uint64_t woffset, wsize;
265         void *window;
266
267         assert(f);
268         assert(size > 0);
269         assert(ret);
270
271         woffset = offset & ~((uint64_t) page_size() - 1ULL);
272         wsize = size + (offset - woffset);
273         wsize = PAGE_ALIGN(wsize);
274
275         /* Avoid SIGBUS on invalid accesses */
276         if (woffset + wsize > (uint64_t) PAGE_ALIGN(f->last_stat.st_size))
277                 return -EADDRNOTAVAIL;
278
279         window = mmap(NULL, wsize, f->prot, MAP_SHARED, f->fd, woffset);
280         if (window == MAP_FAILED)
281                 return -errno;
282
283         if (_window)
284                 *_window = window;
285
286         if (_woffset)
287                 *_woffset = woffset;
288
289         if (_wsize)
290                 *_wsize = wsize;
291
292         *ret = (uint8_t*) window + (offset - woffset);
293
294         return 0;
295 }
296
297 static int journal_file_move_to(JournalFile *f, int wt, uint64_t offset, uint64_t size, void **ret) {
298         void *p = NULL;
299         uint64_t delta;
300         int r;
301         Window *w;
302
303         assert(f);
304         assert(ret);
305         assert(wt >= 0);
306         assert(wt < _WINDOW_MAX);
307
308         if (offset + size > (uint64_t) f->last_stat.st_size) {
309                 /* Hmm, out of range? Let's refresh the fstat() data
310                  * first, before we trust that check. */
311
312                 if (fstat(f->fd, &f->last_stat) < 0 ||
313                     offset + size > (uint64_t) f->last_stat.st_size)
314                         return -EADDRNOTAVAIL;
315         }
316
317         w = f->windows + wt;
318
319         if (_likely_(w->ptr &&
320                      w->offset <= offset &&
321                      w->offset + w->size >= offset + size)) {
322
323                 *ret = (uint8_t*) w->ptr + (offset - w->offset);
324                 return 0;
325         }
326
327         if (w->ptr) {
328                 if (munmap(w->ptr, w->size) < 0)
329                         return -errno;
330
331                 w->ptr = NULL;
332                 w->size = w->offset = 0;
333         }
334
335         if (size < DEFAULT_WINDOW_SIZE) {
336                 /* If the default window size is larger then what was
337                  * asked for extend the mapping a bit in the hope to
338                  * minimize needed remappings later on. We add half
339                  * the window space before and half behind the
340                  * requested mapping */
341
342                 delta = (DEFAULT_WINDOW_SIZE - size) / 2;
343
344                 if (delta > offset)
345                         delta = offset;
346
347                 offset -= delta;
348                 size = DEFAULT_WINDOW_SIZE;
349         } else
350                 delta = 0;
351
352         if (offset + size > (uint64_t) f->last_stat.st_size)
353                 size = (uint64_t) f->last_stat.st_size - offset;
354
355         if (size <= 0)
356                 return -EADDRNOTAVAIL;
357
358         r = journal_file_map(f,
359                              offset, size,
360                              &w->ptr, &w->offset, &w->size,
361                              &p);
362
363         if (r < 0)
364                 return r;
365
366         *ret = (uint8_t*) p + delta;
367         return 0;
368 }
369
370 static bool verify_hash(Object *o) {
371         uint64_t h1, h2;
372
373         assert(o);
374
375         if (o->object.type == OBJECT_DATA && !(o->object.flags & OBJECT_COMPRESSED)) {
376                 h1 = le64toh(o->data.hash);
377                 h2 = hash64(o->data.payload, le64toh(o->object.size) - offsetof(Object, data.payload));
378         } else if (o->object.type == OBJECT_FIELD) {
379                 h1 = le64toh(o->field.hash);
380                 h2 = hash64(o->field.payload, le64toh(o->object.size) - offsetof(Object, field.payload));
381         } else
382                 return true;
383
384         return h1 == h2;
385 }
386
387 int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
388         int r;
389         void *t;
390         Object *o;
391         uint64_t s;
392
393         assert(f);
394         assert(ret);
395         assert(type < _OBJECT_TYPE_MAX);
396
397         r = journal_file_move_to(f, type >= 0 ? type : WINDOW_UNKNOWN, offset, sizeof(ObjectHeader), &t);
398         if (r < 0)
399                 return r;
400
401         o = (Object*) t;
402         s = le64toh(o->object.size);
403
404         if (s < sizeof(ObjectHeader))
405                 return -EBADMSG;
406
407         if (type >= 0 && o->object.type != type)
408                 return -EBADMSG;
409
410         if (s > sizeof(ObjectHeader)) {
411                 r = journal_file_move_to(f, o->object.type, offset, s, &t);
412                 if (r < 0)
413                         return r;
414
415                 o = (Object*) t;
416         }
417
418         if (!verify_hash(o))
419                 return -EBADMSG;
420
421         *ret = o;
422         return 0;
423 }
424
425 static uint64_t journal_file_seqnum(JournalFile *f, uint64_t *seqnum) {
426         uint64_t r;
427
428         assert(f);
429
430         r = le64toh(f->header->seqnum) + 1;
431
432         if (seqnum) {
433                 /* If an external seqnum counter was passed, we update
434                  * both the local and the external one, and set it to
435                  * the maximum of both */
436
437                 if (*seqnum + 1 > r)
438                         r = *seqnum + 1;
439
440                 *seqnum = r;
441         }
442
443         f->header->seqnum = htole64(r);
444
445         if (f->header->first_seqnum == 0)
446                 f->header->first_seqnum = htole64(r);
447
448         return r;
449 }
450
451 static int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
452         int r;
453         uint64_t p;
454         Object *tail, *o;
455         void *t;
456
457         assert(f);
458         assert(size >= sizeof(ObjectHeader));
459         assert(offset);
460         assert(ret);
461
462         p = le64toh(f->header->tail_object_offset);
463         if (p == 0)
464                 p = le64toh(f->header->header_size);
465         else {
466                 r = journal_file_move_to_object(f, -1, p, &tail);
467                 if (r < 0)
468                         return r;
469
470                 p += ALIGN64(le64toh(tail->object.size));
471         }
472
473         r = journal_file_allocate(f, p, size);
474         if (r < 0)
475                 return r;
476
477         r = journal_file_move_to(f, type, p, size, &t);
478         if (r < 0)
479                 return r;
480
481         o = (Object*) t;
482
483         zero(o->object);
484         o->object.type = type;
485         o->object.size = htole64(size);
486
487         f->header->tail_object_offset = htole64(p);
488         f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
489
490         *ret = o;
491         *offset = p;
492
493         return 0;
494 }
495
496 static int journal_file_setup_data_hash_table(JournalFile *f) {
497         uint64_t s, p;
498         Object *o;
499         int r;
500
501         assert(f);
502
503         s = DEFAULT_DATA_HASH_TABLE_SIZE;
504         r = journal_file_append_object(f,
505                                        OBJECT_DATA_HASH_TABLE,
506                                        offsetof(Object, hash_table.items) + s,
507                                        &o, &p);
508         if (r < 0)
509                 return r;
510
511         memset(o->hash_table.items, 0, s);
512
513         f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
514         f->header->data_hash_table_size = htole64(s);
515
516         return 0;
517 }
518
519 static int journal_file_setup_field_hash_table(JournalFile *f) {
520         uint64_t s, p;
521         Object *o;
522         int r;
523
524         assert(f);
525
526         s = DEFAULT_FIELD_HASH_TABLE_SIZE;
527         r = journal_file_append_object(f,
528                                        OBJECT_FIELD_HASH_TABLE,
529                                        offsetof(Object, hash_table.items) + s,
530                                        &o, &p);
531         if (r < 0)
532                 return r;
533
534         memset(o->hash_table.items, 0, s);
535
536         f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
537         f->header->field_hash_table_size = htole64(s);
538
539         return 0;
540 }
541
542 static int journal_file_map_data_hash_table(JournalFile *f) {
543         uint64_t s, p;
544         void *t;
545         int r;
546
547         assert(f);
548
549         p = le64toh(f->header->data_hash_table_offset);
550         s = le64toh(f->header->data_hash_table_size);
551
552         r = journal_file_move_to(f,
553                                  WINDOW_DATA_HASH_TABLE,
554                                  p, s,
555                                  &t);
556         if (r < 0)
557                 return r;
558
559         f->data_hash_table = t;
560         return 0;
561 }
562
563 static int journal_file_map_field_hash_table(JournalFile *f) {
564         uint64_t s, p;
565         void *t;
566         int r;
567
568         assert(f);
569
570         p = le64toh(f->header->field_hash_table_offset);
571         s = le64toh(f->header->field_hash_table_size);
572
573         r = journal_file_move_to(f,
574                                  WINDOW_FIELD_HASH_TABLE,
575                                  p, s,
576                                  &t);
577         if (r < 0)
578                 return r;
579
580         f->field_hash_table = t;
581         return 0;
582 }
583
584 static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, uint64_t hash) {
585         uint64_t p, h;
586         int r;
587
588         assert(f);
589         assert(o);
590         assert(offset > 0);
591         assert(o->object.type == OBJECT_DATA);
592
593         /* This might alter the window we are looking at */
594
595         o->data.next_hash_offset = o->data.next_field_offset = 0;
596         o->data.entry_offset = o->data.entry_array_offset = 0;
597         o->data.n_entries = 0;
598
599         h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
600         p = le64toh(f->data_hash_table[h].tail_hash_offset);
601         if (p == 0) {
602                 /* Only entry in the hash table is easy */
603                 f->data_hash_table[h].head_hash_offset = htole64(offset);
604         } else {
605                 /* Move back to the previous data object, to patch in
606                  * pointer */
607
608                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
609                 if (r < 0)
610                         return r;
611
612                 o->data.next_hash_offset = htole64(offset);
613         }
614
615         f->data_hash_table[h].tail_hash_offset = htole64(offset);
616
617         return 0;
618 }
619
620 int journal_file_find_data_object_with_hash(
621                 JournalFile *f,
622                 const void *data, uint64_t size, uint64_t hash,
623                 Object **ret, uint64_t *offset) {
624
625         uint64_t p, osize, h;
626         int r;
627
628         assert(f);
629         assert(data || size == 0);
630
631         osize = offsetof(Object, data.payload) + size;
632
633         if (f->header->data_hash_table_size == 0)
634                 return -EBADMSG;
635
636         h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
637         p = le64toh(f->data_hash_table[h].head_hash_offset);
638
639         while (p > 0) {
640                 Object *o;
641
642                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
643                 if (r < 0)
644                         return r;
645
646                 if (le64toh(o->data.hash) != hash)
647                         goto next;
648
649                 if (o->object.flags & OBJECT_COMPRESSED) {
650 #ifdef HAVE_XZ
651                         uint64_t l, rsize;
652
653                         l = le64toh(o->object.size);
654                         if (l <= offsetof(Object, data.payload))
655                                 return -EBADMSG;
656
657                         l -= offsetof(Object, data.payload);
658
659                         if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize))
660                                 return -EBADMSG;
661
662                         if (rsize == size &&
663                             memcmp(f->compress_buffer, data, size) == 0) {
664
665                                 if (ret)
666                                         *ret = o;
667
668                                 if (offset)
669                                         *offset = p;
670
671                                 return 1;
672                         }
673 #else
674                         return -EPROTONOSUPPORT;
675 #endif
676
677                 } else if (le64toh(o->object.size) == osize &&
678                            memcmp(o->data.payload, data, size) == 0) {
679
680                         if (ret)
681                                 *ret = o;
682
683                         if (offset)
684                                 *offset = p;
685
686                         return 1;
687                 }
688
689         next:
690                 p = le64toh(o->data.next_hash_offset);
691         }
692
693         return 0;
694 }
695
696 int journal_file_find_data_object(
697                 JournalFile *f,
698                 const void *data, uint64_t size,
699                 Object **ret, uint64_t *offset) {
700
701         uint64_t hash;
702
703         assert(f);
704         assert(data || size == 0);
705
706         hash = hash64(data, size);
707
708         return journal_file_find_data_object_with_hash(f,
709                                                        data, size, hash,
710                                                        ret, offset);
711 }
712
713 static int journal_file_append_data(
714                 JournalFile *f,
715                 const void *data, uint64_t size,
716                 Object **ret, uint64_t *offset) {
717
718         uint64_t hash, p;
719         uint64_t osize;
720         Object *o;
721         int r;
722         bool compressed = false;
723
724         assert(f);
725         assert(data || size == 0);
726
727         hash = hash64(data, size);
728
729         r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
730         if (r < 0)
731                 return r;
732         else if (r > 0) {
733
734                 if (ret)
735                         *ret = o;
736
737                 if (offset)
738                         *offset = p;
739
740                 return 0;
741         }
742
743         osize = offsetof(Object, data.payload) + size;
744         r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
745         if (r < 0)
746                 return r;
747
748         o->data.hash = htole64(hash);
749
750 #ifdef HAVE_XZ
751         if (f->compress &&
752             size >= COMPRESSION_SIZE_THRESHOLD) {
753                 uint64_t rsize;
754
755                 compressed = compress_blob(data, size, o->data.payload, &rsize);
756
757                 if (compressed) {
758                         o->object.size = htole64(offsetof(Object, data.payload) + rsize);
759                         o->object.flags |= OBJECT_COMPRESSED;
760
761                         f->header->incompatible_flags = htole32(le32toh(f->header->incompatible_flags) | HEADER_INCOMPATIBLE_COMPRESSED);
762
763                         log_debug("Compressed data object %lu -> %lu", (unsigned long) size, (unsigned long) rsize);
764                 }
765         }
766 #endif
767
768         if (!compressed)
769                 memcpy(o->data.payload, data, size);
770
771         r = journal_file_link_data(f, o, p, hash);
772         if (r < 0)
773                 return r;
774
775         /* The linking might have altered the window, so let's
776          * refresh our pointer */
777         r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
778         if (r < 0)
779                 return r;
780
781         if (ret)
782                 *ret = o;
783
784         if (offset)
785                 *offset = p;
786
787         return 0;
788 }
789
790 uint64_t journal_file_entry_n_items(Object *o) {
791         assert(o);
792         assert(o->object.type == OBJECT_ENTRY);
793
794         return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
795 }
796
797 static uint64_t journal_file_entry_array_n_items(Object *o) {
798         assert(o);
799         assert(o->object.type == OBJECT_ENTRY_ARRAY);
800
801         return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
802 }
803
804 static int link_entry_into_array(JournalFile *f,
805                                  le64_t *first,
806                                  le64_t *idx,
807                                  uint64_t p) {
808         int r;
809         uint64_t n = 0, ap = 0, q, i, a, hidx;
810         Object *o;
811
812         assert(f);
813         assert(first);
814         assert(idx);
815         assert(p > 0);
816
817         a = le64toh(*first);
818         i = hidx = le64toh(*idx);
819         while (a > 0) {
820
821                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
822                 if (r < 0)
823                         return r;
824
825                 n = journal_file_entry_array_n_items(o);
826                 if (i < n) {
827                         o->entry_array.items[i] = htole64(p);
828                         *idx = htole64(hidx + 1);
829                         return 0;
830                 }
831
832                 i -= n;
833                 ap = a;
834                 a = le64toh(o->entry_array.next_entry_array_offset);
835         }
836
837         if (hidx > n)
838                 n = (hidx+1) * 2;
839         else
840                 n = n * 2;
841
842         if (n < 4)
843                 n = 4;
844
845         r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
846                                        offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
847                                        &o, &q);
848         if (r < 0)
849                 return r;
850
851         o->entry_array.items[i] = htole64(p);
852
853         if (ap == 0)
854                 *first = htole64(q);
855         else {
856                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
857                 if (r < 0)
858                         return r;
859
860                 o->entry_array.next_entry_array_offset = htole64(q);
861         }
862
863         *idx = htole64(hidx + 1);
864
865         return 0;
866 }
867
868 static int link_entry_into_array_plus_one(JournalFile *f,
869                                           le64_t *extra,
870                                           le64_t *first,
871                                           le64_t *idx,
872                                           uint64_t p) {
873
874         int r;
875
876         assert(f);
877         assert(extra);
878         assert(first);
879         assert(idx);
880         assert(p > 0);
881
882         if (*idx == 0)
883                 *extra = htole64(p);
884         else {
885                 le64_t i;
886
887                 i = htole64(le64toh(*idx) - 1);
888                 r = link_entry_into_array(f, first, &i, p);
889                 if (r < 0)
890                         return r;
891         }
892
893         *idx = htole64(le64toh(*idx) + 1);
894         return 0;
895 }
896
897 static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
898         uint64_t p;
899         int r;
900         assert(f);
901         assert(o);
902         assert(offset > 0);
903
904         p = le64toh(o->entry.items[i].object_offset);
905         if (p == 0)
906                 return -EINVAL;
907
908         r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
909         if (r < 0)
910                 return r;
911
912         return link_entry_into_array_plus_one(f,
913                                               &o->data.entry_offset,
914                                               &o->data.entry_array_offset,
915                                               &o->data.n_entries,
916                                               offset);
917 }
918
919 static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
920         uint64_t n, i;
921         int r;
922
923         assert(f);
924         assert(o);
925         assert(offset > 0);
926         assert(o->object.type == OBJECT_ENTRY);
927
928         __sync_synchronize();
929
930         /* Link up the entry itself */
931         r = link_entry_into_array(f,
932                                   &f->header->entry_array_offset,
933                                   &f->header->n_entries,
934                                   offset);
935         if (r < 0)
936                 return r;
937
938         /* log_debug("=> %s seqnr=%lu n_entries=%lu", f->path, (unsigned long) o->entry.seqnum, (unsigned long) f->header->n_entries); */
939
940         if (f->header->head_entry_realtime == 0)
941                 f->header->head_entry_realtime = o->entry.realtime;
942
943         f->header->tail_entry_realtime = o->entry.realtime;
944         f->header->tail_entry_monotonic = o->entry.monotonic;
945
946         f->tail_entry_monotonic_valid = true;
947
948         /* Link up the items */
949         n = journal_file_entry_n_items(o);
950         for (i = 0; i < n; i++) {
951                 r = journal_file_link_entry_item(f, o, offset, i);
952                 if (r < 0)
953                         return r;
954         }
955
956         return 0;
957 }
958
959 static int journal_file_append_entry_internal(
960                 JournalFile *f,
961                 const dual_timestamp *ts,
962                 uint64_t xor_hash,
963                 const EntryItem items[], unsigned n_items,
964                 uint64_t *seqnum,
965                 Object **ret, uint64_t *offset) {
966         uint64_t np;
967         uint64_t osize;
968         Object *o;
969         int r;
970
971         assert(f);
972         assert(items || n_items == 0);
973         assert(ts);
974
975         osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
976
977         r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
978         if (r < 0)
979                 return r;
980
981         o->entry.seqnum = htole64(journal_file_seqnum(f, seqnum));
982         memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
983         o->entry.realtime = htole64(ts->realtime);
984         o->entry.monotonic = htole64(ts->monotonic);
985         o->entry.xor_hash = htole64(xor_hash);
986         o->entry.boot_id = f->header->boot_id;
987
988         r = journal_file_link_entry(f, o, np);
989         if (r < 0)
990                 return r;
991
992         if (ret)
993                 *ret = o;
994
995         if (offset)
996                 *offset = np;
997
998         return 0;
999 }
1000
1001 void journal_file_post_change(JournalFile *f) {
1002         assert(f);
1003
1004         /* inotify() does not receive IN_MODIFY events from file
1005          * accesses done via mmap(). After each access we hence
1006          * trigger IN_MODIFY by truncating the journal file to its
1007          * current size which triggers IN_MODIFY. */
1008
1009         __sync_synchronize();
1010
1011         if (ftruncate(f->fd, f->last_stat.st_size) < 0)
1012                 log_error("Failed to to truncate file to its own size: %m");
1013 }
1014
1015 int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
1016         unsigned i;
1017         EntryItem *items;
1018         int r;
1019         uint64_t xor_hash = 0;
1020         struct dual_timestamp _ts;
1021
1022         assert(f);
1023         assert(iovec || n_iovec == 0);
1024
1025         if (!f->writable)
1026                 return -EPERM;
1027
1028         if (!ts) {
1029                 dual_timestamp_get(&_ts);
1030                 ts = &_ts;
1031         }
1032
1033         if (f->tail_entry_monotonic_valid &&
1034             ts->monotonic < le64toh(f->header->tail_entry_monotonic))
1035                 return -EINVAL;
1036
1037         items = alloca(sizeof(EntryItem) * n_iovec);
1038
1039         for (i = 0; i < n_iovec; i++) {
1040                 uint64_t p;
1041                 Object *o;
1042
1043                 r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
1044                 if (r < 0)
1045                         return r;
1046
1047                 xor_hash ^= le64toh(o->data.hash);
1048                 items[i].object_offset = htole64(p);
1049                 items[i].hash = o->data.hash;
1050         }
1051
1052         r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
1053
1054         journal_file_post_change(f);
1055
1056         return r;
1057 }
1058
1059 static int generic_array_get(JournalFile *f,
1060                              uint64_t first,
1061                              uint64_t i,
1062                              Object **ret, uint64_t *offset) {
1063
1064         Object *o;
1065         uint64_t p = 0, a;
1066         int r;
1067
1068         assert(f);
1069
1070         a = first;
1071         while (a > 0) {
1072                 uint64_t n;
1073
1074                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1075                 if (r < 0)
1076                         return r;
1077
1078                 n = journal_file_entry_array_n_items(o);
1079                 if (i < n) {
1080                         p = le64toh(o->entry_array.items[i]);
1081                         break;
1082                 }
1083
1084                 i -= n;
1085                 a = le64toh(o->entry_array.next_entry_array_offset);
1086         }
1087
1088         if (a <= 0 || p <= 0)
1089                 return 0;
1090
1091         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1092         if (r < 0)
1093                 return r;
1094
1095         if (ret)
1096                 *ret = o;
1097
1098         if (offset)
1099                 *offset = p;
1100
1101         return 1;
1102 }
1103
1104 static int generic_array_get_plus_one(JournalFile *f,
1105                                       uint64_t extra,
1106                                       uint64_t first,
1107                                       uint64_t i,
1108                                       Object **ret, uint64_t *offset) {
1109
1110         Object *o;
1111
1112         assert(f);
1113
1114         if (i == 0) {
1115                 int r;
1116
1117                 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1118                 if (r < 0)
1119                         return r;
1120
1121                 if (ret)
1122                         *ret = o;
1123
1124                 if (offset)
1125                         *offset = extra;
1126
1127                 return 1;
1128         }
1129
1130         return generic_array_get(f, first, i-1, ret, offset);
1131 }
1132
1133 enum {
1134         TEST_FOUND,
1135         TEST_LEFT,
1136         TEST_RIGHT
1137 };
1138
1139 static int generic_array_bisect(JournalFile *f,
1140                                 uint64_t first,
1141                                 uint64_t n,
1142                                 uint64_t needle,
1143                                 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1144                                 direction_t direction,
1145                                 Object **ret,
1146                                 uint64_t *offset,
1147                                 uint64_t *idx) {
1148
1149         uint64_t a, p, t = 0, i = 0, last_p = 0;
1150         bool subtract_one = false;
1151         Object *o, *array = NULL;
1152         int r;
1153
1154         assert(f);
1155         assert(test_object);
1156
1157         a = first;
1158         while (a > 0) {
1159                 uint64_t left, right, k, lp;
1160
1161                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
1162                 if (r < 0)
1163                         return r;
1164
1165                 k = journal_file_entry_array_n_items(array);
1166                 right = MIN(k, n);
1167                 if (right <= 0)
1168                         return 0;
1169
1170                 i = right - 1;
1171                 lp = p = le64toh(array->entry_array.items[i]);
1172                 if (p <= 0)
1173                         return -EBADMSG;
1174
1175                 r = test_object(f, p, needle);
1176                 if (r < 0)
1177                         return r;
1178
1179                 if (r == TEST_FOUND)
1180                         r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1181
1182                 if (r == TEST_RIGHT) {
1183                         left = 0;
1184                         right -= 1;
1185                         for (;;) {
1186                                 if (left == right) {
1187                                         if (direction == DIRECTION_UP)
1188                                                 subtract_one = true;
1189
1190                                         i = left;
1191                                         goto found;
1192                                 }
1193
1194                                 assert(left < right);
1195
1196                                 i = (left + right) / 2;
1197                                 p = le64toh(array->entry_array.items[i]);
1198                                 if (p <= 0)
1199                                         return -EBADMSG;
1200
1201                                 r = test_object(f, p, needle);
1202                                 if (r < 0)
1203                                         return r;
1204
1205                                 if (r == TEST_FOUND)
1206                                         r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1207
1208                                 if (r == TEST_RIGHT)
1209                                         right = i;
1210                                 else
1211                                         left = i + 1;
1212                         }
1213                 }
1214
1215                 if (k > n) {
1216                         if (direction == DIRECTION_UP) {
1217                                 i = n;
1218                                 subtract_one = true;
1219                                 goto found;
1220                         }
1221
1222                         return 0;
1223                 }
1224
1225                 last_p = lp;
1226
1227                 n -= k;
1228                 t += k;
1229                 a = le64toh(array->entry_array.next_entry_array_offset);
1230         }
1231
1232         return 0;
1233
1234 found:
1235         if (subtract_one && t == 0 && i == 0)
1236                 return 0;
1237
1238         if (subtract_one && i == 0)
1239                 p = last_p;
1240         else if (subtract_one)
1241                 p = le64toh(array->entry_array.items[i-1]);
1242         else
1243                 p = le64toh(array->entry_array.items[i]);
1244
1245         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1246         if (r < 0)
1247                 return r;
1248
1249         if (ret)
1250                 *ret = o;
1251
1252         if (offset)
1253                 *offset = p;
1254
1255         if (idx)
1256                 *idx = t + i + (subtract_one ? -1 : 0);
1257
1258         return 1;
1259 }
1260
1261 static int generic_array_bisect_plus_one(JournalFile *f,
1262                                          uint64_t extra,
1263                                          uint64_t first,
1264                                          uint64_t n,
1265                                          uint64_t needle,
1266                                          int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1267                                          direction_t direction,
1268                                          Object **ret,
1269                                          uint64_t *offset,
1270                                          uint64_t *idx) {
1271
1272         int r;
1273         bool step_back = false;
1274         Object *o;
1275
1276         assert(f);
1277         assert(test_object);
1278
1279         if (n <= 0)
1280                 return 0;
1281
1282         /* This bisects the array in object 'first', but first checks
1283          * an extra  */
1284         r = test_object(f, extra, needle);
1285         if (r < 0)
1286                 return r;
1287
1288         if (r == TEST_FOUND)
1289                 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1290
1291         /* if we are looking with DIRECTION_UP then we need to first
1292            see if in the actual array there is a matching entry, and
1293            return the last one of that. But if there isn't any we need
1294            to return this one. Hence remember this, and return it
1295            below. */
1296         if (r == TEST_LEFT)
1297                 step_back = direction == DIRECTION_UP;
1298
1299         if (r == TEST_RIGHT) {
1300                 if (direction == DIRECTION_DOWN)
1301                         goto found;
1302                 else
1303                         return 0;
1304         }
1305
1306         r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
1307
1308         if (r == 0 && step_back)
1309                 goto found;
1310
1311         if (r > 0 && idx)
1312                 (*idx) ++;
1313
1314         return r;
1315
1316 found:
1317         r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1318         if (r < 0)
1319                 return r;
1320
1321         if (ret)
1322                 *ret = o;
1323
1324         if (offset)
1325                 *offset = extra;
1326
1327         if (idx)
1328                 *idx = 0;
1329
1330         return 1;
1331 }
1332
1333 static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
1334         assert(f);
1335         assert(p > 0);
1336
1337         if (p == needle)
1338                 return TEST_FOUND;
1339         else if (p < needle)
1340                 return TEST_LEFT;
1341         else
1342                 return TEST_RIGHT;
1343 }
1344
1345 int journal_file_move_to_entry_by_offset(
1346                 JournalFile *f,
1347                 uint64_t p,
1348                 direction_t direction,
1349                 Object **ret,
1350                 uint64_t *offset) {
1351
1352         return generic_array_bisect(f,
1353                                     le64toh(f->header->entry_array_offset),
1354                                     le64toh(f->header->n_entries),
1355                                     p,
1356                                     test_object_offset,
1357                                     direction,
1358                                     ret, offset, NULL);
1359 }
1360
1361
1362 static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
1363         Object *o;
1364         int r;
1365
1366         assert(f);
1367         assert(p > 0);
1368
1369         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1370         if (r < 0)
1371                 return r;
1372
1373         if (le64toh(o->entry.seqnum) == needle)
1374                 return TEST_FOUND;
1375         else if (le64toh(o->entry.seqnum) < needle)
1376                 return TEST_LEFT;
1377         else
1378                 return TEST_RIGHT;
1379 }
1380
1381 int journal_file_move_to_entry_by_seqnum(
1382                 JournalFile *f,
1383                 uint64_t seqnum,
1384                 direction_t direction,
1385                 Object **ret,
1386                 uint64_t *offset) {
1387
1388         return generic_array_bisect(f,
1389                                     le64toh(f->header->entry_array_offset),
1390                                     le64toh(f->header->n_entries),
1391                                     seqnum,
1392                                     test_object_seqnum,
1393                                     direction,
1394                                     ret, offset, NULL);
1395 }
1396
1397 static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
1398         Object *o;
1399         int r;
1400
1401         assert(f);
1402         assert(p > 0);
1403
1404         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1405         if (r < 0)
1406                 return r;
1407
1408         if (le64toh(o->entry.realtime) == needle)
1409                 return TEST_FOUND;
1410         else if (le64toh(o->entry.realtime) < needle)
1411                 return TEST_LEFT;
1412         else
1413                 return TEST_RIGHT;
1414 }
1415
1416 int journal_file_move_to_entry_by_realtime(
1417                 JournalFile *f,
1418                 uint64_t realtime,
1419                 direction_t direction,
1420                 Object **ret,
1421                 uint64_t *offset) {
1422
1423         return generic_array_bisect(f,
1424                                     le64toh(f->header->entry_array_offset),
1425                                     le64toh(f->header->n_entries),
1426                                     realtime,
1427                                     test_object_realtime,
1428                                     direction,
1429                                     ret, offset, NULL);
1430 }
1431
1432 static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
1433         Object *o;
1434         int r;
1435
1436         assert(f);
1437         assert(p > 0);
1438
1439         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1440         if (r < 0)
1441                 return r;
1442
1443         if (le64toh(o->entry.monotonic) == needle)
1444                 return TEST_FOUND;
1445         else if (le64toh(o->entry.monotonic) < needle)
1446                 return TEST_LEFT;
1447         else
1448                 return TEST_RIGHT;
1449 }
1450
1451 int journal_file_move_to_entry_by_monotonic(
1452                 JournalFile *f,
1453                 sd_id128_t boot_id,
1454                 uint64_t monotonic,
1455                 direction_t direction,
1456                 Object **ret,
1457                 uint64_t *offset) {
1458
1459         char t[9+32+1] = "_BOOT_ID=";
1460         Object *o;
1461         int r;
1462
1463         assert(f);
1464
1465         sd_id128_to_string(boot_id, t + 9);
1466         r = journal_file_find_data_object(f, t, strlen(t), &o, NULL);
1467         if (r < 0)
1468                 return r;
1469         if (r == 0)
1470                 return -ENOENT;
1471
1472         return generic_array_bisect_plus_one(f,
1473                                              le64toh(o->data.entry_offset),
1474                                              le64toh(o->data.entry_array_offset),
1475                                              le64toh(o->data.n_entries),
1476                                              monotonic,
1477                                              test_object_monotonic,
1478                                              direction,
1479                                              ret, offset, NULL);
1480 }
1481
1482 int journal_file_next_entry(
1483                 JournalFile *f,
1484                 Object *o, uint64_t p,
1485                 direction_t direction,
1486                 Object **ret, uint64_t *offset) {
1487
1488         uint64_t i, n;
1489         int r;
1490
1491         assert(f);
1492         assert(p > 0 || !o);
1493
1494         n = le64toh(f->header->n_entries);
1495         if (n <= 0)
1496                 return 0;
1497
1498         if (!o)
1499                 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1500         else {
1501                 if (o->object.type != OBJECT_ENTRY)
1502                         return -EINVAL;
1503
1504                 r = generic_array_bisect(f,
1505                                          le64toh(f->header->entry_array_offset),
1506                                          le64toh(f->header->n_entries),
1507                                          p,
1508                                          test_object_offset,
1509                                          DIRECTION_DOWN,
1510                                          NULL, NULL,
1511                                          &i);
1512                 if (r <= 0)
1513                         return r;
1514
1515                 if (direction == DIRECTION_DOWN) {
1516                         if (i >= n - 1)
1517                                 return 0;
1518
1519                         i++;
1520                 } else {
1521                         if (i <= 0)
1522                                 return 0;
1523
1524                         i--;
1525                 }
1526         }
1527
1528         /* And jump to it */
1529         return generic_array_get(f,
1530                                  le64toh(f->header->entry_array_offset),
1531                                  i,
1532                                  ret, offset);
1533 }
1534
1535 int journal_file_skip_entry(
1536                 JournalFile *f,
1537                 Object *o, uint64_t p,
1538                 int64_t skip,
1539                 Object **ret, uint64_t *offset) {
1540
1541         uint64_t i, n;
1542         int r;
1543
1544         assert(f);
1545         assert(o);
1546         assert(p > 0);
1547
1548         if (o->object.type != OBJECT_ENTRY)
1549                 return -EINVAL;
1550
1551         r = generic_array_bisect(f,
1552                                  le64toh(f->header->entry_array_offset),
1553                                  le64toh(f->header->n_entries),
1554                                  p,
1555                                  test_object_offset,
1556                                  DIRECTION_DOWN,
1557                                  NULL, NULL,
1558                                  &i);
1559         if (r <= 0)
1560                 return r;
1561
1562         /* Calculate new index */
1563         if (skip < 0) {
1564                 if ((uint64_t) -skip >= i)
1565                         i = 0;
1566                 else
1567                         i = i - (uint64_t) -skip;
1568         } else
1569                 i  += (uint64_t) skip;
1570
1571         n = le64toh(f->header->n_entries);
1572         if (n <= 0)
1573                 return -EBADMSG;
1574
1575         if (i >= n)
1576                 i = n-1;
1577
1578         return generic_array_get(f,
1579                                  le64toh(f->header->entry_array_offset),
1580                                  i,
1581                                  ret, offset);
1582 }
1583
1584 int journal_file_next_entry_for_data(
1585                 JournalFile *f,
1586                 Object *o, uint64_t p,
1587                 uint64_t data_offset,
1588                 direction_t direction,
1589                 Object **ret, uint64_t *offset) {
1590
1591         uint64_t n, i;
1592         int r;
1593         Object *d;
1594
1595         assert(f);
1596         assert(p > 0 || !o);
1597
1598         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1599         if (r < 0)
1600                 return r;
1601
1602         n = le64toh(d->data.n_entries);
1603         if (n <= 0)
1604                 return n;
1605
1606         if (!o)
1607                 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1608         else {
1609                 if (o->object.type != OBJECT_ENTRY)
1610                         return -EINVAL;
1611
1612                 r = generic_array_bisect_plus_one(f,
1613                                                   le64toh(d->data.entry_offset),
1614                                                   le64toh(d->data.entry_array_offset),
1615                                                   le64toh(d->data.n_entries),
1616                                                   p,
1617                                                   test_object_offset,
1618                                                   DIRECTION_DOWN,
1619                                                   NULL, NULL,
1620                                                   &i);
1621
1622                 if (r <= 0)
1623                         return r;
1624
1625                 if (direction == DIRECTION_DOWN) {
1626                         if (i >= n - 1)
1627                                 return 0;
1628
1629                         i++;
1630                 } else {
1631                         if (i <= 0)
1632                                 return 0;
1633
1634                         i--;
1635                 }
1636
1637         }
1638
1639         return generic_array_get_plus_one(f,
1640                                           le64toh(d->data.entry_offset),
1641                                           le64toh(d->data.entry_array_offset),
1642                                           i,
1643                                           ret, offset);
1644 }
1645
1646 int journal_file_move_to_entry_by_offset_for_data(
1647                 JournalFile *f,
1648                 uint64_t data_offset,
1649                 uint64_t p,
1650                 direction_t direction,
1651                 Object **ret, uint64_t *offset) {
1652
1653         int r;
1654         Object *d;
1655
1656         assert(f);
1657
1658         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1659         if (r < 0)
1660                 return r;
1661
1662         return generic_array_bisect_plus_one(f,
1663                                              le64toh(d->data.entry_offset),
1664                                              le64toh(d->data.entry_array_offset),
1665                                              le64toh(d->data.n_entries),
1666                                              p,
1667                                              test_object_offset,
1668                                              direction,
1669                                              ret, offset, NULL);
1670 }
1671
1672 int journal_file_move_to_entry_by_monotonic_for_data(
1673                 JournalFile *f,
1674                 uint64_t data_offset,
1675                 sd_id128_t boot_id,
1676                 uint64_t monotonic,
1677                 direction_t direction,
1678                 Object **ret, uint64_t *offset) {
1679
1680         char t[9+32+1] = "_BOOT_ID=";
1681         Object *o, *d;
1682         int r;
1683         uint64_t b, z;
1684
1685         assert(f);
1686
1687         /* First, seek by time */
1688         sd_id128_to_string(boot_id, t + 9);
1689         r = journal_file_find_data_object(f, t, strlen(t), &o, &b);
1690         if (r < 0)
1691                 return r;
1692         if (r == 0)
1693                 return -ENOENT;
1694
1695         r = generic_array_bisect_plus_one(f,
1696                                           le64toh(o->data.entry_offset),
1697                                           le64toh(o->data.entry_array_offset),
1698                                           le64toh(o->data.n_entries),
1699                                           monotonic,
1700                                           test_object_monotonic,
1701                                           direction,
1702                                           NULL, &z, NULL);
1703         if (r <= 0)
1704                 return r;
1705
1706         /* And now, continue seeking until we find an entry that
1707          * exists in both bisection arrays */
1708
1709         for (;;) {
1710                 Object *qo;
1711                 uint64_t p, q;
1712
1713                 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1714                 if (r < 0)
1715                         return r;
1716
1717                 r = generic_array_bisect_plus_one(f,
1718                                                   le64toh(d->data.entry_offset),
1719                                                   le64toh(d->data.entry_array_offset),
1720                                                   le64toh(d->data.n_entries),
1721                                                   z,
1722                                                   test_object_offset,
1723                                                   direction,
1724                                                   NULL, &p, NULL);
1725                 if (r <= 0)
1726                         return r;
1727
1728                 r = journal_file_move_to_object(f, OBJECT_DATA, b, &o);
1729                 if (r < 0)
1730                         return r;
1731
1732                 r = generic_array_bisect_plus_one(f,
1733                                                   le64toh(o->data.entry_offset),
1734                                                   le64toh(o->data.entry_array_offset),
1735                                                   le64toh(o->data.n_entries),
1736                                                   p,
1737                                                   test_object_offset,
1738                                                   direction,
1739                                                   &qo, &q, NULL);
1740
1741                 if (r <= 0)
1742                         return r;
1743
1744                 if (p == q) {
1745                         if (ret)
1746                                 *ret = qo;
1747                         if (offset)
1748                                 *offset = q;
1749
1750                         return 1;
1751                 }
1752
1753                 z = q;
1754         }
1755
1756         return 0;
1757 }
1758
1759 int journal_file_move_to_entry_by_seqnum_for_data(
1760                 JournalFile *f,
1761                 uint64_t data_offset,
1762                 uint64_t seqnum,
1763                 direction_t direction,
1764                 Object **ret, uint64_t *offset) {
1765
1766         Object *d;
1767         int r;
1768
1769         assert(f);
1770
1771         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1772         if (r < 0)
1773                 return r;
1774
1775         return generic_array_bisect_plus_one(f,
1776                                              le64toh(d->data.entry_offset),
1777                                              le64toh(d->data.entry_array_offset),
1778                                              le64toh(d->data.n_entries),
1779                                              seqnum,
1780                                              test_object_seqnum,
1781                                              direction,
1782                                              ret, offset, NULL);
1783 }
1784
1785 int journal_file_move_to_entry_by_realtime_for_data(
1786                 JournalFile *f,
1787                 uint64_t data_offset,
1788                 uint64_t realtime,
1789                 direction_t direction,
1790                 Object **ret, uint64_t *offset) {
1791
1792         Object *d;
1793         int r;
1794
1795         assert(f);
1796
1797         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1798         if (r < 0)
1799                 return r;
1800
1801         return generic_array_bisect_plus_one(f,
1802                                              le64toh(d->data.entry_offset),
1803                                              le64toh(d->data.entry_array_offset),
1804                                              le64toh(d->data.n_entries),
1805                                              realtime,
1806                                              test_object_realtime,
1807                                              direction,
1808                                              ret, offset, NULL);
1809 }
1810
1811 void journal_file_dump(JournalFile *f) {
1812         char a[33], b[33], c[33];
1813         Object *o;
1814         int r;
1815         uint64_t p;
1816
1817         assert(f);
1818
1819         printf("File Path: %s\n"
1820                "File ID: %s\n"
1821                "Machine ID: %s\n"
1822                "Boot ID: %s\n"
1823                "Arena size: %llu\n"
1824                "Objects: %lu\n"
1825                "Entries: %lu\n",
1826                f->path,
1827                sd_id128_to_string(f->header->file_id, a),
1828                sd_id128_to_string(f->header->machine_id, b),
1829                sd_id128_to_string(f->header->boot_id, c),
1830                (unsigned long long) le64toh(f->header->arena_size),
1831                (unsigned long) le64toh(f->header->n_objects),
1832                (unsigned long) le64toh(f->header->n_entries));
1833
1834         p = le64toh(f->header->header_size);
1835         while (p != 0) {
1836                 r = journal_file_move_to_object(f, -1, p, &o);
1837                 if (r < 0)
1838                         goto fail;
1839
1840                 switch (o->object.type) {
1841
1842                 case OBJECT_UNUSED:
1843                         printf("Type: OBJECT_UNUSED\n");
1844                         break;
1845
1846                 case OBJECT_DATA:
1847                         printf("Type: OBJECT_DATA\n");
1848                         break;
1849
1850                 case OBJECT_ENTRY:
1851                         printf("Type: OBJECT_ENTRY %llu %llu %llu\n",
1852                                (unsigned long long) le64toh(o->entry.seqnum),
1853                                (unsigned long long) le64toh(o->entry.monotonic),
1854                                (unsigned long long) le64toh(o->entry.realtime));
1855                         break;
1856
1857                 case OBJECT_FIELD_HASH_TABLE:
1858                         printf("Type: OBJECT_FIELD_HASH_TABLE\n");
1859                         break;
1860
1861                 case OBJECT_DATA_HASH_TABLE:
1862                         printf("Type: OBJECT_DATA_HASH_TABLE\n");
1863                         break;
1864
1865                 case OBJECT_ENTRY_ARRAY:
1866                         printf("Type: OBJECT_ENTRY_ARRAY\n");
1867                         break;
1868
1869                 case OBJECT_SIGNATURE:
1870                         printf("Type: OBJECT_SIGNATURE\n");
1871                         break;
1872                 }
1873
1874                 if (o->object.flags & OBJECT_COMPRESSED)
1875                         printf("Flags: COMPRESSED\n");
1876
1877                 if (p == le64toh(f->header->tail_object_offset))
1878                         p = 0;
1879                 else
1880                         p = p + ALIGN64(le64toh(o->object.size));
1881         }
1882
1883         return;
1884 fail:
1885         log_error("File corrupt");
1886 }
1887
1888 int journal_file_open(
1889                 const char *fname,
1890                 int flags,
1891                 mode_t mode,
1892                 JournalFile *template,
1893                 JournalFile **ret) {
1894
1895         JournalFile *f;
1896         int r;
1897         bool newly_created = false;
1898
1899         assert(fname);
1900
1901         if ((flags & O_ACCMODE) != O_RDONLY &&
1902             (flags & O_ACCMODE) != O_RDWR)
1903                 return -EINVAL;
1904
1905         if (!endswith(fname, ".journal"))
1906                 return -EINVAL;
1907
1908         f = new0(JournalFile, 1);
1909         if (!f)
1910                 return -ENOMEM;
1911
1912         f->fd = -1;
1913         f->flags = flags;
1914         f->mode = mode;
1915         f->writable = (flags & O_ACCMODE) != O_RDONLY;
1916         f->prot = prot_from_flags(flags);
1917
1918         if (template) {
1919                 f->metrics = template->metrics;
1920                 f->compress = template->compress;
1921         }
1922
1923         f->path = strdup(fname);
1924         if (!f->path) {
1925                 r = -ENOMEM;
1926                 goto fail;
1927         }
1928
1929         f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
1930         if (f->fd < 0) {
1931                 r = -errno;
1932                 goto fail;
1933         }
1934
1935         if (fstat(f->fd, &f->last_stat) < 0) {
1936                 r = -errno;
1937                 goto fail;
1938         }
1939
1940         if (f->last_stat.st_size == 0 && f->writable) {
1941                 newly_created = true;
1942
1943                 r = journal_file_init_header(f, template);
1944                 if (r < 0)
1945                         goto fail;
1946
1947                 if (fstat(f->fd, &f->last_stat) < 0) {
1948                         r = -errno;
1949                         goto fail;
1950                 }
1951         }
1952
1953         if (f->last_stat.st_size < (off_t) sizeof(Header)) {
1954                 r = -EIO;
1955                 goto fail;
1956         }
1957
1958         f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
1959         if (f->header == MAP_FAILED) {
1960                 f->header = NULL;
1961                 r = -errno;
1962                 goto fail;
1963         }
1964
1965         if (!newly_created) {
1966                 r = journal_file_verify_header(f);
1967                 if (r < 0)
1968                         goto fail;
1969         }
1970
1971         if (f->writable) {
1972                 r = journal_file_refresh_header(f);
1973                 if (r < 0)
1974                         goto fail;
1975         }
1976
1977         if (newly_created) {
1978
1979                 r = journal_file_setup_field_hash_table(f);
1980                 if (r < 0)
1981                         goto fail;
1982
1983                 r = journal_file_setup_data_hash_table(f);
1984                 if (r < 0)
1985                         goto fail;
1986         }
1987
1988         r = journal_file_map_field_hash_table(f);
1989         if (r < 0)
1990                 goto fail;
1991
1992         r = journal_file_map_data_hash_table(f);
1993         if (r < 0)
1994                 goto fail;
1995
1996         if (ret)
1997                 *ret = f;
1998
1999         return 0;
2000
2001 fail:
2002         journal_file_close(f);
2003
2004         return r;
2005 }
2006
2007 int journal_file_rotate(JournalFile **f) {
2008         char *p;
2009         size_t l;
2010         JournalFile *old_file, *new_file = NULL;
2011         int r;
2012
2013         assert(f);
2014         assert(*f);
2015
2016         old_file = *f;
2017
2018         if (!old_file->writable)
2019                 return -EINVAL;
2020
2021         if (!endswith(old_file->path, ".journal"))
2022                 return -EINVAL;
2023
2024         l = strlen(old_file->path);
2025
2026         p = new(char, l + 1 + 32 + 1 + 16 + 1 + 16 + 1);
2027         if (!p)
2028                 return -ENOMEM;
2029
2030         memcpy(p, old_file->path, l - 8);
2031         p[l-8] = '@';
2032         sd_id128_to_string(old_file->header->seqnum_id, p + l - 8 + 1);
2033         snprintf(p + l - 8 + 1 + 32, 1 + 16 + 1 + 16 + 8 + 1,
2034                  "-%016llx-%016llx.journal",
2035                  (unsigned long long) le64toh((*f)->header->seqnum),
2036                  (unsigned long long) le64toh((*f)->header->tail_entry_realtime));
2037
2038         r = rename(old_file->path, p);
2039         free(p);
2040
2041         if (r < 0)
2042                 return -errno;
2043
2044         old_file->header->state = STATE_ARCHIVED;
2045
2046         r = journal_file_open(old_file->path, old_file->flags, old_file->mode, old_file, &new_file);
2047         journal_file_close(old_file);
2048
2049         *f = new_file;
2050         return r;
2051 }
2052
2053 int journal_file_open_reliably(
2054                 const char *fname,
2055                 int flags,
2056                 mode_t mode,
2057                 JournalFile *template,
2058                 JournalFile **ret) {
2059
2060         int r;
2061         size_t l;
2062         char *p;
2063
2064         r = journal_file_open(fname, flags, mode, template, ret);
2065         if (r != -EBADMSG && /* corrupted */
2066             r != -ENODATA && /* truncated */
2067             r != -EHOSTDOWN && /* other machine */
2068             r != -EPROTONOSUPPORT) /* incompatible feature */
2069                 return r;
2070
2071         if ((flags & O_ACCMODE) == O_RDONLY)
2072                 return r;
2073
2074         if (!(flags & O_CREAT))
2075                 return r;
2076
2077         /* The file is corrupted. Rotate it away and try it again (but only once) */
2078
2079         l = strlen(fname);
2080         if (asprintf(&p, "%.*s@%016llx-%016llx.journal~",
2081                      (int) (l-8), fname,
2082                      (unsigned long long) now(CLOCK_REALTIME),
2083                      random_ull()) < 0)
2084                 return -ENOMEM;
2085
2086         r = rename(fname, p);
2087         free(p);
2088         if (r < 0)
2089                 return -errno;
2090
2091         log_warning("File %s corrupted, renaming and replacing.", fname);
2092
2093         return journal_file_open(fname, flags, mode, template, ret);
2094 }
2095
2096 struct vacuum_info {
2097         off_t usage;
2098         char *filename;
2099
2100         uint64_t realtime;
2101         sd_id128_t seqnum_id;
2102         uint64_t seqnum;
2103
2104         bool have_seqnum;
2105 };
2106
2107 static int vacuum_compare(const void *_a, const void *_b) {
2108         const struct vacuum_info *a, *b;
2109
2110         a = _a;
2111         b = _b;
2112
2113         if (a->have_seqnum && b->have_seqnum &&
2114             sd_id128_equal(a->seqnum_id, b->seqnum_id)) {
2115                 if (a->seqnum < b->seqnum)
2116                         return -1;
2117                 else if (a->seqnum > b->seqnum)
2118                         return 1;
2119                 else
2120                         return 0;
2121         }
2122
2123         if (a->realtime < b->realtime)
2124                 return -1;
2125         else if (a->realtime > b->realtime)
2126                 return 1;
2127         else if (a->have_seqnum && b->have_seqnum)
2128                 return memcmp(&a->seqnum_id, &b->seqnum_id, 16);
2129         else
2130                 return strcmp(a->filename, b->filename);
2131 }
2132
2133 int journal_directory_vacuum(const char *directory, uint64_t max_use, uint64_t min_free) {
2134         DIR *d;
2135         int r = 0;
2136         struct vacuum_info *list = NULL;
2137         unsigned n_list = 0, n_allocated = 0, i;
2138         uint64_t sum = 0;
2139
2140         assert(directory);
2141
2142         if (max_use <= 0)
2143                 return 0;
2144
2145         d = opendir(directory);
2146         if (!d)
2147                 return -errno;
2148
2149         for (;;) {
2150                 int k;
2151                 struct dirent buf, *de;
2152                 size_t q;
2153                 struct stat st;
2154                 char *p;
2155                 unsigned long long seqnum = 0, realtime;
2156                 sd_id128_t seqnum_id;
2157                 bool have_seqnum;
2158
2159                 k = readdir_r(d, &buf, &de);
2160                 if (k != 0) {
2161                         r = -k;
2162                         goto finish;
2163                 }
2164
2165                 if (!de)
2166                         break;
2167
2168                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
2169                         continue;
2170
2171                 if (!S_ISREG(st.st_mode))
2172                         continue;
2173
2174                 q = strlen(de->d_name);
2175
2176                 if (endswith(de->d_name, ".journal")) {
2177
2178                         /* Vacuum archived files */
2179
2180                         if (q < 1 + 32 + 1 + 16 + 1 + 16 + 8)
2181                                 continue;
2182
2183                         if (de->d_name[q-8-16-1] != '-' ||
2184                             de->d_name[q-8-16-1-16-1] != '-' ||
2185                             de->d_name[q-8-16-1-16-1-32-1] != '@')
2186                                 continue;
2187
2188                         p = strdup(de->d_name);
2189                         if (!p) {
2190                                 r = -ENOMEM;
2191                                 goto finish;
2192                         }
2193
2194                         de->d_name[q-8-16-1-16-1] = 0;
2195                         if (sd_id128_from_string(de->d_name + q-8-16-1-16-1-32, &seqnum_id) < 0) {
2196                                 free(p);
2197                                 continue;
2198                         }
2199
2200                         if (sscanf(de->d_name + q-8-16-1-16, "%16llx-%16llx.journal", &seqnum, &realtime) != 2) {
2201                                 free(p);
2202                                 continue;
2203                         }
2204
2205                         have_seqnum = true;
2206
2207                 } else if (endswith(de->d_name, ".journal~")) {
2208                         unsigned long long tmp;
2209
2210                         /* Vacuum corrupted files */
2211
2212                         if (q < 1 + 16 + 1 + 16 + 8 + 1)
2213                                 continue;
2214
2215                         if (de->d_name[q-1-8-16-1] != '-' ||
2216                             de->d_name[q-1-8-16-1-16-1] != '@')
2217                                 continue;
2218
2219                         p = strdup(de->d_name);
2220                         if (!p) {
2221                                 r = -ENOMEM;
2222                                 goto finish;
2223                         }
2224
2225                         if (sscanf(de->d_name + q-1-8-16-1-16, "%16llx-%16llx.journal~", &realtime, &tmp) != 2) {
2226                                 free(p);
2227                                 continue;
2228                         }
2229
2230                         have_seqnum = false;
2231                 } else
2232                         continue;
2233
2234                 if (n_list >= n_allocated) {
2235                         struct vacuum_info *j;
2236
2237                         n_allocated = MAX(n_allocated * 2U, 8U);
2238                         j = realloc(list, n_allocated * sizeof(struct vacuum_info));
2239                         if (!j) {
2240                                 free(p);
2241                                 r = -ENOMEM;
2242                                 goto finish;
2243                         }
2244
2245                         list = j;
2246                 }
2247
2248                 list[n_list].filename = p;
2249                 list[n_list].usage = 512UL * (uint64_t) st.st_blocks;
2250                 list[n_list].seqnum = seqnum;
2251                 list[n_list].realtime = realtime;
2252                 list[n_list].seqnum_id = seqnum_id;
2253                 list[n_list].have_seqnum = have_seqnum;
2254
2255                 sum += list[n_list].usage;
2256
2257                 n_list ++;
2258         }
2259
2260         qsort(list, n_list, sizeof(struct vacuum_info), vacuum_compare);
2261
2262         for(i = 0; i < n_list; i++) {
2263                 struct statvfs ss;
2264
2265                 if (fstatvfs(dirfd(d), &ss) < 0) {
2266                         r = -errno;
2267                         goto finish;
2268                 }
2269
2270                 if (sum <= max_use &&
2271                     (uint64_t) ss.f_bavail * (uint64_t) ss.f_bsize >= min_free)
2272                         break;
2273
2274                 if (unlinkat(dirfd(d), list[i].filename, 0) >= 0) {
2275                         log_info("Deleted archived journal %s/%s.", directory, list[i].filename);
2276                         sum -= list[i].usage;
2277                 } else if (errno != ENOENT)
2278                         log_warning("Failed to delete %s/%s: %m", directory, list[i].filename);
2279         }
2280
2281 finish:
2282         for (i = 0; i < n_list; i++)
2283                 free(list[i].filename);
2284
2285         free(list);
2286
2287         if (d)
2288                 closedir(d);
2289
2290         return r;
2291 }
2292
2293 int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
2294         uint64_t i, n;
2295         uint64_t q, xor_hash = 0;
2296         int r;
2297         EntryItem *items;
2298         dual_timestamp ts;
2299
2300         assert(from);
2301         assert(to);
2302         assert(o);
2303         assert(p);
2304
2305         if (!to->writable)
2306                 return -EPERM;
2307
2308         ts.monotonic = le64toh(o->entry.monotonic);
2309         ts.realtime = le64toh(o->entry.realtime);
2310
2311         if (to->tail_entry_monotonic_valid &&
2312             ts.monotonic < le64toh(to->header->tail_entry_monotonic))
2313                 return -EINVAL;
2314
2315         n = journal_file_entry_n_items(o);
2316         items = alloca(sizeof(EntryItem) * n);
2317
2318         for (i = 0; i < n; i++) {
2319                 uint64_t l, h;
2320                 le64_t le_hash;
2321                 size_t t;
2322                 void *data;
2323                 Object *u;
2324
2325                 q = le64toh(o->entry.items[i].object_offset);
2326                 le_hash = o->entry.items[i].hash;
2327
2328                 r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
2329                 if (r < 0)
2330                         return r;
2331
2332                 if (le_hash != o->data.hash)
2333                         return -EBADMSG;
2334
2335                 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2336                 t = (size_t) l;
2337
2338                 /* We hit the limit on 32bit machines */
2339                 if ((uint64_t) t != l)
2340                         return -E2BIG;
2341
2342                 if (o->object.flags & OBJECT_COMPRESSED) {
2343 #ifdef HAVE_XZ
2344                         uint64_t rsize;
2345
2346                         if (!uncompress_blob(o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize))
2347                                 return -EBADMSG;
2348
2349                         data = from->compress_buffer;
2350                         l = rsize;
2351 #else
2352                         return -EPROTONOSUPPORT;
2353 #endif
2354                 } else
2355                         data = o->data.payload;
2356
2357                 r = journal_file_append_data(to, data, l, &u, &h);
2358                 if (r < 0)
2359                         return r;
2360
2361                 xor_hash ^= le64toh(u->data.hash);
2362                 items[i].object_offset = htole64(h);
2363                 items[i].hash = u->data.hash;
2364
2365                 r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
2366                 if (r < 0)
2367                         return r;
2368         }
2369
2370         return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
2371 }
2372
2373 void journal_default_metrics(JournalMetrics *m, int fd) {
2374         uint64_t fs_size = 0;
2375         struct statvfs ss;
2376         char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
2377
2378         assert(m);
2379         assert(fd >= 0);
2380
2381         if (fstatvfs(fd, &ss) >= 0)
2382                 fs_size = ss.f_frsize * ss.f_blocks;
2383
2384         if (m->max_use == (uint64_t) -1) {
2385
2386                 if (fs_size > 0) {
2387                         m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
2388
2389                         if (m->max_use > DEFAULT_MAX_USE_UPPER)
2390                                 m->max_use = DEFAULT_MAX_USE_UPPER;
2391
2392                         if (m->max_use < DEFAULT_MAX_USE_LOWER)
2393                                 m->max_use = DEFAULT_MAX_USE_LOWER;
2394                 } else
2395                         m->max_use = DEFAULT_MAX_USE_LOWER;
2396         } else {
2397                 m->max_use = PAGE_ALIGN(m->max_use);
2398
2399                 if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
2400                         m->max_use = JOURNAL_FILE_SIZE_MIN*2;
2401         }
2402
2403         if (m->max_size == (uint64_t) -1) {
2404                 m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
2405
2406                 if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
2407                         m->max_size = DEFAULT_MAX_SIZE_UPPER;
2408         } else
2409                 m->max_size = PAGE_ALIGN(m->max_size);
2410
2411         if (m->max_size < JOURNAL_FILE_SIZE_MIN)
2412                 m->max_size = JOURNAL_FILE_SIZE_MIN;
2413
2414         if (m->max_size*2 > m->max_use)
2415                 m->max_use = m->max_size*2;
2416
2417         if (m->min_size == (uint64_t) -1)
2418                 m->min_size = JOURNAL_FILE_SIZE_MIN;
2419         else {
2420                 m->min_size = PAGE_ALIGN(m->min_size);
2421
2422                 if (m->min_size < JOURNAL_FILE_SIZE_MIN)
2423                         m->min_size = JOURNAL_FILE_SIZE_MIN;
2424
2425                 if (m->min_size > m->max_size)
2426                         m->max_size = m->min_size;
2427         }
2428
2429         if (m->keep_free == (uint64_t) -1) {
2430
2431                 if (fs_size > 0) {
2432                         m->keep_free = PAGE_ALIGN(fs_size / 20); /* 5% of file system size */
2433
2434                         if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
2435                                 m->keep_free = DEFAULT_KEEP_FREE_UPPER;
2436
2437                 } else
2438                         m->keep_free = DEFAULT_KEEP_FREE;
2439         }
2440
2441         log_info("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
2442                  format_bytes(a, sizeof(a), m->max_use),
2443                  format_bytes(b, sizeof(b), m->max_size),
2444                  format_bytes(c, sizeof(c), m->min_size),
2445                  format_bytes(d, sizeof(d), m->keep_free));
2446 }
2447
2448 int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
2449         Object *o;
2450         int r;
2451
2452         assert(f);
2453         assert(from || to);
2454
2455         if (from) {
2456                 r = journal_file_next_entry(f, NULL, 0, DIRECTION_DOWN, &o, NULL);
2457                 if (r <= 0)
2458                         return r;
2459
2460                 *from = le64toh(o->entry.realtime);
2461         }
2462
2463         if (to) {
2464                 r = journal_file_next_entry(f, NULL, 0, DIRECTION_UP, &o, NULL);
2465                 if (r <= 0)
2466                         return r;
2467
2468                 *to = le64toh(o->entry.realtime);
2469         }
2470
2471         return 1;
2472 }
2473
2474 int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
2475         char t[9+32+1] = "_BOOT_ID=";
2476         Object *o;
2477         uint64_t p;
2478         int r;
2479
2480         assert(f);
2481         assert(from || to);
2482
2483         sd_id128_to_string(boot_id, t + 9);
2484
2485         r = journal_file_find_data_object(f, t, strlen(t), &o, &p);
2486         if (r <= 0)
2487                 return r;
2488
2489         if (le64toh(o->data.n_entries) <= 0)
2490                 return 0;
2491
2492         if (from) {
2493                 r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o);
2494                 if (r < 0)
2495                         return r;
2496
2497                 *from = le64toh(o->entry.monotonic);
2498         }
2499
2500         if (to) {
2501                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
2502                 if (r < 0)
2503                         return r;
2504
2505                 r = generic_array_get_plus_one(f,
2506                                                le64toh(o->data.entry_offset),
2507                                                le64toh(o->data.entry_array_offset),
2508                                                le64toh(o->data.n_entries)-1,
2509                                                &o, NULL);
2510                 if (r <= 0)
2511                         return r;
2512
2513                 *to = le64toh(o->entry.monotonic);
2514         }
2515
2516         return 1;
2517 }