chiark / gitweb /
d06dbc2f753397efd6408a0d451fd6ab914cc948
[elogind.git] / src / journal / journal-file.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/mman.h>
23 #include <errno.h>
24 #include <sys/uio.h>
25 #include <unistd.h>
26 #include <sys/statvfs.h>
27 #include <fcntl.h>
28 #include <stddef.h>
29 #include <sys/xattr.h>
30
31 #include "journal-def.h"
32 #include "journal-file.h"
33 #include "journal-authenticate.h"
34 #include "lookup3.h"
35 #include "compress.h"
36 #include "fsprg.h"
37
38 #define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
39 #define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
40
41 #define COMPRESSION_SIZE_THRESHOLD (512ULL)
42
43 /* This is the minimum journal file size */
44 #define JOURNAL_FILE_SIZE_MIN (4ULL*1024ULL*1024ULL)           /* 4 MiB */
45
46 /* These are the lower and upper bounds if we deduce the max_use value
47  * from the file system size */
48 #define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL)           /* 1 MiB */
49 #define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL)   /* 4 GiB */
50
51 /* This is the upper bound if we deduce max_size from max_use */
52 #define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL)        /* 128 MiB */
53
54 /* This is the upper bound if we deduce the keep_free value from the
55  * file system size */
56 #define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
57
58 /* This is the keep_free value when we can't determine the system
59  * size */
60 #define DEFAULT_KEEP_FREE (1024ULL*1024ULL)                    /* 1 MB */
61
62 /* n_data was the first entry we added after the initial file format design */
63 #define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
64
65 /* How many entries to keep in the entry array chain cache at max */
66 #define CHAIN_CACHE_MAX 20
67
68 /* How much to increase the journal file size at once each time we allocate something new. */
69 #define FILE_SIZE_INCREASE (8ULL*1024ULL*1024ULL)              /* 8MB */
70
71 static int journal_file_set_online(JournalFile *f) {
72         assert(f);
73
74         if (!f->writable)
75                 return -EPERM;
76
77         if (!(f->fd >= 0 && f->header))
78                 return -EINVAL;
79
80         switch(f->header->state) {
81                 case STATE_ONLINE:
82                         return 0;
83
84                 case STATE_OFFLINE:
85                         f->header->state = STATE_ONLINE;
86                         fsync(f->fd);
87                         return 0;
88
89                 default:
90                         return -EINVAL;
91         }
92 }
93
94 int journal_file_set_offline(JournalFile *f) {
95         assert(f);
96
97         if (!f->writable)
98                 return -EPERM;
99
100         if (!(f->fd >= 0 && f->header))
101                 return -EINVAL;
102
103         if (f->header->state != STATE_ONLINE)
104                 return 0;
105
106         fsync(f->fd);
107
108         f->header->state = STATE_OFFLINE;
109
110         fsync(f->fd);
111
112         return 0;
113 }
114
115 void journal_file_close(JournalFile *f) {
116         assert(f);
117
118 #ifdef HAVE_GCRYPT
119         /* Write the final tag */
120         if (f->seal && f->writable)
121                 journal_file_append_tag(f);
122 #endif
123
124         /* Sync everything to disk, before we mark the file offline */
125         if (f->mmap && f->fd >= 0)
126                 mmap_cache_close_fd(f->mmap, f->fd);
127
128         journal_file_set_offline(f);
129
130         if (f->header)
131                 munmap(f->header, PAGE_ALIGN(sizeof(Header)));
132
133         safe_close(f->fd);
134         free(f->path);
135
136         if (f->mmap)
137                 mmap_cache_unref(f->mmap);
138
139         ordered_hashmap_free_free(f->chain_cache);
140
141 #if defined(HAVE_XZ) || defined(HAVE_LZ4)
142         free(f->compress_buffer);
143 #endif
144
145 #ifdef HAVE_GCRYPT
146         if (f->fss_file)
147                 munmap(f->fss_file, PAGE_ALIGN(f->fss_file_size));
148         else if (f->fsprg_state)
149                 free(f->fsprg_state);
150
151         free(f->fsprg_seed);
152
153         if (f->hmac)
154                 gcry_md_close(f->hmac);
155 #endif
156
157         free(f);
158 }
159
160 static int journal_file_init_header(JournalFile *f, JournalFile *template) {
161         Header h = {};
162         ssize_t k;
163         int r;
164
165         assert(f);
166
167         memcpy(h.signature, HEADER_SIGNATURE, 8);
168         h.header_size = htole64(ALIGN64(sizeof(h)));
169
170         h.incompatible_flags |= htole32(
171                 f->compress_xz * HEADER_INCOMPATIBLE_COMPRESSED_XZ |
172                 f->compress_lz4 * HEADER_INCOMPATIBLE_COMPRESSED_LZ4);
173
174         h.compatible_flags = htole32(
175                 f->seal * HEADER_COMPATIBLE_SEALED);
176
177         r = sd_id128_randomize(&h.file_id);
178         if (r < 0)
179                 return r;
180
181         if (template) {
182                 h.seqnum_id = template->header->seqnum_id;
183                 h.tail_entry_seqnum = template->header->tail_entry_seqnum;
184         } else
185                 h.seqnum_id = h.file_id;
186
187         k = pwrite(f->fd, &h, sizeof(h), 0);
188         if (k < 0)
189                 return -errno;
190
191         if (k != sizeof(h))
192                 return -EIO;
193
194         return 0;
195 }
196
197 static int journal_file_refresh_header(JournalFile *f) {
198         int r;
199         sd_id128_t boot_id;
200
201         assert(f);
202
203         r = sd_id128_get_machine(&f->header->machine_id);
204         if (r < 0)
205                 return r;
206
207         r = sd_id128_get_boot(&boot_id);
208         if (r < 0)
209                 return r;
210
211         if (sd_id128_equal(boot_id, f->header->boot_id))
212                 f->tail_entry_monotonic_valid = true;
213
214         f->header->boot_id = boot_id;
215
216         journal_file_set_online(f);
217
218         /* Sync the online state to disk */
219         fsync(f->fd);
220
221         return 0;
222 }
223
224 static int journal_file_verify_header(JournalFile *f) {
225         uint32_t flags;
226
227         assert(f);
228
229         if (memcmp(f->header->signature, HEADER_SIGNATURE, 8))
230                 return -EBADMSG;
231
232         /* In both read and write mode we refuse to open files with
233          * incompatible flags we don't know */
234         flags = le32toh(f->header->incompatible_flags);
235         if (flags & ~HEADER_INCOMPATIBLE_SUPPORTED) {
236                 if (flags & ~HEADER_INCOMPATIBLE_ANY)
237                         log_debug("Journal file %s has unknown incompatible flags %"PRIx32,
238                                   f->path, flags & ~HEADER_INCOMPATIBLE_ANY);
239                 flags = (flags & HEADER_INCOMPATIBLE_ANY) & ~HEADER_INCOMPATIBLE_SUPPORTED;
240                 if (flags)
241                         log_debug("Journal file %s uses incompatible flags %"PRIx32
242                                   " disabled at compilation time.", f->path, flags);
243                 return -EPROTONOSUPPORT;
244         }
245
246         /* When open for writing we refuse to open files with
247          * compatible flags, too */
248         flags = le32toh(f->header->compatible_flags);
249         if (f->writable && (flags & ~HEADER_COMPATIBLE_SUPPORTED)) {
250                 if (flags & ~HEADER_COMPATIBLE_ANY)
251                         log_debug("Journal file %s has unknown compatible flags %"PRIx32,
252                                   f->path, flags & ~HEADER_COMPATIBLE_ANY);
253                 flags = (flags & HEADER_COMPATIBLE_ANY) & ~HEADER_COMPATIBLE_SUPPORTED;
254                 if (flags)
255                         log_debug("Journal file %s uses compatible flags %"PRIx32
256                                   " disabled at compilation time.", f->path, flags);
257                 return -EPROTONOSUPPORT;
258         }
259
260         if (f->header->state >= _STATE_MAX)
261                 return -EBADMSG;
262
263         /* The first addition was n_data, so check that we are at least this large */
264         if (le64toh(f->header->header_size) < HEADER_SIZE_MIN)
265                 return -EBADMSG;
266
267         if (JOURNAL_HEADER_SEALED(f->header) && !JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
268                 return -EBADMSG;
269
270         if ((le64toh(f->header->header_size) + le64toh(f->header->arena_size)) > (uint64_t) f->last_stat.st_size)
271                 return -ENODATA;
272
273         if (le64toh(f->header->tail_object_offset) > (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
274                 return -ENODATA;
275
276         if (!VALID64(le64toh(f->header->data_hash_table_offset)) ||
277             !VALID64(le64toh(f->header->field_hash_table_offset)) ||
278             !VALID64(le64toh(f->header->tail_object_offset)) ||
279             !VALID64(le64toh(f->header->entry_array_offset)))
280                 return -ENODATA;
281
282         if (f->writable) {
283                 uint8_t state;
284                 sd_id128_t machine_id;
285                 int r;
286
287                 r = sd_id128_get_machine(&machine_id);
288                 if (r < 0)
289                         return r;
290
291                 if (!sd_id128_equal(machine_id, f->header->machine_id))
292                         return -EHOSTDOWN;
293
294                 state = f->header->state;
295
296                 if (state == STATE_ONLINE) {
297                         log_debug("Journal file %s is already online. Assuming unclean closing.", f->path);
298                         return -EBUSY;
299                 } else if (state == STATE_ARCHIVED)
300                         return -ESHUTDOWN;
301                 else if (state != STATE_OFFLINE) {
302                         log_debug("Journal file %s has unknown state %u.", f->path, state);
303                         return -EBUSY;
304                 }
305         }
306
307         f->compress_xz = JOURNAL_HEADER_COMPRESSED_XZ(f->header);
308         f->compress_lz4 = JOURNAL_HEADER_COMPRESSED_LZ4(f->header);
309
310         f->seal = JOURNAL_HEADER_SEALED(f->header);
311
312         return 0;
313 }
314
315 static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
316         uint64_t old_size, new_size;
317         int r;
318
319         assert(f);
320
321         /* We assume that this file is not sparse, and we know that
322          * for sure, since we always call posix_fallocate()
323          * ourselves */
324
325         old_size =
326                 le64toh(f->header->header_size) +
327                 le64toh(f->header->arena_size);
328
329         new_size = PAGE_ALIGN(offset + size);
330         if (new_size < le64toh(f->header->header_size))
331                 new_size = le64toh(f->header->header_size);
332
333         if (new_size <= old_size)
334                 return 0;
335
336         if (f->metrics.max_size > 0 && new_size > f->metrics.max_size)
337                 return -E2BIG;
338
339         if (new_size > f->metrics.min_size && f->metrics.keep_free > 0) {
340                 struct statvfs svfs;
341
342                 if (fstatvfs(f->fd, &svfs) >= 0) {
343                         uint64_t available;
344
345                         available = svfs.f_bfree * svfs.f_bsize;
346
347                         if (available >= f->metrics.keep_free)
348                                 available -= f->metrics.keep_free;
349                         else
350                                 available = 0;
351
352                         if (new_size - old_size > available)
353                                 return -E2BIG;
354                 }
355         }
356
357         /* Increase by larger blocks at once */
358         new_size = ((new_size+FILE_SIZE_INCREASE-1) / FILE_SIZE_INCREASE) * FILE_SIZE_INCREASE;
359         if (f->metrics.max_size > 0 && new_size > f->metrics.max_size)
360                 new_size = f->metrics.max_size;
361
362         /* Note that the glibc fallocate() fallback is very
363            inefficient, hence we try to minimize the allocation area
364            as we can. */
365         r = posix_fallocate(f->fd, old_size, new_size - old_size);
366         if (r != 0)
367                 return -r;
368
369         if (fstat(f->fd, &f->last_stat) < 0)
370                 return -errno;
371
372         f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
373
374         return 0;
375 }
376
377 static int journal_file_move_to(JournalFile *f, int context, bool keep_always, uint64_t offset, uint64_t size, void **ret) {
378         assert(f);
379         assert(ret);
380
381         if (size <= 0)
382                 return -EINVAL;
383
384         /* Avoid SIGBUS on invalid accesses */
385         if (offset + size > (uint64_t) f->last_stat.st_size) {
386                 /* Hmm, out of range? Let's refresh the fstat() data
387                  * first, before we trust that check. */
388
389                 if (fstat(f->fd, &f->last_stat) < 0 ||
390                     offset + size > (uint64_t) f->last_stat.st_size)
391                         return -EADDRNOTAVAIL;
392         }
393
394         return mmap_cache_get(f->mmap, f->fd, f->prot, context, keep_always, offset, size, &f->last_stat, ret, NULL);
395 }
396
397 static uint64_t minimum_header_size(Object *o) {
398
399         static const uint64_t table[] = {
400                 [OBJECT_DATA] = sizeof(DataObject),
401                 [OBJECT_FIELD] = sizeof(FieldObject),
402                 [OBJECT_ENTRY] = sizeof(EntryObject),
403                 [OBJECT_DATA_HASH_TABLE] = sizeof(HashTableObject),
404                 [OBJECT_FIELD_HASH_TABLE] = sizeof(HashTableObject),
405                 [OBJECT_ENTRY_ARRAY] = sizeof(EntryArrayObject),
406                 [OBJECT_TAG] = sizeof(TagObject),
407         };
408
409         if (o->object.type >= ELEMENTSOF(table) || table[o->object.type] <= 0)
410                 return sizeof(ObjectHeader);
411
412         return table[o->object.type];
413 }
414
415 int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
416         int r;
417         void *t;
418         Object *o;
419         uint64_t s;
420
421         assert(f);
422         assert(ret);
423
424         /* Objects may only be located at multiple of 64 bit */
425         if (!VALID64(offset))
426                 return -EFAULT;
427
428         r = journal_file_move_to(f, type_to_context(type), false, offset, sizeof(ObjectHeader), &t);
429         if (r < 0)
430                 return r;
431
432         o = (Object*) t;
433         s = le64toh(o->object.size);
434
435         if (s < sizeof(ObjectHeader))
436                 return -EBADMSG;
437
438         if (o->object.type <= OBJECT_UNUSED)
439                 return -EBADMSG;
440
441         if (s < minimum_header_size(o))
442                 return -EBADMSG;
443
444         if (type > 0 && o->object.type != type)
445                 return -EBADMSG;
446
447         if (s > sizeof(ObjectHeader)) {
448                 r = journal_file_move_to(f, o->object.type, false, offset, s, &t);
449                 if (r < 0)
450                         return r;
451
452                 o = (Object*) t;
453         }
454
455         *ret = o;
456         return 0;
457 }
458
459 static uint64_t journal_file_entry_seqnum(JournalFile *f, uint64_t *seqnum) {
460         uint64_t r;
461
462         assert(f);
463
464         r = le64toh(f->header->tail_entry_seqnum) + 1;
465
466         if (seqnum) {
467                 /* If an external seqnum counter was passed, we update
468                  * both the local and the external one, and set it to
469                  * the maximum of both */
470
471                 if (*seqnum + 1 > r)
472                         r = *seqnum + 1;
473
474                 *seqnum = r;
475         }
476
477         f->header->tail_entry_seqnum = htole64(r);
478
479         if (f->header->head_entry_seqnum == 0)
480                 f->header->head_entry_seqnum = htole64(r);
481
482         return r;
483 }
484
485 int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
486         int r;
487         uint64_t p;
488         Object *tail, *o;
489         void *t;
490
491         assert(f);
492         assert(type > 0 && type < _OBJECT_TYPE_MAX);
493         assert(size >= sizeof(ObjectHeader));
494         assert(offset);
495         assert(ret);
496
497         r = journal_file_set_online(f);
498         if (r < 0)
499                 return r;
500
501         p = le64toh(f->header->tail_object_offset);
502         if (p == 0)
503                 p = le64toh(f->header->header_size);
504         else {
505                 r = journal_file_move_to_object(f, -1, p, &tail);
506                 if (r < 0)
507                         return r;
508
509                 p += ALIGN64(le64toh(tail->object.size));
510         }
511
512         r = journal_file_allocate(f, p, size);
513         if (r < 0)
514                 return r;
515
516         r = journal_file_move_to(f, type, false, p, size, &t);
517         if (r < 0)
518                 return r;
519
520         o = (Object*) t;
521
522         zero(o->object);
523         o->object.type = type;
524         o->object.size = htole64(size);
525
526         f->header->tail_object_offset = htole64(p);
527         f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
528
529         *ret = o;
530         *offset = p;
531
532         return 0;
533 }
534
535 static int journal_file_setup_data_hash_table(JournalFile *f) {
536         uint64_t s, p;
537         Object *o;
538         int r;
539
540         assert(f);
541
542         /* We estimate that we need 1 hash table entry per 768 of
543            journal file and we want to make sure we never get beyond
544            75% fill level. Calculate the hash table size for the
545            maximum file size based on these metrics. */
546
547         s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem);
548         if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
549                 s = DEFAULT_DATA_HASH_TABLE_SIZE;
550
551         log_debug("Reserving %"PRIu64" entries in hash table.", s / sizeof(HashItem));
552
553         r = journal_file_append_object(f,
554                                        OBJECT_DATA_HASH_TABLE,
555                                        offsetof(Object, hash_table.items) + s,
556                                        &o, &p);
557         if (r < 0)
558                 return r;
559
560         memzero(o->hash_table.items, s);
561
562         f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
563         f->header->data_hash_table_size = htole64(s);
564
565         return 0;
566 }
567
568 static int journal_file_setup_field_hash_table(JournalFile *f) {
569         uint64_t s, p;
570         Object *o;
571         int r;
572
573         assert(f);
574
575         /* We use a fixed size hash table for the fields as this
576          * number should grow very slowly only */
577
578         s = DEFAULT_FIELD_HASH_TABLE_SIZE;
579         r = journal_file_append_object(f,
580                                        OBJECT_FIELD_HASH_TABLE,
581                                        offsetof(Object, hash_table.items) + s,
582                                        &o, &p);
583         if (r < 0)
584                 return r;
585
586         memzero(o->hash_table.items, s);
587
588         f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
589         f->header->field_hash_table_size = htole64(s);
590
591         return 0;
592 }
593
594 static int journal_file_map_data_hash_table(JournalFile *f) {
595         uint64_t s, p;
596         void *t;
597         int r;
598
599         assert(f);
600
601         p = le64toh(f->header->data_hash_table_offset);
602         s = le64toh(f->header->data_hash_table_size);
603
604         r = journal_file_move_to(f,
605                                  OBJECT_DATA_HASH_TABLE,
606                                  true,
607                                  p, s,
608                                  &t);
609         if (r < 0)
610                 return r;
611
612         f->data_hash_table = t;
613         return 0;
614 }
615
616 static int journal_file_map_field_hash_table(JournalFile *f) {
617         uint64_t s, p;
618         void *t;
619         int r;
620
621         assert(f);
622
623         p = le64toh(f->header->field_hash_table_offset);
624         s = le64toh(f->header->field_hash_table_size);
625
626         r = journal_file_move_to(f,
627                                  OBJECT_FIELD_HASH_TABLE,
628                                  true,
629                                  p, s,
630                                  &t);
631         if (r < 0)
632                 return r;
633
634         f->field_hash_table = t;
635         return 0;
636 }
637
638 static int journal_file_link_field(
639                 JournalFile *f,
640                 Object *o,
641                 uint64_t offset,
642                 uint64_t hash) {
643
644         uint64_t p, h;
645         int r;
646
647         assert(f);
648         assert(o);
649         assert(offset > 0);
650
651         if (o->object.type != OBJECT_FIELD)
652                 return -EINVAL;
653
654         /* This might alter the window we are looking at */
655
656         o->field.next_hash_offset = o->field.head_data_offset = 0;
657
658         h = hash % (le64toh(f->header->field_hash_table_size) / sizeof(HashItem));
659         p = le64toh(f->field_hash_table[h].tail_hash_offset);
660         if (p == 0)
661                 f->field_hash_table[h].head_hash_offset = htole64(offset);
662         else {
663                 r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o);
664                 if (r < 0)
665                         return r;
666
667                 o->field.next_hash_offset = htole64(offset);
668         }
669
670         f->field_hash_table[h].tail_hash_offset = htole64(offset);
671
672         if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
673                 f->header->n_fields = htole64(le64toh(f->header->n_fields) + 1);
674
675         return 0;
676 }
677
678 static int journal_file_link_data(
679                 JournalFile *f,
680                 Object *o,
681                 uint64_t offset,
682                 uint64_t hash) {
683
684         uint64_t p, h;
685         int r;
686
687         assert(f);
688         assert(o);
689         assert(offset > 0);
690
691         if (o->object.type != OBJECT_DATA)
692                 return -EINVAL;
693
694         /* This might alter the window we are looking at */
695
696         o->data.next_hash_offset = o->data.next_field_offset = 0;
697         o->data.entry_offset = o->data.entry_array_offset = 0;
698         o->data.n_entries = 0;
699
700         h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
701         p = le64toh(f->data_hash_table[h].tail_hash_offset);
702         if (p == 0)
703                 /* Only entry in the hash table is easy */
704                 f->data_hash_table[h].head_hash_offset = htole64(offset);
705         else {
706                 /* Move back to the previous data object, to patch in
707                  * pointer */
708
709                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
710                 if (r < 0)
711                         return r;
712
713                 o->data.next_hash_offset = htole64(offset);
714         }
715
716         f->data_hash_table[h].tail_hash_offset = htole64(offset);
717
718         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
719                 f->header->n_data = htole64(le64toh(f->header->n_data) + 1);
720
721         return 0;
722 }
723
724 int journal_file_find_field_object_with_hash(
725                 JournalFile *f,
726                 const void *field, uint64_t size, uint64_t hash,
727                 Object **ret, uint64_t *offset) {
728
729         uint64_t p, osize, h;
730         int r;
731
732         assert(f);
733         assert(field && size > 0);
734
735         osize = offsetof(Object, field.payload) + size;
736
737         if (f->header->field_hash_table_size == 0)
738                 return -EBADMSG;
739
740         h = hash % (le64toh(f->header->field_hash_table_size) / sizeof(HashItem));
741         p = le64toh(f->field_hash_table[h].head_hash_offset);
742
743         while (p > 0) {
744                 Object *o;
745
746                 r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o);
747                 if (r < 0)
748                         return r;
749
750                 if (le64toh(o->field.hash) == hash &&
751                     le64toh(o->object.size) == osize &&
752                     memcmp(o->field.payload, field, size) == 0) {
753
754                         if (ret)
755                                 *ret = o;
756                         if (offset)
757                                 *offset = p;
758
759                         return 1;
760                 }
761
762                 p = le64toh(o->field.next_hash_offset);
763         }
764
765         return 0;
766 }
767
768 int journal_file_find_field_object(
769                 JournalFile *f,
770                 const void *field, uint64_t size,
771                 Object **ret, uint64_t *offset) {
772
773         uint64_t hash;
774
775         assert(f);
776         assert(field && size > 0);
777
778         hash = hash64(field, size);
779
780         return journal_file_find_field_object_with_hash(f,
781                                                         field, size, hash,
782                                                         ret, offset);
783 }
784
785 int journal_file_find_data_object_with_hash(
786                 JournalFile *f,
787                 const void *data, uint64_t size, uint64_t hash,
788                 Object **ret, uint64_t *offset) {
789
790         uint64_t p, osize, h;
791         int r;
792
793         assert(f);
794         assert(data || size == 0);
795
796         osize = offsetof(Object, data.payload) + size;
797
798         if (f->header->data_hash_table_size == 0)
799                 return -EBADMSG;
800
801         h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
802         p = le64toh(f->data_hash_table[h].head_hash_offset);
803
804         while (p > 0) {
805                 Object *o;
806
807                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
808                 if (r < 0)
809                         return r;
810
811                 if (le64toh(o->data.hash) != hash)
812                         goto next;
813
814                 if (o->object.flags & OBJECT_COMPRESSION_MASK) {
815 #if defined(HAVE_XZ) || defined(HAVE_LZ4)
816                         uint64_t l;
817                         size_t rsize;
818
819                         l = le64toh(o->object.size);
820                         if (l <= offsetof(Object, data.payload))
821                                 return -EBADMSG;
822
823                         l -= offsetof(Object, data.payload);
824
825                         r = decompress_blob(o->object.flags & OBJECT_COMPRESSION_MASK,
826                                             o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize, 0);
827                         if (r < 0)
828                                 return r;
829
830                         if (rsize == size &&
831                             memcmp(f->compress_buffer, data, size) == 0) {
832
833                                 if (ret)
834                                         *ret = o;
835
836                                 if (offset)
837                                         *offset = p;
838
839                                 return 1;
840                         }
841 #else
842                         return -EPROTONOSUPPORT;
843 #endif
844                 } else if (le64toh(o->object.size) == osize &&
845                            memcmp(o->data.payload, data, size) == 0) {
846
847                         if (ret)
848                                 *ret = o;
849
850                         if (offset)
851                                 *offset = p;
852
853                         return 1;
854                 }
855
856         next:
857                 p = le64toh(o->data.next_hash_offset);
858         }
859
860         return 0;
861 }
862
863 int journal_file_find_data_object(
864                 JournalFile *f,
865                 const void *data, uint64_t size,
866                 Object **ret, uint64_t *offset) {
867
868         uint64_t hash;
869
870         assert(f);
871         assert(data || size == 0);
872
873         hash = hash64(data, size);
874
875         return journal_file_find_data_object_with_hash(f,
876                                                        data, size, hash,
877                                                        ret, offset);
878 }
879
880 static int journal_file_append_field(
881                 JournalFile *f,
882                 const void *field, uint64_t size,
883                 Object **ret, uint64_t *offset) {
884
885         uint64_t hash, p;
886         uint64_t osize;
887         Object *o;
888         int r;
889
890         assert(f);
891         assert(field && size > 0);
892
893         hash = hash64(field, size);
894
895         r = journal_file_find_field_object_with_hash(f, field, size, hash, &o, &p);
896         if (r < 0)
897                 return r;
898         else if (r > 0) {
899
900                 if (ret)
901                         *ret = o;
902
903                 if (offset)
904                         *offset = p;
905
906                 return 0;
907         }
908
909         osize = offsetof(Object, field.payload) + size;
910         r = journal_file_append_object(f, OBJECT_FIELD, osize, &o, &p);
911         if (r < 0)
912                 return r;
913
914         o->field.hash = htole64(hash);
915         memcpy(o->field.payload, field, size);
916
917         r = journal_file_link_field(f, o, p, hash);
918         if (r < 0)
919                 return r;
920
921         /* The linking might have altered the window, so let's
922          * refresh our pointer */
923         r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o);
924         if (r < 0)
925                 return r;
926
927 #ifdef HAVE_GCRYPT
928         r = journal_file_hmac_put_object(f, OBJECT_FIELD, o, p);
929         if (r < 0)
930                 return r;
931 #endif
932
933         if (ret)
934                 *ret = o;
935
936         if (offset)
937                 *offset = p;
938
939         return 0;
940 }
941
942 static int journal_file_append_data(
943                 JournalFile *f,
944                 const void *data, uint64_t size,
945                 Object **ret, uint64_t *offset) {
946
947         uint64_t hash, p;
948         uint64_t osize;
949         Object *o;
950         int r, compression = 0;
951         const void *eq;
952
953         assert(f);
954         assert(data || size == 0);
955
956         hash = hash64(data, size);
957
958         r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
959         if (r < 0)
960                 return r;
961         else if (r > 0) {
962
963                 if (ret)
964                         *ret = o;
965
966                 if (offset)
967                         *offset = p;
968
969                 return 0;
970         }
971
972         osize = offsetof(Object, data.payload) + size;
973         r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
974         if (r < 0)
975                 return r;
976
977         o->data.hash = htole64(hash);
978
979 #if defined(HAVE_XZ) || defined(HAVE_LZ4)
980         if (f->compress_xz &&
981             size >= COMPRESSION_SIZE_THRESHOLD) {
982                 size_t rsize;
983
984                 compression = compress_blob(data, size, o->data.payload, &rsize);
985
986                 if (compression) {
987                         o->object.size = htole64(offsetof(Object, data.payload) + rsize);
988                         o->object.flags |= compression;
989
990                         log_debug("Compressed data object %"PRIu64" -> %zu using %s",
991                                   size, rsize, object_compressed_to_string(compression));
992                 }
993         }
994 #endif
995
996         if (!compression && size > 0)
997                 memcpy(o->data.payload, data, size);
998
999         r = journal_file_link_data(f, o, p, hash);
1000         if (r < 0)
1001                 return r;
1002
1003         /* The linking might have altered the window, so let's
1004          * refresh our pointer */
1005         r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
1006         if (r < 0)
1007                 return r;
1008
1009         if (!data)
1010                 eq = NULL;
1011         else
1012                 eq = memchr(data, '=', size);
1013         if (eq && eq > data) {
1014                 Object *fo = NULL;
1015                 uint64_t fp;
1016
1017                 /* Create field object ... */
1018                 r = journal_file_append_field(f, data, (uint8_t*) eq - (uint8_t*) data, &fo, &fp);
1019                 if (r < 0)
1020                         return r;
1021
1022                 /* ... and link it in. */
1023                 o->data.next_field_offset = fo->field.head_data_offset;
1024                 fo->field.head_data_offset = le64toh(p);
1025         }
1026
1027 #ifdef HAVE_GCRYPT
1028         r = journal_file_hmac_put_object(f, OBJECT_DATA, o, p);
1029         if (r < 0)
1030                 return r;
1031 #endif
1032
1033         if (ret)
1034                 *ret = o;
1035
1036         if (offset)
1037                 *offset = p;
1038
1039         return 0;
1040 }
1041
1042 uint64_t journal_file_entry_n_items(Object *o) {
1043         assert(o);
1044
1045         if (o->object.type != OBJECT_ENTRY)
1046                 return 0;
1047
1048         return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
1049 }
1050
1051 uint64_t journal_file_entry_array_n_items(Object *o) {
1052         assert(o);
1053
1054         if (o->object.type != OBJECT_ENTRY_ARRAY)
1055                 return 0;
1056
1057         return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
1058 }
1059
1060 uint64_t journal_file_hash_table_n_items(Object *o) {
1061         assert(o);
1062
1063         if (o->object.type != OBJECT_DATA_HASH_TABLE &&
1064             o->object.type != OBJECT_FIELD_HASH_TABLE)
1065                 return 0;
1066
1067         return (le64toh(o->object.size) - offsetof(Object, hash_table.items)) / sizeof(HashItem);
1068 }
1069
1070 static int link_entry_into_array(JournalFile *f,
1071                                  le64_t *first,
1072                                  le64_t *idx,
1073                                  uint64_t p) {
1074         int r;
1075         uint64_t n = 0, ap = 0, q, i, a, hidx;
1076         Object *o;
1077
1078         assert(f);
1079         assert(first);
1080         assert(idx);
1081         assert(p > 0);
1082
1083         a = le64toh(*first);
1084         i = hidx = le64toh(*idx);
1085         while (a > 0) {
1086
1087                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1088                 if (r < 0)
1089                         return r;
1090
1091                 n = journal_file_entry_array_n_items(o);
1092                 if (i < n) {
1093                         o->entry_array.items[i] = htole64(p);
1094                         *idx = htole64(hidx + 1);
1095                         return 0;
1096                 }
1097
1098                 i -= n;
1099                 ap = a;
1100                 a = le64toh(o->entry_array.next_entry_array_offset);
1101         }
1102
1103         if (hidx > n)
1104                 n = (hidx+1) * 2;
1105         else
1106                 n = n * 2;
1107
1108         if (n < 4)
1109                 n = 4;
1110
1111         r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
1112                                        offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
1113                                        &o, &q);
1114         if (r < 0)
1115                 return r;
1116
1117 #ifdef HAVE_GCRYPT
1118         r = journal_file_hmac_put_object(f, OBJECT_ENTRY_ARRAY, o, q);
1119         if (r < 0)
1120                 return r;
1121 #endif
1122
1123         o->entry_array.items[i] = htole64(p);
1124
1125         if (ap == 0)
1126                 *first = htole64(q);
1127         else {
1128                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
1129                 if (r < 0)
1130                         return r;
1131
1132                 o->entry_array.next_entry_array_offset = htole64(q);
1133         }
1134
1135         if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
1136                 f->header->n_entry_arrays = htole64(le64toh(f->header->n_entry_arrays) + 1);
1137
1138         *idx = htole64(hidx + 1);
1139
1140         return 0;
1141 }
1142
1143 static int link_entry_into_array_plus_one(JournalFile *f,
1144                                           le64_t *extra,
1145                                           le64_t *first,
1146                                           le64_t *idx,
1147                                           uint64_t p) {
1148
1149         int r;
1150
1151         assert(f);
1152         assert(extra);
1153         assert(first);
1154         assert(idx);
1155         assert(p > 0);
1156
1157         if (*idx == 0)
1158                 *extra = htole64(p);
1159         else {
1160                 le64_t i;
1161
1162                 i = htole64(le64toh(*idx) - 1);
1163                 r = link_entry_into_array(f, first, &i, p);
1164                 if (r < 0)
1165                         return r;
1166         }
1167
1168         *idx = htole64(le64toh(*idx) + 1);
1169         return 0;
1170 }
1171
1172 static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
1173         uint64_t p;
1174         int r;
1175         assert(f);
1176         assert(o);
1177         assert(offset > 0);
1178
1179         p = le64toh(o->entry.items[i].object_offset);
1180         if (p == 0)
1181                 return -EINVAL;
1182
1183         r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
1184         if (r < 0)
1185                 return r;
1186
1187         return link_entry_into_array_plus_one(f,
1188                                               &o->data.entry_offset,
1189                                               &o->data.entry_array_offset,
1190                                               &o->data.n_entries,
1191                                               offset);
1192 }
1193
1194 static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
1195         uint64_t n, i;
1196         int r;
1197
1198         assert(f);
1199         assert(o);
1200         assert(offset > 0);
1201
1202         if (o->object.type != OBJECT_ENTRY)
1203                 return -EINVAL;
1204
1205         __sync_synchronize();
1206
1207         /* Link up the entry itself */
1208         r = link_entry_into_array(f,
1209                                   &f->header->entry_array_offset,
1210                                   &f->header->n_entries,
1211                                   offset);
1212         if (r < 0)
1213                 return r;
1214
1215         /* log_debug("=> %s seqnr=%"PRIu64" n_entries=%"PRIu64, f->path, o->entry.seqnum, f->header->n_entries); */
1216
1217         if (f->header->head_entry_realtime == 0)
1218                 f->header->head_entry_realtime = o->entry.realtime;
1219
1220         f->header->tail_entry_realtime = o->entry.realtime;
1221         f->header->tail_entry_monotonic = o->entry.monotonic;
1222
1223         f->tail_entry_monotonic_valid = true;
1224
1225         /* Link up the items */
1226         n = journal_file_entry_n_items(o);
1227         for (i = 0; i < n; i++) {
1228                 r = journal_file_link_entry_item(f, o, offset, i);
1229                 if (r < 0)
1230                         return r;
1231         }
1232
1233         return 0;
1234 }
1235
1236 static int journal_file_append_entry_internal(
1237                 JournalFile *f,
1238                 const dual_timestamp *ts,
1239                 uint64_t xor_hash,
1240                 const EntryItem items[], unsigned n_items,
1241                 uint64_t *seqnum,
1242                 Object **ret, uint64_t *offset) {
1243         uint64_t np;
1244         uint64_t osize;
1245         Object *o;
1246         int r;
1247
1248         assert(f);
1249         assert(items || n_items == 0);
1250         assert(ts);
1251
1252         osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
1253
1254         r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
1255         if (r < 0)
1256                 return r;
1257
1258         o->entry.seqnum = htole64(journal_file_entry_seqnum(f, seqnum));
1259         memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
1260         o->entry.realtime = htole64(ts->realtime);
1261         o->entry.monotonic = htole64(ts->monotonic);
1262         o->entry.xor_hash = htole64(xor_hash);
1263         o->entry.boot_id = f->header->boot_id;
1264
1265 #ifdef HAVE_GCRYPT
1266         r = journal_file_hmac_put_object(f, OBJECT_ENTRY, o, np);
1267         if (r < 0)
1268                 return r;
1269 #endif
1270
1271         r = journal_file_link_entry(f, o, np);
1272         if (r < 0)
1273                 return r;
1274
1275         if (ret)
1276                 *ret = o;
1277
1278         if (offset)
1279                 *offset = np;
1280
1281         return 0;
1282 }
1283
1284 void journal_file_post_change(JournalFile *f) {
1285         assert(f);
1286
1287         /* inotify() does not receive IN_MODIFY events from file
1288          * accesses done via mmap(). After each access we hence
1289          * trigger IN_MODIFY by truncating the journal file to its
1290          * current size which triggers IN_MODIFY. */
1291
1292         __sync_synchronize();
1293
1294         if (ftruncate(f->fd, f->last_stat.st_size) < 0)
1295                 log_error("Failed to truncate file to its own size: %m");
1296 }
1297
1298 static int entry_item_cmp(const void *_a, const void *_b) {
1299         const EntryItem *a = _a, *b = _b;
1300
1301         if (le64toh(a->object_offset) < le64toh(b->object_offset))
1302                 return -1;
1303         if (le64toh(a->object_offset) > le64toh(b->object_offset))
1304                 return 1;
1305         return 0;
1306 }
1307
1308 int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
1309         unsigned i;
1310         EntryItem *items;
1311         int r;
1312         uint64_t xor_hash = 0;
1313         struct dual_timestamp _ts;
1314
1315         assert(f);
1316         assert(iovec || n_iovec == 0);
1317
1318         if (!ts) {
1319                 dual_timestamp_get(&_ts);
1320                 ts = &_ts;
1321         }
1322
1323         if (f->tail_entry_monotonic_valid &&
1324             ts->monotonic < le64toh(f->header->tail_entry_monotonic))
1325                 return -EINVAL;
1326
1327 #ifdef HAVE_GCRYPT
1328         r = journal_file_maybe_append_tag(f, ts->realtime);
1329         if (r < 0)
1330                 return r;
1331 #endif
1332
1333         /* alloca() can't take 0, hence let's allocate at least one */
1334         items = alloca(sizeof(EntryItem) * MAX(1u, n_iovec));
1335
1336         for (i = 0; i < n_iovec; i++) {
1337                 uint64_t p;
1338                 Object *o;
1339
1340                 r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
1341                 if (r < 0)
1342                         return r;
1343
1344                 xor_hash ^= le64toh(o->data.hash);
1345                 items[i].object_offset = htole64(p);
1346                 items[i].hash = o->data.hash;
1347         }
1348
1349         /* Order by the position on disk, in order to improve seek
1350          * times for rotating media. */
1351         qsort_safe(items, n_iovec, sizeof(EntryItem), entry_item_cmp);
1352
1353         r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
1354
1355         journal_file_post_change(f);
1356
1357         return r;
1358 }
1359
1360 typedef struct ChainCacheItem {
1361         uint64_t first; /* the array at the beginning of the chain */
1362         uint64_t array; /* the cached array */
1363         uint64_t begin; /* the first item in the cached array */
1364         uint64_t total; /* the total number of items in all arrays before this one in the chain */
1365         uint64_t last_index; /* the last index we looked at, to optimize locality when bisecting */
1366 } ChainCacheItem;
1367
1368 static void chain_cache_put(
1369                 OrderedHashmap *h,
1370                 ChainCacheItem *ci,
1371                 uint64_t first,
1372                 uint64_t array,
1373                 uint64_t begin,
1374                 uint64_t total,
1375                 uint64_t last_index) {
1376
1377         if (!ci) {
1378                 /* If the chain item to cache for this chain is the
1379                  * first one it's not worth caching anything */
1380                 if (array == first)
1381                         return;
1382
1383                 if (ordered_hashmap_size(h) >= CHAIN_CACHE_MAX)
1384                         ci = ordered_hashmap_steal_first(h);
1385                 else {
1386                         ci = new(ChainCacheItem, 1);
1387                         if (!ci)
1388                                 return;
1389                 }
1390
1391                 ci->first = first;
1392
1393                 if (ordered_hashmap_put(h, &ci->first, ci) < 0) {
1394                         free(ci);
1395                         return;
1396                 }
1397         } else
1398                 assert(ci->first == first);
1399
1400         ci->array = array;
1401         ci->begin = begin;
1402         ci->total = total;
1403         ci->last_index = last_index;
1404 }
1405
1406 static int generic_array_get(
1407                 JournalFile *f,
1408                 uint64_t first,
1409                 uint64_t i,
1410                 Object **ret, uint64_t *offset) {
1411
1412         Object *o;
1413         uint64_t p = 0, a, t = 0;
1414         int r;
1415         ChainCacheItem *ci;
1416
1417         assert(f);
1418
1419         a = first;
1420
1421         /* Try the chain cache first */
1422         ci = ordered_hashmap_get(f->chain_cache, &first);
1423         if (ci && i > ci->total) {
1424                 a = ci->array;
1425                 i -= ci->total;
1426                 t = ci->total;
1427         }
1428
1429         while (a > 0) {
1430                 uint64_t k;
1431
1432                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1433                 if (r < 0)
1434                         return r;
1435
1436                 k = journal_file_entry_array_n_items(o);
1437                 if (i < k) {
1438                         p = le64toh(o->entry_array.items[i]);
1439                         goto found;
1440                 }
1441
1442                 i -= k;
1443                 t += k;
1444                 a = le64toh(o->entry_array.next_entry_array_offset);
1445         }
1446
1447         return 0;
1448
1449 found:
1450         /* Let's cache this item for the next invocation */
1451         chain_cache_put(f->chain_cache, ci, first, a, le64toh(o->entry_array.items[0]), t, i);
1452
1453         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1454         if (r < 0)
1455                 return r;
1456
1457         if (ret)
1458                 *ret = o;
1459
1460         if (offset)
1461                 *offset = p;
1462
1463         return 1;
1464 }
1465
1466 static int generic_array_get_plus_one(
1467                 JournalFile *f,
1468                 uint64_t extra,
1469                 uint64_t first,
1470                 uint64_t i,
1471                 Object **ret, uint64_t *offset) {
1472
1473         Object *o;
1474
1475         assert(f);
1476
1477         if (i == 0) {
1478                 int r;
1479
1480                 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1481                 if (r < 0)
1482                         return r;
1483
1484                 if (ret)
1485                         *ret = o;
1486
1487                 if (offset)
1488                         *offset = extra;
1489
1490                 return 1;
1491         }
1492
1493         return generic_array_get(f, first, i-1, ret, offset);
1494 }
1495
1496 enum {
1497         TEST_FOUND,
1498         TEST_LEFT,
1499         TEST_RIGHT
1500 };
1501
1502 static int generic_array_bisect(
1503                 JournalFile *f,
1504                 uint64_t first,
1505                 uint64_t n,
1506                 uint64_t needle,
1507                 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1508                 direction_t direction,
1509                 Object **ret,
1510                 uint64_t *offset,
1511                 uint64_t *idx) {
1512
1513         uint64_t a, p, t = 0, i = 0, last_p = 0, last_index = (uint64_t) -1;
1514         bool subtract_one = false;
1515         Object *o, *array = NULL;
1516         int r;
1517         ChainCacheItem *ci;
1518
1519         assert(f);
1520         assert(test_object);
1521
1522         /* Start with the first array in the chain */
1523         a = first;
1524
1525         ci = ordered_hashmap_get(f->chain_cache, &first);
1526         if (ci && n > ci->total) {
1527                 /* Ah, we have iterated this bisection array chain
1528                  * previously! Let's see if we can skip ahead in the
1529                  * chain, as far as the last time. But we can't jump
1530                  * backwards in the chain, so let's check that
1531                  * first. */
1532
1533                 r = test_object(f, ci->begin, needle);
1534                 if (r < 0)
1535                         return r;
1536
1537                 if (r == TEST_LEFT) {
1538                         /* OK, what we are looking for is right of the
1539                          * begin of this EntryArray, so let's jump
1540                          * straight to previously cached array in the
1541                          * chain */
1542
1543                         a = ci->array;
1544                         n -= ci->total;
1545                         t = ci->total;
1546                         last_index = ci->last_index;
1547                 }
1548         }
1549
1550         while (a > 0) {
1551                 uint64_t left, right, k, lp;
1552
1553                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
1554                 if (r < 0)
1555                         return r;
1556
1557                 k = journal_file_entry_array_n_items(array);
1558                 right = MIN(k, n);
1559                 if (right <= 0)
1560                         return 0;
1561
1562                 i = right - 1;
1563                 lp = p = le64toh(array->entry_array.items[i]);
1564                 if (p <= 0)
1565                         return -EBADMSG;
1566
1567                 r = test_object(f, p, needle);
1568                 if (r < 0)
1569                         return r;
1570
1571                 if (r == TEST_FOUND)
1572                         r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1573
1574                 if (r == TEST_RIGHT) {
1575                         left = 0;
1576                         right -= 1;
1577
1578                         if (last_index != (uint64_t) -1) {
1579                                 assert(last_index <= right);
1580
1581                                 /* If we cached the last index we
1582                                  * looked at, let's try to not to jump
1583                                  * too wildly around and see if we can
1584                                  * limit the range to look at early to
1585                                  * the immediate neighbors of the last
1586                                  * index we looked at. */
1587
1588                                 if (last_index > 0) {
1589                                         uint64_t x = last_index - 1;
1590
1591                                         p = le64toh(array->entry_array.items[x]);
1592                                         if (p <= 0)
1593                                                 return -EBADMSG;
1594
1595                                         r = test_object(f, p, needle);
1596                                         if (r < 0)
1597                                                 return r;
1598
1599                                         if (r == TEST_FOUND)
1600                                                 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1601
1602                                         if (r == TEST_RIGHT)
1603                                                 right = x;
1604                                         else
1605                                                 left = x + 1;
1606                                 }
1607
1608                                 if (last_index < right) {
1609                                         uint64_t y = last_index + 1;
1610
1611                                         p = le64toh(array->entry_array.items[y]);
1612                                         if (p <= 0)
1613                                                 return -EBADMSG;
1614
1615                                         r = test_object(f, p, needle);
1616                                         if (r < 0)
1617                                                 return r;
1618
1619                                         if (r == TEST_FOUND)
1620                                                 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1621
1622                                         if (r == TEST_RIGHT)
1623                                                 right = y;
1624                                         else
1625                                                 left = y + 1;
1626                                 }
1627                         }
1628
1629                         for (;;) {
1630                                 if (left == right) {
1631                                         if (direction == DIRECTION_UP)
1632                                                 subtract_one = true;
1633
1634                                         i = left;
1635                                         goto found;
1636                                 }
1637
1638                                 assert(left < right);
1639                                 i = (left + right) / 2;
1640
1641                                 p = le64toh(array->entry_array.items[i]);
1642                                 if (p <= 0)
1643                                         return -EBADMSG;
1644
1645                                 r = test_object(f, p, needle);
1646                                 if (r < 0)
1647                                         return r;
1648
1649                                 if (r == TEST_FOUND)
1650                                         r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1651
1652                                 if (r == TEST_RIGHT)
1653                                         right = i;
1654                                 else
1655                                         left = i + 1;
1656                         }
1657                 }
1658
1659                 if (k > n) {
1660                         if (direction == DIRECTION_UP) {
1661                                 i = n;
1662                                 subtract_one = true;
1663                                 goto found;
1664                         }
1665
1666                         return 0;
1667                 }
1668
1669                 last_p = lp;
1670
1671                 n -= k;
1672                 t += k;
1673                 last_index = (uint64_t) -1;
1674                 a = le64toh(array->entry_array.next_entry_array_offset);
1675         }
1676
1677         return 0;
1678
1679 found:
1680         if (subtract_one && t == 0 && i == 0)
1681                 return 0;
1682
1683         /* Let's cache this item for the next invocation */
1684         chain_cache_put(f->chain_cache, ci, first, a, le64toh(array->entry_array.items[0]), t, subtract_one ? (i > 0 ? i-1 : (uint64_t) -1) : i);
1685
1686         if (subtract_one && i == 0)
1687                 p = last_p;
1688         else if (subtract_one)
1689                 p = le64toh(array->entry_array.items[i-1]);
1690         else
1691                 p = le64toh(array->entry_array.items[i]);
1692
1693         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1694         if (r < 0)
1695                 return r;
1696
1697         if (ret)
1698                 *ret = o;
1699
1700         if (offset)
1701                 *offset = p;
1702
1703         if (idx)
1704                 *idx = t + i + (subtract_one ? -1 : 0);
1705
1706         return 1;
1707 }
1708
1709
1710 static int generic_array_bisect_plus_one(
1711                 JournalFile *f,
1712                 uint64_t extra,
1713                 uint64_t first,
1714                 uint64_t n,
1715                 uint64_t needle,
1716                 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1717                 direction_t direction,
1718                 Object **ret,
1719                 uint64_t *offset,
1720                 uint64_t *idx) {
1721
1722         int r;
1723         bool step_back = false;
1724         Object *o;
1725
1726         assert(f);
1727         assert(test_object);
1728
1729         if (n <= 0)
1730                 return 0;
1731
1732         /* This bisects the array in object 'first', but first checks
1733          * an extra  */
1734         r = test_object(f, extra, needle);
1735         if (r < 0)
1736                 return r;
1737
1738         if (r == TEST_FOUND)
1739                 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1740
1741         /* if we are looking with DIRECTION_UP then we need to first
1742            see if in the actual array there is a matching entry, and
1743            return the last one of that. But if there isn't any we need
1744            to return this one. Hence remember this, and return it
1745            below. */
1746         if (r == TEST_LEFT)
1747                 step_back = direction == DIRECTION_UP;
1748
1749         if (r == TEST_RIGHT) {
1750                 if (direction == DIRECTION_DOWN)
1751                         goto found;
1752                 else
1753                         return 0;
1754         }
1755
1756         r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
1757
1758         if (r == 0 && step_back)
1759                 goto found;
1760
1761         if (r > 0 && idx)
1762                 (*idx) ++;
1763
1764         return r;
1765
1766 found:
1767         r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1768         if (r < 0)
1769                 return r;
1770
1771         if (ret)
1772                 *ret = o;
1773
1774         if (offset)
1775                 *offset = extra;
1776
1777         if (idx)
1778                 *idx = 0;
1779
1780         return 1;
1781 }
1782
1783 _pure_ static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
1784         assert(f);
1785         assert(p > 0);
1786
1787         if (p == needle)
1788                 return TEST_FOUND;
1789         else if (p < needle)
1790                 return TEST_LEFT;
1791         else
1792                 return TEST_RIGHT;
1793 }
1794
1795 int journal_file_move_to_entry_by_offset(
1796                 JournalFile *f,
1797                 uint64_t p,
1798                 direction_t direction,
1799                 Object **ret,
1800                 uint64_t *offset) {
1801
1802         return generic_array_bisect(f,
1803                                     le64toh(f->header->entry_array_offset),
1804                                     le64toh(f->header->n_entries),
1805                                     p,
1806                                     test_object_offset,
1807                                     direction,
1808                                     ret, offset, NULL);
1809 }
1810
1811
1812 static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
1813         Object *o;
1814         int r;
1815
1816         assert(f);
1817         assert(p > 0);
1818
1819         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1820         if (r < 0)
1821                 return r;
1822
1823         if (le64toh(o->entry.seqnum) == needle)
1824                 return TEST_FOUND;
1825         else if (le64toh(o->entry.seqnum) < needle)
1826                 return TEST_LEFT;
1827         else
1828                 return TEST_RIGHT;
1829 }
1830
1831 int journal_file_move_to_entry_by_seqnum(
1832                 JournalFile *f,
1833                 uint64_t seqnum,
1834                 direction_t direction,
1835                 Object **ret,
1836                 uint64_t *offset) {
1837
1838         return generic_array_bisect(f,
1839                                     le64toh(f->header->entry_array_offset),
1840                                     le64toh(f->header->n_entries),
1841                                     seqnum,
1842                                     test_object_seqnum,
1843                                     direction,
1844                                     ret, offset, NULL);
1845 }
1846
1847 static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
1848         Object *o;
1849         int r;
1850
1851         assert(f);
1852         assert(p > 0);
1853
1854         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1855         if (r < 0)
1856                 return r;
1857
1858         if (le64toh(o->entry.realtime) == needle)
1859                 return TEST_FOUND;
1860         else if (le64toh(o->entry.realtime) < needle)
1861                 return TEST_LEFT;
1862         else
1863                 return TEST_RIGHT;
1864 }
1865
1866 int journal_file_move_to_entry_by_realtime(
1867                 JournalFile *f,
1868                 uint64_t realtime,
1869                 direction_t direction,
1870                 Object **ret,
1871                 uint64_t *offset) {
1872
1873         return generic_array_bisect(f,
1874                                     le64toh(f->header->entry_array_offset),
1875                                     le64toh(f->header->n_entries),
1876                                     realtime,
1877                                     test_object_realtime,
1878                                     direction,
1879                                     ret, offset, NULL);
1880 }
1881
1882 static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
1883         Object *o;
1884         int r;
1885
1886         assert(f);
1887         assert(p > 0);
1888
1889         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1890         if (r < 0)
1891                 return r;
1892
1893         if (le64toh(o->entry.monotonic) == needle)
1894                 return TEST_FOUND;
1895         else if (le64toh(o->entry.monotonic) < needle)
1896                 return TEST_LEFT;
1897         else
1898                 return TEST_RIGHT;
1899 }
1900
1901 static inline int find_data_object_by_boot_id(
1902                 JournalFile *f,
1903                 sd_id128_t boot_id,
1904                 Object **o,
1905                 uint64_t *b) {
1906         char t[sizeof("_BOOT_ID=")-1 + 32 + 1] = "_BOOT_ID=";
1907
1908         sd_id128_to_string(boot_id, t + 9);
1909         return journal_file_find_data_object(f, t, sizeof(t) - 1, o, b);
1910 }
1911
1912 int journal_file_move_to_entry_by_monotonic(
1913                 JournalFile *f,
1914                 sd_id128_t boot_id,
1915                 uint64_t monotonic,
1916                 direction_t direction,
1917                 Object **ret,
1918                 uint64_t *offset) {
1919
1920         Object *o;
1921         int r;
1922
1923         assert(f);
1924
1925         r = find_data_object_by_boot_id(f, boot_id, &o, NULL);
1926         if (r < 0)
1927                 return r;
1928         if (r == 0)
1929                 return -ENOENT;
1930
1931         return generic_array_bisect_plus_one(f,
1932                                              le64toh(o->data.entry_offset),
1933                                              le64toh(o->data.entry_array_offset),
1934                                              le64toh(o->data.n_entries),
1935                                              monotonic,
1936                                              test_object_monotonic,
1937                                              direction,
1938                                              ret, offset, NULL);
1939 }
1940
1941 int journal_file_next_entry(
1942                 JournalFile *f,
1943                 Object *o, uint64_t p,
1944                 direction_t direction,
1945                 Object **ret, uint64_t *offset) {
1946
1947         uint64_t i, n, ofs;
1948         int r;
1949
1950         assert(f);
1951         assert(p > 0 || !o);
1952
1953         n = le64toh(f->header->n_entries);
1954         if (n <= 0)
1955                 return 0;
1956
1957         if (!o)
1958                 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1959         else {
1960                 if (o->object.type != OBJECT_ENTRY)
1961                         return -EINVAL;
1962
1963                 r = generic_array_bisect(f,
1964                                          le64toh(f->header->entry_array_offset),
1965                                          le64toh(f->header->n_entries),
1966                                          p,
1967                                          test_object_offset,
1968                                          DIRECTION_DOWN,
1969                                          NULL, NULL,
1970                                          &i);
1971                 if (r <= 0)
1972                         return r;
1973
1974                 if (direction == DIRECTION_DOWN) {
1975                         if (i >= n - 1)
1976                                 return 0;
1977
1978                         i++;
1979                 } else {
1980                         if (i <= 0)
1981                                 return 0;
1982
1983                         i--;
1984                 }
1985         }
1986
1987         /* And jump to it */
1988         r = generic_array_get(f,
1989                               le64toh(f->header->entry_array_offset),
1990                               i,
1991                               ret, &ofs);
1992         if (r <= 0)
1993                 return r;
1994
1995         if (p > 0 &&
1996             (direction == DIRECTION_DOWN ? ofs <= p : ofs >= p)) {
1997                 log_debug("%s: entry array corrupted at entry %"PRIu64,
1998                           f->path, i);
1999                 return -EBADMSG;
2000         }
2001
2002         if (offset)
2003                 *offset = ofs;
2004
2005         return 1;
2006 }
2007
2008 int journal_file_skip_entry(
2009                 JournalFile *f,
2010                 Object *o, uint64_t p,
2011                 int64_t skip,
2012                 Object **ret, uint64_t *offset) {
2013
2014         uint64_t i, n;
2015         int r;
2016
2017         assert(f);
2018         assert(o);
2019         assert(p > 0);
2020
2021         if (o->object.type != OBJECT_ENTRY)
2022                 return -EINVAL;
2023
2024         r = generic_array_bisect(f,
2025                                  le64toh(f->header->entry_array_offset),
2026                                  le64toh(f->header->n_entries),
2027                                  p,
2028                                  test_object_offset,
2029                                  DIRECTION_DOWN,
2030                                  NULL, NULL,
2031                                  &i);
2032         if (r <= 0)
2033                 return r;
2034
2035         /* Calculate new index */
2036         if (skip < 0) {
2037                 if ((uint64_t) -skip >= i)
2038                         i = 0;
2039                 else
2040                         i = i - (uint64_t) -skip;
2041         } else
2042                 i  += (uint64_t) skip;
2043
2044         n = le64toh(f->header->n_entries);
2045         if (n <= 0)
2046                 return -EBADMSG;
2047
2048         if (i >= n)
2049                 i = n-1;
2050
2051         return generic_array_get(f,
2052                                  le64toh(f->header->entry_array_offset),
2053                                  i,
2054                                  ret, offset);
2055 }
2056
2057 int journal_file_next_entry_for_data(
2058                 JournalFile *f,
2059                 Object *o, uint64_t p,
2060                 uint64_t data_offset,
2061                 direction_t direction,
2062                 Object **ret, uint64_t *offset) {
2063
2064         uint64_t n, i;
2065         int r;
2066         Object *d;
2067
2068         assert(f);
2069         assert(p > 0 || !o);
2070
2071         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
2072         if (r < 0)
2073                 return r;
2074
2075         n = le64toh(d->data.n_entries);
2076         if (n <= 0)
2077                 return n;
2078
2079         if (!o)
2080                 i = direction == DIRECTION_DOWN ? 0 : n - 1;
2081         else {
2082                 if (o->object.type != OBJECT_ENTRY)
2083                         return -EINVAL;
2084
2085                 r = generic_array_bisect_plus_one(f,
2086                                                   le64toh(d->data.entry_offset),
2087                                                   le64toh(d->data.entry_array_offset),
2088                                                   le64toh(d->data.n_entries),
2089                                                   p,
2090                                                   test_object_offset,
2091                                                   DIRECTION_DOWN,
2092                                                   NULL, NULL,
2093                                                   &i);
2094
2095                 if (r <= 0)
2096                         return r;
2097
2098                 if (direction == DIRECTION_DOWN) {
2099                         if (i >= n - 1)
2100                                 return 0;
2101
2102                         i++;
2103                 } else {
2104                         if (i <= 0)
2105                                 return 0;
2106
2107                         i--;
2108                 }
2109
2110         }
2111
2112         return generic_array_get_plus_one(f,
2113                                           le64toh(d->data.entry_offset),
2114                                           le64toh(d->data.entry_array_offset),
2115                                           i,
2116                                           ret, offset);
2117 }
2118
2119 int journal_file_move_to_entry_by_offset_for_data(
2120                 JournalFile *f,
2121                 uint64_t data_offset,
2122                 uint64_t p,
2123                 direction_t direction,
2124                 Object **ret, uint64_t *offset) {
2125
2126         int r;
2127         Object *d;
2128
2129         assert(f);
2130
2131         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
2132         if (r < 0)
2133                 return r;
2134
2135         return generic_array_bisect_plus_one(f,
2136                                              le64toh(d->data.entry_offset),
2137                                              le64toh(d->data.entry_array_offset),
2138                                              le64toh(d->data.n_entries),
2139                                              p,
2140                                              test_object_offset,
2141                                              direction,
2142                                              ret, offset, NULL);
2143 }
2144
2145 int journal_file_move_to_entry_by_monotonic_for_data(
2146                 JournalFile *f,
2147                 uint64_t data_offset,
2148                 sd_id128_t boot_id,
2149                 uint64_t monotonic,
2150                 direction_t direction,
2151                 Object **ret, uint64_t *offset) {
2152
2153         Object *o, *d;
2154         int r;
2155         uint64_t b, z;
2156
2157         assert(f);
2158
2159         /* First, seek by time */
2160         r = find_data_object_by_boot_id(f, boot_id, &o, &b);
2161         if (r < 0)
2162                 return r;
2163         if (r == 0)
2164                 return -ENOENT;
2165
2166         r = generic_array_bisect_plus_one(f,
2167                                           le64toh(o->data.entry_offset),
2168                                           le64toh(o->data.entry_array_offset),
2169                                           le64toh(o->data.n_entries),
2170                                           monotonic,
2171                                           test_object_monotonic,
2172                                           direction,
2173                                           NULL, &z, NULL);
2174         if (r <= 0)
2175                 return r;
2176
2177         /* And now, continue seeking until we find an entry that
2178          * exists in both bisection arrays */
2179
2180         for (;;) {
2181                 Object *qo;
2182                 uint64_t p, q;
2183
2184                 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
2185                 if (r < 0)
2186                         return r;
2187
2188                 r = generic_array_bisect_plus_one(f,
2189                                                   le64toh(d->data.entry_offset),
2190                                                   le64toh(d->data.entry_array_offset),
2191                                                   le64toh(d->data.n_entries),
2192                                                   z,
2193                                                   test_object_offset,
2194                                                   direction,
2195                                                   NULL, &p, NULL);
2196                 if (r <= 0)
2197                         return r;
2198
2199                 r = journal_file_move_to_object(f, OBJECT_DATA, b, &o);
2200                 if (r < 0)
2201                         return r;
2202
2203                 r = generic_array_bisect_plus_one(f,
2204                                                   le64toh(o->data.entry_offset),
2205                                                   le64toh(o->data.entry_array_offset),
2206                                                   le64toh(o->data.n_entries),
2207                                                   p,
2208                                                   test_object_offset,
2209                                                   direction,
2210                                                   &qo, &q, NULL);
2211
2212                 if (r <= 0)
2213                         return r;
2214
2215                 if (p == q) {
2216                         if (ret)
2217                                 *ret = qo;
2218                         if (offset)
2219                                 *offset = q;
2220
2221                         return 1;
2222                 }
2223
2224                 z = q;
2225         }
2226 }
2227
2228 int journal_file_move_to_entry_by_seqnum_for_data(
2229                 JournalFile *f,
2230                 uint64_t data_offset,
2231                 uint64_t seqnum,
2232                 direction_t direction,
2233                 Object **ret, uint64_t *offset) {
2234
2235         Object *d;
2236         int r;
2237
2238         assert(f);
2239
2240         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
2241         if (r < 0)
2242                 return r;
2243
2244         return generic_array_bisect_plus_one(f,
2245                                              le64toh(d->data.entry_offset),
2246                                              le64toh(d->data.entry_array_offset),
2247                                              le64toh(d->data.n_entries),
2248                                              seqnum,
2249                                              test_object_seqnum,
2250                                              direction,
2251                                              ret, offset, NULL);
2252 }
2253
2254 int journal_file_move_to_entry_by_realtime_for_data(
2255                 JournalFile *f,
2256                 uint64_t data_offset,
2257                 uint64_t realtime,
2258                 direction_t direction,
2259                 Object **ret, uint64_t *offset) {
2260
2261         Object *d;
2262         int r;
2263
2264         assert(f);
2265
2266         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
2267         if (r < 0)
2268                 return r;
2269
2270         return generic_array_bisect_plus_one(f,
2271                                              le64toh(d->data.entry_offset),
2272                                              le64toh(d->data.entry_array_offset),
2273                                              le64toh(d->data.n_entries),
2274                                              realtime,
2275                                              test_object_realtime,
2276                                              direction,
2277                                              ret, offset, NULL);
2278 }
2279
2280 void journal_file_dump(JournalFile *f) {
2281         Object *o;
2282         int r;
2283         uint64_t p;
2284
2285         assert(f);
2286
2287         journal_file_print_header(f);
2288
2289         p = le64toh(f->header->header_size);
2290         while (p != 0) {
2291                 r = journal_file_move_to_object(f, -1, p, &o);
2292                 if (r < 0)
2293                         goto fail;
2294
2295                 switch (o->object.type) {
2296
2297                 case OBJECT_UNUSED:
2298                         printf("Type: OBJECT_UNUSED\n");
2299                         break;
2300
2301                 case OBJECT_DATA:
2302                         printf("Type: OBJECT_DATA\n");
2303                         break;
2304
2305                 case OBJECT_FIELD:
2306                         printf("Type: OBJECT_FIELD\n");
2307                         break;
2308
2309                 case OBJECT_ENTRY:
2310                         printf("Type: OBJECT_ENTRY seqnum=%"PRIu64" monotonic=%"PRIu64" realtime=%"PRIu64"\n",
2311                                le64toh(o->entry.seqnum),
2312                                le64toh(o->entry.monotonic),
2313                                le64toh(o->entry.realtime));
2314                         break;
2315
2316                 case OBJECT_FIELD_HASH_TABLE:
2317                         printf("Type: OBJECT_FIELD_HASH_TABLE\n");
2318                         break;
2319
2320                 case OBJECT_DATA_HASH_TABLE:
2321                         printf("Type: OBJECT_DATA_HASH_TABLE\n");
2322                         break;
2323
2324                 case OBJECT_ENTRY_ARRAY:
2325                         printf("Type: OBJECT_ENTRY_ARRAY\n");
2326                         break;
2327
2328                 case OBJECT_TAG:
2329                         printf("Type: OBJECT_TAG seqnum=%"PRIu64" epoch=%"PRIu64"\n",
2330                                le64toh(o->tag.seqnum),
2331                                le64toh(o->tag.epoch));
2332                         break;
2333
2334                 default:
2335                         printf("Type: unknown (%u)\n", o->object.type);
2336                         break;
2337                 }
2338
2339                 if (o->object.flags & OBJECT_COMPRESSION_MASK)
2340                         printf("Flags: %s\n",
2341                                object_compressed_to_string(o->object.flags & OBJECT_COMPRESSION_MASK));
2342
2343                 if (p == le64toh(f->header->tail_object_offset))
2344                         p = 0;
2345                 else
2346                         p = p + ALIGN64(le64toh(o->object.size));
2347         }
2348
2349         return;
2350 fail:
2351         log_error("File corrupt");
2352 }
2353
2354 static const char* format_timestamp_safe(char *buf, size_t l, usec_t t) {
2355         const char *x;
2356
2357         x = format_timestamp(buf, l, t);
2358         if (x)
2359                 return x;
2360         return " --- ";
2361 }
2362
2363 void journal_file_print_header(JournalFile *f) {
2364         char a[33], b[33], c[33], d[33];
2365         char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX], z[FORMAT_TIMESTAMP_MAX];
2366         struct stat st;
2367         char bytes[FORMAT_BYTES_MAX];
2368
2369         assert(f);
2370
2371         printf("File Path: %s\n"
2372                "File ID: %s\n"
2373                "Machine ID: %s\n"
2374                "Boot ID: %s\n"
2375                "Sequential Number ID: %s\n"
2376                "State: %s\n"
2377                "Compatible Flags:%s%s\n"
2378                "Incompatible Flags:%s%s%s\n"
2379                "Header size: %"PRIu64"\n"
2380                "Arena size: %"PRIu64"\n"
2381                "Data Hash Table Size: %"PRIu64"\n"
2382                "Field Hash Table Size: %"PRIu64"\n"
2383                "Rotate Suggested: %s\n"
2384                "Head Sequential Number: %"PRIu64"\n"
2385                "Tail Sequential Number: %"PRIu64"\n"
2386                "Head Realtime Timestamp: %s\n"
2387                "Tail Realtime Timestamp: %s\n"
2388                "Tail Monotonic Timestamp: %s\n"
2389                "Objects: %"PRIu64"\n"
2390                "Entry Objects: %"PRIu64"\n",
2391                f->path,
2392                sd_id128_to_string(f->header->file_id, a),
2393                sd_id128_to_string(f->header->machine_id, b),
2394                sd_id128_to_string(f->header->boot_id, c),
2395                sd_id128_to_string(f->header->seqnum_id, d),
2396                f->header->state == STATE_OFFLINE ? "OFFLINE" :
2397                f->header->state == STATE_ONLINE ? "ONLINE" :
2398                f->header->state == STATE_ARCHIVED ? "ARCHIVED" : "UNKNOWN",
2399                JOURNAL_HEADER_SEALED(f->header) ? " SEALED" : "",
2400                (le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_ANY) ? " ???" : "",
2401                JOURNAL_HEADER_COMPRESSED_XZ(f->header) ? " COMPRESSED-XZ" : "",
2402                JOURNAL_HEADER_COMPRESSED_LZ4(f->header) ? " COMPRESSED-LZ4" : "",
2403                (le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_ANY) ? " ???" : "",
2404                le64toh(f->header->header_size),
2405                le64toh(f->header->arena_size),
2406                le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
2407                le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
2408                yes_no(journal_file_rotate_suggested(f, 0)),
2409                le64toh(f->header->head_entry_seqnum),
2410                le64toh(f->header->tail_entry_seqnum),
2411                format_timestamp_safe(x, sizeof(x), le64toh(f->header->head_entry_realtime)),
2412                format_timestamp_safe(y, sizeof(y), le64toh(f->header->tail_entry_realtime)),
2413                format_timespan(z, sizeof(z), le64toh(f->header->tail_entry_monotonic), USEC_PER_MSEC),
2414                le64toh(f->header->n_objects),
2415                le64toh(f->header->n_entries));
2416
2417         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
2418                 printf("Data Objects: %"PRIu64"\n"
2419                        "Data Hash Table Fill: %.1f%%\n",
2420                        le64toh(f->header->n_data),
2421                        100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
2422
2423         if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
2424                 printf("Field Objects: %"PRIu64"\n"
2425                        "Field Hash Table Fill: %.1f%%\n",
2426                        le64toh(f->header->n_fields),
2427                        100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
2428
2429         if (JOURNAL_HEADER_CONTAINS(f->header, n_tags))
2430                 printf("Tag Objects: %"PRIu64"\n",
2431                        le64toh(f->header->n_tags));
2432         if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
2433                 printf("Entry Array Objects: %"PRIu64"\n",
2434                        le64toh(f->header->n_entry_arrays));
2435
2436         if (fstat(f->fd, &st) >= 0)
2437                 printf("Disk usage: %s\n", format_bytes(bytes, sizeof(bytes), (off_t) st.st_blocks * 512ULL));
2438 }
2439
2440 int journal_file_open(
2441                 const char *fname,
2442                 int flags,
2443                 mode_t mode,
2444                 bool compress,
2445                 bool seal,
2446                 JournalMetrics *metrics,
2447                 MMapCache *mmap_cache,
2448                 JournalFile *template,
2449                 JournalFile **ret) {
2450
2451         JournalFile *f;
2452         int r;
2453         bool newly_created = false;
2454
2455         assert(fname);
2456         assert(ret);
2457
2458         if ((flags & O_ACCMODE) != O_RDONLY &&
2459             (flags & O_ACCMODE) != O_RDWR)
2460                 return -EINVAL;
2461
2462         if (!endswith(fname, ".journal") &&
2463             !endswith(fname, ".journal~"))
2464                 return -EINVAL;
2465
2466         f = new0(JournalFile, 1);
2467         if (!f)
2468                 return -ENOMEM;
2469
2470         f->fd = -1;
2471         f->mode = mode;
2472
2473         f->flags = flags;
2474         f->prot = prot_from_flags(flags);
2475         f->writable = (flags & O_ACCMODE) != O_RDONLY;
2476 #if defined(HAVE_LZ4)
2477         f->compress_lz4 = compress;
2478 #elif defined(HAVE_XZ)
2479         f->compress_xz = compress;
2480 #endif
2481 #ifdef HAVE_GCRYPT
2482         f->seal = seal;
2483 #endif
2484
2485         if (mmap_cache)
2486                 f->mmap = mmap_cache_ref(mmap_cache);
2487         else {
2488                 f->mmap = mmap_cache_new();
2489                 if (!f->mmap) {
2490                         r = -ENOMEM;
2491                         goto fail;
2492                 }
2493         }
2494
2495         f->path = strdup(fname);
2496         if (!f->path) {
2497                 r = -ENOMEM;
2498                 goto fail;
2499         }
2500
2501         f->chain_cache = ordered_hashmap_new(&uint64_hash_ops);
2502         if (!f->chain_cache) {
2503                 r = -ENOMEM;
2504                 goto fail;
2505         }
2506
2507         f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
2508         if (f->fd < 0) {
2509                 r = -errno;
2510                 goto fail;
2511         }
2512
2513         if (fstat(f->fd, &f->last_stat) < 0) {
2514                 r = -errno;
2515                 goto fail;
2516         }
2517
2518         if (f->last_stat.st_size == 0 && f->writable) {
2519                 uint64_t crtime;
2520
2521                 /* Let's attach the creation time to the journal file,
2522                  * so that the vacuuming code knows the age of this
2523                  * file even if the file might end up corrupted one
2524                  * day... Ideally we'd just use the creation time many
2525                  * file systems maintain for each file, but there is
2526                  * currently no usable API to query this, hence let's
2527                  * emulate this via extended attributes. If extended
2528                  * attributes are not supported we'll just skip this,
2529                  * and rely solely on mtime/atime/ctime of the file.*/
2530
2531                 crtime = htole64((uint64_t) now(CLOCK_REALTIME));
2532                 fsetxattr(f->fd, "user.crtime_usec", &crtime, sizeof(crtime), XATTR_CREATE);
2533
2534 #ifdef HAVE_GCRYPT
2535                 /* Try to load the FSPRG state, and if we can't, then
2536                  * just don't do sealing */
2537                 if (f->seal) {
2538                         r = journal_file_fss_load(f);
2539                         if (r < 0)
2540                                 f->seal = false;
2541                 }
2542 #endif
2543
2544                 r = journal_file_init_header(f, template);
2545                 if (r < 0)
2546                         goto fail;
2547
2548                 if (fstat(f->fd, &f->last_stat) < 0) {
2549                         r = -errno;
2550                         goto fail;
2551                 }
2552
2553                 newly_created = true;
2554         }
2555
2556         if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
2557                 r = -EIO;
2558                 goto fail;
2559         }
2560
2561         f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
2562         if (f->header == MAP_FAILED) {
2563                 f->header = NULL;
2564                 r = -errno;
2565                 goto fail;
2566         }
2567
2568         if (!newly_created) {
2569                 r = journal_file_verify_header(f);
2570                 if (r < 0)
2571                         goto fail;
2572         }
2573
2574 #ifdef HAVE_GCRYPT
2575         if (!newly_created && f->writable) {
2576                 r = journal_file_fss_load(f);
2577                 if (r < 0)
2578                         goto fail;
2579         }
2580 #endif
2581
2582         if (f->writable) {
2583                 if (metrics) {
2584                         journal_default_metrics(metrics, f->fd);
2585                         f->metrics = *metrics;
2586                 } else if (template)
2587                         f->metrics = template->metrics;
2588
2589                 r = journal_file_refresh_header(f);
2590                 if (r < 0)
2591                         goto fail;
2592         }
2593
2594 #ifdef HAVE_GCRYPT
2595         r = journal_file_hmac_setup(f);
2596         if (r < 0)
2597                 goto fail;
2598 #endif
2599
2600         if (newly_created) {
2601                 r = journal_file_setup_field_hash_table(f);
2602                 if (r < 0)
2603                         goto fail;
2604
2605                 r = journal_file_setup_data_hash_table(f);
2606                 if (r < 0)
2607                         goto fail;
2608
2609 #ifdef HAVE_GCRYPT
2610                 r = journal_file_append_first_tag(f);
2611                 if (r < 0)
2612                         goto fail;
2613 #endif
2614         }
2615
2616         r = journal_file_map_field_hash_table(f);
2617         if (r < 0)
2618                 goto fail;
2619
2620         r = journal_file_map_data_hash_table(f);
2621         if (r < 0)
2622                 goto fail;
2623
2624         *ret = f;
2625         return 0;
2626
2627 fail:
2628         journal_file_close(f);
2629
2630         return r;
2631 }
2632
2633 int journal_file_rotate(JournalFile **f, bool compress, bool seal) {
2634         _cleanup_free_ char *p = NULL;
2635         size_t l;
2636         JournalFile *old_file, *new_file = NULL;
2637         int r;
2638
2639         assert(f);
2640         assert(*f);
2641
2642         old_file = *f;
2643
2644         if (!old_file->writable)
2645                 return -EINVAL;
2646
2647         if (!endswith(old_file->path, ".journal"))
2648                 return -EINVAL;
2649
2650         l = strlen(old_file->path);
2651         r = asprintf(&p, "%.*s@" SD_ID128_FORMAT_STR "-%016"PRIx64"-%016"PRIx64".journal",
2652                      (int) l - 8, old_file->path,
2653                      SD_ID128_FORMAT_VAL(old_file->header->seqnum_id),
2654                      le64toh((*f)->header->head_entry_seqnum),
2655                      le64toh((*f)->header->head_entry_realtime));
2656         if (r < 0)
2657                 return -ENOMEM;
2658
2659         r = rename(old_file->path, p);
2660         if (r < 0)
2661                 return -errno;
2662
2663         old_file->header->state = STATE_ARCHIVED;
2664
2665         r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, seal, NULL, old_file->mmap, old_file, &new_file);
2666         journal_file_close(old_file);
2667
2668         *f = new_file;
2669         return r;
2670 }
2671
2672 int journal_file_open_reliably(
2673                 const char *fname,
2674                 int flags,
2675                 mode_t mode,
2676                 bool compress,
2677                 bool seal,
2678                 JournalMetrics *metrics,
2679                 MMapCache *mmap_cache,
2680                 JournalFile *template,
2681                 JournalFile **ret) {
2682
2683         int r;
2684         size_t l;
2685         _cleanup_free_ char *p = NULL;
2686
2687         r = journal_file_open(fname, flags, mode, compress, seal,
2688                               metrics, mmap_cache, template, ret);
2689         if (r != -EBADMSG && /* corrupted */
2690             r != -ENODATA && /* truncated */
2691             r != -EHOSTDOWN && /* other machine */
2692             r != -EPROTONOSUPPORT && /* incompatible feature */
2693             r != -EBUSY && /* unclean shutdown */
2694             r != -ESHUTDOWN /* already archived */)
2695                 return r;
2696
2697         if ((flags & O_ACCMODE) == O_RDONLY)
2698                 return r;
2699
2700         if (!(flags & O_CREAT))
2701                 return r;
2702
2703         if (!endswith(fname, ".journal"))
2704                 return r;
2705
2706         /* The file is corrupted. Rotate it away and try it again (but only once) */
2707
2708         l = strlen(fname);
2709         if (asprintf(&p, "%.*s@%016llx-%016" PRIx64 ".journal~",
2710                      (int) l - 8, fname,
2711                      (unsigned long long) now(CLOCK_REALTIME),
2712                      random_u64()) < 0)
2713                 return -ENOMEM;
2714
2715         r = rename(fname, p);
2716         if (r < 0)
2717                 return -errno;
2718
2719         log_warning("File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
2720
2721         return journal_file_open(fname, flags, mode, compress, seal,
2722                                  metrics, mmap_cache, template, ret);
2723 }
2724
2725 int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
2726         uint64_t i, n;
2727         uint64_t q, xor_hash = 0;
2728         int r;
2729         EntryItem *items;
2730         dual_timestamp ts;
2731
2732         assert(from);
2733         assert(to);
2734         assert(o);
2735         assert(p);
2736
2737         if (!to->writable)
2738                 return -EPERM;
2739
2740         ts.monotonic = le64toh(o->entry.monotonic);
2741         ts.realtime = le64toh(o->entry.realtime);
2742
2743         n = journal_file_entry_n_items(o);
2744         /* alloca() can't take 0, hence let's allocate at least one */
2745         items = alloca(sizeof(EntryItem) * MAX(1u, n));
2746
2747         for (i = 0; i < n; i++) {
2748                 uint64_t l, h;
2749                 le64_t le_hash;
2750                 size_t t;
2751                 void *data;
2752                 Object *u;
2753
2754                 q = le64toh(o->entry.items[i].object_offset);
2755                 le_hash = o->entry.items[i].hash;
2756
2757                 r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
2758                 if (r < 0)
2759                         return r;
2760
2761                 if (le_hash != o->data.hash)
2762                         return -EBADMSG;
2763
2764                 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2765                 t = (size_t) l;
2766
2767                 /* We hit the limit on 32bit machines */
2768                 if ((uint64_t) t != l)
2769                         return -E2BIG;
2770
2771                 if (o->object.flags & OBJECT_COMPRESSION_MASK) {
2772 #if defined(HAVE_XZ) || defined(HAVE_LZ4)
2773                         size_t rsize;
2774
2775                         r = decompress_blob(o->object.flags & OBJECT_COMPRESSION_MASK,
2776                                             o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize, 0);
2777                         if (r < 0)
2778                                 return r;
2779
2780                         data = from->compress_buffer;
2781                         l = rsize;
2782 #else
2783                         return -EPROTONOSUPPORT;
2784 #endif
2785                 } else
2786                         data = o->data.payload;
2787
2788                 r = journal_file_append_data(to, data, l, &u, &h);
2789                 if (r < 0)
2790                         return r;
2791
2792                 xor_hash ^= le64toh(u->data.hash);
2793                 items[i].object_offset = htole64(h);
2794                 items[i].hash = u->data.hash;
2795
2796                 r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
2797                 if (r < 0)
2798                         return r;
2799         }
2800
2801         return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
2802 }
2803
2804 void journal_default_metrics(JournalMetrics *m, int fd) {
2805         uint64_t fs_size = 0;
2806         struct statvfs ss;
2807         char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
2808
2809         assert(m);
2810         assert(fd >= 0);
2811
2812         if (fstatvfs(fd, &ss) >= 0)
2813                 fs_size = ss.f_frsize * ss.f_blocks;
2814
2815         if (m->max_use == (uint64_t) -1) {
2816
2817                 if (fs_size > 0) {
2818                         m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
2819
2820                         if (m->max_use > DEFAULT_MAX_USE_UPPER)
2821                                 m->max_use = DEFAULT_MAX_USE_UPPER;
2822
2823                         if (m->max_use < DEFAULT_MAX_USE_LOWER)
2824                                 m->max_use = DEFAULT_MAX_USE_LOWER;
2825                 } else
2826                         m->max_use = DEFAULT_MAX_USE_LOWER;
2827         } else {
2828                 m->max_use = PAGE_ALIGN(m->max_use);
2829
2830                 if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
2831                         m->max_use = JOURNAL_FILE_SIZE_MIN*2;
2832         }
2833
2834         if (m->max_size == (uint64_t) -1) {
2835                 m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
2836
2837                 if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
2838                         m->max_size = DEFAULT_MAX_SIZE_UPPER;
2839         } else
2840                 m->max_size = PAGE_ALIGN(m->max_size);
2841
2842         if (m->max_size < JOURNAL_FILE_SIZE_MIN)
2843                 m->max_size = JOURNAL_FILE_SIZE_MIN;
2844
2845         if (m->max_size*2 > m->max_use)
2846                 m->max_use = m->max_size*2;
2847
2848         if (m->min_size == (uint64_t) -1)
2849                 m->min_size = JOURNAL_FILE_SIZE_MIN;
2850         else {
2851                 m->min_size = PAGE_ALIGN(m->min_size);
2852
2853                 if (m->min_size < JOURNAL_FILE_SIZE_MIN)
2854                         m->min_size = JOURNAL_FILE_SIZE_MIN;
2855
2856                 if (m->min_size > m->max_size)
2857                         m->max_size = m->min_size;
2858         }
2859
2860         if (m->keep_free == (uint64_t) -1) {
2861
2862                 if (fs_size > 0) {
2863                         m->keep_free = PAGE_ALIGN(fs_size * 3 / 20); /* 15% of file system size */
2864
2865                         if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
2866                                 m->keep_free = DEFAULT_KEEP_FREE_UPPER;
2867
2868                 } else
2869                         m->keep_free = DEFAULT_KEEP_FREE;
2870         }
2871
2872         log_debug("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
2873                   format_bytes(a, sizeof(a), m->max_use),
2874                   format_bytes(b, sizeof(b), m->max_size),
2875                   format_bytes(c, sizeof(c), m->min_size),
2876                   format_bytes(d, sizeof(d), m->keep_free));
2877 }
2878
2879 int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
2880         assert(f);
2881         assert(from || to);
2882
2883         if (from) {
2884                 if (f->header->head_entry_realtime == 0)
2885                         return -ENOENT;
2886
2887                 *from = le64toh(f->header->head_entry_realtime);
2888         }
2889
2890         if (to) {
2891                 if (f->header->tail_entry_realtime == 0)
2892                         return -ENOENT;
2893
2894                 *to = le64toh(f->header->tail_entry_realtime);
2895         }
2896
2897         return 1;
2898 }
2899
2900 int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
2901         Object *o;
2902         uint64_t p;
2903         int r;
2904
2905         assert(f);
2906         assert(from || to);
2907
2908         r = find_data_object_by_boot_id(f, boot_id, &o, &p);
2909         if (r <= 0)
2910                 return r;
2911
2912         if (le64toh(o->data.n_entries) <= 0)
2913                 return 0;
2914
2915         if (from) {
2916                 r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o);
2917                 if (r < 0)
2918                         return r;
2919
2920                 *from = le64toh(o->entry.monotonic);
2921         }
2922
2923         if (to) {
2924                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
2925                 if (r < 0)
2926                         return r;
2927
2928                 r = generic_array_get_plus_one(f,
2929                                                le64toh(o->data.entry_offset),
2930                                                le64toh(o->data.entry_array_offset),
2931                                                le64toh(o->data.n_entries)-1,
2932                                                &o, NULL);
2933                 if (r <= 0)
2934                         return r;
2935
2936                 *to = le64toh(o->entry.monotonic);
2937         }
2938
2939         return 1;
2940 }
2941
2942 bool journal_file_rotate_suggested(JournalFile *f, usec_t max_file_usec) {
2943         assert(f);
2944
2945         /* If we gained new header fields we gained new features,
2946          * hence suggest a rotation */
2947         if (le64toh(f->header->header_size) < sizeof(Header)) {
2948                 log_debug("%s uses an outdated header, suggesting rotation.", f->path);
2949                 return true;
2950         }
2951
2952         /* Let's check if the hash tables grew over a certain fill
2953          * level (75%, borrowing this value from Java's hash table
2954          * implementation), and if so suggest a rotation. To calculate
2955          * the fill level we need the n_data field, which only exists
2956          * in newer versions. */
2957
2958         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
2959                 if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2960                         log_debug("Data hash table of %s has a fill level at %.1f (%"PRIu64" of %"PRIu64" items, %llu file size, %"PRIu64" bytes per hash table item), suggesting rotation.",
2961                                   f->path,
2962                                   100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))),
2963                                   le64toh(f->header->n_data),
2964                                   le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
2965                                   (unsigned long long) f->last_stat.st_size,
2966                                   f->last_stat.st_size / le64toh(f->header->n_data));
2967                         return true;
2968                 }
2969
2970         if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
2971                 if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2972                         log_debug("Field hash table of %s has a fill level at %.1f (%"PRIu64" of %"PRIu64" items), suggesting rotation.",
2973                                   f->path,
2974                                   100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))),
2975                                   le64toh(f->header->n_fields),
2976                                   le64toh(f->header->field_hash_table_size) / sizeof(HashItem));
2977                         return true;
2978                 }
2979
2980         /* Are the data objects properly indexed by field objects? */
2981         if (JOURNAL_HEADER_CONTAINS(f->header, n_data) &&
2982             JOURNAL_HEADER_CONTAINS(f->header, n_fields) &&
2983             le64toh(f->header->n_data) > 0 &&
2984             le64toh(f->header->n_fields) == 0)
2985                 return true;
2986
2987         if (max_file_usec > 0) {
2988                 usec_t t, h;
2989
2990                 h = le64toh(f->header->head_entry_realtime);
2991                 t = now(CLOCK_REALTIME);
2992
2993                 if (h > 0 && t > h + max_file_usec)
2994                         return true;
2995         }
2996
2997         return false;
2998 }