chiark / gitweb /
fix #ifdef
[elogind.git] / src / journal / journal-file.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/mman.h>
23 #include <errno.h>
24 #include <sys/uio.h>
25 #include <unistd.h>
26 #include <sys/statvfs.h>
27 #include <fcntl.h>
28 #include <stddef.h>
29 #include <sys/xattr.h>
30
31 #include "journal-def.h"
32 #include "journal-file.h"
33 #include "journal-authenticate.h"
34 #include "lookup3.h"
35 #include "compress.h"
36 #include "fsprg.h"
37
38 #define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
39 #define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
40
41 #define COMPRESSION_SIZE_THRESHOLD (512ULL)
42
43 /* This is the minimum journal file size */
44 #define JOURNAL_FILE_SIZE_MIN (4ULL*1024ULL*1024ULL)           /* 4 MiB */
45
46 /* These are the lower and upper bounds if we deduce the max_use value
47  * from the file system size */
48 #define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL)           /* 1 MiB */
49 #define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL)   /* 4 GiB */
50
51 /* This is the upper bound if we deduce max_size from max_use */
52 #define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL)        /* 128 MiB */
53
54 /* This is the upper bound if we deduce the keep_free value from the
55  * file system size */
56 #define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
57
58 /* This is the keep_free value when we can't determine the system
59  * size */
60 #define DEFAULT_KEEP_FREE (1024ULL*1024ULL)                    /* 1 MB */
61
62 /* n_data was the first entry we added after the initial file format design */
63 #define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
64
65 /* How many entries to keep in the entry array chain cache at max */
66 #define CHAIN_CACHE_MAX 20
67
68 /* How much to increase the journal file size at once each time we allocate something new. */
69 #define FILE_SIZE_INCREASE (8ULL*1024ULL*1024ULL)              /* 8MB */
70
71 static int journal_file_set_online(JournalFile *f) {
72         assert(f);
73
74         if (!f->writable)
75                 return -EPERM;
76
77         if (!(f->fd >= 0 && f->header))
78                 return -EINVAL;
79
80         switch(f->header->state) {
81                 case STATE_ONLINE:
82                         return 0;
83
84                 case STATE_OFFLINE:
85                         f->header->state = STATE_ONLINE;
86                         fsync(f->fd);
87                         return 0;
88
89                 default:
90                         return -EINVAL;
91         }
92 }
93
94 int journal_file_set_offline(JournalFile *f) {
95         assert(f);
96
97         if (!f->writable)
98                 return -EPERM;
99
100         if (!(f->fd >= 0 && f->header))
101                 return -EINVAL;
102
103         if (f->header->state != STATE_ONLINE)
104                 return 0;
105
106         fsync(f->fd);
107
108         f->header->state = STATE_OFFLINE;
109
110         fsync(f->fd);
111
112         return 0;
113 }
114
115 void journal_file_close(JournalFile *f) {
116         assert(f);
117
118 #ifdef HAVE_GCRYPT
119         /* Write the final tag */
120         if (f->seal && f->writable)
121                 journal_file_append_tag(f);
122 #endif
123
124         /* Sync everything to disk, before we mark the file offline */
125         if (f->mmap && f->fd >= 0)
126                 mmap_cache_close_fd(f->mmap, f->fd);
127
128         journal_file_set_offline(f);
129
130         if (f->header)
131                 munmap(f->header, PAGE_ALIGN(sizeof(Header)));
132
133         safe_close(f->fd);
134         free(f->path);
135
136         if (f->mmap)
137                 mmap_cache_unref(f->mmap);
138
139         hashmap_free_free(f->chain_cache);
140
141 #if defined(HAVE_XZ) || defined(HAVE_LZ4)
142         free(f->compress_buffer);
143 #endif
144
145 #ifdef HAVE_GCRYPT
146         if (f->fss_file)
147                 munmap(f->fss_file, PAGE_ALIGN(f->fss_file_size));
148         else if (f->fsprg_state)
149                 free(f->fsprg_state);
150
151         free(f->fsprg_seed);
152
153         if (f->hmac)
154                 gcry_md_close(f->hmac);
155 #endif
156
157         free(f);
158 }
159
160 static int journal_file_init_header(JournalFile *f, JournalFile *template) {
161         Header h = {};
162         ssize_t k;
163         int r;
164
165         assert(f);
166
167         memcpy(h.signature, HEADER_SIGNATURE, 8);
168         h.header_size = htole64(ALIGN64(sizeof(h)));
169
170         h.incompatible_flags |= htole32(
171                 f->compress_xz * HEADER_INCOMPATIBLE_COMPRESSED_XZ |
172                 f->compress_lz4 * HEADER_INCOMPATIBLE_COMPRESSED_LZ4);
173
174         h.compatible_flags = htole32(
175                 f->seal * HEADER_COMPATIBLE_SEALED);
176
177         r = sd_id128_randomize(&h.file_id);
178         if (r < 0)
179                 return r;
180
181         if (template) {
182                 h.seqnum_id = template->header->seqnum_id;
183                 h.tail_entry_seqnum = template->header->tail_entry_seqnum;
184         } else
185                 h.seqnum_id = h.file_id;
186
187         k = pwrite(f->fd, &h, sizeof(h), 0);
188         if (k < 0)
189                 return -errno;
190
191         if (k != sizeof(h))
192                 return -EIO;
193
194         return 0;
195 }
196
197 static int journal_file_refresh_header(JournalFile *f) {
198         int r;
199         sd_id128_t boot_id;
200
201         assert(f);
202
203         r = sd_id128_get_machine(&f->header->machine_id);
204         if (r < 0)
205                 return r;
206
207         r = sd_id128_get_boot(&boot_id);
208         if (r < 0)
209                 return r;
210
211         if (sd_id128_equal(boot_id, f->header->boot_id))
212                 f->tail_entry_monotonic_valid = true;
213
214         f->header->boot_id = boot_id;
215
216         journal_file_set_online(f);
217
218         /* Sync the online state to disk */
219         fsync(f->fd);
220
221         return 0;
222 }
223
224 static int journal_file_verify_header(JournalFile *f) {
225         uint32_t flags;
226
227         assert(f);
228
229         if (memcmp(f->header->signature, HEADER_SIGNATURE, 8))
230                 return -EBADMSG;
231
232         /* In both read and write mode we refuse to open files with
233          * incompatible flags we don't know */
234         flags = le32toh(f->header->incompatible_flags);
235         if (flags & ~HEADER_INCOMPATIBLE_SUPPORTED) {
236                 if (flags & ~HEADER_INCOMPATIBLE_ANY)
237                         log_debug("Journal file %s has unknown incompatible flags %"PRIx32,
238                                   f->path, flags & ~HEADER_INCOMPATIBLE_ANY);
239                 flags = (flags & HEADER_INCOMPATIBLE_ANY) & ~HEADER_INCOMPATIBLE_SUPPORTED;
240                 if (flags)
241                         log_debug("Journal file %s uses incompatible flags %"PRIx32
242                                   " disabled at compilation time.", f->path, flags);
243                 return -EPROTONOSUPPORT;
244         }
245
246         /* When open for writing we refuse to open files with
247          * compatible flags, too */
248         flags = le32toh(f->header->compatible_flags);
249         if (f->writable && (flags & ~HEADER_COMPATIBLE_SUPPORTED)) {
250                 if (flags & ~HEADER_COMPATIBLE_ANY)
251                         log_debug("Journal file %s has unknown compatible flags %"PRIx32,
252                                   f->path, flags & ~HEADER_COMPATIBLE_ANY);
253                 flags = (flags & HEADER_COMPATIBLE_ANY) & ~HEADER_COMPATIBLE_SUPPORTED;
254                 if (flags)
255                         log_debug("Journal file %s uses compatible flags %"PRIx32
256                                   " disabled at compilation time.", f->path, flags);
257                 return -EPROTONOSUPPORT;
258         }
259
260         if (f->header->state >= _STATE_MAX)
261                 return -EBADMSG;
262
263         /* The first addition was n_data, so check that we are at least this large */
264         if (le64toh(f->header->header_size) < HEADER_SIZE_MIN)
265                 return -EBADMSG;
266
267         if (JOURNAL_HEADER_SEALED(f->header) && !JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
268                 return -EBADMSG;
269
270         if ((le64toh(f->header->header_size) + le64toh(f->header->arena_size)) > (uint64_t) f->last_stat.st_size)
271                 return -ENODATA;
272
273         if (le64toh(f->header->tail_object_offset) > (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
274                 return -ENODATA;
275
276         if (!VALID64(le64toh(f->header->data_hash_table_offset)) ||
277             !VALID64(le64toh(f->header->field_hash_table_offset)) ||
278             !VALID64(le64toh(f->header->tail_object_offset)) ||
279             !VALID64(le64toh(f->header->entry_array_offset)))
280                 return -ENODATA;
281
282         if (le64toh(f->header->data_hash_table_offset) < le64toh(f->header->header_size) ||
283             le64toh(f->header->field_hash_table_offset) < le64toh(f->header->header_size) ||
284             le64toh(f->header->tail_object_offset) < le64toh(f->header->header_size) ||
285             le64toh(f->header->entry_array_offset) < le64toh(f->header->header_size))
286                 return -ENODATA;
287
288         if (f->writable) {
289                 uint8_t state;
290                 sd_id128_t machine_id;
291                 int r;
292
293                 r = sd_id128_get_machine(&machine_id);
294                 if (r < 0)
295                         return r;
296
297                 if (!sd_id128_equal(machine_id, f->header->machine_id))
298                         return -EHOSTDOWN;
299
300                 state = f->header->state;
301
302                 if (state == STATE_ONLINE) {
303                         log_debug("Journal file %s is already online. Assuming unclean closing.", f->path);
304                         return -EBUSY;
305                 } else if (state == STATE_ARCHIVED)
306                         return -ESHUTDOWN;
307                 else if (state != STATE_OFFLINE) {
308                         log_debug("Journal file %s has unknown state %u.", f->path, state);
309                         return -EBUSY;
310                 }
311         }
312
313         f->compress_xz = JOURNAL_HEADER_COMPRESSED_XZ(f->header);
314         f->compress_lz4 = JOURNAL_HEADER_COMPRESSED_LZ4(f->header);
315
316         f->seal = JOURNAL_HEADER_SEALED(f->header);
317
318         return 0;
319 }
320
321 static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
322         uint64_t old_size, new_size;
323         int r;
324
325         assert(f);
326
327         /* We assume that this file is not sparse, and we know that
328          * for sure, since we always call posix_fallocate()
329          * ourselves */
330
331         old_size =
332                 le64toh(f->header->header_size) +
333                 le64toh(f->header->arena_size);
334
335         new_size = PAGE_ALIGN(offset + size);
336         if (new_size < le64toh(f->header->header_size))
337                 new_size = le64toh(f->header->header_size);
338
339         if (new_size <= old_size)
340                 return 0;
341
342         if (f->metrics.max_size > 0 && new_size > f->metrics.max_size)
343                 return -E2BIG;
344
345         if (new_size > f->metrics.min_size && f->metrics.keep_free > 0) {
346                 struct statvfs svfs;
347
348                 if (fstatvfs(f->fd, &svfs) >= 0) {
349                         uint64_t available;
350
351                         available = svfs.f_bfree * svfs.f_bsize;
352
353                         if (available >= f->metrics.keep_free)
354                                 available -= f->metrics.keep_free;
355                         else
356                                 available = 0;
357
358                         if (new_size - old_size > available)
359                                 return -E2BIG;
360                 }
361         }
362
363         /* Increase by larger blocks at once */
364         new_size = ((new_size+FILE_SIZE_INCREASE-1) / FILE_SIZE_INCREASE) * FILE_SIZE_INCREASE;
365         if (f->metrics.max_size > 0 && new_size > f->metrics.max_size)
366                 new_size = f->metrics.max_size;
367
368         /* Note that the glibc fallocate() fallback is very
369            inefficient, hence we try to minimize the allocation area
370            as we can. */
371         r = posix_fallocate(f->fd, old_size, new_size - old_size);
372         if (r != 0)
373                 return -r;
374
375         if (fstat(f->fd, &f->last_stat) < 0)
376                 return -errno;
377
378         f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
379
380         return 0;
381 }
382
383 static int journal_file_move_to(JournalFile *f, int context, bool keep_always, uint64_t offset, uint64_t size, void **ret) {
384         assert(f);
385         assert(ret);
386
387         if (size <= 0)
388                 return -EINVAL;
389
390         /* Avoid SIGBUS on invalid accesses */
391         if (offset + size > (uint64_t) f->last_stat.st_size) {
392                 /* Hmm, out of range? Let's refresh the fstat() data
393                  * first, before we trust that check. */
394
395                 if (fstat(f->fd, &f->last_stat) < 0 ||
396                     offset + size > (uint64_t) f->last_stat.st_size)
397                         return -EADDRNOTAVAIL;
398         }
399
400         return mmap_cache_get(f->mmap, f->fd, f->prot, context, keep_always, offset, size, &f->last_stat, ret);
401 }
402
403 static uint64_t minimum_header_size(Object *o) {
404
405         static const uint64_t table[] = {
406                 [OBJECT_DATA] = sizeof(DataObject),
407                 [OBJECT_FIELD] = sizeof(FieldObject),
408                 [OBJECT_ENTRY] = sizeof(EntryObject),
409                 [OBJECT_DATA_HASH_TABLE] = sizeof(HashTableObject),
410                 [OBJECT_FIELD_HASH_TABLE] = sizeof(HashTableObject),
411                 [OBJECT_ENTRY_ARRAY] = sizeof(EntryArrayObject),
412                 [OBJECT_TAG] = sizeof(TagObject),
413         };
414
415         if (o->object.type >= ELEMENTSOF(table) || table[o->object.type] <= 0)
416                 return sizeof(ObjectHeader);
417
418         return table[o->object.type];
419 }
420
421 int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
422         int r;
423         void *t;
424         Object *o;
425         uint64_t s;
426
427         assert(f);
428         assert(ret);
429
430         /* Objects may only be located at multiple of 64 bit */
431         if (!VALID64(offset))
432                 return -EFAULT;
433
434
435         r = journal_file_move_to(f, type_to_context(type), false, offset, sizeof(ObjectHeader), &t);
436         if (r < 0)
437                 return r;
438
439         o = (Object*) t;
440         s = le64toh(o->object.size);
441
442         if (s < sizeof(ObjectHeader))
443                 return -EBADMSG;
444
445         if (o->object.type <= OBJECT_UNUSED)
446                 return -EBADMSG;
447
448         if (s < minimum_header_size(o))
449                 return -EBADMSG;
450
451         if (type > 0 && o->object.type != type)
452                 return -EBADMSG;
453
454         if (s > sizeof(ObjectHeader)) {
455                 r = journal_file_move_to(f, o->object.type, false, offset, s, &t);
456                 if (r < 0)
457                         return r;
458
459                 o = (Object*) t;
460         }
461
462         *ret = o;
463         return 0;
464 }
465
466 static uint64_t journal_file_entry_seqnum(JournalFile *f, uint64_t *seqnum) {
467         uint64_t r;
468
469         assert(f);
470
471         r = le64toh(f->header->tail_entry_seqnum) + 1;
472
473         if (seqnum) {
474                 /* If an external seqnum counter was passed, we update
475                  * both the local and the external one, and set it to
476                  * the maximum of both */
477
478                 if (*seqnum + 1 > r)
479                         r = *seqnum + 1;
480
481                 *seqnum = r;
482         }
483
484         f->header->tail_entry_seqnum = htole64(r);
485
486         if (f->header->head_entry_seqnum == 0)
487                 f->header->head_entry_seqnum = htole64(r);
488
489         return r;
490 }
491
492 int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
493         int r;
494         uint64_t p;
495         Object *tail, *o;
496         void *t;
497
498         assert(f);
499         assert(type > 0 && type < _OBJECT_TYPE_MAX);
500         assert(size >= sizeof(ObjectHeader));
501         assert(offset);
502         assert(ret);
503
504         r = journal_file_set_online(f);
505         if (r < 0)
506                 return r;
507
508         p = le64toh(f->header->tail_object_offset);
509         if (p == 0)
510                 p = le64toh(f->header->header_size);
511         else {
512                 r = journal_file_move_to_object(f, -1, p, &tail);
513                 if (r < 0)
514                         return r;
515
516                 p += ALIGN64(le64toh(tail->object.size));
517         }
518
519         r = journal_file_allocate(f, p, size);
520         if (r < 0)
521                 return r;
522
523         r = journal_file_move_to(f, type, false, p, size, &t);
524         if (r < 0)
525                 return r;
526
527         o = (Object*) t;
528
529         zero(o->object);
530         o->object.type = type;
531         o->object.size = htole64(size);
532
533         f->header->tail_object_offset = htole64(p);
534         f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
535
536         *ret = o;
537         *offset = p;
538
539         return 0;
540 }
541
542 static int journal_file_setup_data_hash_table(JournalFile *f) {
543         uint64_t s, p;
544         Object *o;
545         int r;
546
547         assert(f);
548
549         /* We estimate that we need 1 hash table entry per 768 of
550            journal file and we want to make sure we never get beyond
551            75% fill level. Calculate the hash table size for the
552            maximum file size based on these metrics. */
553
554         s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem);
555         if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
556                 s = DEFAULT_DATA_HASH_TABLE_SIZE;
557
558         log_debug("Reserving %"PRIu64" entries in hash table.", s / sizeof(HashItem));
559
560         r = journal_file_append_object(f,
561                                        OBJECT_DATA_HASH_TABLE,
562                                        offsetof(Object, hash_table.items) + s,
563                                        &o, &p);
564         if (r < 0)
565                 return r;
566
567         memzero(o->hash_table.items, s);
568
569         f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
570         f->header->data_hash_table_size = htole64(s);
571
572         return 0;
573 }
574
575 static int journal_file_setup_field_hash_table(JournalFile *f) {
576         uint64_t s, p;
577         Object *o;
578         int r;
579
580         assert(f);
581
582         /* We use a fixed size hash table for the fields as this
583          * number should grow very slowly only */
584
585         s = DEFAULT_FIELD_HASH_TABLE_SIZE;
586         r = journal_file_append_object(f,
587                                        OBJECT_FIELD_HASH_TABLE,
588                                        offsetof(Object, hash_table.items) + s,
589                                        &o, &p);
590         if (r < 0)
591                 return r;
592
593         memzero(o->hash_table.items, s);
594
595         f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
596         f->header->field_hash_table_size = htole64(s);
597
598         return 0;
599 }
600
601 static int journal_file_map_data_hash_table(JournalFile *f) {
602         uint64_t s, p;
603         void *t;
604         int r;
605
606         assert(f);
607
608         p = le64toh(f->header->data_hash_table_offset);
609         s = le64toh(f->header->data_hash_table_size);
610
611         r = journal_file_move_to(f,
612                                  OBJECT_DATA_HASH_TABLE,
613                                  true,
614                                  p, s,
615                                  &t);
616         if (r < 0)
617                 return r;
618
619         f->data_hash_table = t;
620         return 0;
621 }
622
623 static int journal_file_map_field_hash_table(JournalFile *f) {
624         uint64_t s, p;
625         void *t;
626         int r;
627
628         assert(f);
629
630         p = le64toh(f->header->field_hash_table_offset);
631         s = le64toh(f->header->field_hash_table_size);
632
633         r = journal_file_move_to(f,
634                                  OBJECT_FIELD_HASH_TABLE,
635                                  true,
636                                  p, s,
637                                  &t);
638         if (r < 0)
639                 return r;
640
641         f->field_hash_table = t;
642         return 0;
643 }
644
645 static int journal_file_link_field(
646                 JournalFile *f,
647                 Object *o,
648                 uint64_t offset,
649                 uint64_t hash) {
650
651         uint64_t p, h;
652         int r;
653
654         assert(f);
655         assert(o);
656         assert(offset > 0);
657
658         if (o->object.type != OBJECT_FIELD)
659                 return -EINVAL;
660
661         /* This might alter the window we are looking at */
662
663         o->field.next_hash_offset = o->field.head_data_offset = 0;
664
665         h = hash % (le64toh(f->header->field_hash_table_size) / sizeof(HashItem));
666         p = le64toh(f->field_hash_table[h].tail_hash_offset);
667         if (p == 0)
668                 f->field_hash_table[h].head_hash_offset = htole64(offset);
669         else {
670                 r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o);
671                 if (r < 0)
672                         return r;
673
674                 o->field.next_hash_offset = htole64(offset);
675         }
676
677         f->field_hash_table[h].tail_hash_offset = htole64(offset);
678
679         if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
680                 f->header->n_fields = htole64(le64toh(f->header->n_fields) + 1);
681
682         return 0;
683 }
684
685 static int journal_file_link_data(
686                 JournalFile *f,
687                 Object *o,
688                 uint64_t offset,
689                 uint64_t hash) {
690
691         uint64_t p, h;
692         int r;
693
694         assert(f);
695         assert(o);
696         assert(offset > 0);
697
698         if (o->object.type != OBJECT_DATA)
699                 return -EINVAL;
700
701         /* This might alter the window we are looking at */
702
703         o->data.next_hash_offset = o->data.next_field_offset = 0;
704         o->data.entry_offset = o->data.entry_array_offset = 0;
705         o->data.n_entries = 0;
706
707         h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
708         p = le64toh(f->data_hash_table[h].tail_hash_offset);
709         if (p == 0)
710                 /* Only entry in the hash table is easy */
711                 f->data_hash_table[h].head_hash_offset = htole64(offset);
712         else {
713                 /* Move back to the previous data object, to patch in
714                  * pointer */
715
716                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
717                 if (r < 0)
718                         return r;
719
720                 o->data.next_hash_offset = htole64(offset);
721         }
722
723         f->data_hash_table[h].tail_hash_offset = htole64(offset);
724
725         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
726                 f->header->n_data = htole64(le64toh(f->header->n_data) + 1);
727
728         return 0;
729 }
730
731 int journal_file_find_field_object_with_hash(
732                 JournalFile *f,
733                 const void *field, uint64_t size, uint64_t hash,
734                 Object **ret, uint64_t *offset) {
735
736         uint64_t p, osize, h;
737         int r;
738
739         assert(f);
740         assert(field && size > 0);
741
742         osize = offsetof(Object, field.payload) + size;
743
744         if (f->header->field_hash_table_size == 0)
745                 return -EBADMSG;
746
747         h = hash % (le64toh(f->header->field_hash_table_size) / sizeof(HashItem));
748         p = le64toh(f->field_hash_table[h].head_hash_offset);
749
750         while (p > 0) {
751                 Object *o;
752
753                 r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o);
754                 if (r < 0)
755                         return r;
756
757                 if (le64toh(o->field.hash) == hash &&
758                     le64toh(o->object.size) == osize &&
759                     memcmp(o->field.payload, field, size) == 0) {
760
761                         if (ret)
762                                 *ret = o;
763                         if (offset)
764                                 *offset = p;
765
766                         return 1;
767                 }
768
769                 p = le64toh(o->field.next_hash_offset);
770         }
771
772         return 0;
773 }
774
775 int journal_file_find_field_object(
776                 JournalFile *f,
777                 const void *field, uint64_t size,
778                 Object **ret, uint64_t *offset) {
779
780         uint64_t hash;
781
782         assert(f);
783         assert(field && size > 0);
784
785         hash = hash64(field, size);
786
787         return journal_file_find_field_object_with_hash(f,
788                                                         field, size, hash,
789                                                         ret, offset);
790 }
791
792 int journal_file_find_data_object_with_hash(
793                 JournalFile *f,
794                 const void *data, uint64_t size, uint64_t hash,
795                 Object **ret, uint64_t *offset) {
796
797         uint64_t p, osize, h;
798         int r;
799
800         assert(f);
801         assert(data || size == 0);
802
803         osize = offsetof(Object, data.payload) + size;
804
805         if (f->header->data_hash_table_size == 0)
806                 return -EBADMSG;
807
808         h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
809         p = le64toh(f->data_hash_table[h].head_hash_offset);
810
811         while (p > 0) {
812                 Object *o;
813
814                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
815                 if (r < 0)
816                         return r;
817
818                 if (le64toh(o->data.hash) != hash)
819                         goto next;
820
821                 if (o->object.flags & OBJECT_COMPRESSION_MASK) {
822                         uint64_t l, rsize;
823
824                         l = le64toh(o->object.size);
825                         if (l <= offsetof(Object, data.payload))
826                                 return -EBADMSG;
827
828                         l -= offsetof(Object, data.payload);
829
830                         r = decompress_blob(o->object.flags & OBJECT_COMPRESSION_MASK,
831                                             o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize, 0);
832                         if (r < 0)
833                                 return r;
834
835                         if (rsize == size &&
836                             memcmp(f->compress_buffer, data, size) == 0) {
837
838                                 if (ret)
839                                         *ret = o;
840
841                                 if (offset)
842                                         *offset = p;
843
844                                 return 1;
845                         }
846
847                 } else if (le64toh(o->object.size) == osize &&
848                            memcmp(o->data.payload, data, size) == 0) {
849
850                         if (ret)
851                                 *ret = o;
852
853                         if (offset)
854                                 *offset = p;
855
856                         return 1;
857                 }
858
859         next:
860                 p = le64toh(o->data.next_hash_offset);
861         }
862
863         return 0;
864 }
865
866 int journal_file_find_data_object(
867                 JournalFile *f,
868                 const void *data, uint64_t size,
869                 Object **ret, uint64_t *offset) {
870
871         uint64_t hash;
872
873         assert(f);
874         assert(data || size == 0);
875
876         hash = hash64(data, size);
877
878         return journal_file_find_data_object_with_hash(f,
879                                                        data, size, hash,
880                                                        ret, offset);
881 }
882
883 static int journal_file_append_field(
884                 JournalFile *f,
885                 const void *field, uint64_t size,
886                 Object **ret, uint64_t *offset) {
887
888         uint64_t hash, p;
889         uint64_t osize;
890         Object *o;
891         int r;
892
893         assert(f);
894         assert(field && size > 0);
895
896         hash = hash64(field, size);
897
898         r = journal_file_find_field_object_with_hash(f, field, size, hash, &o, &p);
899         if (r < 0)
900                 return r;
901         else if (r > 0) {
902
903                 if (ret)
904                         *ret = o;
905
906                 if (offset)
907                         *offset = p;
908
909                 return 0;
910         }
911
912         osize = offsetof(Object, field.payload) + size;
913         r = journal_file_append_object(f, OBJECT_FIELD, osize, &o, &p);
914         if (r < 0)
915                 return r;
916
917         o->field.hash = htole64(hash);
918         memcpy(o->field.payload, field, size);
919
920         r = journal_file_link_field(f, o, p, hash);
921         if (r < 0)
922                 return r;
923
924         /* The linking might have altered the window, so let's
925          * refresh our pointer */
926         r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o);
927         if (r < 0)
928                 return r;
929
930 #ifdef HAVE_GCRYPT
931         r = journal_file_hmac_put_object(f, OBJECT_FIELD, o, p);
932         if (r < 0)
933                 return r;
934 #endif
935
936         if (ret)
937                 *ret = o;
938
939         if (offset)
940                 *offset = p;
941
942         return 0;
943 }
944
945 static int journal_file_append_data(
946                 JournalFile *f,
947                 const void *data, uint64_t size,
948                 Object **ret, uint64_t *offset) {
949
950         uint64_t hash, p;
951         uint64_t osize;
952         Object *o;
953         int r, compression = 0;
954         const void *eq;
955
956         assert(f);
957         assert(data || size == 0);
958
959         hash = hash64(data, size);
960
961         r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
962         if (r < 0)
963                 return r;
964         else if (r > 0) {
965
966                 if (ret)
967                         *ret = o;
968
969                 if (offset)
970                         *offset = p;
971
972                 return 0;
973         }
974
975         osize = offsetof(Object, data.payload) + size;
976         r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
977         if (r < 0)
978                 return r;
979
980         o->data.hash = htole64(hash);
981
982 #if defined(HAVE_XZ) || defined(HAVE_LZ4)
983         if (f->compress_xz &&
984             size >= COMPRESSION_SIZE_THRESHOLD) {
985                 uint64_t rsize;
986
987                 compression = compress_blob(data, size, o->data.payload, &rsize);
988
989                 if (compression) {
990                         o->object.size = htole64(offsetof(Object, data.payload) + rsize);
991                         o->object.flags |= compression;
992
993                         log_debug("Compressed data object %"PRIu64" -> %"PRIu64" using %s",
994                                   size, rsize, object_compressed_to_string(compression));
995                 }
996         }
997 #endif
998
999         if (!compression && size > 0)
1000                 memcpy(o->data.payload, data, size);
1001
1002         r = journal_file_link_data(f, o, p, hash);
1003         if (r < 0)
1004                 return r;
1005
1006         /* The linking might have altered the window, so let's
1007          * refresh our pointer */
1008         r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
1009         if (r < 0)
1010                 return r;
1011
1012         if (!data)
1013                 eq = NULL;
1014         else
1015                 eq = memchr(data, '=', size);
1016         if (eq && eq > data) {
1017                 Object *fo = NULL;
1018                 uint64_t fp;
1019
1020                 /* Create field object ... */
1021                 r = journal_file_append_field(f, data, (uint8_t*) eq - (uint8_t*) data, &fo, &fp);
1022                 if (r < 0)
1023                         return r;
1024
1025                 /* ... and link it in. */
1026                 o->data.next_field_offset = fo->field.head_data_offset;
1027                 fo->field.head_data_offset = le64toh(p);
1028         }
1029
1030 #ifdef HAVE_GCRYPT
1031         r = journal_file_hmac_put_object(f, OBJECT_DATA, o, p);
1032         if (r < 0)
1033                 return r;
1034 #endif
1035
1036         if (ret)
1037                 *ret = o;
1038
1039         if (offset)
1040                 *offset = p;
1041
1042         return 0;
1043 }
1044
1045 uint64_t journal_file_entry_n_items(Object *o) {
1046         assert(o);
1047
1048         if (o->object.type != OBJECT_ENTRY)
1049                 return 0;
1050
1051         return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
1052 }
1053
1054 uint64_t journal_file_entry_array_n_items(Object *o) {
1055         assert(o);
1056
1057         if (o->object.type != OBJECT_ENTRY_ARRAY)
1058                 return 0;
1059
1060         return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
1061 }
1062
1063 uint64_t journal_file_hash_table_n_items(Object *o) {
1064         assert(o);
1065
1066         if (o->object.type != OBJECT_DATA_HASH_TABLE &&
1067             o->object.type != OBJECT_FIELD_HASH_TABLE)
1068                 return 0;
1069
1070         return (le64toh(o->object.size) - offsetof(Object, hash_table.items)) / sizeof(HashItem);
1071 }
1072
1073 static int link_entry_into_array(JournalFile *f,
1074                                  le64_t *first,
1075                                  le64_t *idx,
1076                                  uint64_t p) {
1077         int r;
1078         uint64_t n = 0, ap = 0, q, i, a, hidx;
1079         Object *o;
1080
1081         assert(f);
1082         assert(first);
1083         assert(idx);
1084         assert(p > 0);
1085
1086         a = le64toh(*first);
1087         i = hidx = le64toh(*idx);
1088         while (a > 0) {
1089
1090                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1091                 if (r < 0)
1092                         return r;
1093
1094                 n = journal_file_entry_array_n_items(o);
1095                 if (i < n) {
1096                         o->entry_array.items[i] = htole64(p);
1097                         *idx = htole64(hidx + 1);
1098                         return 0;
1099                 }
1100
1101                 i -= n;
1102                 ap = a;
1103                 a = le64toh(o->entry_array.next_entry_array_offset);
1104         }
1105
1106         if (hidx > n)
1107                 n = (hidx+1) * 2;
1108         else
1109                 n = n * 2;
1110
1111         if (n < 4)
1112                 n = 4;
1113
1114         r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
1115                                        offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
1116                                        &o, &q);
1117         if (r < 0)
1118                 return r;
1119
1120 #ifdef HAVE_GCRYPT
1121         r = journal_file_hmac_put_object(f, OBJECT_ENTRY_ARRAY, o, q);
1122         if (r < 0)
1123                 return r;
1124 #endif
1125
1126         o->entry_array.items[i] = htole64(p);
1127
1128         if (ap == 0)
1129                 *first = htole64(q);
1130         else {
1131                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
1132                 if (r < 0)
1133                         return r;
1134
1135                 o->entry_array.next_entry_array_offset = htole64(q);
1136         }
1137
1138         if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
1139                 f->header->n_entry_arrays = htole64(le64toh(f->header->n_entry_arrays) + 1);
1140
1141         *idx = htole64(hidx + 1);
1142
1143         return 0;
1144 }
1145
1146 static int link_entry_into_array_plus_one(JournalFile *f,
1147                                           le64_t *extra,
1148                                           le64_t *first,
1149                                           le64_t *idx,
1150                                           uint64_t p) {
1151
1152         int r;
1153
1154         assert(f);
1155         assert(extra);
1156         assert(first);
1157         assert(idx);
1158         assert(p > 0);
1159
1160         if (*idx == 0)
1161                 *extra = htole64(p);
1162         else {
1163                 le64_t i;
1164
1165                 i = htole64(le64toh(*idx) - 1);
1166                 r = link_entry_into_array(f, first, &i, p);
1167                 if (r < 0)
1168                         return r;
1169         }
1170
1171         *idx = htole64(le64toh(*idx) + 1);
1172         return 0;
1173 }
1174
1175 static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
1176         uint64_t p;
1177         int r;
1178         assert(f);
1179         assert(o);
1180         assert(offset > 0);
1181
1182         p = le64toh(o->entry.items[i].object_offset);
1183         if (p == 0)
1184                 return -EINVAL;
1185
1186         r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
1187         if (r < 0)
1188                 return r;
1189
1190         return link_entry_into_array_plus_one(f,
1191                                               &o->data.entry_offset,
1192                                               &o->data.entry_array_offset,
1193                                               &o->data.n_entries,
1194                                               offset);
1195 }
1196
1197 static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
1198         uint64_t n, i;
1199         int r;
1200
1201         assert(f);
1202         assert(o);
1203         assert(offset > 0);
1204
1205         if (o->object.type != OBJECT_ENTRY)
1206                 return -EINVAL;
1207
1208         __sync_synchronize();
1209
1210         /* Link up the entry itself */
1211         r = link_entry_into_array(f,
1212                                   &f->header->entry_array_offset,
1213                                   &f->header->n_entries,
1214                                   offset);
1215         if (r < 0)
1216                 return r;
1217
1218         /* log_debug("=> %s seqnr=%"PRIu64" n_entries=%"PRIu64, f->path, o->entry.seqnum, f->header->n_entries); */
1219
1220         if (f->header->head_entry_realtime == 0)
1221                 f->header->head_entry_realtime = o->entry.realtime;
1222
1223         f->header->tail_entry_realtime = o->entry.realtime;
1224         f->header->tail_entry_monotonic = o->entry.monotonic;
1225
1226         f->tail_entry_monotonic_valid = true;
1227
1228         /* Link up the items */
1229         n = journal_file_entry_n_items(o);
1230         for (i = 0; i < n; i++) {
1231                 r = journal_file_link_entry_item(f, o, offset, i);
1232                 if (r < 0)
1233                         return r;
1234         }
1235
1236         return 0;
1237 }
1238
1239 static int journal_file_append_entry_internal(
1240                 JournalFile *f,
1241                 const dual_timestamp *ts,
1242                 uint64_t xor_hash,
1243                 const EntryItem items[], unsigned n_items,
1244                 uint64_t *seqnum,
1245                 Object **ret, uint64_t *offset) {
1246         uint64_t np;
1247         uint64_t osize;
1248         Object *o;
1249         int r;
1250
1251         assert(f);
1252         assert(items || n_items == 0);
1253         assert(ts);
1254
1255         osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
1256
1257         r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
1258         if (r < 0)
1259                 return r;
1260
1261         o->entry.seqnum = htole64(journal_file_entry_seqnum(f, seqnum));
1262         memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
1263         o->entry.realtime = htole64(ts->realtime);
1264         o->entry.monotonic = htole64(ts->monotonic);
1265         o->entry.xor_hash = htole64(xor_hash);
1266         o->entry.boot_id = f->header->boot_id;
1267
1268 #ifdef HAVE_GCRYPT
1269         r = journal_file_hmac_put_object(f, OBJECT_ENTRY, o, np);
1270         if (r < 0)
1271                 return r;
1272 #endif
1273
1274         r = journal_file_link_entry(f, o, np);
1275         if (r < 0)
1276                 return r;
1277
1278         if (ret)
1279                 *ret = o;
1280
1281         if (offset)
1282                 *offset = np;
1283
1284         return 0;
1285 }
1286
1287 void journal_file_post_change(JournalFile *f) {
1288         assert(f);
1289
1290         /* inotify() does not receive IN_MODIFY events from file
1291          * accesses done via mmap(). After each access we hence
1292          * trigger IN_MODIFY by truncating the journal file to its
1293          * current size which triggers IN_MODIFY. */
1294
1295         __sync_synchronize();
1296
1297         if (ftruncate(f->fd, f->last_stat.st_size) < 0)
1298                 log_error("Failed to truncate file to its own size: %m");
1299 }
1300
1301 static int entry_item_cmp(const void *_a, const void *_b) {
1302         const EntryItem *a = _a, *b = _b;
1303
1304         if (le64toh(a->object_offset) < le64toh(b->object_offset))
1305                 return -1;
1306         if (le64toh(a->object_offset) > le64toh(b->object_offset))
1307                 return 1;
1308         return 0;
1309 }
1310
1311 int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
1312         unsigned i;
1313         EntryItem *items;
1314         int r;
1315         uint64_t xor_hash = 0;
1316         struct dual_timestamp _ts;
1317
1318         assert(f);
1319         assert(iovec || n_iovec == 0);
1320
1321         if (!ts) {
1322                 dual_timestamp_get(&_ts);
1323                 ts = &_ts;
1324         }
1325
1326         if (f->tail_entry_monotonic_valid &&
1327             ts->monotonic < le64toh(f->header->tail_entry_monotonic))
1328                 return -EINVAL;
1329
1330 #ifdef HAVE_GCRYPT
1331         r = journal_file_maybe_append_tag(f, ts->realtime);
1332         if (r < 0)
1333                 return r;
1334 #endif
1335
1336         /* alloca() can't take 0, hence let's allocate at least one */
1337         items = alloca(sizeof(EntryItem) * MAX(1u, n_iovec));
1338
1339         for (i = 0; i < n_iovec; i++) {
1340                 uint64_t p;
1341                 Object *o;
1342
1343                 r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
1344                 if (r < 0)
1345                         return r;
1346
1347                 xor_hash ^= le64toh(o->data.hash);
1348                 items[i].object_offset = htole64(p);
1349                 items[i].hash = o->data.hash;
1350         }
1351
1352         /* Order by the position on disk, in order to improve seek
1353          * times for rotating media. */
1354         qsort_safe(items, n_iovec, sizeof(EntryItem), entry_item_cmp);
1355
1356         r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
1357
1358         journal_file_post_change(f);
1359
1360         return r;
1361 }
1362
1363 typedef struct ChainCacheItem {
1364         uint64_t first; /* the array at the beginning of the chain */
1365         uint64_t array; /* the cached array */
1366         uint64_t begin; /* the first item in the cached array */
1367         uint64_t total; /* the total number of items in all arrays before this one in the chain */
1368         uint64_t last_index; /* the last index we looked at, to optimize locality when bisecting */
1369 } ChainCacheItem;
1370
1371 static void chain_cache_put(
1372                 Hashmap *h,
1373                 ChainCacheItem *ci,
1374                 uint64_t first,
1375                 uint64_t array,
1376                 uint64_t begin,
1377                 uint64_t total,
1378                 uint64_t last_index) {
1379
1380         if (!ci) {
1381                 /* If the chain item to cache for this chain is the
1382                  * first one it's not worth caching anything */
1383                 if (array == first)
1384                         return;
1385
1386                 if (hashmap_size(h) >= CHAIN_CACHE_MAX)
1387                         ci = hashmap_steal_first(h);
1388                 else {
1389                         ci = new(ChainCacheItem, 1);
1390                         if (!ci)
1391                                 return;
1392                 }
1393
1394                 ci->first = first;
1395
1396                 if (hashmap_put(h, &ci->first, ci) < 0) {
1397                         free(ci);
1398                         return;
1399                 }
1400         } else
1401                 assert(ci->first == first);
1402
1403         ci->array = array;
1404         ci->begin = begin;
1405         ci->total = total;
1406         ci->last_index = last_index;
1407 }
1408
1409 static int generic_array_get(
1410                 JournalFile *f,
1411                 uint64_t first,
1412                 uint64_t i,
1413                 Object **ret, uint64_t *offset) {
1414
1415         Object *o;
1416         uint64_t p = 0, a, t = 0;
1417         int r;
1418         ChainCacheItem *ci;
1419
1420         assert(f);
1421
1422         a = first;
1423
1424         /* Try the chain cache first */
1425         ci = hashmap_get(f->chain_cache, &first);
1426         if (ci && i > ci->total) {
1427                 a = ci->array;
1428                 i -= ci->total;
1429                 t = ci->total;
1430         }
1431
1432         while (a > 0) {
1433                 uint64_t k;
1434
1435                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1436                 if (r < 0)
1437                         return r;
1438
1439                 k = journal_file_entry_array_n_items(o);
1440                 if (i < k) {
1441                         p = le64toh(o->entry_array.items[i]);
1442                         goto found;
1443                 }
1444
1445                 i -= k;
1446                 t += k;
1447                 a = le64toh(o->entry_array.next_entry_array_offset);
1448         }
1449
1450         return 0;
1451
1452 found:
1453         /* Let's cache this item for the next invocation */
1454         chain_cache_put(f->chain_cache, ci, first, a, le64toh(o->entry_array.items[0]), t, i);
1455
1456         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1457         if (r < 0)
1458                 return r;
1459
1460         if (ret)
1461                 *ret = o;
1462
1463         if (offset)
1464                 *offset = p;
1465
1466         return 1;
1467 }
1468
1469 static int generic_array_get_plus_one(
1470                 JournalFile *f,
1471                 uint64_t extra,
1472                 uint64_t first,
1473                 uint64_t i,
1474                 Object **ret, uint64_t *offset) {
1475
1476         Object *o;
1477
1478         assert(f);
1479
1480         if (i == 0) {
1481                 int r;
1482
1483                 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1484                 if (r < 0)
1485                         return r;
1486
1487                 if (ret)
1488                         *ret = o;
1489
1490                 if (offset)
1491                         *offset = extra;
1492
1493                 return 1;
1494         }
1495
1496         return generic_array_get(f, first, i-1, ret, offset);
1497 }
1498
1499 enum {
1500         TEST_FOUND,
1501         TEST_LEFT,
1502         TEST_RIGHT
1503 };
1504
1505 static int generic_array_bisect(
1506                 JournalFile *f,
1507                 uint64_t first,
1508                 uint64_t n,
1509                 uint64_t needle,
1510                 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1511                 direction_t direction,
1512                 Object **ret,
1513                 uint64_t *offset,
1514                 uint64_t *idx) {
1515
1516         uint64_t a, p, t = 0, i = 0, last_p = 0, last_index = (uint64_t) -1;
1517         bool subtract_one = false;
1518         Object *o, *array = NULL;
1519         int r;
1520         ChainCacheItem *ci;
1521
1522         assert(f);
1523         assert(test_object);
1524
1525         /* Start with the first array in the chain */
1526         a = first;
1527
1528         ci = hashmap_get(f->chain_cache, &first);
1529         if (ci && n > ci->total) {
1530                 /* Ah, we have iterated this bisection array chain
1531                  * previously! Let's see if we can skip ahead in the
1532                  * chain, as far as the last time. But we can't jump
1533                  * backwards in the chain, so let's check that
1534                  * first. */
1535
1536                 r = test_object(f, ci->begin, needle);
1537                 if (r < 0)
1538                         return r;
1539
1540                 if (r == TEST_LEFT) {
1541                         /* OK, what we are looking for is right of the
1542                          * begin of this EntryArray, so let's jump
1543                          * straight to previously cached array in the
1544                          * chain */
1545
1546                         a = ci->array;
1547                         n -= ci->total;
1548                         t = ci->total;
1549                         last_index = ci->last_index;
1550                 }
1551         }
1552
1553         while (a > 0) {
1554                 uint64_t left, right, k, lp;
1555
1556                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
1557                 if (r < 0)
1558                         return r;
1559
1560                 k = journal_file_entry_array_n_items(array);
1561                 right = MIN(k, n);
1562                 if (right <= 0)
1563                         return 0;
1564
1565                 i = right - 1;
1566                 lp = p = le64toh(array->entry_array.items[i]);
1567                 if (p <= 0)
1568                         return -EBADMSG;
1569
1570                 r = test_object(f, p, needle);
1571                 if (r < 0)
1572                         return r;
1573
1574                 if (r == TEST_FOUND)
1575                         r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1576
1577                 if (r == TEST_RIGHT) {
1578                         left = 0;
1579                         right -= 1;
1580
1581                         if (last_index != (uint64_t) -1) {
1582                                 assert(last_index <= right);
1583
1584                                 /* If we cached the last index we
1585                                  * looked at, let's try to not to jump
1586                                  * too wildly around and see if we can
1587                                  * limit the range to look at early to
1588                                  * the immediate neighbors of the last
1589                                  * index we looked at. */
1590
1591                                 if (last_index > 0) {
1592                                         uint64_t x = last_index - 1;
1593
1594                                         p = le64toh(array->entry_array.items[x]);
1595                                         if (p <= 0)
1596                                                 return -EBADMSG;
1597
1598                                         r = test_object(f, p, needle);
1599                                         if (r < 0)
1600                                                 return r;
1601
1602                                         if (r == TEST_FOUND)
1603                                                 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1604
1605                                         if (r == TEST_RIGHT)
1606                                                 right = x;
1607                                         else
1608                                                 left = x + 1;
1609                                 }
1610
1611                                 if (last_index < right) {
1612                                         uint64_t y = last_index + 1;
1613
1614                                         p = le64toh(array->entry_array.items[y]);
1615                                         if (p <= 0)
1616                                                 return -EBADMSG;
1617
1618                                         r = test_object(f, p, needle);
1619                                         if (r < 0)
1620                                                 return r;
1621
1622                                         if (r == TEST_FOUND)
1623                                                 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1624
1625                                         if (r == TEST_RIGHT)
1626                                                 right = y;
1627                                         else
1628                                                 left = y + 1;
1629                                 }
1630                         }
1631
1632                         for (;;) {
1633                                 if (left == right) {
1634                                         if (direction == DIRECTION_UP)
1635                                                 subtract_one = true;
1636
1637                                         i = left;
1638                                         goto found;
1639                                 }
1640
1641                                 assert(left < right);
1642                                 i = (left + right) / 2;
1643
1644                                 p = le64toh(array->entry_array.items[i]);
1645                                 if (p <= 0)
1646                                         return -EBADMSG;
1647
1648                                 r = test_object(f, p, needle);
1649                                 if (r < 0)
1650                                         return r;
1651
1652                                 if (r == TEST_FOUND)
1653                                         r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1654
1655                                 if (r == TEST_RIGHT)
1656                                         right = i;
1657                                 else
1658                                         left = i + 1;
1659                         }
1660                 }
1661
1662                 if (k > n) {
1663                         if (direction == DIRECTION_UP) {
1664                                 i = n;
1665                                 subtract_one = true;
1666                                 goto found;
1667                         }
1668
1669                         return 0;
1670                 }
1671
1672                 last_p = lp;
1673
1674                 n -= k;
1675                 t += k;
1676                 last_index = (uint64_t) -1;
1677                 a = le64toh(array->entry_array.next_entry_array_offset);
1678         }
1679
1680         return 0;
1681
1682 found:
1683         if (subtract_one && t == 0 && i == 0)
1684                 return 0;
1685
1686         /* Let's cache this item for the next invocation */
1687         chain_cache_put(f->chain_cache, ci, first, a, le64toh(array->entry_array.items[0]), t, subtract_one ? (i > 0 ? i-1 : (uint64_t) -1) : i);
1688
1689         if (subtract_one && i == 0)
1690                 p = last_p;
1691         else if (subtract_one)
1692                 p = le64toh(array->entry_array.items[i-1]);
1693         else
1694                 p = le64toh(array->entry_array.items[i]);
1695
1696         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1697         if (r < 0)
1698                 return r;
1699
1700         if (ret)
1701                 *ret = o;
1702
1703         if (offset)
1704                 *offset = p;
1705
1706         if (idx)
1707                 *idx = t + i + (subtract_one ? -1 : 0);
1708
1709         return 1;
1710 }
1711
1712
1713 static int generic_array_bisect_plus_one(
1714                 JournalFile *f,
1715                 uint64_t extra,
1716                 uint64_t first,
1717                 uint64_t n,
1718                 uint64_t needle,
1719                 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1720                 direction_t direction,
1721                 Object **ret,
1722                 uint64_t *offset,
1723                 uint64_t *idx) {
1724
1725         int r;
1726         bool step_back = false;
1727         Object *o;
1728
1729         assert(f);
1730         assert(test_object);
1731
1732         if (n <= 0)
1733                 return 0;
1734
1735         /* This bisects the array in object 'first', but first checks
1736          * an extra  */
1737         r = test_object(f, extra, needle);
1738         if (r < 0)
1739                 return r;
1740
1741         if (r == TEST_FOUND)
1742                 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1743
1744         /* if we are looking with DIRECTION_UP then we need to first
1745            see if in the actual array there is a matching entry, and
1746            return the last one of that. But if there isn't any we need
1747            to return this one. Hence remember this, and return it
1748            below. */
1749         if (r == TEST_LEFT)
1750                 step_back = direction == DIRECTION_UP;
1751
1752         if (r == TEST_RIGHT) {
1753                 if (direction == DIRECTION_DOWN)
1754                         goto found;
1755                 else
1756                         return 0;
1757         }
1758
1759         r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
1760
1761         if (r == 0 && step_back)
1762                 goto found;
1763
1764         if (r > 0 && idx)
1765                 (*idx) ++;
1766
1767         return r;
1768
1769 found:
1770         r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1771         if (r < 0)
1772                 return r;
1773
1774         if (ret)
1775                 *ret = o;
1776
1777         if (offset)
1778                 *offset = extra;
1779
1780         if (idx)
1781                 *idx = 0;
1782
1783         return 1;
1784 }
1785
1786 _pure_ static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
1787         assert(f);
1788         assert(p > 0);
1789
1790         if (p == needle)
1791                 return TEST_FOUND;
1792         else if (p < needle)
1793                 return TEST_LEFT;
1794         else
1795                 return TEST_RIGHT;
1796 }
1797
1798 int journal_file_move_to_entry_by_offset(
1799                 JournalFile *f,
1800                 uint64_t p,
1801                 direction_t direction,
1802                 Object **ret,
1803                 uint64_t *offset) {
1804
1805         return generic_array_bisect(f,
1806                                     le64toh(f->header->entry_array_offset),
1807                                     le64toh(f->header->n_entries),
1808                                     p,
1809                                     test_object_offset,
1810                                     direction,
1811                                     ret, offset, NULL);
1812 }
1813
1814
1815 static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
1816         Object *o;
1817         int r;
1818
1819         assert(f);
1820         assert(p > 0);
1821
1822         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1823         if (r < 0)
1824                 return r;
1825
1826         if (le64toh(o->entry.seqnum) == needle)
1827                 return TEST_FOUND;
1828         else if (le64toh(o->entry.seqnum) < needle)
1829                 return TEST_LEFT;
1830         else
1831                 return TEST_RIGHT;
1832 }
1833
1834 int journal_file_move_to_entry_by_seqnum(
1835                 JournalFile *f,
1836                 uint64_t seqnum,
1837                 direction_t direction,
1838                 Object **ret,
1839                 uint64_t *offset) {
1840
1841         return generic_array_bisect(f,
1842                                     le64toh(f->header->entry_array_offset),
1843                                     le64toh(f->header->n_entries),
1844                                     seqnum,
1845                                     test_object_seqnum,
1846                                     direction,
1847                                     ret, offset, NULL);
1848 }
1849
1850 static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
1851         Object *o;
1852         int r;
1853
1854         assert(f);
1855         assert(p > 0);
1856
1857         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1858         if (r < 0)
1859                 return r;
1860
1861         if (le64toh(o->entry.realtime) == needle)
1862                 return TEST_FOUND;
1863         else if (le64toh(o->entry.realtime) < needle)
1864                 return TEST_LEFT;
1865         else
1866                 return TEST_RIGHT;
1867 }
1868
1869 int journal_file_move_to_entry_by_realtime(
1870                 JournalFile *f,
1871                 uint64_t realtime,
1872                 direction_t direction,
1873                 Object **ret,
1874                 uint64_t *offset) {
1875
1876         return generic_array_bisect(f,
1877                                     le64toh(f->header->entry_array_offset),
1878                                     le64toh(f->header->n_entries),
1879                                     realtime,
1880                                     test_object_realtime,
1881                                     direction,
1882                                     ret, offset, NULL);
1883 }
1884
1885 static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
1886         Object *o;
1887         int r;
1888
1889         assert(f);
1890         assert(p > 0);
1891
1892         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1893         if (r < 0)
1894                 return r;
1895
1896         if (le64toh(o->entry.monotonic) == needle)
1897                 return TEST_FOUND;
1898         else if (le64toh(o->entry.monotonic) < needle)
1899                 return TEST_LEFT;
1900         else
1901                 return TEST_RIGHT;
1902 }
1903
1904 static inline int find_data_object_by_boot_id(
1905                 JournalFile *f,
1906                 sd_id128_t boot_id,
1907                 Object **o,
1908                 uint64_t *b) {
1909         char t[sizeof("_BOOT_ID=")-1 + 32 + 1] = "_BOOT_ID=";
1910
1911         sd_id128_to_string(boot_id, t + 9);
1912         return journal_file_find_data_object(f, t, sizeof(t) - 1, o, b);
1913 }
1914
1915 int journal_file_move_to_entry_by_monotonic(
1916                 JournalFile *f,
1917                 sd_id128_t boot_id,
1918                 uint64_t monotonic,
1919                 direction_t direction,
1920                 Object **ret,
1921                 uint64_t *offset) {
1922
1923         Object *o;
1924         int r;
1925
1926         assert(f);
1927
1928         r = find_data_object_by_boot_id(f, boot_id, &o, NULL);
1929         if (r < 0)
1930                 return r;
1931         if (r == 0)
1932                 return -ENOENT;
1933
1934         return generic_array_bisect_plus_one(f,
1935                                              le64toh(o->data.entry_offset),
1936                                              le64toh(o->data.entry_array_offset),
1937                                              le64toh(o->data.n_entries),
1938                                              monotonic,
1939                                              test_object_monotonic,
1940                                              direction,
1941                                              ret, offset, NULL);
1942 }
1943
1944 int journal_file_next_entry(
1945                 JournalFile *f,
1946                 Object *o, uint64_t p,
1947                 direction_t direction,
1948                 Object **ret, uint64_t *offset) {
1949
1950         uint64_t i, n, ofs;
1951         int r;
1952
1953         assert(f);
1954         assert(p > 0 || !o);
1955
1956         n = le64toh(f->header->n_entries);
1957         if (n <= 0)
1958                 return 0;
1959
1960         if (!o)
1961                 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1962         else {
1963                 if (o->object.type != OBJECT_ENTRY)
1964                         return -EINVAL;
1965
1966                 r = generic_array_bisect(f,
1967                                          le64toh(f->header->entry_array_offset),
1968                                          le64toh(f->header->n_entries),
1969                                          p,
1970                                          test_object_offset,
1971                                          DIRECTION_DOWN,
1972                                          NULL, NULL,
1973                                          &i);
1974                 if (r <= 0)
1975                         return r;
1976
1977                 if (direction == DIRECTION_DOWN) {
1978                         if (i >= n - 1)
1979                                 return 0;
1980
1981                         i++;
1982                 } else {
1983                         if (i <= 0)
1984                                 return 0;
1985
1986                         i--;
1987                 }
1988         }
1989
1990         /* And jump to it */
1991         r = generic_array_get(f,
1992                               le64toh(f->header->entry_array_offset),
1993                               i,
1994                               ret, &ofs);
1995         if (r <= 0)
1996                 return r;
1997
1998         if (p > 0 &&
1999             (direction == DIRECTION_DOWN ? ofs <= p : ofs >= p)) {
2000                 log_debug("%s: entry array corrupted at entry %"PRIu64,
2001                           f->path, i);
2002                 return -EBADMSG;
2003         }
2004
2005         if (offset)
2006                 *offset = ofs;
2007
2008         return 1;
2009 }
2010
2011 int journal_file_skip_entry(
2012                 JournalFile *f,
2013                 Object *o, uint64_t p,
2014                 int64_t skip,
2015                 Object **ret, uint64_t *offset) {
2016
2017         uint64_t i, n;
2018         int r;
2019
2020         assert(f);
2021         assert(o);
2022         assert(p > 0);
2023
2024         if (o->object.type != OBJECT_ENTRY)
2025                 return -EINVAL;
2026
2027         r = generic_array_bisect(f,
2028                                  le64toh(f->header->entry_array_offset),
2029                                  le64toh(f->header->n_entries),
2030                                  p,
2031                                  test_object_offset,
2032                                  DIRECTION_DOWN,
2033                                  NULL, NULL,
2034                                  &i);
2035         if (r <= 0)
2036                 return r;
2037
2038         /* Calculate new index */
2039         if (skip < 0) {
2040                 if ((uint64_t) -skip >= i)
2041                         i = 0;
2042                 else
2043                         i = i - (uint64_t) -skip;
2044         } else
2045                 i  += (uint64_t) skip;
2046
2047         n = le64toh(f->header->n_entries);
2048         if (n <= 0)
2049                 return -EBADMSG;
2050
2051         if (i >= n)
2052                 i = n-1;
2053
2054         return generic_array_get(f,
2055                                  le64toh(f->header->entry_array_offset),
2056                                  i,
2057                                  ret, offset);
2058 }
2059
2060 int journal_file_next_entry_for_data(
2061                 JournalFile *f,
2062                 Object *o, uint64_t p,
2063                 uint64_t data_offset,
2064                 direction_t direction,
2065                 Object **ret, uint64_t *offset) {
2066
2067         uint64_t n, i;
2068         int r;
2069         Object *d;
2070
2071         assert(f);
2072         assert(p > 0 || !o);
2073
2074         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
2075         if (r < 0)
2076                 return r;
2077
2078         n = le64toh(d->data.n_entries);
2079         if (n <= 0)
2080                 return n;
2081
2082         if (!o)
2083                 i = direction == DIRECTION_DOWN ? 0 : n - 1;
2084         else {
2085                 if (o->object.type != OBJECT_ENTRY)
2086                         return -EINVAL;
2087
2088                 r = generic_array_bisect_plus_one(f,
2089                                                   le64toh(d->data.entry_offset),
2090                                                   le64toh(d->data.entry_array_offset),
2091                                                   le64toh(d->data.n_entries),
2092                                                   p,
2093                                                   test_object_offset,
2094                                                   DIRECTION_DOWN,
2095                                                   NULL, NULL,
2096                                                   &i);
2097
2098                 if (r <= 0)
2099                         return r;
2100
2101                 if (direction == DIRECTION_DOWN) {
2102                         if (i >= n - 1)
2103                                 return 0;
2104
2105                         i++;
2106                 } else {
2107                         if (i <= 0)
2108                                 return 0;
2109
2110                         i--;
2111                 }
2112
2113         }
2114
2115         return generic_array_get_plus_one(f,
2116                                           le64toh(d->data.entry_offset),
2117                                           le64toh(d->data.entry_array_offset),
2118                                           i,
2119                                           ret, offset);
2120 }
2121
2122 int journal_file_move_to_entry_by_offset_for_data(
2123                 JournalFile *f,
2124                 uint64_t data_offset,
2125                 uint64_t p,
2126                 direction_t direction,
2127                 Object **ret, uint64_t *offset) {
2128
2129         int r;
2130         Object *d;
2131
2132         assert(f);
2133
2134         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
2135         if (r < 0)
2136                 return r;
2137
2138         return generic_array_bisect_plus_one(f,
2139                                              le64toh(d->data.entry_offset),
2140                                              le64toh(d->data.entry_array_offset),
2141                                              le64toh(d->data.n_entries),
2142                                              p,
2143                                              test_object_offset,
2144                                              direction,
2145                                              ret, offset, NULL);
2146 }
2147
2148 int journal_file_move_to_entry_by_monotonic_for_data(
2149                 JournalFile *f,
2150                 uint64_t data_offset,
2151                 sd_id128_t boot_id,
2152                 uint64_t monotonic,
2153                 direction_t direction,
2154                 Object **ret, uint64_t *offset) {
2155
2156         Object *o, *d;
2157         int r;
2158         uint64_t b, z;
2159
2160         assert(f);
2161
2162         /* First, seek by time */
2163         r = find_data_object_by_boot_id(f, boot_id, &o, &b);
2164         if (r < 0)
2165                 return r;
2166         if (r == 0)
2167                 return -ENOENT;
2168
2169         r = generic_array_bisect_plus_one(f,
2170                                           le64toh(o->data.entry_offset),
2171                                           le64toh(o->data.entry_array_offset),
2172                                           le64toh(o->data.n_entries),
2173                                           monotonic,
2174                                           test_object_monotonic,
2175                                           direction,
2176                                           NULL, &z, NULL);
2177         if (r <= 0)
2178                 return r;
2179
2180         /* And now, continue seeking until we find an entry that
2181          * exists in both bisection arrays */
2182
2183         for (;;) {
2184                 Object *qo;
2185                 uint64_t p, q;
2186
2187                 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
2188                 if (r < 0)
2189                         return r;
2190
2191                 r = generic_array_bisect_plus_one(f,
2192                                                   le64toh(d->data.entry_offset),
2193                                                   le64toh(d->data.entry_array_offset),
2194                                                   le64toh(d->data.n_entries),
2195                                                   z,
2196                                                   test_object_offset,
2197                                                   direction,
2198                                                   NULL, &p, NULL);
2199                 if (r <= 0)
2200                         return r;
2201
2202                 r = journal_file_move_to_object(f, OBJECT_DATA, b, &o);
2203                 if (r < 0)
2204                         return r;
2205
2206                 r = generic_array_bisect_plus_one(f,
2207                                                   le64toh(o->data.entry_offset),
2208                                                   le64toh(o->data.entry_array_offset),
2209                                                   le64toh(o->data.n_entries),
2210                                                   p,
2211                                                   test_object_offset,
2212                                                   direction,
2213                                                   &qo, &q, NULL);
2214
2215                 if (r <= 0)
2216                         return r;
2217
2218                 if (p == q) {
2219                         if (ret)
2220                                 *ret = qo;
2221                         if (offset)
2222                                 *offset = q;
2223
2224                         return 1;
2225                 }
2226
2227                 z = q;
2228         }
2229 }
2230
2231 int journal_file_move_to_entry_by_seqnum_for_data(
2232                 JournalFile *f,
2233                 uint64_t data_offset,
2234                 uint64_t seqnum,
2235                 direction_t direction,
2236                 Object **ret, uint64_t *offset) {
2237
2238         Object *d;
2239         int r;
2240
2241         assert(f);
2242
2243         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
2244         if (r < 0)
2245                 return r;
2246
2247         return generic_array_bisect_plus_one(f,
2248                                              le64toh(d->data.entry_offset),
2249                                              le64toh(d->data.entry_array_offset),
2250                                              le64toh(d->data.n_entries),
2251                                              seqnum,
2252                                              test_object_seqnum,
2253                                              direction,
2254                                              ret, offset, NULL);
2255 }
2256
2257 int journal_file_move_to_entry_by_realtime_for_data(
2258                 JournalFile *f,
2259                 uint64_t data_offset,
2260                 uint64_t realtime,
2261                 direction_t direction,
2262                 Object **ret, uint64_t *offset) {
2263
2264         Object *d;
2265         int r;
2266
2267         assert(f);
2268
2269         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
2270         if (r < 0)
2271                 return r;
2272
2273         return generic_array_bisect_plus_one(f,
2274                                              le64toh(d->data.entry_offset),
2275                                              le64toh(d->data.entry_array_offset),
2276                                              le64toh(d->data.n_entries),
2277                                              realtime,
2278                                              test_object_realtime,
2279                                              direction,
2280                                              ret, offset, NULL);
2281 }
2282
2283 void journal_file_dump(JournalFile *f) {
2284         Object *o;
2285         int r;
2286         uint64_t p;
2287
2288         assert(f);
2289
2290         journal_file_print_header(f);
2291
2292         p = le64toh(f->header->header_size);
2293         while (p != 0) {
2294                 r = journal_file_move_to_object(f, -1, p, &o);
2295                 if (r < 0)
2296                         goto fail;
2297
2298                 switch (o->object.type) {
2299
2300                 case OBJECT_UNUSED:
2301                         printf("Type: OBJECT_UNUSED\n");
2302                         break;
2303
2304                 case OBJECT_DATA:
2305                         printf("Type: OBJECT_DATA\n");
2306                         break;
2307
2308                 case OBJECT_FIELD:
2309                         printf("Type: OBJECT_FIELD\n");
2310                         break;
2311
2312                 case OBJECT_ENTRY:
2313                         printf("Type: OBJECT_ENTRY seqnum=%"PRIu64" monotonic=%"PRIu64" realtime=%"PRIu64"\n",
2314                                le64toh(o->entry.seqnum),
2315                                le64toh(o->entry.monotonic),
2316                                le64toh(o->entry.realtime));
2317                         break;
2318
2319                 case OBJECT_FIELD_HASH_TABLE:
2320                         printf("Type: OBJECT_FIELD_HASH_TABLE\n");
2321                         break;
2322
2323                 case OBJECT_DATA_HASH_TABLE:
2324                         printf("Type: OBJECT_DATA_HASH_TABLE\n");
2325                         break;
2326
2327                 case OBJECT_ENTRY_ARRAY:
2328                         printf("Type: OBJECT_ENTRY_ARRAY\n");
2329                         break;
2330
2331                 case OBJECT_TAG:
2332                         printf("Type: OBJECT_TAG seqnum=%"PRIu64" epoch=%"PRIu64"\n",
2333                                le64toh(o->tag.seqnum),
2334                                le64toh(o->tag.epoch));
2335                         break;
2336
2337                 default:
2338                         printf("Type: unknown (%u)\n", o->object.type);
2339                         break;
2340                 }
2341
2342                 if (o->object.flags & OBJECT_COMPRESSION_MASK)
2343                         printf("Flags: %s\n",
2344                                object_compressed_to_string(o->object.flags & OBJECT_COMPRESSION_MASK));
2345
2346                 if (p == le64toh(f->header->tail_object_offset))
2347                         p = 0;
2348                 else
2349                         p = p + ALIGN64(le64toh(o->object.size));
2350         }
2351
2352         return;
2353 fail:
2354         log_error("File corrupt");
2355 }
2356
2357 static const char* format_timestamp_safe(char *buf, size_t l, usec_t t) {
2358         const char *x;
2359
2360         x = format_timestamp(buf, l, t);
2361         if (x)
2362                 return x;
2363         return " --- ";
2364 }
2365
2366 void journal_file_print_header(JournalFile *f) {
2367         char a[33], b[33], c[33], d[33];
2368         char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX], z[FORMAT_TIMESTAMP_MAX];
2369         struct stat st;
2370         char bytes[FORMAT_BYTES_MAX];
2371
2372         assert(f);
2373
2374         printf("File Path: %s\n"
2375                "File ID: %s\n"
2376                "Machine ID: %s\n"
2377                "Boot ID: %s\n"
2378                "Sequential Number ID: %s\n"
2379                "State: %s\n"
2380                "Compatible Flags:%s%s\n"
2381                "Incompatible Flags:%s%s%s\n"
2382                "Header size: %"PRIu64"\n"
2383                "Arena size: %"PRIu64"\n"
2384                "Data Hash Table Size: %"PRIu64"\n"
2385                "Field Hash Table Size: %"PRIu64"\n"
2386                "Rotate Suggested: %s\n"
2387                "Head Sequential Number: %"PRIu64"\n"
2388                "Tail Sequential Number: %"PRIu64"\n"
2389                "Head Realtime Timestamp: %s\n"
2390                "Tail Realtime Timestamp: %s\n"
2391                "Tail Monotonic Timestamp: %s\n"
2392                "Objects: %"PRIu64"\n"
2393                "Entry Objects: %"PRIu64"\n",
2394                f->path,
2395                sd_id128_to_string(f->header->file_id, a),
2396                sd_id128_to_string(f->header->machine_id, b),
2397                sd_id128_to_string(f->header->boot_id, c),
2398                sd_id128_to_string(f->header->seqnum_id, d),
2399                f->header->state == STATE_OFFLINE ? "OFFLINE" :
2400                f->header->state == STATE_ONLINE ? "ONLINE" :
2401                f->header->state == STATE_ARCHIVED ? "ARCHIVED" : "UNKNOWN",
2402                JOURNAL_HEADER_SEALED(f->header) ? " SEALED" : "",
2403                (le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_ANY) ? " ???" : "",
2404                JOURNAL_HEADER_COMPRESSED_XZ(f->header) ? " COMPRESSED-XZ" : "",
2405                JOURNAL_HEADER_COMPRESSED_LZ4(f->header) ? " COMPRESSED-LZ4" : "",
2406                (le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_ANY) ? " ???" : "",
2407                le64toh(f->header->header_size),
2408                le64toh(f->header->arena_size),
2409                le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
2410                le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
2411                yes_no(journal_file_rotate_suggested(f, 0)),
2412                le64toh(f->header->head_entry_seqnum),
2413                le64toh(f->header->tail_entry_seqnum),
2414                format_timestamp_safe(x, sizeof(x), le64toh(f->header->head_entry_realtime)),
2415                format_timestamp_safe(y, sizeof(y), le64toh(f->header->tail_entry_realtime)),
2416                format_timespan(z, sizeof(z), le64toh(f->header->tail_entry_monotonic), USEC_PER_MSEC),
2417                le64toh(f->header->n_objects),
2418                le64toh(f->header->n_entries));
2419
2420         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
2421                 printf("Data Objects: %"PRIu64"\n"
2422                        "Data Hash Table Fill: %.1f%%\n",
2423                        le64toh(f->header->n_data),
2424                        100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
2425
2426         if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
2427                 printf("Field Objects: %"PRIu64"\n"
2428                        "Field Hash Table Fill: %.1f%%\n",
2429                        le64toh(f->header->n_fields),
2430                        100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
2431
2432         if (JOURNAL_HEADER_CONTAINS(f->header, n_tags))
2433                 printf("Tag Objects: %"PRIu64"\n",
2434                        le64toh(f->header->n_tags));
2435         if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
2436                 printf("Entry Array Objects: %"PRIu64"\n",
2437                        le64toh(f->header->n_entry_arrays));
2438
2439         if (fstat(f->fd, &st) >= 0)
2440                 printf("Disk usage: %s\n", format_bytes(bytes, sizeof(bytes), (off_t) st.st_blocks * 512ULL));
2441 }
2442
2443 int journal_file_open(
2444                 const char *fname,
2445                 int flags,
2446                 mode_t mode,
2447                 bool compress,
2448                 bool seal,
2449                 JournalMetrics *metrics,
2450                 MMapCache *mmap_cache,
2451                 JournalFile *template,
2452                 JournalFile **ret) {
2453
2454         JournalFile *f;
2455         int r;
2456         bool newly_created = false;
2457
2458         assert(fname);
2459         assert(ret);
2460
2461         if ((flags & O_ACCMODE) != O_RDONLY &&
2462             (flags & O_ACCMODE) != O_RDWR)
2463                 return -EINVAL;
2464
2465         if (!endswith(fname, ".journal") &&
2466             !endswith(fname, ".journal~"))
2467                 return -EINVAL;
2468
2469         f = new0(JournalFile, 1);
2470         if (!f)
2471                 return -ENOMEM;
2472
2473         f->fd = -1;
2474         f->mode = mode;
2475
2476         f->flags = flags;
2477         f->prot = prot_from_flags(flags);
2478         f->writable = (flags & O_ACCMODE) != O_RDONLY;
2479 #if defined(HAVE_LZ4)
2480         f->compress_lz4 = compress;
2481 #elif defined(HAVE_XZ)
2482         f->compress_xz = compress;
2483 #endif
2484 #ifdef HAVE_GCRYPT
2485         f->seal = seal;
2486 #endif
2487
2488         if (mmap_cache)
2489                 f->mmap = mmap_cache_ref(mmap_cache);
2490         else {
2491                 f->mmap = mmap_cache_new();
2492                 if (!f->mmap) {
2493                         r = -ENOMEM;
2494                         goto fail;
2495                 }
2496         }
2497
2498         f->path = strdup(fname);
2499         if (!f->path) {
2500                 r = -ENOMEM;
2501                 goto fail;
2502         }
2503
2504         f->chain_cache = hashmap_new(uint64_hash_func, uint64_compare_func);
2505         if (!f->chain_cache) {
2506                 r = -ENOMEM;
2507                 goto fail;
2508         }
2509
2510         f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
2511         if (f->fd < 0) {
2512                 r = -errno;
2513                 goto fail;
2514         }
2515
2516         if (fstat(f->fd, &f->last_stat) < 0) {
2517                 r = -errno;
2518                 goto fail;
2519         }
2520
2521         if (f->last_stat.st_size == 0 && f->writable) {
2522                 uint64_t crtime;
2523
2524                 /* Let's attach the creation time to the journal file,
2525                  * so that the vacuuming code knows the age of this
2526                  * file even if the file might end up corrupted one
2527                  * day... Ideally we'd just use the creation time many
2528                  * file systems maintain for each file, but there is
2529                  * currently no usable API to query this, hence let's
2530                  * emulate this via extended attributes. If extended
2531                  * attributes are not supported we'll just skip this,
2532                  * and rely solely on mtime/atime/ctime of the file.*/
2533
2534                 crtime = htole64((uint64_t) now(CLOCK_REALTIME));
2535                 fsetxattr(f->fd, "user.crtime_usec", &crtime, sizeof(crtime), XATTR_CREATE);
2536
2537 #ifdef HAVE_GCRYPT
2538                 /* Try to load the FSPRG state, and if we can't, then
2539                  * just don't do sealing */
2540                 if (f->seal) {
2541                         r = journal_file_fss_load(f);
2542                         if (r < 0)
2543                                 f->seal = false;
2544                 }
2545 #endif
2546
2547                 r = journal_file_init_header(f, template);
2548                 if (r < 0)
2549                         goto fail;
2550
2551                 if (fstat(f->fd, &f->last_stat) < 0) {
2552                         r = -errno;
2553                         goto fail;
2554                 }
2555
2556                 newly_created = true;
2557         }
2558
2559         if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
2560                 r = -EIO;
2561                 goto fail;
2562         }
2563
2564         f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
2565         if (f->header == MAP_FAILED) {
2566                 f->header = NULL;
2567                 r = -errno;
2568                 goto fail;
2569         }
2570
2571         if (!newly_created) {
2572                 r = journal_file_verify_header(f);
2573                 if (r < 0)
2574                         goto fail;
2575         }
2576
2577 #ifdef HAVE_GCRYPT
2578         if (!newly_created && f->writable) {
2579                 r = journal_file_fss_load(f);
2580                 if (r < 0)
2581                         goto fail;
2582         }
2583 #endif
2584
2585         if (f->writable) {
2586                 if (metrics) {
2587                         journal_default_metrics(metrics, f->fd);
2588                         f->metrics = *metrics;
2589                 } else if (template)
2590                         f->metrics = template->metrics;
2591
2592                 r = journal_file_refresh_header(f);
2593                 if (r < 0)
2594                         goto fail;
2595         }
2596
2597 #ifdef HAVE_GCRYPT
2598         r = journal_file_hmac_setup(f);
2599         if (r < 0)
2600                 goto fail;
2601 #endif
2602
2603         if (newly_created) {
2604                 r = journal_file_setup_field_hash_table(f);
2605                 if (r < 0)
2606                         goto fail;
2607
2608                 r = journal_file_setup_data_hash_table(f);
2609                 if (r < 0)
2610                         goto fail;
2611
2612 #ifdef HAVE_GCRYPT
2613                 r = journal_file_append_first_tag(f);
2614                 if (r < 0)
2615                         goto fail;
2616 #endif
2617         }
2618
2619         r = journal_file_map_field_hash_table(f);
2620         if (r < 0)
2621                 goto fail;
2622
2623         r = journal_file_map_data_hash_table(f);
2624         if (r < 0)
2625                 goto fail;
2626
2627         *ret = f;
2628         return 0;
2629
2630 fail:
2631         journal_file_close(f);
2632
2633         return r;
2634 }
2635
2636 int journal_file_rotate(JournalFile **f, bool compress, bool seal) {
2637         _cleanup_free_ char *p = NULL;
2638         size_t l;
2639         JournalFile *old_file, *new_file = NULL;
2640         int r;
2641
2642         assert(f);
2643         assert(*f);
2644
2645         old_file = *f;
2646
2647         if (!old_file->writable)
2648                 return -EINVAL;
2649
2650         if (!endswith(old_file->path, ".journal"))
2651                 return -EINVAL;
2652
2653         l = strlen(old_file->path);
2654         r = asprintf(&p, "%.*s@" SD_ID128_FORMAT_STR "-%016"PRIx64"-%016"PRIx64".journal",
2655                      (int) l - 8, old_file->path,
2656                      SD_ID128_FORMAT_VAL(old_file->header->seqnum_id),
2657                      le64toh((*f)->header->head_entry_seqnum),
2658                      le64toh((*f)->header->head_entry_realtime));
2659         if (r < 0)
2660                 return -ENOMEM;
2661
2662         r = rename(old_file->path, p);
2663         if (r < 0)
2664                 return -errno;
2665
2666         old_file->header->state = STATE_ARCHIVED;
2667
2668         r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, seal, NULL, old_file->mmap, old_file, &new_file);
2669         journal_file_close(old_file);
2670
2671         *f = new_file;
2672         return r;
2673 }
2674
2675 int journal_file_open_reliably(
2676                 const char *fname,
2677                 int flags,
2678                 mode_t mode,
2679                 bool compress,
2680                 bool seal,
2681                 JournalMetrics *metrics,
2682                 MMapCache *mmap_cache,
2683                 JournalFile *template,
2684                 JournalFile **ret) {
2685
2686         int r;
2687         size_t l;
2688         _cleanup_free_ char *p = NULL;
2689
2690         r = journal_file_open(fname, flags, mode, compress, seal,
2691                               metrics, mmap_cache, template, ret);
2692         if (r != -EBADMSG && /* corrupted */
2693             r != -ENODATA && /* truncated */
2694             r != -EHOSTDOWN && /* other machine */
2695             r != -EPROTONOSUPPORT && /* incompatible feature */
2696             r != -EBUSY && /* unclean shutdown */
2697             r != -ESHUTDOWN /* already archived */)
2698                 return r;
2699
2700         if ((flags & O_ACCMODE) == O_RDONLY)
2701                 return r;
2702
2703         if (!(flags & O_CREAT))
2704                 return r;
2705
2706         if (!endswith(fname, ".journal"))
2707                 return r;
2708
2709         /* The file is corrupted. Rotate it away and try it again (but only once) */
2710
2711         l = strlen(fname);
2712         if (asprintf(&p, "%.*s@%016llx-%016" PRIx64 ".journal~",
2713                      (int) l - 8, fname,
2714                      (unsigned long long) now(CLOCK_REALTIME),
2715                      random_u64()) < 0)
2716                 return -ENOMEM;
2717
2718         r = rename(fname, p);
2719         if (r < 0)
2720                 return -errno;
2721
2722         log_warning("File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
2723
2724         return journal_file_open(fname, flags, mode, compress, seal,
2725                                  metrics, mmap_cache, template, ret);
2726 }
2727
2728 int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
2729         uint64_t i, n;
2730         uint64_t q, xor_hash = 0;
2731         int r;
2732         EntryItem *items;
2733         dual_timestamp ts;
2734
2735         assert(from);
2736         assert(to);
2737         assert(o);
2738         assert(p);
2739
2740         if (!to->writable)
2741                 return -EPERM;
2742
2743         ts.monotonic = le64toh(o->entry.monotonic);
2744         ts.realtime = le64toh(o->entry.realtime);
2745
2746         n = journal_file_entry_n_items(o);
2747         /* alloca() can't take 0, hence let's allocate at least one */
2748         items = alloca(sizeof(EntryItem) * MAX(1u, n));
2749
2750         for (i = 0; i < n; i++) {
2751                 uint64_t l, h;
2752                 le64_t le_hash;
2753                 size_t t;
2754                 void *data;
2755                 Object *u;
2756
2757                 q = le64toh(o->entry.items[i].object_offset);
2758                 le_hash = o->entry.items[i].hash;
2759
2760                 r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
2761                 if (r < 0)
2762                         return r;
2763
2764                 if (le_hash != o->data.hash)
2765                         return -EBADMSG;
2766
2767                 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2768                 t = (size_t) l;
2769
2770                 /* We hit the limit on 32bit machines */
2771                 if ((uint64_t) t != l)
2772                         return -E2BIG;
2773
2774                 if (o->object.flags & OBJECT_COMPRESSION_MASK) {
2775                         uint64_t rsize;
2776
2777                         r = decompress_blob(o->object.flags & OBJECT_COMPRESSION_MASK,
2778                                             o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize, 0);
2779                         if (r < 0)
2780                                 return r;
2781
2782                         data = from->compress_buffer;
2783                         l = rsize;
2784                 } else
2785                         data = o->data.payload;
2786
2787                 r = journal_file_append_data(to, data, l, &u, &h);
2788                 if (r < 0)
2789                         return r;
2790
2791                 xor_hash ^= le64toh(u->data.hash);
2792                 items[i].object_offset = htole64(h);
2793                 items[i].hash = u->data.hash;
2794
2795                 r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
2796                 if (r < 0)
2797                         return r;
2798         }
2799
2800         return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
2801 }
2802
2803 void journal_default_metrics(JournalMetrics *m, int fd) {
2804         uint64_t fs_size = 0;
2805         struct statvfs ss;
2806         char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
2807
2808         assert(m);
2809         assert(fd >= 0);
2810
2811         if (fstatvfs(fd, &ss) >= 0)
2812                 fs_size = ss.f_frsize * ss.f_blocks;
2813
2814         if (m->max_use == (uint64_t) -1) {
2815
2816                 if (fs_size > 0) {
2817                         m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
2818
2819                         if (m->max_use > DEFAULT_MAX_USE_UPPER)
2820                                 m->max_use = DEFAULT_MAX_USE_UPPER;
2821
2822                         if (m->max_use < DEFAULT_MAX_USE_LOWER)
2823                                 m->max_use = DEFAULT_MAX_USE_LOWER;
2824                 } else
2825                         m->max_use = DEFAULT_MAX_USE_LOWER;
2826         } else {
2827                 m->max_use = PAGE_ALIGN(m->max_use);
2828
2829                 if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
2830                         m->max_use = JOURNAL_FILE_SIZE_MIN*2;
2831         }
2832
2833         if (m->max_size == (uint64_t) -1) {
2834                 m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
2835
2836                 if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
2837                         m->max_size = DEFAULT_MAX_SIZE_UPPER;
2838         } else
2839                 m->max_size = PAGE_ALIGN(m->max_size);
2840
2841         if (m->max_size < JOURNAL_FILE_SIZE_MIN)
2842                 m->max_size = JOURNAL_FILE_SIZE_MIN;
2843
2844         if (m->max_size*2 > m->max_use)
2845                 m->max_use = m->max_size*2;
2846
2847         if (m->min_size == (uint64_t) -1)
2848                 m->min_size = JOURNAL_FILE_SIZE_MIN;
2849         else {
2850                 m->min_size = PAGE_ALIGN(m->min_size);
2851
2852                 if (m->min_size < JOURNAL_FILE_SIZE_MIN)
2853                         m->min_size = JOURNAL_FILE_SIZE_MIN;
2854
2855                 if (m->min_size > m->max_size)
2856                         m->max_size = m->min_size;
2857         }
2858
2859         if (m->keep_free == (uint64_t) -1) {
2860
2861                 if (fs_size > 0) {
2862                         m->keep_free = PAGE_ALIGN(fs_size * 3 / 20); /* 15% of file system size */
2863
2864                         if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
2865                                 m->keep_free = DEFAULT_KEEP_FREE_UPPER;
2866
2867                 } else
2868                         m->keep_free = DEFAULT_KEEP_FREE;
2869         }
2870
2871         log_debug("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
2872                   format_bytes(a, sizeof(a), m->max_use),
2873                   format_bytes(b, sizeof(b), m->max_size),
2874                   format_bytes(c, sizeof(c), m->min_size),
2875                   format_bytes(d, sizeof(d), m->keep_free));
2876 }
2877
2878 int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
2879         assert(f);
2880         assert(from || to);
2881
2882         if (from) {
2883                 if (f->header->head_entry_realtime == 0)
2884                         return -ENOENT;
2885
2886                 *from = le64toh(f->header->head_entry_realtime);
2887         }
2888
2889         if (to) {
2890                 if (f->header->tail_entry_realtime == 0)
2891                         return -ENOENT;
2892
2893                 *to = le64toh(f->header->tail_entry_realtime);
2894         }
2895
2896         return 1;
2897 }
2898
2899 int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
2900         Object *o;
2901         uint64_t p;
2902         int r;
2903
2904         assert(f);
2905         assert(from || to);
2906
2907         r = find_data_object_by_boot_id(f, boot_id, &o, &p);
2908         if (r <= 0)
2909                 return r;
2910
2911         if (le64toh(o->data.n_entries) <= 0)
2912                 return 0;
2913
2914         if (from) {
2915                 r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o);
2916                 if (r < 0)
2917                         return r;
2918
2919                 *from = le64toh(o->entry.monotonic);
2920         }
2921
2922         if (to) {
2923                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
2924                 if (r < 0)
2925                         return r;
2926
2927                 r = generic_array_get_plus_one(f,
2928                                                le64toh(o->data.entry_offset),
2929                                                le64toh(o->data.entry_array_offset),
2930                                                le64toh(o->data.n_entries)-1,
2931                                                &o, NULL);
2932                 if (r <= 0)
2933                         return r;
2934
2935                 *to = le64toh(o->entry.monotonic);
2936         }
2937
2938         return 1;
2939 }
2940
2941 bool journal_file_rotate_suggested(JournalFile *f, usec_t max_file_usec) {
2942         assert(f);
2943
2944         /* If we gained new header fields we gained new features,
2945          * hence suggest a rotation */
2946         if (le64toh(f->header->header_size) < sizeof(Header)) {
2947                 log_debug("%s uses an outdated header, suggesting rotation.", f->path);
2948                 return true;
2949         }
2950
2951         /* Let's check if the hash tables grew over a certain fill
2952          * level (75%, borrowing this value from Java's hash table
2953          * implementation), and if so suggest a rotation. To calculate
2954          * the fill level we need the n_data field, which only exists
2955          * in newer versions. */
2956
2957         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
2958                 if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2959                         log_debug("Data hash table of %s has a fill level at %.1f (%"PRIu64" of %"PRIu64" items, %llu file size, %"PRIu64" bytes per hash table item), suggesting rotation.",
2960                                   f->path,
2961                                   100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))),
2962                                   le64toh(f->header->n_data),
2963                                   le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
2964                                   (unsigned long long) f->last_stat.st_size,
2965                                   f->last_stat.st_size / le64toh(f->header->n_data));
2966                         return true;
2967                 }
2968
2969         if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
2970                 if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2971                         log_debug("Field hash table of %s has a fill level at %.1f (%"PRIu64" of %"PRIu64" items), suggesting rotation.",
2972                                   f->path,
2973                                   100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))),
2974                                   le64toh(f->header->n_fields),
2975                                   le64toh(f->header->field_hash_table_size) / sizeof(HashItem));
2976                         return true;
2977                 }
2978
2979         /* Are the data objects properly indexed by field objects? */
2980         if (JOURNAL_HEADER_CONTAINS(f->header, n_data) &&
2981             JOURNAL_HEADER_CONTAINS(f->header, n_fields) &&
2982             le64toh(f->header->n_data) > 0 &&
2983             le64toh(f->header->n_fields) == 0)
2984                 return true;
2985
2986         if (max_file_usec > 0) {
2987                 usec_t t, h;
2988
2989                 h = le64toh(f->header->head_entry_realtime);
2990                 t = now(CLOCK_REALTIME);
2991
2992                 if (h > 0 && t > h + max_file_usec)
2993                         return true;
2994         }
2995
2996         return false;
2997 }