chiark / gitweb /
journal: introduce entry array chain cache
[elogind.git] / src / journal / sd-journal.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <errno.h>
23 #include <fcntl.h>
24 #include <stddef.h>
25 #include <unistd.h>
26 #include <sys/inotify.h>
27 #include <sys/poll.h>
28
29 #include "sd-journal.h"
30 #include "journal-def.h"
31 #include "journal-file.h"
32 #include "hashmap.h"
33 #include "list.h"
34 #include "path-util.h"
35 #include "lookup3.h"
36 #include "compress.h"
37 #include "journal-internal.h"
38
39 #define JOURNAL_FILES_MAX 1024
40
41 static void detach_location(sd_journal *j) {
42         Iterator i;
43         JournalFile *f;
44
45         assert(j);
46
47         j->current_file = NULL;
48         j->current_field = 0;
49
50         HASHMAP_FOREACH(f, j->files, i)
51                 f->current_offset = 0;
52 }
53
54 static void reset_location(sd_journal *j) {
55         assert(j);
56
57         detach_location(j);
58         zero(j->current_location);
59 }
60
61 static void init_location(Location *l, LocationType type, JournalFile *f, Object *o) {
62         assert(l);
63         assert(type == LOCATION_DISCRETE || type == LOCATION_SEEK);
64         assert(f);
65         assert(o->object.type == OBJECT_ENTRY);
66
67         l->type = type;
68         l->seqnum = le64toh(o->entry.seqnum);
69         l->seqnum_id = f->header->seqnum_id;
70         l->realtime = le64toh(o->entry.realtime);
71         l->monotonic = le64toh(o->entry.monotonic);
72         l->boot_id = o->entry.boot_id;
73         l->xor_hash = le64toh(o->entry.xor_hash);
74
75         l->seqnum_set = l->realtime_set = l->monotonic_set = l->xor_hash_set = true;
76 }
77
78 static void set_location(sd_journal *j, LocationType type, JournalFile *f, Object *o, uint64_t offset) {
79         assert(j);
80         assert(type == LOCATION_DISCRETE || type == LOCATION_SEEK);
81         assert(f);
82         assert(o);
83
84         init_location(&j->current_location, type, f, o);
85
86         j->current_file = f;
87         j->current_field = 0;
88
89         f->current_offset = offset;
90 }
91
92 static int match_is_valid(const void *data, size_t size) {
93         const char *b, *p;
94
95         assert(data);
96
97         if (size < 2)
98                 return false;
99
100         if (startswith(data, "__"))
101                 return false;
102
103         b = data;
104         for (p = b; p < b + size; p++) {
105
106                 if (*p == '=')
107                         return p > b;
108
109                 if (*p == '_')
110                         continue;
111
112                 if (*p >= 'A' && *p <= 'Z')
113                         continue;
114
115                 if (*p >= '0' && *p <= '9')
116                         continue;
117
118                 return false;
119         }
120
121         return false;
122 }
123
124 static bool same_field(const void *_a, size_t s, const void *_b, size_t t) {
125         const uint8_t *a = _a, *b = _b;
126         size_t j;
127
128         for (j = 0; j < s && j < t; j++) {
129
130                 if (a[j] != b[j])
131                         return false;
132
133                 if (a[j] == '=')
134                         return true;
135         }
136
137         return true;
138 }
139
140 static Match *match_new(Match *p, MatchType t) {
141         Match *m;
142
143         m = new0(Match, 1);
144         if (!m)
145                 return NULL;
146
147         m->type = t;
148
149         if (p) {
150                 m->parent = p;
151                 LIST_PREPEND(Match, matches, p->matches, m);
152         }
153
154         return m;
155 }
156
157 static void match_free(Match *m) {
158         assert(m);
159
160         while (m->matches)
161                 match_free(m->matches);
162
163         if (m->parent)
164                 LIST_REMOVE(Match, matches, m->parent->matches, m);
165
166         free(m->data);
167         free(m);
168 }
169
170 static void match_free_if_empty(Match *m) {
171         assert(m);
172
173         if (m->matches)
174                 return;
175
176         match_free(m);
177 }
178
179 _public_ int sd_journal_add_match(sd_journal *j, const void *data, size_t size) {
180         Match *l2, *l3, *add_here = NULL, *m;
181         le64_t le_hash;
182
183         if (!j)
184                 return -EINVAL;
185
186         if (!data)
187                 return -EINVAL;
188
189         if (size == 0)
190                 size = strlen(data);
191
192         if (!match_is_valid(data, size))
193                 return -EINVAL;
194
195         /* level 0: OR term
196          * level 1: AND terms
197          * level 2: OR terms
198          * level 3: concrete matches */
199
200         if (!j->level0) {
201                 j->level0 = match_new(NULL, MATCH_OR_TERM);
202                 if (!j->level0)
203                         return -ENOMEM;
204         }
205
206         if (!j->level1) {
207                 j->level1 = match_new(j->level0, MATCH_AND_TERM);
208                 if (!j->level1)
209                         return -ENOMEM;
210         }
211
212         assert(j->level0->type == MATCH_OR_TERM);
213         assert(j->level1->type == MATCH_AND_TERM);
214
215         le_hash = htole64(hash64(data, size));
216
217         LIST_FOREACH(matches, l2, j->level1->matches) {
218                 assert(l2->type == MATCH_OR_TERM);
219
220                 LIST_FOREACH(matches, l3, l2->matches) {
221                         assert(l3->type == MATCH_DISCRETE);
222
223                         /* Exactly the same match already? Then ignore
224                          * this addition */
225                         if (l3->le_hash == le_hash &&
226                             l3->size == size &&
227                             memcmp(l3->data, data, size) == 0)
228                                 return 0;
229
230                         /* Same field? Then let's add this to this OR term */
231                         if (same_field(data, size, l3->data, l3->size)) {
232                                 add_here = l2;
233                                 break;
234                         }
235                 }
236
237                 if (add_here)
238                         break;
239         }
240
241         if (!add_here) {
242                 add_here = match_new(j->level1, MATCH_OR_TERM);
243                 if (!add_here)
244                         goto fail;
245         }
246
247         m = match_new(add_here, MATCH_DISCRETE);
248         if (!m)
249                 goto fail;
250
251         m->le_hash = le_hash;
252         m->size = size;
253         m->data = memdup(data, size);
254         if (!m->data)
255                 goto fail;
256
257         detach_location(j);
258
259         return 0;
260
261 fail:
262         if (add_here)
263                 match_free_if_empty(add_here);
264
265         if (j->level1)
266                 match_free_if_empty(j->level1);
267
268         if (j->level0)
269                 match_free_if_empty(j->level0);
270
271         return -ENOMEM;
272 }
273
274 _public_ int sd_journal_add_disjunction(sd_journal *j) {
275         Match *m;
276
277         assert(j);
278
279         if (!j->level0)
280                 return 0;
281
282         if (!j->level1)
283                 return 0;
284
285         if (!j->level1->matches)
286                 return 0;
287
288         m = match_new(j->level0, MATCH_AND_TERM);
289         if (!m)
290                 return -ENOMEM;
291
292         j->level1 = m;
293         return 0;
294 }
295
296 static char *match_make_string(Match *m) {
297         char *p, *r;
298         Match *i;
299         bool enclose = false;
300
301         if (!m)
302                 return strdup("");
303
304         if (m->type == MATCH_DISCRETE)
305                 return strndup(m->data, m->size);
306
307         p = NULL;
308         LIST_FOREACH(matches, i, m->matches) {
309                 char *t, *k;
310
311                 t = match_make_string(i);
312                 if (!t) {
313                         free(p);
314                         return NULL;
315                 }
316
317                 if (p) {
318                         k = strjoin(p, m->type == MATCH_OR_TERM ? " OR " : " AND ", t, NULL);
319                         free(p);
320                         free(t);
321
322                         if (!k)
323                                 return NULL;
324
325                         p = k;
326
327                         enclose = true;
328                 } else {
329                         free(p);
330                         p = t;
331                 }
332         }
333
334         if (enclose) {
335                 r = strjoin("(", p, ")", NULL);
336                 free(p);
337                 return r;
338         }
339
340         return p;
341 }
342
343 char *journal_make_match_string(sd_journal *j) {
344         assert(j);
345
346         return match_make_string(j->level0);
347 }
348
349 _public_ void sd_journal_flush_matches(sd_journal *j) {
350
351         if (!j)
352                 return;
353
354         if (j->level0)
355                 match_free(j->level0);
356
357         j->level0 = j->level1 = NULL;
358
359         detach_location(j);
360 }
361
362 static int compare_entry_order(JournalFile *af, Object *_ao,
363                          JournalFile *bf, uint64_t bp) {
364
365         uint64_t a, b;
366         Object *ao, *bo;
367         int r;
368
369         assert(af);
370         assert(bf);
371         assert(_ao);
372
373         /* The mmap cache might invalidate the object from the first
374          * file if we look at the one from the second file. Hence
375          * temporarily copy the header of the first one, and look at
376          * that only. */
377         ao = alloca(offsetof(EntryObject, items));
378         memcpy(ao, _ao, offsetof(EntryObject, items));
379
380         r = journal_file_move_to_object(bf, OBJECT_ENTRY, bp, &bo);
381         if (r < 0)
382                 return strcmp(af->path, bf->path);
383
384         /* We operate on two different files here, hence we can access
385          * two objects at the same time, which we normally can't.
386          *
387          * If contents and timestamps match, these entries are
388          * identical, even if the seqnum does not match */
389
390         if (sd_id128_equal(ao->entry.boot_id, bo->entry.boot_id) &&
391             ao->entry.monotonic == bo->entry.monotonic &&
392             ao->entry.realtime == bo->entry.realtime &&
393             ao->entry.xor_hash == bo->entry.xor_hash)
394                 return 0;
395
396         if (sd_id128_equal(af->header->seqnum_id, bf->header->seqnum_id)) {
397
398                 /* If this is from the same seqnum source, compare
399                  * seqnums */
400                 a = le64toh(ao->entry.seqnum);
401                 b = le64toh(bo->entry.seqnum);
402
403                 if (a < b)
404                         return -1;
405                 if (a > b)
406                         return 1;
407
408                 /* Wow! This is weird, different data but the same
409                  * seqnums? Something is borked, but let's make the
410                  * best of it and compare by time. */
411         }
412
413         if (sd_id128_equal(ao->entry.boot_id, bo->entry.boot_id)) {
414
415                 /* If the boot id matches compare monotonic time */
416                 a = le64toh(ao->entry.monotonic);
417                 b = le64toh(bo->entry.monotonic);
418
419                 if (a < b)
420                         return -1;
421                 if (a > b)
422                         return 1;
423         }
424
425         /* Otherwise compare UTC time */
426         a = le64toh(ao->entry.realtime);
427         b = le64toh(bo->entry.realtime);
428
429         if (a < b)
430                 return -1;
431         if (a > b)
432                 return 1;
433
434         /* Finally, compare by contents */
435         a = le64toh(ao->entry.xor_hash);
436         b = le64toh(bo->entry.xor_hash);
437
438         if (a < b)
439                 return -1;
440         if (a > b)
441                 return 1;
442
443         return 0;
444 }
445
446 static int compare_with_location(JournalFile *af, Object *ao, Location *l) {
447         uint64_t a;
448
449         assert(af);
450         assert(ao);
451         assert(l);
452         assert(l->type == LOCATION_DISCRETE || l->type == LOCATION_SEEK);
453
454         if (l->monotonic_set &&
455             sd_id128_equal(ao->entry.boot_id, l->boot_id) &&
456             l->realtime_set &&
457             le64toh(ao->entry.realtime) == l->realtime &&
458             l->xor_hash_set &&
459             le64toh(ao->entry.xor_hash) == l->xor_hash)
460                 return 0;
461
462         if (l->seqnum_set &&
463             sd_id128_equal(af->header->seqnum_id, l->seqnum_id)) {
464
465                 a = le64toh(ao->entry.seqnum);
466
467                 if (a < l->seqnum)
468                         return -1;
469                 if (a > l->seqnum)
470                         return 1;
471         }
472
473         if (l->monotonic_set &&
474             sd_id128_equal(ao->entry.boot_id, l->boot_id)) {
475
476                 a = le64toh(ao->entry.monotonic);
477
478                 if (a < l->monotonic)
479                         return -1;
480                 if (a > l->monotonic)
481                         return 1;
482         }
483
484         if (l->realtime_set) {
485
486                 a = le64toh(ao->entry.realtime);
487
488                 if (a < l->realtime)
489                         return -1;
490                 if (a > l->realtime)
491                         return 1;
492         }
493
494         if (l->xor_hash_set) {
495                 a = le64toh(ao->entry.xor_hash);
496
497                 if (a < l->xor_hash)
498                         return -1;
499                 if (a > l->xor_hash)
500                         return 1;
501         }
502
503         return 0;
504 }
505
506 static int next_for_match(
507                 sd_journal *j,
508                 Match *m,
509                 JournalFile *f,
510                 uint64_t after_offset,
511                 direction_t direction,
512                 Object **ret,
513                 uint64_t *offset) {
514
515         int r;
516         uint64_t np = 0;
517         Object *n;
518
519         assert(j);
520         assert(m);
521         assert(f);
522
523         if (m->type == MATCH_DISCRETE) {
524                 uint64_t dp;
525
526                 r = journal_file_find_data_object_with_hash(f, m->data, m->size, le64toh(m->le_hash), NULL, &dp);
527                 if (r <= 0)
528                         return r;
529
530                 return journal_file_move_to_entry_by_offset_for_data(f, dp, after_offset, direction, ret, offset);
531
532         } else if (m->type == MATCH_OR_TERM) {
533                 Match *i;
534
535                 /* Find the earliest match beyond after_offset */
536
537                 LIST_FOREACH(matches, i, m->matches) {
538                         uint64_t cp;
539
540                         r = next_for_match(j, i, f, after_offset, direction, NULL, &cp);
541                         if (r < 0)
542                                 return r;
543                         else if (r > 0) {
544                                 if (np == 0 || (direction == DIRECTION_DOWN ? np > cp : np < cp))
545                                         np = cp;
546                         }
547                 }
548
549         } else if (m->type == MATCH_AND_TERM) {
550                 Match *i;
551                 bool continue_looking;
552
553                 /* Always jump to the next matching entry and repeat
554                  * this until we fine and offset that matches for all
555                  * matches. */
556
557                 if (!m->matches)
558                         return 0;
559
560                 np = 0;
561                 do {
562                         continue_looking = false;
563
564                         LIST_FOREACH(matches, i, m->matches) {
565                                 uint64_t cp, limit;
566
567                                 if (np == 0)
568                                         limit = after_offset;
569                                 else if (direction == DIRECTION_DOWN)
570                                         limit = MAX(np, after_offset);
571                                 else
572                                         limit = MIN(np, after_offset);
573
574                                 r = next_for_match(j, i, f, limit, direction, NULL, &cp);
575                                 if (r <= 0)
576                                         return r;
577
578                                 if ((direction == DIRECTION_DOWN ? cp >= after_offset : cp <= after_offset) &&
579                                     (np == 0 || (direction == DIRECTION_DOWN ? cp > np : np < cp))) {
580                                         np = cp;
581                                         continue_looking = true;
582                                 }
583                         }
584
585                 } while (continue_looking);
586         }
587
588         if (np == 0)
589                 return 0;
590
591         r = journal_file_move_to_object(f, OBJECT_ENTRY, np, &n);
592         if (r < 0)
593                 return r;
594
595         if (ret)
596                 *ret = n;
597         if (offset)
598                 *offset = np;
599
600         return 1;
601 }
602
603 static int find_location_for_match(
604                 sd_journal *j,
605                 Match *m,
606                 JournalFile *f,
607                 direction_t direction,
608                 Object **ret,
609                 uint64_t *offset) {
610
611         int r;
612
613         assert(j);
614         assert(m);
615         assert(f);
616
617         if (m->type == MATCH_DISCRETE) {
618                 uint64_t dp;
619
620                 r = journal_file_find_data_object_with_hash(f, m->data, m->size, le64toh(m->le_hash), NULL, &dp);
621                 if (r <= 0)
622                         return r;
623
624                 /* FIXME: missing: find by monotonic */
625
626                 if (j->current_location.type == LOCATION_HEAD)
627                         return journal_file_next_entry_for_data(f, NULL, 0, dp, DIRECTION_DOWN, ret, offset);
628                 if (j->current_location.type == LOCATION_TAIL)
629                         return journal_file_next_entry_for_data(f, NULL, 0, dp, DIRECTION_UP, ret, offset);
630                 if (j->current_location.seqnum_set && sd_id128_equal(j->current_location.seqnum_id, f->header->seqnum_id))
631                         return journal_file_move_to_entry_by_seqnum_for_data(f, dp, j->current_location.seqnum, direction, ret, offset);
632                 if (j->current_location.monotonic_set) {
633                         r = journal_file_move_to_entry_by_monotonic_for_data(f, dp, j->current_location.boot_id, j->current_location.monotonic, direction, ret, offset);
634                         if (r != -ENOENT)
635                                 return r;
636                 }
637                 if (j->current_location.realtime_set)
638                         return journal_file_move_to_entry_by_realtime_for_data(f, dp, j->current_location.realtime, direction, ret, offset);
639
640                 return journal_file_next_entry_for_data(f, NULL, 0, dp, direction, ret, offset);
641
642         } else if (m->type == MATCH_OR_TERM) {
643                 uint64_t np = 0;
644                 Object *n;
645                 Match *i;
646
647                 /* Find the earliest match */
648
649                 LIST_FOREACH(matches, i, m->matches) {
650                         uint64_t cp;
651
652                         r = find_location_for_match(j, i, f, direction, NULL, &cp);
653                         if (r < 0)
654                                 return r;
655                         else if (r > 0) {
656                                 if (np == 0 || (direction == DIRECTION_DOWN ? np > cp : np < cp))
657                                         np = cp;
658                         }
659                 }
660
661                 if (np == 0)
662                         return 0;
663
664                 r = journal_file_move_to_object(f, OBJECT_ENTRY, np, &n);
665                 if (r < 0)
666                         return r;
667
668                 if (ret)
669                         *ret = n;
670                 if (offset)
671                         *offset = np;
672
673                 return 1;
674
675         } else {
676                 Match *i;
677                 uint64_t np = 0;
678
679                 assert(m->type == MATCH_AND_TERM);
680
681                 /* First jump to the last match, and then find the
682                  * next one where all matches match */
683
684                 if (!m->matches)
685                         return 0;
686
687                 LIST_FOREACH(matches, i, m->matches) {
688                         uint64_t cp;
689
690                         r = find_location_for_match(j, i, f, direction, NULL, &cp);
691                         if (r <= 0)
692                                 return r;
693
694                         if (np == 0 || (direction == DIRECTION_DOWN ? np < cp : np > cp))
695                                 np = cp;
696                 }
697
698                 return next_for_match(j, m, f, np, direction, ret, offset);
699         }
700 }
701
702 static int find_location_with_matches(
703                 sd_journal *j,
704                 JournalFile *f,
705                 direction_t direction,
706                 Object **ret,
707                 uint64_t *offset) {
708
709         int r;
710
711         assert(j);
712         assert(f);
713         assert(ret);
714         assert(offset);
715
716         if (!j->level0) {
717                 /* No matches is simple */
718
719                 if (j->current_location.type == LOCATION_HEAD)
720                         return journal_file_next_entry(f, NULL, 0, DIRECTION_DOWN, ret, offset);
721                 if (j->current_location.type == LOCATION_TAIL)
722                         return journal_file_next_entry(f, NULL, 0, DIRECTION_UP, ret, offset);
723                 if (j->current_location.seqnum_set && sd_id128_equal(j->current_location.seqnum_id, f->header->seqnum_id))
724                         return journal_file_move_to_entry_by_seqnum(f, j->current_location.seqnum, direction, ret, offset);
725                 if (j->current_location.monotonic_set) {
726                         r = journal_file_move_to_entry_by_monotonic(f, j->current_location.boot_id, j->current_location.monotonic, direction, ret, offset);
727                         if (r != -ENOENT)
728                                 return r;
729                 }
730                 if (j->current_location.realtime_set)
731                         return journal_file_move_to_entry_by_realtime(f, j->current_location.realtime, direction, ret, offset);
732
733                 return journal_file_next_entry(f, NULL, 0, direction, ret, offset);
734         } else
735                 return find_location_for_match(j, j->level0, f, direction, ret, offset);
736 }
737
738 static int next_with_matches(
739                 sd_journal *j,
740                 JournalFile *f,
741                 direction_t direction,
742                 Object **ret,
743                 uint64_t *offset) {
744
745         Object *c;
746         uint64_t cp;
747
748         assert(j);
749         assert(f);
750         assert(ret);
751         assert(offset);
752
753         c = *ret;
754         cp = *offset;
755
756         /* No matches is easy. We simple advance the file
757          * pointer by one. */
758         if (!j->level0)
759                 return journal_file_next_entry(f, c, cp, direction, ret, offset);
760
761         /* If we have a match then we look for the next matching entry
762          * with an offset at least one step larger */
763         return next_for_match(j, j->level0, f, direction == DIRECTION_DOWN ? cp+1 : cp-1, direction, ret, offset);
764 }
765
766 static int next_beyond_location(sd_journal *j, JournalFile *f, direction_t direction, Object **ret, uint64_t *offset) {
767         Object *c;
768         uint64_t cp;
769         int r;
770
771         assert(j);
772         assert(f);
773
774         if (f->current_offset > 0) {
775                 cp = f->current_offset;
776
777                 r = journal_file_move_to_object(f, OBJECT_ENTRY, cp, &c);
778                 if (r < 0)
779                         return r;
780
781                 r = next_with_matches(j, f, direction, &c, &cp);
782                 if (r <= 0)
783                         return r;
784         } else {
785                 r = find_location_with_matches(j, f, direction, &c, &cp);
786                 if (r <= 0)
787                         return r;
788         }
789
790         /* OK, we found the spot, now let's advance until to an entry
791          * that is actually different from what we were previously
792          * looking at. This is necessary to handle entries which exist
793          * in two (or more) journal files, and which shall all be
794          * suppressed but one. */
795
796         for (;;) {
797                 bool found;
798
799                 if (j->current_location.type == LOCATION_DISCRETE) {
800                         int k;
801
802                         k = compare_with_location(f, c, &j->current_location);
803                         if (direction == DIRECTION_DOWN)
804                                 found = k > 0;
805                         else
806                                 found = k < 0;
807                 } else
808                         found = true;
809
810                 if (found) {
811                         if (ret)
812                                 *ret = c;
813                         if (offset)
814                                 *offset = cp;
815                         return 1;
816                 }
817
818                 r = next_with_matches(j, f, direction, &c, &cp);
819                 if (r <= 0)
820                         return r;
821         }
822 }
823
824 static int real_journal_next(sd_journal *j, direction_t direction) {
825         JournalFile *f, *new_file = NULL;
826         uint64_t new_offset = 0;
827         Object *o;
828         uint64_t p;
829         Iterator i;
830         int r;
831
832         if (!j)
833                 return -EINVAL;
834
835         HASHMAP_FOREACH(f, j->files, i) {
836                 bool found;
837
838                 r = next_beyond_location(j, f, direction, &o, &p);
839                 if (r < 0) {
840                         log_debug("Can't iterate through %s, ignoring: %s", f->path, strerror(-r));
841                         continue;
842                 } else if (r == 0)
843                         continue;
844
845                 if (!new_file)
846                         found = true;
847                 else {
848                         int k;
849
850                         k = compare_entry_order(f, o, new_file, new_offset);
851
852                         if (direction == DIRECTION_DOWN)
853                                 found = k < 0;
854                         else
855                                 found = k > 0;
856                 }
857
858                 if (found) {
859                         new_file = f;
860                         new_offset = p;
861                 }
862         }
863
864         if (!new_file)
865                 return 0;
866
867         r = journal_file_move_to_object(new_file, OBJECT_ENTRY, new_offset, &o);
868         if (r < 0)
869                 return r;
870
871         set_location(j, LOCATION_DISCRETE, new_file, o, new_offset);
872
873         return 1;
874 }
875
876 _public_ int sd_journal_next(sd_journal *j) {
877         return real_journal_next(j, DIRECTION_DOWN);
878 }
879
880 _public_ int sd_journal_previous(sd_journal *j) {
881         return real_journal_next(j, DIRECTION_UP);
882 }
883
884 static int real_journal_next_skip(sd_journal *j, direction_t direction, uint64_t skip) {
885         int c = 0, r;
886
887         if (!j)
888                 return -EINVAL;
889
890         if (skip == 0) {
891                 /* If this is not a discrete skip, then at least
892                  * resolve the current location */
893                 if (j->current_location.type != LOCATION_DISCRETE)
894                         return real_journal_next(j, direction);
895
896                 return 0;
897         }
898
899         do {
900                 r = real_journal_next(j, direction);
901                 if (r < 0)
902                         return r;
903
904                 if (r == 0)
905                         return c;
906
907                 skip--;
908                 c++;
909         } while (skip > 0);
910
911         return c;
912 }
913
914 _public_ int sd_journal_next_skip(sd_journal *j, uint64_t skip) {
915         return real_journal_next_skip(j, DIRECTION_DOWN, skip);
916 }
917
918 _public_ int sd_journal_previous_skip(sd_journal *j, uint64_t skip) {
919         return real_journal_next_skip(j, DIRECTION_UP, skip);
920 }
921
922 _public_ int sd_journal_get_cursor(sd_journal *j, char **cursor) {
923         Object *o;
924         int r;
925         char bid[33], sid[33];
926
927         if (!j)
928                 return -EINVAL;
929         if (!cursor)
930                 return -EINVAL;
931
932         if (!j->current_file || j->current_file->current_offset <= 0)
933                 return -EADDRNOTAVAIL;
934
935         r = journal_file_move_to_object(j->current_file, OBJECT_ENTRY, j->current_file->current_offset, &o);
936         if (r < 0)
937                 return r;
938
939         sd_id128_to_string(j->current_file->header->seqnum_id, sid);
940         sd_id128_to_string(o->entry.boot_id, bid);
941
942         if (asprintf(cursor,
943                      "s=%s;i=%llx;b=%s;m=%llx;t=%llx;x=%llx",
944                      sid, (unsigned long long) le64toh(o->entry.seqnum),
945                      bid, (unsigned long long) le64toh(o->entry.monotonic),
946                      (unsigned long long) le64toh(o->entry.realtime),
947                      (unsigned long long) le64toh(o->entry.xor_hash)) < 0)
948                 return -ENOMEM;
949
950         return 1;
951 }
952
953 _public_ int sd_journal_seek_cursor(sd_journal *j, const char *cursor) {
954         char *w, *state;
955         size_t l;
956         unsigned long long seqnum, monotonic, realtime, xor_hash;
957         bool
958                 seqnum_id_set = false,
959                 seqnum_set = false,
960                 boot_id_set = false,
961                 monotonic_set = false,
962                 realtime_set = false,
963                 xor_hash_set = false;
964         sd_id128_t seqnum_id, boot_id;
965
966         if (!j)
967                 return -EINVAL;
968         if (isempty(cursor))
969                 return -EINVAL;
970
971         FOREACH_WORD_SEPARATOR(w, l, cursor, ";", state) {
972                 char *item;
973                 int k = 0;
974
975                 if (l < 2 || w[1] != '=')
976                         return -EINVAL;
977
978                 item = strndup(w, l);
979                 if (!item)
980                         return -ENOMEM;
981
982                 switch (w[0]) {
983
984                 case 's':
985                         seqnum_id_set = true;
986                         k = sd_id128_from_string(item+2, &seqnum_id);
987                         break;
988
989                 case 'i':
990                         seqnum_set = true;
991                         if (sscanf(item+2, "%llx", &seqnum) != 1)
992                                 k = -EINVAL;
993                         break;
994
995                 case 'b':
996                         boot_id_set = true;
997                         k = sd_id128_from_string(item+2, &boot_id);
998                         break;
999
1000                 case 'm':
1001                         monotonic_set = true;
1002                         if (sscanf(item+2, "%llx", &monotonic) != 1)
1003                                 k = -EINVAL;
1004                         break;
1005
1006                 case 't':
1007                         realtime_set = true;
1008                         if (sscanf(item+2, "%llx", &realtime) != 1)
1009                                 k = -EINVAL;
1010                         break;
1011
1012                 case 'x':
1013                         xor_hash_set = true;
1014                         if (sscanf(item+2, "%llx", &xor_hash) != 1)
1015                                 k = -EINVAL;
1016                         break;
1017                 }
1018
1019                 free(item);
1020
1021                 if (k < 0)
1022                         return k;
1023         }
1024
1025         if ((!seqnum_set || !seqnum_id_set) &&
1026             (!monotonic_set || !boot_id_set) &&
1027             !realtime_set)
1028                 return -EINVAL;
1029
1030         reset_location(j);
1031
1032         j->current_location.type = LOCATION_SEEK;
1033
1034         if (realtime_set) {
1035                 j->current_location.realtime = (uint64_t) realtime;
1036                 j->current_location.realtime_set = true;
1037         }
1038
1039         if (seqnum_set && seqnum_id_set) {
1040                 j->current_location.seqnum = (uint64_t) seqnum;
1041                 j->current_location.seqnum_id = seqnum_id;
1042                 j->current_location.seqnum_set = true;
1043         }
1044
1045         if (monotonic_set && boot_id_set) {
1046                 j->current_location.monotonic = (uint64_t) monotonic;
1047                 j->current_location.boot_id = boot_id;
1048                 j->current_location.monotonic_set = true;
1049         }
1050
1051         if (xor_hash_set) {
1052                 j->current_location.xor_hash = (uint64_t) xor_hash;
1053                 j->current_location.xor_hash_set = true;
1054         }
1055
1056         return 0;
1057 }
1058
1059 _public_ int sd_journal_test_cursor(sd_journal *j, const char *cursor) {
1060         int r;
1061         char *w, *state;
1062         size_t l;
1063         Object *o;
1064
1065         if (!j)
1066                 return -EINVAL;
1067         if (isempty(cursor))
1068                 return -EINVAL;
1069
1070         if (!j->current_file || j->current_file->current_offset <= 0)
1071                 return -EADDRNOTAVAIL;
1072
1073         r = journal_file_move_to_object(j->current_file, OBJECT_ENTRY, j->current_file->current_offset, &o);
1074         if (r < 0)
1075                 return r;
1076
1077         FOREACH_WORD_SEPARATOR(w, l, cursor, ";", state) {
1078                 _cleanup_free_ char *item = NULL;
1079                 sd_id128_t id;
1080                 unsigned long long ll;
1081                 int k = 0;
1082
1083                 if (l < 2 || w[1] != '=')
1084                         return -EINVAL;
1085
1086                 item = strndup(w, l);
1087                 if (!item)
1088                         return -ENOMEM;
1089
1090                 switch (w[0]) {
1091
1092                 case 's':
1093                         k = sd_id128_from_string(item+2, &id);
1094                         if (k < 0)
1095                                 return k;
1096                         if (!sd_id128_equal(id, j->current_file->header->seqnum_id))
1097                                 return 0;
1098                         break;
1099
1100                 case 'i':
1101                         if (sscanf(item+2, "%llx", &ll) != 1)
1102                                 return -EINVAL;
1103                         if (ll != le64toh(o->entry.seqnum))
1104                                 return 0;
1105                         break;
1106
1107                 case 'b':
1108                         k = sd_id128_from_string(item+2, &id);
1109                         if (k < 0)
1110                                 return k;
1111                         if (!sd_id128_equal(id, o->entry.boot_id))
1112                                 return 0;
1113                         break;
1114
1115                 case 'm':
1116                         if (sscanf(item+2, "%llx", &ll) != 1)
1117                                 return -EINVAL;
1118                         if (ll != le64toh(o->entry.monotonic))
1119                                 return 0;
1120                         break;
1121
1122                 case 't':
1123                         if (sscanf(item+2, "%llx", &ll) != 1)
1124                                 return -EINVAL;
1125                         if (ll != le64toh(o->entry.realtime))
1126                                 return 0;
1127                         break;
1128
1129                 case 'x':
1130                         if (sscanf(item+2, "%llx", &ll) != 1)
1131                                 return -EINVAL;
1132                         if (ll != le64toh(o->entry.xor_hash))
1133                                 return 0;
1134                         break;
1135                 }
1136         }
1137
1138         return 1;
1139 }
1140
1141
1142 _public_ int sd_journal_seek_monotonic_usec(sd_journal *j, sd_id128_t boot_id, uint64_t usec) {
1143         if (!j)
1144                 return -EINVAL;
1145
1146         reset_location(j);
1147         j->current_location.type = LOCATION_SEEK;
1148         j->current_location.boot_id = boot_id;
1149         j->current_location.monotonic = usec;
1150         j->current_location.monotonic_set = true;
1151
1152         return 0;
1153 }
1154
1155 _public_ int sd_journal_seek_realtime_usec(sd_journal *j, uint64_t usec) {
1156         if (!j)
1157                 return -EINVAL;
1158
1159         reset_location(j);
1160         j->current_location.type = LOCATION_SEEK;
1161         j->current_location.realtime = usec;
1162         j->current_location.realtime_set = true;
1163
1164         return 0;
1165 }
1166
1167 _public_ int sd_journal_seek_head(sd_journal *j) {
1168         if (!j)
1169                 return -EINVAL;
1170
1171         reset_location(j);
1172         j->current_location.type = LOCATION_HEAD;
1173
1174         return 0;
1175 }
1176
1177 _public_ int sd_journal_seek_tail(sd_journal *j) {
1178         if (!j)
1179                 return -EINVAL;
1180
1181         reset_location(j);
1182         j->current_location.type = LOCATION_TAIL;
1183
1184         return 0;
1185 }
1186
1187 static int add_file(sd_journal *j, const char *prefix, const char *filename) {
1188         char *path;
1189         int r;
1190         JournalFile *f;
1191
1192         assert(j);
1193         assert(prefix);
1194         assert(filename);
1195
1196         if ((j->flags & SD_JOURNAL_SYSTEM_ONLY) &&
1197             !(streq(filename, "system.journal") ||
1198               streq(filename, "system.journal~") ||
1199               (startswith(filename, "system@") &&
1200                (endswith(filename, ".journal") || endswith(filename, ".journal~")))))
1201                 return 0;
1202
1203         path = strjoin(prefix, "/", filename, NULL);
1204         if (!path)
1205                 return -ENOMEM;
1206
1207         if (hashmap_get(j->files, path)) {
1208                 free(path);
1209                 return 0;
1210         }
1211
1212         if (hashmap_size(j->files) >= JOURNAL_FILES_MAX) {
1213                 log_debug("Too many open journal files, not adding %s, ignoring.", path);
1214                 free(path);
1215                 return 0;
1216         }
1217
1218         r = journal_file_open(path, O_RDONLY, 0, false, false, NULL, j->mmap, NULL, &f);
1219         free(path);
1220
1221         if (r < 0) {
1222                 if (errno == ENOENT)
1223                         return 0;
1224
1225                 return r;
1226         }
1227
1228         /* journal_file_dump(f); */
1229
1230         r = hashmap_put(j->files, f->path, f);
1231         if (r < 0) {
1232                 journal_file_close(f);
1233                 return r;
1234         }
1235
1236         j->current_invalidate_counter ++;
1237
1238         log_debug("File %s got added.", f->path);
1239
1240         return 0;
1241 }
1242
1243 static int remove_file(sd_journal *j, const char *prefix, const char *filename) {
1244         char *path;
1245         JournalFile *f;
1246
1247         assert(j);
1248         assert(prefix);
1249         assert(filename);
1250
1251         path = strjoin(prefix, "/", filename, NULL);
1252         if (!path)
1253                 return -ENOMEM;
1254
1255         f = hashmap_get(j->files, path);
1256         free(path);
1257         if (!f)
1258                 return 0;
1259
1260         hashmap_remove(j->files, f->path);
1261
1262         log_debug("File %s got removed.", f->path);
1263
1264         if (j->current_file == f) {
1265                 j->current_file = NULL;
1266                 j->current_field = 0;
1267         }
1268
1269         if (j->unique_file == f) {
1270                 j->unique_file = NULL;
1271                 j->unique_offset = 0;
1272         }
1273
1274         journal_file_close(f);
1275
1276         j->current_invalidate_counter ++;
1277
1278         return 0;
1279 }
1280
1281 static int add_directory(sd_journal *j, const char *prefix, const char *dirname) {
1282         char *path;
1283         int r;
1284         DIR *d;
1285         sd_id128_t id, mid;
1286         Directory *m;
1287
1288         assert(j);
1289         assert(prefix);
1290         assert(dirname);
1291
1292         if ((j->flags & SD_JOURNAL_LOCAL_ONLY) &&
1293             (sd_id128_from_string(dirname, &id) < 0 ||
1294              sd_id128_get_machine(&mid) < 0 ||
1295              !sd_id128_equal(id, mid)))
1296             return 0;
1297
1298         path = strjoin(prefix, "/", dirname, NULL);
1299         if (!path)
1300                 return -ENOMEM;
1301
1302         d = opendir(path);
1303         if (!d) {
1304                 log_debug("Failed to open %s: %m", path);
1305                 free(path);
1306
1307                 if (errno == ENOENT)
1308                         return 0;
1309                 return -errno;
1310         }
1311
1312         m = hashmap_get(j->directories_by_path, path);
1313         if (!m) {
1314                 m = new0(Directory, 1);
1315                 if (!m) {
1316                         closedir(d);
1317                         free(path);
1318                         return -ENOMEM;
1319                 }
1320
1321                 m->is_root = false;
1322                 m->path = path;
1323
1324                 if (hashmap_put(j->directories_by_path, m->path, m) < 0) {
1325                         closedir(d);
1326                         free(m->path);
1327                         free(m);
1328                         return -ENOMEM;
1329                 }
1330
1331                 j->current_invalidate_counter ++;
1332
1333                 log_debug("Directory %s got added.", m->path);
1334
1335         } else if (m->is_root) {
1336                 free (path);
1337                 closedir(d);
1338                 return 0;
1339         }  else
1340                 free(path);
1341
1342         if (m->wd <= 0 && j->inotify_fd >= 0) {
1343
1344                 m->wd = inotify_add_watch(j->inotify_fd, m->path,
1345                                           IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE|
1346                                           IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT|IN_MOVED_FROM|
1347                                           IN_ONLYDIR);
1348
1349                 if (m->wd > 0 && hashmap_put(j->directories_by_wd, INT_TO_PTR(m->wd), m) < 0)
1350                         inotify_rm_watch(j->inotify_fd, m->wd);
1351         }
1352
1353         for (;;) {
1354                 struct dirent *de;
1355                 union dirent_storage buf;
1356
1357                 r = readdir_r(d, &buf.de, &de);
1358                 if (r != 0 || !de)
1359                         break;
1360
1361                 if (dirent_is_file_with_suffix(de, ".journal") ||
1362                     dirent_is_file_with_suffix(de, ".journal~")) {
1363                         r = add_file(j, m->path, de->d_name);
1364                         if (r < 0)
1365                                 log_debug("Failed to add file %s/%s: %s", m->path, de->d_name, strerror(-r));
1366                 }
1367         }
1368
1369         closedir(d);
1370
1371         return 0;
1372 }
1373
1374 static int add_root_directory(sd_journal *j, const char *p) {
1375         DIR *d;
1376         Directory *m;
1377         int r;
1378
1379         assert(j);
1380         assert(p);
1381
1382         if ((j->flags & SD_JOURNAL_RUNTIME_ONLY) &&
1383             !path_startswith(p, "/run"))
1384                 return -EINVAL;
1385
1386         d = opendir(p);
1387         if (!d)
1388                 return -errno;
1389
1390         m = hashmap_get(j->directories_by_path, p);
1391         if (!m) {
1392                 m = new0(Directory, 1);
1393                 if (!m) {
1394                         closedir(d);
1395                         return -ENOMEM;
1396                 }
1397
1398                 m->is_root = true;
1399                 m->path = strdup(p);
1400                 if (!m->path) {
1401                         closedir(d);
1402                         free(m);
1403                         return -ENOMEM;
1404                 }
1405
1406                 if (hashmap_put(j->directories_by_path, m->path, m) < 0) {
1407                         closedir(d);
1408                         free(m->path);
1409                         free(m);
1410                         return -ENOMEM;
1411                 }
1412
1413                 j->current_invalidate_counter ++;
1414
1415                 log_debug("Root directory %s got added.", m->path);
1416
1417         } else if (!m->is_root) {
1418                 closedir(d);
1419                 return 0;
1420         }
1421
1422         if (m->wd <= 0 && j->inotify_fd >= 0) {
1423
1424                 m->wd = inotify_add_watch(j->inotify_fd, m->path,
1425                                           IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE|
1426                                           IN_ONLYDIR);
1427
1428                 if (m->wd > 0 && hashmap_put(j->directories_by_wd, INT_TO_PTR(m->wd), m) < 0)
1429                         inotify_rm_watch(j->inotify_fd, m->wd);
1430         }
1431
1432         for (;;) {
1433                 struct dirent *de;
1434                 union dirent_storage buf;
1435                 sd_id128_t id;
1436
1437                 r = readdir_r(d, &buf.de, &de);
1438                 if (r != 0 || !de)
1439                         break;
1440
1441                 if (dirent_is_file_with_suffix(de, ".journal") ||
1442                     dirent_is_file_with_suffix(de, ".journal~")) {
1443                         r = add_file(j, m->path, de->d_name);
1444                         if (r < 0)
1445                                 log_debug("Failed to add file %s/%s: %s", m->path, de->d_name, strerror(-r));
1446
1447                 } else if ((de->d_type == DT_DIR || de->d_type == DT_LNK || de->d_type == DT_UNKNOWN) &&
1448                            sd_id128_from_string(de->d_name, &id) >= 0) {
1449
1450                         r = add_directory(j, m->path, de->d_name);
1451                         if (r < 0)
1452                                 log_debug("Failed to add directory %s/%s: %s", m->path, de->d_name, strerror(-r));
1453                 }
1454         }
1455
1456         closedir(d);
1457
1458         return 0;
1459 }
1460
1461 static int remove_directory(sd_journal *j, Directory *d) {
1462         assert(j);
1463
1464         if (d->wd > 0) {
1465                 hashmap_remove(j->directories_by_wd, INT_TO_PTR(d->wd));
1466
1467                 if (j->inotify_fd >= 0)
1468                         inotify_rm_watch(j->inotify_fd, d->wd);
1469         }
1470
1471         hashmap_remove(j->directories_by_path, d->path);
1472
1473         if (d->is_root)
1474                 log_debug("Root directory %s got removed.", d->path);
1475         else
1476                 log_debug("Directory %s got removed.", d->path);
1477
1478         free(d->path);
1479         free(d);
1480
1481         return 0;
1482 }
1483
1484 static int add_search_paths(sd_journal *j) {
1485
1486         const char search_paths[] =
1487                 "/run/log/journal\0"
1488                 "/var/log/journal\0";
1489         const char *p;
1490
1491         assert(j);
1492
1493         /* We ignore most errors here, since the idea is to only open
1494          * what's actually accessible, and ignore the rest. */
1495
1496         NULSTR_FOREACH(p, search_paths)
1497                 add_root_directory(j, p);
1498
1499         return 0;
1500 }
1501
1502 static int allocate_inotify(sd_journal *j) {
1503         assert(j);
1504
1505         if (j->inotify_fd < 0) {
1506                 j->inotify_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
1507                 if (j->inotify_fd < 0)
1508                         return -errno;
1509         }
1510
1511         if (!j->directories_by_wd) {
1512                 j->directories_by_wd = hashmap_new(trivial_hash_func, trivial_compare_func);
1513                 if (!j->directories_by_wd)
1514                         return -ENOMEM;
1515         }
1516
1517         return 0;
1518 }
1519
1520 static sd_journal *journal_new(int flags, const char *path) {
1521         sd_journal *j;
1522
1523         j = new0(sd_journal, 1);
1524         if (!j)
1525                 return NULL;
1526
1527         j->inotify_fd = -1;
1528         j->flags = flags;
1529
1530         if (path) {
1531                 j->path = strdup(path);
1532                 if (!j->path) {
1533                         free(j);
1534                         return NULL;
1535                 }
1536         }
1537
1538         j->files = hashmap_new(string_hash_func, string_compare_func);
1539         if (!j->files) {
1540                 free(j->path);
1541                 free(j);
1542                 return NULL;
1543         }
1544
1545         j->directories_by_path = hashmap_new(string_hash_func, string_compare_func);
1546         if (!j->directories_by_path) {
1547                 hashmap_free(j->files);
1548                 free(j->path);
1549                 free(j);
1550                 return NULL;
1551         }
1552
1553         j->mmap = mmap_cache_new();
1554         if (!j->mmap) {
1555                 hashmap_free(j->files);
1556                 hashmap_free(j->directories_by_path);
1557                 free(j->path);
1558                 free(j);
1559                 return NULL;
1560         }
1561
1562         return j;
1563 }
1564
1565 _public_ int sd_journal_open(sd_journal **ret, int flags) {
1566         sd_journal *j;
1567         int r;
1568
1569         if (!ret)
1570                 return -EINVAL;
1571
1572         if (flags & ~(SD_JOURNAL_LOCAL_ONLY|
1573                       SD_JOURNAL_RUNTIME_ONLY|
1574                       SD_JOURNAL_SYSTEM_ONLY))
1575                 return -EINVAL;
1576
1577         j = journal_new(flags, NULL);
1578         if (!j)
1579                 return -ENOMEM;
1580
1581         r = add_search_paths(j);
1582         if (r < 0)
1583                 goto fail;
1584
1585         *ret = j;
1586         return 0;
1587
1588 fail:
1589         sd_journal_close(j);
1590
1591         return r;
1592 }
1593
1594 _public_ int sd_journal_open_directory(sd_journal **ret, const char *path, int flags) {
1595         sd_journal *j;
1596         int r;
1597
1598         if (!ret)
1599                 return -EINVAL;
1600
1601         if (!path || !path_is_absolute(path))
1602                 return -EINVAL;
1603
1604         if (flags != 0)
1605                 return -EINVAL;
1606
1607         j = journal_new(flags, path);
1608         if (!j)
1609                 return -ENOMEM;
1610
1611         r = add_root_directory(j, path);
1612         if (r < 0)
1613                 goto fail;
1614
1615         *ret = j;
1616         return 0;
1617
1618 fail:
1619         sd_journal_close(j);
1620
1621         return r;
1622 }
1623
1624 _public_ void sd_journal_close(sd_journal *j) {
1625         Directory *d;
1626         JournalFile *f;
1627
1628         if (!j)
1629                 return;
1630
1631         while ((f = hashmap_steal_first(j->files)))
1632                 journal_file_close(f);
1633
1634         hashmap_free(j->files);
1635
1636         while ((d = hashmap_first(j->directories_by_path)))
1637                 remove_directory(j, d);
1638
1639         while ((d = hashmap_first(j->directories_by_wd)))
1640                 remove_directory(j, d);
1641
1642         hashmap_free(j->directories_by_path);
1643         hashmap_free(j->directories_by_wd);
1644
1645         if (j->inotify_fd >= 0)
1646                 close_nointr_nofail(j->inotify_fd);
1647
1648         sd_journal_flush_matches(j);
1649
1650         if (j->mmap)
1651                 mmap_cache_unref(j->mmap);
1652
1653         free(j->path);
1654         free(j->unique_field);
1655         free(j);
1656 }
1657
1658 _public_ int sd_journal_get_realtime_usec(sd_journal *j, uint64_t *ret) {
1659         Object *o;
1660         JournalFile *f;
1661         int r;
1662
1663         if (!j)
1664                 return -EINVAL;
1665         if (!ret)
1666                 return -EINVAL;
1667
1668         f = j->current_file;
1669         if (!f)
1670                 return -EADDRNOTAVAIL;
1671
1672         if (f->current_offset <= 0)
1673                 return -EADDRNOTAVAIL;
1674
1675         r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1676         if (r < 0)
1677                 return r;
1678
1679         *ret = le64toh(o->entry.realtime);
1680         return 0;
1681 }
1682
1683 _public_ int sd_journal_get_monotonic_usec(sd_journal *j, uint64_t *ret, sd_id128_t *ret_boot_id) {
1684         Object *o;
1685         JournalFile *f;
1686         int r;
1687         sd_id128_t id;
1688
1689         if (!j)
1690                 return -EINVAL;
1691
1692         f = j->current_file;
1693         if (!f)
1694                 return -EADDRNOTAVAIL;
1695
1696         if (f->current_offset <= 0)
1697                 return -EADDRNOTAVAIL;
1698
1699         r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1700         if (r < 0)
1701                 return r;
1702
1703         if (ret_boot_id)
1704                 *ret_boot_id = o->entry.boot_id;
1705         else {
1706                 r = sd_id128_get_boot(&id);
1707                 if (r < 0)
1708                         return r;
1709
1710                 if (!sd_id128_equal(id, o->entry.boot_id))
1711                         return -ESTALE;
1712         }
1713
1714         if (ret)
1715                 *ret = le64toh(o->entry.monotonic);
1716
1717         return 0;
1718 }
1719
1720 static bool field_is_valid(const char *field) {
1721         const char *p;
1722
1723         assert(field);
1724
1725         if (isempty(field))
1726                 return false;
1727
1728         if (startswith(field, "__"))
1729                 return false;
1730
1731         for (p = field; *p; p++) {
1732
1733                 if (*p == '_')
1734                         continue;
1735
1736                 if (*p >= 'A' && *p <= 'Z')
1737                         continue;
1738
1739                 if (*p >= '0' && *p <= '9')
1740                         continue;
1741
1742                 return false;
1743         }
1744
1745         return true;
1746 }
1747
1748 _public_ int sd_journal_get_data(sd_journal *j, const char *field, const void **data, size_t *size) {
1749         JournalFile *f;
1750         uint64_t i, n;
1751         size_t field_length;
1752         int r;
1753         Object *o;
1754
1755         if (!j)
1756                 return -EINVAL;
1757         if (!field)
1758                 return -EINVAL;
1759         if (!data)
1760                 return -EINVAL;
1761         if (!size)
1762                 return -EINVAL;
1763
1764         if (!field_is_valid(field))
1765                 return -EINVAL;
1766
1767         f = j->current_file;
1768         if (!f)
1769                 return -EADDRNOTAVAIL;
1770
1771         if (f->current_offset <= 0)
1772                 return -EADDRNOTAVAIL;
1773
1774         r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1775         if (r < 0)
1776                 return r;
1777
1778         field_length = strlen(field);
1779
1780         n = journal_file_entry_n_items(o);
1781         for (i = 0; i < n; i++) {
1782                 uint64_t p, l;
1783                 le64_t le_hash;
1784                 size_t t;
1785
1786                 p = le64toh(o->entry.items[i].object_offset);
1787                 le_hash = o->entry.items[i].hash;
1788                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
1789                 if (r < 0)
1790                         return r;
1791
1792                 if (le_hash != o->data.hash)
1793                         return -EBADMSG;
1794
1795                 l = le64toh(o->object.size) - offsetof(Object, data.payload);
1796
1797                 if (o->object.flags & OBJECT_COMPRESSED) {
1798
1799 #ifdef HAVE_XZ
1800                         if (uncompress_startswith(o->data.payload, l,
1801                                                   &f->compress_buffer, &f->compress_buffer_size,
1802                                                   field, field_length, '=')) {
1803
1804                                 uint64_t rsize;
1805
1806                                 if (!uncompress_blob(o->data.payload, l,
1807                                                      &f->compress_buffer, &f->compress_buffer_size, &rsize))
1808                                         return -EBADMSG;
1809
1810                                 *data = f->compress_buffer;
1811                                 *size = (size_t) rsize;
1812
1813                                 return 0;
1814                         }
1815 #else
1816                         return -EPROTONOSUPPORT;
1817 #endif
1818
1819                 } else if (l >= field_length+1 &&
1820                            memcmp(o->data.payload, field, field_length) == 0 &&
1821                            o->data.payload[field_length] == '=') {
1822
1823                         t = (size_t) l;
1824
1825                         if ((uint64_t) t != l)
1826                                 return -E2BIG;
1827
1828                         *data = o->data.payload;
1829                         *size = t;
1830
1831                         return 0;
1832                 }
1833
1834                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1835                 if (r < 0)
1836                         return r;
1837         }
1838
1839         return -ENOENT;
1840 }
1841
1842 static int return_data(JournalFile *f, Object *o, const void **data, size_t *size) {
1843         size_t t;
1844         uint64_t l;
1845
1846         l = le64toh(o->object.size) - offsetof(Object, data.payload);
1847         t = (size_t) l;
1848
1849         /* We can't read objects larger than 4G on a 32bit machine */
1850         if ((uint64_t) t != l)
1851                 return -E2BIG;
1852
1853         if (o->object.flags & OBJECT_COMPRESSED) {
1854 #ifdef HAVE_XZ
1855                 uint64_t rsize;
1856
1857                 if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize))
1858                         return -EBADMSG;
1859
1860                 *data = f->compress_buffer;
1861                 *size = (size_t) rsize;
1862 #else
1863                 return -EPROTONOSUPPORT;
1864 #endif
1865         } else {
1866                 *data = o->data.payload;
1867                 *size = t;
1868         }
1869
1870         return 0;
1871 }
1872
1873 _public_ int sd_journal_enumerate_data(sd_journal *j, const void **data, size_t *size) {
1874         JournalFile *f;
1875         uint64_t p, n;
1876         le64_t le_hash;
1877         int r;
1878         Object *o;
1879
1880         if (!j)
1881                 return -EINVAL;
1882         if (!data)
1883                 return -EINVAL;
1884         if (!size)
1885                 return -EINVAL;
1886
1887         f = j->current_file;
1888         if (!f)
1889                 return -EADDRNOTAVAIL;
1890
1891         if (f->current_offset <= 0)
1892                 return -EADDRNOTAVAIL;
1893
1894         r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1895         if (r < 0)
1896                 return r;
1897
1898         n = journal_file_entry_n_items(o);
1899         if (j->current_field >= n)
1900                 return 0;
1901
1902         p = le64toh(o->entry.items[j->current_field].object_offset);
1903         le_hash = o->entry.items[j->current_field].hash;
1904         r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
1905         if (r < 0)
1906                 return r;
1907
1908         if (le_hash != o->data.hash)
1909                 return -EBADMSG;
1910
1911         r = return_data(f, o, data, size);
1912         if (r < 0)
1913                 return r;
1914
1915         j->current_field ++;
1916
1917         return 1;
1918 }
1919
1920 _public_ void sd_journal_restart_data(sd_journal *j) {
1921         if (!j)
1922                 return;
1923
1924         j->current_field = 0;
1925 }
1926
1927 _public_ int sd_journal_get_fd(sd_journal *j) {
1928         int r;
1929
1930         if (!j)
1931                 return -EINVAL;
1932
1933         if (j->inotify_fd >= 0)
1934                 return j->inotify_fd;
1935
1936         r = allocate_inotify(j);
1937         if (r < 0)
1938                 return r;
1939
1940         /* Iterate through all dirs again, to add them to the
1941          * inotify */
1942         if (j->path)
1943                 r = add_root_directory(j, j->path);
1944         else
1945                 r = add_search_paths(j);
1946         if (r < 0)
1947                 return r;
1948
1949         return j->inotify_fd;
1950 }
1951
1952 static void process_inotify_event(sd_journal *j, struct inotify_event *e) {
1953         Directory *d;
1954         int r;
1955
1956         assert(j);
1957         assert(e);
1958
1959         /* Is this a subdirectory we watch? */
1960         d = hashmap_get(j->directories_by_wd, INT_TO_PTR(e->wd));
1961         if (d) {
1962                 sd_id128_t id;
1963
1964                 if (!(e->mask & IN_ISDIR) && e->len > 0 &&
1965                     (endswith(e->name, ".journal") ||
1966                      endswith(e->name, ".journal~"))) {
1967
1968                         /* Event for a journal file */
1969
1970                         if (e->mask & (IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB)) {
1971                                 r = add_file(j, d->path, e->name);
1972                                 if (r < 0)
1973                                         log_debug("Failed to add file %s/%s: %s", d->path, e->name, strerror(-r));
1974
1975                         } else if (e->mask & (IN_DELETE|IN_MOVED_FROM|IN_UNMOUNT)) {
1976
1977                                 r = remove_file(j, d->path, e->name);
1978                                 if (r < 0)
1979                                         log_debug("Failed to remove file %s/%s: %s", d->path, e->name, strerror(-r));
1980                         }
1981
1982                 } else if (!d->is_root && e->len == 0) {
1983
1984                         /* Event for a subdirectory */
1985
1986                         if (e->mask & (IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT)) {
1987                                 r = remove_directory(j, d);
1988                                 if (r < 0)
1989                                         log_debug("Failed to remove directory %s: %s", d->path, strerror(-r));
1990                         }
1991
1992
1993                 } else if (d->is_root && (e->mask & IN_ISDIR) && e->len > 0 && sd_id128_from_string(e->name, &id) >= 0) {
1994
1995                         /* Event for root directory */
1996
1997                         if (e->mask & (IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB)) {
1998                                 r = add_directory(j, d->path, e->name);
1999                                 if (r < 0)
2000                                         log_debug("Failed to add directory %s/%s: %s", d->path, e->name, strerror(-r));
2001                         }
2002                 }
2003
2004                 return;
2005         }
2006
2007         if (e->mask & IN_IGNORED)
2008                 return;
2009
2010         log_warning("Unknown inotify event.");
2011 }
2012
2013 static int determine_change(sd_journal *j) {
2014         bool b;
2015
2016         assert(j);
2017
2018         b = j->current_invalidate_counter != j->last_invalidate_counter;
2019         j->last_invalidate_counter = j->current_invalidate_counter;
2020
2021         return b ? SD_JOURNAL_INVALIDATE : SD_JOURNAL_APPEND;
2022 }
2023
2024 _public_ int sd_journal_process(sd_journal *j) {
2025         uint8_t buffer[sizeof(struct inotify_event) + FILENAME_MAX] _alignas_(struct inotify_event);
2026         bool got_something = false;
2027
2028         if (!j)
2029                 return -EINVAL;
2030
2031         for (;;) {
2032                 struct inotify_event *e;
2033                 ssize_t l;
2034
2035                 l = read(j->inotify_fd, buffer, sizeof(buffer));
2036                 if (l < 0) {
2037                         if (errno == EAGAIN || errno == EINTR)
2038                                 return got_something ? determine_change(j) : SD_JOURNAL_NOP;
2039
2040                         return -errno;
2041                 }
2042
2043                 got_something = true;
2044
2045                 e = (struct inotify_event*) buffer;
2046                 while (l > 0) {
2047                         size_t step;
2048
2049                         process_inotify_event(j, e);
2050
2051                         step = sizeof(struct inotify_event) + e->len;
2052                         assert(step <= (size_t) l);
2053
2054                         e = (struct inotify_event*) ((uint8_t*) e + step);
2055                         l -= step;
2056                 }
2057         }
2058
2059         return determine_change(j);
2060 }
2061
2062 _public_ int sd_journal_wait(sd_journal *j, uint64_t timeout_usec) {
2063         int r;
2064
2065         assert(j);
2066
2067         if (j->inotify_fd < 0) {
2068
2069                 /* This is the first invocation, hence create the
2070                  * inotify watch */
2071                 r = sd_journal_get_fd(j);
2072                 if (r < 0)
2073                         return r;
2074
2075                 /* The journal might have changed since the context
2076                  * object was created and we weren't watching before,
2077                  * hence don't wait for anything, and return
2078                  * immediately. */
2079                 return determine_change(j);
2080         }
2081
2082         do {
2083                 r = fd_wait_for_event(j->inotify_fd, POLLIN, timeout_usec);
2084         } while (r == -EINTR);
2085
2086         if (r < 0)
2087                 return r;
2088
2089         return sd_journal_process(j);
2090 }
2091
2092 _public_ int sd_journal_get_cutoff_realtime_usec(sd_journal *j, uint64_t *from, uint64_t *to) {
2093         Iterator i;
2094         JournalFile *f;
2095         bool first = true;
2096         int r;
2097
2098         if (!j)
2099                 return -EINVAL;
2100         if (!from && !to)
2101                 return -EINVAL;
2102
2103         HASHMAP_FOREACH(f, j->files, i) {
2104                 usec_t fr, t;
2105
2106                 r = journal_file_get_cutoff_realtime_usec(f, &fr, &t);
2107                 if (r == -ENOENT)
2108                         continue;
2109                 if (r < 0)
2110                         return r;
2111                 if (r == 0)
2112                         continue;
2113
2114                 if (first) {
2115                         if (from)
2116                                 *from = fr;
2117                         if (to)
2118                                 *to = t;
2119                         first = false;
2120                 } else {
2121                         if (from)
2122                                 *from = MIN(fr, *from);
2123                         if (to)
2124                                 *to = MAX(t, *to);
2125                 }
2126         }
2127
2128         return first ? 0 : 1;
2129 }
2130
2131 _public_ int sd_journal_get_cutoff_monotonic_usec(sd_journal *j, sd_id128_t boot_id, uint64_t *from, uint64_t *to) {
2132         Iterator i;
2133         JournalFile *f;
2134         bool first = true;
2135         int r;
2136
2137         if (!j)
2138                 return -EINVAL;
2139         if (!from && !to)
2140                 return -EINVAL;
2141
2142         HASHMAP_FOREACH(f, j->files, i) {
2143                 usec_t fr, t;
2144
2145                 r = journal_file_get_cutoff_monotonic_usec(f, boot_id, &fr, &t);
2146                 if (r == -ENOENT)
2147                         continue;
2148                 if (r < 0)
2149                         return r;
2150                 if (r == 0)
2151                         continue;
2152
2153                 if (first) {
2154                         if (from)
2155                                 *from = fr;
2156                         if (to)
2157                                 *to = t;
2158                         first = false;
2159                 } else {
2160                         if (from)
2161                                 *from = MIN(fr, *from);
2162                         if (to)
2163                                 *to = MAX(t, *to);
2164                 }
2165         }
2166
2167         return first ? 0 : 1;
2168 }
2169
2170 void journal_print_header(sd_journal *j) {
2171         Iterator i;
2172         JournalFile *f;
2173         bool newline = false;
2174
2175         assert(j);
2176
2177         HASHMAP_FOREACH(f, j->files, i) {
2178                 if (newline)
2179                         putchar('\n');
2180                 else
2181                         newline = true;
2182
2183                 journal_file_print_header(f);
2184         }
2185 }
2186
2187 _public_ int sd_journal_get_usage(sd_journal *j, uint64_t *bytes) {
2188         Iterator i;
2189         JournalFile *f;
2190         uint64_t sum = 0;
2191
2192         if (!j)
2193                 return -EINVAL;
2194         if (!bytes)
2195                 return -EINVAL;
2196
2197         HASHMAP_FOREACH(f, j->files, i) {
2198                 struct stat st;
2199
2200                 if (fstat(f->fd, &st) < 0)
2201                         return -errno;
2202
2203                 sum += (uint64_t) st.st_blocks * 512ULL;
2204         }
2205
2206         *bytes = sum;
2207         return 0;
2208 }
2209
2210 _public_ int sd_journal_query_unique(sd_journal *j, const char *field) {
2211         char *f;
2212
2213         if (!j)
2214                 return -EINVAL;
2215         if (isempty(field))
2216                 return -EINVAL;
2217         if (!field_is_valid(field))
2218                 return -EINVAL;
2219
2220         f = strdup(field);
2221         if (!f)
2222                 return -ENOMEM;
2223
2224         free(j->unique_field);
2225         j->unique_field = f;
2226         j->unique_file = NULL;
2227         j->unique_offset = 0;
2228
2229         return 0;
2230 }
2231
2232 _public_ int sd_journal_enumerate_unique(sd_journal *j, const void **data, size_t *l) {
2233         Object *o;
2234         size_t k;
2235         int r;
2236
2237         if (!j)
2238                 return -EINVAL;
2239         if (!data)
2240                 return -EINVAL;
2241         if (!l)
2242                 return -EINVAL;
2243         if (!j->unique_field)
2244                 return -EINVAL;
2245
2246         k = strlen(j->unique_field);
2247
2248         if (!j->unique_file) {
2249                 j->unique_file = hashmap_first(j->files);
2250                 if (!j->unique_file)
2251                         return 0;
2252                 j->unique_offset = 0;
2253         }
2254
2255         for (;;) {
2256                 JournalFile *of;
2257                 Iterator i;
2258                 const void *odata;
2259                 size_t ol;
2260                 bool found;
2261
2262                 /* Proceed to next data object in the field's linked list */
2263                 if (j->unique_offset == 0) {
2264                         r = journal_file_find_field_object(j->unique_file, j->unique_field, k, &o, NULL);
2265                         if (r < 0)
2266                                 return r;
2267
2268                         j->unique_offset = r > 0 ? le64toh(o->field.head_data_offset) : 0;
2269                 } else {
2270                         r = journal_file_move_to_object(j->unique_file, OBJECT_DATA, j->unique_offset, &o);
2271                         if (r < 0)
2272                                 return r;
2273
2274                         j->unique_offset = le64toh(o->data.next_field_offset);
2275                 }
2276
2277                 /* We reached the end of the list? Then start again, with the next file */
2278                 if (j->unique_offset == 0) {
2279                         JournalFile *n;
2280
2281                         n = hashmap_next(j->files, j->unique_file->path);
2282                         if (!n)
2283                                 return 0;
2284
2285                         j->unique_file = n;
2286                         continue;
2287                 }
2288
2289                 /* We do not use the type context here, but 0 instead,
2290                  * so that we can look at this data object at the same
2291                  * time as one on another file */
2292                 r = journal_file_move_to_object(j->unique_file, 0, j->unique_offset, &o);
2293                 if (r < 0)
2294                         return r;
2295
2296                 /* Let's do the type check by hand, since we used 0 context above. */
2297                 if (o->object.type != OBJECT_DATA)
2298                         return -EBADMSG;
2299
2300                 r = return_data(j->unique_file, o, &odata, &ol);
2301                 if (r < 0)
2302                         return r;
2303
2304                 /* OK, now let's see if we already returned this data
2305                  * object by checking if it exists in the earlier
2306                  * traversed files. */
2307                 found = false;
2308                 HASHMAP_FOREACH(of, j->files, i) {
2309                         Object *oo;
2310                         uint64_t op;
2311
2312                         if (of == j->unique_file)
2313                                 break;
2314
2315                         /* Skip this file it didn't have any fields
2316                          * indexed */
2317                         if (JOURNAL_HEADER_CONTAINS(of->header, n_fields) &&
2318                             le64toh(of->header->n_fields) <= 0)
2319                                 continue;
2320
2321                         r = journal_file_find_data_object_with_hash(of, odata, ol, le64toh(o->data.hash), &oo, &op);
2322                         if (r < 0)
2323                                 return r;
2324
2325                         if (r > 0)
2326                                 found = true;
2327                 }
2328
2329                 if (found)
2330                         continue;
2331
2332                 r = return_data(j->unique_file, o, data, l);
2333                 if (r < 0)
2334                         return r;
2335
2336                 return 1;
2337         }
2338 }
2339
2340 _public_ void sd_journal_restart_unique(sd_journal *j) {
2341         if (!j)
2342                 return;
2343
2344         j->unique_file = NULL;
2345         j->unique_offset = 0;
2346 }