chiark / gitweb /
journalctl: be smarter about journal error checks
[elogind.git] / src / journal / sd-journal.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <errno.h>
23 #include <fcntl.h>
24 #include <stddef.h>
25 #include <unistd.h>
26 #include <sys/inotify.h>
27 #include <sys/poll.h>
28 #include <sys/vfs.h>
29 #include <linux/magic.h>
30
31 #include "sd-journal.h"
32 #include "journal-def.h"
33 #include "journal-file.h"
34 #include "hashmap.h"
35 #include "list.h"
36 #include "path-util.h"
37 #include "lookup3.h"
38 #include "compress.h"
39 #include "journal-internal.h"
40 #include "missing.h"
41 #include "catalog.h"
42 #include "replace-var.h"
43
44 #define JOURNAL_FILES_MAX 1024
45
46 #define JOURNAL_FILES_RECHECK_USEC (2 * USEC_PER_SEC)
47
48 #define REPLACE_VAR_MAX 256
49
50 #define DEFAULT_DATA_THRESHOLD (64*1024)
51
52 /* We return an error here only if we didn't manage to
53    memorize the real error. */
54 static int set_put_error(Set* errors, int r) {
55         if (r >= 0)
56                 return r;
57
58         return set_put(errors, INT_TO_PTR(r));
59 }
60
61 static void detach_location(sd_journal *j) {
62         Iterator i;
63         JournalFile *f;
64
65         assert(j);
66
67         j->current_file = NULL;
68         j->current_field = 0;
69
70         HASHMAP_FOREACH(f, j->files, i)
71                 f->current_offset = 0;
72 }
73
74 static void reset_location(sd_journal *j) {
75         assert(j);
76
77         detach_location(j);
78         zero(j->current_location);
79 }
80
81 static void init_location(Location *l, LocationType type, JournalFile *f, Object *o) {
82         assert(l);
83         assert(type == LOCATION_DISCRETE || type == LOCATION_SEEK);
84         assert(f);
85         assert(o->object.type == OBJECT_ENTRY);
86
87         l->type = type;
88         l->seqnum = le64toh(o->entry.seqnum);
89         l->seqnum_id = f->header->seqnum_id;
90         l->realtime = le64toh(o->entry.realtime);
91         l->monotonic = le64toh(o->entry.monotonic);
92         l->boot_id = o->entry.boot_id;
93         l->xor_hash = le64toh(o->entry.xor_hash);
94
95         l->seqnum_set = l->realtime_set = l->monotonic_set = l->xor_hash_set = true;
96 }
97
98 static void set_location(sd_journal *j, LocationType type, JournalFile *f, Object *o, uint64_t offset) {
99         assert(j);
100         assert(type == LOCATION_DISCRETE || type == LOCATION_SEEK);
101         assert(f);
102         assert(o);
103
104         init_location(&j->current_location, type, f, o);
105
106         j->current_file = f;
107         j->current_field = 0;
108
109         f->current_offset = offset;
110 }
111
112 static int match_is_valid(const void *data, size_t size) {
113         const char *b, *p;
114
115         assert(data);
116
117         if (size < 2)
118                 return false;
119
120         if (startswith(data, "__"))
121                 return false;
122
123         b = data;
124         for (p = b; p < b + size; p++) {
125
126                 if (*p == '=')
127                         return p > b;
128
129                 if (*p == '_')
130                         continue;
131
132                 if (*p >= 'A' && *p <= 'Z')
133                         continue;
134
135                 if (*p >= '0' && *p <= '9')
136                         continue;
137
138                 return false;
139         }
140
141         return false;
142 }
143
144 static bool same_field(const void *_a, size_t s, const void *_b, size_t t) {
145         const uint8_t *a = _a, *b = _b;
146         size_t j;
147
148         for (j = 0; j < s && j < t; j++) {
149
150                 if (a[j] != b[j])
151                         return false;
152
153                 if (a[j] == '=')
154                         return true;
155         }
156
157         return true;
158 }
159
160 static Match *match_new(Match *p, MatchType t) {
161         Match *m;
162
163         m = new0(Match, 1);
164         if (!m)
165                 return NULL;
166
167         m->type = t;
168
169         if (p) {
170                 m->parent = p;
171                 LIST_PREPEND(Match, matches, p->matches, m);
172         }
173
174         return m;
175 }
176
177 static void match_free(Match *m) {
178         assert(m);
179
180         while (m->matches)
181                 match_free(m->matches);
182
183         if (m->parent)
184                 LIST_REMOVE(Match, matches, m->parent->matches, m);
185
186         free(m->data);
187         free(m);
188 }
189
190 static void match_free_if_empty(Match *m) {
191         assert(m);
192
193         if (m->matches)
194                 return;
195
196         match_free(m);
197 }
198
199 _public_ int sd_journal_add_match(sd_journal *j, const void *data, size_t size) {
200         Match *l2, *l3, *add_here = NULL, *m;
201         le64_t le_hash;
202
203         if (!j)
204                 return -EINVAL;
205
206         if (!data)
207                 return -EINVAL;
208
209         if (size == 0)
210                 size = strlen(data);
211
212         if (!match_is_valid(data, size))
213                 return -EINVAL;
214
215         /* level 0: OR term
216          * level 1: AND terms
217          * level 2: OR terms
218          * level 3: concrete matches */
219
220         if (!j->level0) {
221                 j->level0 = match_new(NULL, MATCH_OR_TERM);
222                 if (!j->level0)
223                         return -ENOMEM;
224         }
225
226         if (!j->level1) {
227                 j->level1 = match_new(j->level0, MATCH_AND_TERM);
228                 if (!j->level1)
229                         return -ENOMEM;
230         }
231
232         assert(j->level0->type == MATCH_OR_TERM);
233         assert(j->level1->type == MATCH_AND_TERM);
234
235         le_hash = htole64(hash64(data, size));
236
237         LIST_FOREACH(matches, l2, j->level1->matches) {
238                 assert(l2->type == MATCH_OR_TERM);
239
240                 LIST_FOREACH(matches, l3, l2->matches) {
241                         assert(l3->type == MATCH_DISCRETE);
242
243                         /* Exactly the same match already? Then ignore
244                          * this addition */
245                         if (l3->le_hash == le_hash &&
246                             l3->size == size &&
247                             memcmp(l3->data, data, size) == 0)
248                                 return 0;
249
250                         /* Same field? Then let's add this to this OR term */
251                         if (same_field(data, size, l3->data, l3->size)) {
252                                 add_here = l2;
253                                 break;
254                         }
255                 }
256
257                 if (add_here)
258                         break;
259         }
260
261         if (!add_here) {
262                 add_here = match_new(j->level1, MATCH_OR_TERM);
263                 if (!add_here)
264                         goto fail;
265         }
266
267         m = match_new(add_here, MATCH_DISCRETE);
268         if (!m)
269                 goto fail;
270
271         m->le_hash = le_hash;
272         m->size = size;
273         m->data = memdup(data, size);
274         if (!m->data)
275                 goto fail;
276
277         detach_location(j);
278
279         return 0;
280
281 fail:
282         if (add_here)
283                 match_free_if_empty(add_here);
284
285         if (j->level1)
286                 match_free_if_empty(j->level1);
287
288         if (j->level0)
289                 match_free_if_empty(j->level0);
290
291         return -ENOMEM;
292 }
293
294 _public_ int sd_journal_add_disjunction(sd_journal *j) {
295         Match *m;
296
297         assert(j);
298
299         if (!j->level0)
300                 return 0;
301
302         if (!j->level1)
303                 return 0;
304
305         if (!j->level1->matches)
306                 return 0;
307
308         m = match_new(j->level0, MATCH_AND_TERM);
309         if (!m)
310                 return -ENOMEM;
311
312         j->level1 = m;
313         return 0;
314 }
315
316 static char *match_make_string(Match *m) {
317         char *p, *r;
318         Match *i;
319         bool enclose = false;
320
321         if (!m)
322                 return strdup("");
323
324         if (m->type == MATCH_DISCRETE)
325                 return strndup(m->data, m->size);
326
327         p = NULL;
328         LIST_FOREACH(matches, i, m->matches) {
329                 char *t, *k;
330
331                 t = match_make_string(i);
332                 if (!t) {
333                         free(p);
334                         return NULL;
335                 }
336
337                 if (p) {
338                         k = strjoin(p, m->type == MATCH_OR_TERM ? " OR " : " AND ", t, NULL);
339                         free(p);
340                         free(t);
341
342                         if (!k)
343                                 return NULL;
344
345                         p = k;
346
347                         enclose = true;
348                 } else {
349                         free(p);
350                         p = t;
351                 }
352         }
353
354         if (enclose) {
355                 r = strjoin("(", p, ")", NULL);
356                 free(p);
357                 return r;
358         }
359
360         return p;
361 }
362
363 char *journal_make_match_string(sd_journal *j) {
364         assert(j);
365
366         return match_make_string(j->level0);
367 }
368
369 _public_ void sd_journal_flush_matches(sd_journal *j) {
370
371         if (!j)
372                 return;
373
374         if (j->level0)
375                 match_free(j->level0);
376
377         j->level0 = j->level1 = NULL;
378
379         detach_location(j);
380 }
381
382 static int compare_entry_order(JournalFile *af, Object *_ao,
383                          JournalFile *bf, uint64_t bp) {
384
385         uint64_t a, b;
386         Object *ao, *bo;
387         int r;
388
389         assert(af);
390         assert(bf);
391         assert(_ao);
392
393         /* The mmap cache might invalidate the object from the first
394          * file if we look at the one from the second file. Hence
395          * temporarily copy the header of the first one, and look at
396          * that only. */
397         ao = alloca(offsetof(EntryObject, items));
398         memcpy(ao, _ao, offsetof(EntryObject, items));
399
400         r = journal_file_move_to_object(bf, OBJECT_ENTRY, bp, &bo);
401         if (r < 0)
402                 return strcmp(af->path, bf->path);
403
404         /* We operate on two different files here, hence we can access
405          * two objects at the same time, which we normally can't.
406          *
407          * If contents and timestamps match, these entries are
408          * identical, even if the seqnum does not match */
409
410         if (sd_id128_equal(ao->entry.boot_id, bo->entry.boot_id) &&
411             ao->entry.monotonic == bo->entry.monotonic &&
412             ao->entry.realtime == bo->entry.realtime &&
413             ao->entry.xor_hash == bo->entry.xor_hash)
414                 return 0;
415
416         if (sd_id128_equal(af->header->seqnum_id, bf->header->seqnum_id)) {
417
418                 /* If this is from the same seqnum source, compare
419                  * seqnums */
420                 a = le64toh(ao->entry.seqnum);
421                 b = le64toh(bo->entry.seqnum);
422
423                 if (a < b)
424                         return -1;
425                 if (a > b)
426                         return 1;
427
428                 /* Wow! This is weird, different data but the same
429                  * seqnums? Something is borked, but let's make the
430                  * best of it and compare by time. */
431         }
432
433         if (sd_id128_equal(ao->entry.boot_id, bo->entry.boot_id)) {
434
435                 /* If the boot id matches compare monotonic time */
436                 a = le64toh(ao->entry.monotonic);
437                 b = le64toh(bo->entry.monotonic);
438
439                 if (a < b)
440                         return -1;
441                 if (a > b)
442                         return 1;
443         }
444
445         /* Otherwise compare UTC time */
446         a = le64toh(ao->entry.realtime);
447         b = le64toh(bo->entry.realtime);
448
449         if (a < b)
450                 return -1;
451         if (a > b)
452                 return 1;
453
454         /* Finally, compare by contents */
455         a = le64toh(ao->entry.xor_hash);
456         b = le64toh(bo->entry.xor_hash);
457
458         if (a < b)
459                 return -1;
460         if (a > b)
461                 return 1;
462
463         return 0;
464 }
465
466 static int compare_with_location(JournalFile *af, Object *ao, Location *l) {
467         uint64_t a;
468
469         assert(af);
470         assert(ao);
471         assert(l);
472         assert(l->type == LOCATION_DISCRETE || l->type == LOCATION_SEEK);
473
474         if (l->monotonic_set &&
475             sd_id128_equal(ao->entry.boot_id, l->boot_id) &&
476             l->realtime_set &&
477             le64toh(ao->entry.realtime) == l->realtime &&
478             l->xor_hash_set &&
479             le64toh(ao->entry.xor_hash) == l->xor_hash)
480                 return 0;
481
482         if (l->seqnum_set &&
483             sd_id128_equal(af->header->seqnum_id, l->seqnum_id)) {
484
485                 a = le64toh(ao->entry.seqnum);
486
487                 if (a < l->seqnum)
488                         return -1;
489                 if (a > l->seqnum)
490                         return 1;
491         }
492
493         if (l->monotonic_set &&
494             sd_id128_equal(ao->entry.boot_id, l->boot_id)) {
495
496                 a = le64toh(ao->entry.monotonic);
497
498                 if (a < l->monotonic)
499                         return -1;
500                 if (a > l->monotonic)
501                         return 1;
502         }
503
504         if (l->realtime_set) {
505
506                 a = le64toh(ao->entry.realtime);
507
508                 if (a < l->realtime)
509                         return -1;
510                 if (a > l->realtime)
511                         return 1;
512         }
513
514         if (l->xor_hash_set) {
515                 a = le64toh(ao->entry.xor_hash);
516
517                 if (a < l->xor_hash)
518                         return -1;
519                 if (a > l->xor_hash)
520                         return 1;
521         }
522
523         return 0;
524 }
525
526 static int next_for_match(
527                 sd_journal *j,
528                 Match *m,
529                 JournalFile *f,
530                 uint64_t after_offset,
531                 direction_t direction,
532                 Object **ret,
533                 uint64_t *offset) {
534
535         int r;
536         uint64_t np = 0;
537         Object *n;
538
539         assert(j);
540         assert(m);
541         assert(f);
542
543         if (m->type == MATCH_DISCRETE) {
544                 uint64_t dp;
545
546                 r = journal_file_find_data_object_with_hash(f, m->data, m->size, le64toh(m->le_hash), NULL, &dp);
547                 if (r <= 0)
548                         return r;
549
550                 return journal_file_move_to_entry_by_offset_for_data(f, dp, after_offset, direction, ret, offset);
551
552         } else if (m->type == MATCH_OR_TERM) {
553                 Match *i;
554
555                 /* Find the earliest match beyond after_offset */
556
557                 LIST_FOREACH(matches, i, m->matches) {
558                         uint64_t cp;
559
560                         r = next_for_match(j, i, f, after_offset, direction, NULL, &cp);
561                         if (r < 0)
562                                 return r;
563                         else if (r > 0) {
564                                 if (np == 0 || (direction == DIRECTION_DOWN ? np > cp : np < cp))
565                                         np = cp;
566                         }
567                 }
568
569         } else if (m->type == MATCH_AND_TERM) {
570                 Match *i;
571                 bool continue_looking;
572
573                 /* Always jump to the next matching entry and repeat
574                  * this until we fine and offset that matches for all
575                  * matches. */
576
577                 if (!m->matches)
578                         return 0;
579
580                 np = 0;
581                 do {
582                         continue_looking = false;
583
584                         LIST_FOREACH(matches, i, m->matches) {
585                                 uint64_t cp, limit;
586
587                                 if (np == 0)
588                                         limit = after_offset;
589                                 else if (direction == DIRECTION_DOWN)
590                                         limit = MAX(np, after_offset);
591                                 else
592                                         limit = MIN(np, after_offset);
593
594                                 r = next_for_match(j, i, f, limit, direction, NULL, &cp);
595                                 if (r <= 0)
596                                         return r;
597
598                                 if ((direction == DIRECTION_DOWN ? cp >= after_offset : cp <= after_offset) &&
599                                     (np == 0 || (direction == DIRECTION_DOWN ? cp > np : np < cp))) {
600                                         np = cp;
601                                         continue_looking = true;
602                                 }
603                         }
604
605                 } while (continue_looking);
606         }
607
608         if (np == 0)
609                 return 0;
610
611         r = journal_file_move_to_object(f, OBJECT_ENTRY, np, &n);
612         if (r < 0)
613                 return r;
614
615         if (ret)
616                 *ret = n;
617         if (offset)
618                 *offset = np;
619
620         return 1;
621 }
622
623 static int find_location_for_match(
624                 sd_journal *j,
625                 Match *m,
626                 JournalFile *f,
627                 direction_t direction,
628                 Object **ret,
629                 uint64_t *offset) {
630
631         int r;
632
633         assert(j);
634         assert(m);
635         assert(f);
636
637         if (m->type == MATCH_DISCRETE) {
638                 uint64_t dp;
639
640                 r = journal_file_find_data_object_with_hash(f, m->data, m->size, le64toh(m->le_hash), NULL, &dp);
641                 if (r <= 0)
642                         return r;
643
644                 /* FIXME: missing: find by monotonic */
645
646                 if (j->current_location.type == LOCATION_HEAD)
647                         return journal_file_next_entry_for_data(f, NULL, 0, dp, DIRECTION_DOWN, ret, offset);
648                 if (j->current_location.type == LOCATION_TAIL)
649                         return journal_file_next_entry_for_data(f, NULL, 0, dp, DIRECTION_UP, ret, offset);
650                 if (j->current_location.seqnum_set && sd_id128_equal(j->current_location.seqnum_id, f->header->seqnum_id))
651                         return journal_file_move_to_entry_by_seqnum_for_data(f, dp, j->current_location.seqnum, direction, ret, offset);
652                 if (j->current_location.monotonic_set) {
653                         r = journal_file_move_to_entry_by_monotonic_for_data(f, dp, j->current_location.boot_id, j->current_location.monotonic, direction, ret, offset);
654                         if (r != -ENOENT)
655                                 return r;
656                 }
657                 if (j->current_location.realtime_set)
658                         return journal_file_move_to_entry_by_realtime_for_data(f, dp, j->current_location.realtime, direction, ret, offset);
659
660                 return journal_file_next_entry_for_data(f, NULL, 0, dp, direction, ret, offset);
661
662         } else if (m->type == MATCH_OR_TERM) {
663                 uint64_t np = 0;
664                 Object *n;
665                 Match *i;
666
667                 /* Find the earliest match */
668
669                 LIST_FOREACH(matches, i, m->matches) {
670                         uint64_t cp;
671
672                         r = find_location_for_match(j, i, f, direction, NULL, &cp);
673                         if (r < 0)
674                                 return r;
675                         else if (r > 0) {
676                                 if (np == 0 || (direction == DIRECTION_DOWN ? np > cp : np < cp))
677                                         np = cp;
678                         }
679                 }
680
681                 if (np == 0)
682                         return 0;
683
684                 r = journal_file_move_to_object(f, OBJECT_ENTRY, np, &n);
685                 if (r < 0)
686                         return r;
687
688                 if (ret)
689                         *ret = n;
690                 if (offset)
691                         *offset = np;
692
693                 return 1;
694
695         } else {
696                 Match *i;
697                 uint64_t np = 0;
698
699                 assert(m->type == MATCH_AND_TERM);
700
701                 /* First jump to the last match, and then find the
702                  * next one where all matches match */
703
704                 if (!m->matches)
705                         return 0;
706
707                 LIST_FOREACH(matches, i, m->matches) {
708                         uint64_t cp;
709
710                         r = find_location_for_match(j, i, f, direction, NULL, &cp);
711                         if (r <= 0)
712                                 return r;
713
714                         if (np == 0 || (direction == DIRECTION_DOWN ? np < cp : np > cp))
715                                 np = cp;
716                 }
717
718                 return next_for_match(j, m, f, np, direction, ret, offset);
719         }
720 }
721
722 static int find_location_with_matches(
723                 sd_journal *j,
724                 JournalFile *f,
725                 direction_t direction,
726                 Object **ret,
727                 uint64_t *offset) {
728
729         int r;
730
731         assert(j);
732         assert(f);
733         assert(ret);
734         assert(offset);
735
736         if (!j->level0) {
737                 /* No matches is simple */
738
739                 if (j->current_location.type == LOCATION_HEAD)
740                         return journal_file_next_entry(f, NULL, 0, DIRECTION_DOWN, ret, offset);
741                 if (j->current_location.type == LOCATION_TAIL)
742                         return journal_file_next_entry(f, NULL, 0, DIRECTION_UP, ret, offset);
743                 if (j->current_location.seqnum_set && sd_id128_equal(j->current_location.seqnum_id, f->header->seqnum_id))
744                         return journal_file_move_to_entry_by_seqnum(f, j->current_location.seqnum, direction, ret, offset);
745                 if (j->current_location.monotonic_set) {
746                         r = journal_file_move_to_entry_by_monotonic(f, j->current_location.boot_id, j->current_location.monotonic, direction, ret, offset);
747                         if (r != -ENOENT)
748                                 return r;
749                 }
750                 if (j->current_location.realtime_set)
751                         return journal_file_move_to_entry_by_realtime(f, j->current_location.realtime, direction, ret, offset);
752
753                 return journal_file_next_entry(f, NULL, 0, direction, ret, offset);
754         } else
755                 return find_location_for_match(j, j->level0, f, direction, ret, offset);
756 }
757
758 static int next_with_matches(
759                 sd_journal *j,
760                 JournalFile *f,
761                 direction_t direction,
762                 Object **ret,
763                 uint64_t *offset) {
764
765         Object *c;
766         uint64_t cp;
767
768         assert(j);
769         assert(f);
770         assert(ret);
771         assert(offset);
772
773         c = *ret;
774         cp = *offset;
775
776         /* No matches is easy. We simple advance the file
777          * pointer by one. */
778         if (!j->level0)
779                 return journal_file_next_entry(f, c, cp, direction, ret, offset);
780
781         /* If we have a match then we look for the next matching entry
782          * with an offset at least one step larger */
783         return next_for_match(j, j->level0, f, direction == DIRECTION_DOWN ? cp+1 : cp-1, direction, ret, offset);
784 }
785
786 static int next_beyond_location(sd_journal *j, JournalFile *f, direction_t direction, Object **ret, uint64_t *offset) {
787         Object *c;
788         uint64_t cp;
789         int r;
790
791         assert(j);
792         assert(f);
793
794         if (f->current_offset > 0) {
795                 cp = f->current_offset;
796
797                 r = journal_file_move_to_object(f, OBJECT_ENTRY, cp, &c);
798                 if (r < 0)
799                         return r;
800
801                 r = next_with_matches(j, f, direction, &c, &cp);
802                 if (r <= 0)
803                         return r;
804         } else {
805                 r = find_location_with_matches(j, f, direction, &c, &cp);
806                 if (r <= 0)
807                         return r;
808         }
809
810         /* OK, we found the spot, now let's advance until to an entry
811          * that is actually different from what we were previously
812          * looking at. This is necessary to handle entries which exist
813          * in two (or more) journal files, and which shall all be
814          * suppressed but one. */
815
816         for (;;) {
817                 bool found;
818
819                 if (j->current_location.type == LOCATION_DISCRETE) {
820                         int k;
821
822                         k = compare_with_location(f, c, &j->current_location);
823                         if (direction == DIRECTION_DOWN)
824                                 found = k > 0;
825                         else
826                                 found = k < 0;
827                 } else
828                         found = true;
829
830                 if (found) {
831                         if (ret)
832                                 *ret = c;
833                         if (offset)
834                                 *offset = cp;
835                         return 1;
836                 }
837
838                 r = next_with_matches(j, f, direction, &c, &cp);
839                 if (r <= 0)
840                         return r;
841         }
842 }
843
844 static int real_journal_next(sd_journal *j, direction_t direction) {
845         JournalFile *f, *new_file = NULL;
846         uint64_t new_offset = 0;
847         Object *o;
848         uint64_t p;
849         Iterator i;
850         int r;
851
852         if (!j)
853                 return -EINVAL;
854
855         HASHMAP_FOREACH(f, j->files, i) {
856                 bool found;
857
858                 r = next_beyond_location(j, f, direction, &o, &p);
859                 if (r < 0) {
860                         log_debug("Can't iterate through %s, ignoring: %s", f->path, strerror(-r));
861                         continue;
862                 } else if (r == 0)
863                         continue;
864
865                 if (!new_file)
866                         found = true;
867                 else {
868                         int k;
869
870                         k = compare_entry_order(f, o, new_file, new_offset);
871
872                         if (direction == DIRECTION_DOWN)
873                                 found = k < 0;
874                         else
875                                 found = k > 0;
876                 }
877
878                 if (found) {
879                         new_file = f;
880                         new_offset = p;
881                 }
882         }
883
884         if (!new_file)
885                 return 0;
886
887         r = journal_file_move_to_object(new_file, OBJECT_ENTRY, new_offset, &o);
888         if (r < 0)
889                 return r;
890
891         set_location(j, LOCATION_DISCRETE, new_file, o, new_offset);
892
893         return 1;
894 }
895
896 _public_ int sd_journal_next(sd_journal *j) {
897         return real_journal_next(j, DIRECTION_DOWN);
898 }
899
900 _public_ int sd_journal_previous(sd_journal *j) {
901         return real_journal_next(j, DIRECTION_UP);
902 }
903
904 static int real_journal_next_skip(sd_journal *j, direction_t direction, uint64_t skip) {
905         int c = 0, r;
906
907         if (!j)
908                 return -EINVAL;
909
910         if (skip == 0) {
911                 /* If this is not a discrete skip, then at least
912                  * resolve the current location */
913                 if (j->current_location.type != LOCATION_DISCRETE)
914                         return real_journal_next(j, direction);
915
916                 return 0;
917         }
918
919         do {
920                 r = real_journal_next(j, direction);
921                 if (r < 0)
922                         return r;
923
924                 if (r == 0)
925                         return c;
926
927                 skip--;
928                 c++;
929         } while (skip > 0);
930
931         return c;
932 }
933
934 _public_ int sd_journal_next_skip(sd_journal *j, uint64_t skip) {
935         return real_journal_next_skip(j, DIRECTION_DOWN, skip);
936 }
937
938 _public_ int sd_journal_previous_skip(sd_journal *j, uint64_t skip) {
939         return real_journal_next_skip(j, DIRECTION_UP, skip);
940 }
941
942 _public_ int sd_journal_get_cursor(sd_journal *j, char **cursor) {
943         Object *o;
944         int r;
945         char bid[33], sid[33];
946
947         if (!j)
948                 return -EINVAL;
949         if (!cursor)
950                 return -EINVAL;
951
952         if (!j->current_file || j->current_file->current_offset <= 0)
953                 return -EADDRNOTAVAIL;
954
955         r = journal_file_move_to_object(j->current_file, OBJECT_ENTRY, j->current_file->current_offset, &o);
956         if (r < 0)
957                 return r;
958
959         sd_id128_to_string(j->current_file->header->seqnum_id, sid);
960         sd_id128_to_string(o->entry.boot_id, bid);
961
962         if (asprintf(cursor,
963                      "s=%s;i=%llx;b=%s;m=%llx;t=%llx;x=%llx",
964                      sid, (unsigned long long) le64toh(o->entry.seqnum),
965                      bid, (unsigned long long) le64toh(o->entry.monotonic),
966                      (unsigned long long) le64toh(o->entry.realtime),
967                      (unsigned long long) le64toh(o->entry.xor_hash)) < 0)
968                 return -ENOMEM;
969
970         return 0;
971 }
972
973 _public_ int sd_journal_seek_cursor(sd_journal *j, const char *cursor) {
974         char *w, *state;
975         size_t l;
976         unsigned long long seqnum, monotonic, realtime, xor_hash;
977         bool
978                 seqnum_id_set = false,
979                 seqnum_set = false,
980                 boot_id_set = false,
981                 monotonic_set = false,
982                 realtime_set = false,
983                 xor_hash_set = false;
984         sd_id128_t seqnum_id, boot_id;
985
986         if (!j)
987                 return -EINVAL;
988         if (isempty(cursor))
989                 return -EINVAL;
990
991         FOREACH_WORD_SEPARATOR(w, l, cursor, ";", state) {
992                 char *item;
993                 int k = 0;
994
995                 if (l < 2 || w[1] != '=')
996                         return -EINVAL;
997
998                 item = strndup(w, l);
999                 if (!item)
1000                         return -ENOMEM;
1001
1002                 switch (w[0]) {
1003
1004                 case 's':
1005                         seqnum_id_set = true;
1006                         k = sd_id128_from_string(item+2, &seqnum_id);
1007                         break;
1008
1009                 case 'i':
1010                         seqnum_set = true;
1011                         if (sscanf(item+2, "%llx", &seqnum) != 1)
1012                                 k = -EINVAL;
1013                         break;
1014
1015                 case 'b':
1016                         boot_id_set = true;
1017                         k = sd_id128_from_string(item+2, &boot_id);
1018                         break;
1019
1020                 case 'm':
1021                         monotonic_set = true;
1022                         if (sscanf(item+2, "%llx", &monotonic) != 1)
1023                                 k = -EINVAL;
1024                         break;
1025
1026                 case 't':
1027                         realtime_set = true;
1028                         if (sscanf(item+2, "%llx", &realtime) != 1)
1029                                 k = -EINVAL;
1030                         break;
1031
1032                 case 'x':
1033                         xor_hash_set = true;
1034                         if (sscanf(item+2, "%llx", &xor_hash) != 1)
1035                                 k = -EINVAL;
1036                         break;
1037                 }
1038
1039                 free(item);
1040
1041                 if (k < 0)
1042                         return k;
1043         }
1044
1045         if ((!seqnum_set || !seqnum_id_set) &&
1046             (!monotonic_set || !boot_id_set) &&
1047             !realtime_set)
1048                 return -EINVAL;
1049
1050         reset_location(j);
1051
1052         j->current_location.type = LOCATION_SEEK;
1053
1054         if (realtime_set) {
1055                 j->current_location.realtime = (uint64_t) realtime;
1056                 j->current_location.realtime_set = true;
1057         }
1058
1059         if (seqnum_set && seqnum_id_set) {
1060                 j->current_location.seqnum = (uint64_t) seqnum;
1061                 j->current_location.seqnum_id = seqnum_id;
1062                 j->current_location.seqnum_set = true;
1063         }
1064
1065         if (monotonic_set && boot_id_set) {
1066                 j->current_location.monotonic = (uint64_t) monotonic;
1067                 j->current_location.boot_id = boot_id;
1068                 j->current_location.monotonic_set = true;
1069         }
1070
1071         if (xor_hash_set) {
1072                 j->current_location.xor_hash = (uint64_t) xor_hash;
1073                 j->current_location.xor_hash_set = true;
1074         }
1075
1076         return 0;
1077 }
1078
1079 _public_ int sd_journal_test_cursor(sd_journal *j, const char *cursor) {
1080         int r;
1081         char *w, *state;
1082         size_t l;
1083         Object *o;
1084
1085         if (!j)
1086                 return -EINVAL;
1087         if (isempty(cursor))
1088                 return -EINVAL;
1089
1090         if (!j->current_file || j->current_file->current_offset <= 0)
1091                 return -EADDRNOTAVAIL;
1092
1093         r = journal_file_move_to_object(j->current_file, OBJECT_ENTRY, j->current_file->current_offset, &o);
1094         if (r < 0)
1095                 return r;
1096
1097         FOREACH_WORD_SEPARATOR(w, l, cursor, ";", state) {
1098                 _cleanup_free_ char *item = NULL;
1099                 sd_id128_t id;
1100                 unsigned long long ll;
1101                 int k = 0;
1102
1103                 if (l < 2 || w[1] != '=')
1104                         return -EINVAL;
1105
1106                 item = strndup(w, l);
1107                 if (!item)
1108                         return -ENOMEM;
1109
1110                 switch (w[0]) {
1111
1112                 case 's':
1113                         k = sd_id128_from_string(item+2, &id);
1114                         if (k < 0)
1115                                 return k;
1116                         if (!sd_id128_equal(id, j->current_file->header->seqnum_id))
1117                                 return 0;
1118                         break;
1119
1120                 case 'i':
1121                         if (sscanf(item+2, "%llx", &ll) != 1)
1122                                 return -EINVAL;
1123                         if (ll != le64toh(o->entry.seqnum))
1124                                 return 0;
1125                         break;
1126
1127                 case 'b':
1128                         k = sd_id128_from_string(item+2, &id);
1129                         if (k < 0)
1130                                 return k;
1131                         if (!sd_id128_equal(id, o->entry.boot_id))
1132                                 return 0;
1133                         break;
1134
1135                 case 'm':
1136                         if (sscanf(item+2, "%llx", &ll) != 1)
1137                                 return -EINVAL;
1138                         if (ll != le64toh(o->entry.monotonic))
1139                                 return 0;
1140                         break;
1141
1142                 case 't':
1143                         if (sscanf(item+2, "%llx", &ll) != 1)
1144                                 return -EINVAL;
1145                         if (ll != le64toh(o->entry.realtime))
1146                                 return 0;
1147                         break;
1148
1149                 case 'x':
1150                         if (sscanf(item+2, "%llx", &ll) != 1)
1151                                 return -EINVAL;
1152                         if (ll != le64toh(o->entry.xor_hash))
1153                                 return 0;
1154                         break;
1155                 }
1156         }
1157
1158         return 1;
1159 }
1160
1161
1162 _public_ int sd_journal_seek_monotonic_usec(sd_journal *j, sd_id128_t boot_id, uint64_t usec) {
1163         if (!j)
1164                 return -EINVAL;
1165
1166         reset_location(j);
1167         j->current_location.type = LOCATION_SEEK;
1168         j->current_location.boot_id = boot_id;
1169         j->current_location.monotonic = usec;
1170         j->current_location.monotonic_set = true;
1171
1172         return 0;
1173 }
1174
1175 _public_ int sd_journal_seek_realtime_usec(sd_journal *j, uint64_t usec) {
1176         if (!j)
1177                 return -EINVAL;
1178
1179         reset_location(j);
1180         j->current_location.type = LOCATION_SEEK;
1181         j->current_location.realtime = usec;
1182         j->current_location.realtime_set = true;
1183
1184         return 0;
1185 }
1186
1187 _public_ int sd_journal_seek_head(sd_journal *j) {
1188         if (!j)
1189                 return -EINVAL;
1190
1191         reset_location(j);
1192         j->current_location.type = LOCATION_HEAD;
1193
1194         return 0;
1195 }
1196
1197 _public_ int sd_journal_seek_tail(sd_journal *j) {
1198         if (!j)
1199                 return -EINVAL;
1200
1201         reset_location(j);
1202         j->current_location.type = LOCATION_TAIL;
1203
1204         return 0;
1205 }
1206
1207 static void check_network(sd_journal *j, int fd) {
1208         struct statfs sfs;
1209
1210         assert(j);
1211
1212         if (j->on_network)
1213                 return;
1214
1215         if (fstatfs(fd, &sfs) < 0)
1216                 return;
1217
1218         j->on_network =
1219                 (long)sfs.f_type == (long)CIFS_MAGIC_NUMBER ||
1220                 sfs.f_type == CODA_SUPER_MAGIC ||
1221                 sfs.f_type == NCP_SUPER_MAGIC ||
1222                 sfs.f_type == NFS_SUPER_MAGIC ||
1223                 sfs.f_type == SMB_SUPER_MAGIC;
1224 }
1225
1226 static int add_file(sd_journal *j, const char *prefix, const char *filename) {
1227         char _cleanup_free_ *path = NULL;
1228         int r;
1229         JournalFile *f;
1230
1231         assert(j);
1232         assert(prefix);
1233         assert(filename);
1234
1235         if ((j->flags & SD_JOURNAL_SYSTEM_ONLY) &&
1236             !(streq(filename, "system.journal") ||
1237               streq(filename, "system.journal~") ||
1238               (startswith(filename, "system@") &&
1239                (endswith(filename, ".journal") || endswith(filename, ".journal~")))))
1240                 return 0;
1241
1242         path = strjoin(prefix, "/", filename, NULL);
1243         if (!path)
1244                 return -ENOMEM;
1245
1246         if (hashmap_get(j->files, path))
1247                 return 0;
1248
1249         if (hashmap_size(j->files) >= JOURNAL_FILES_MAX) {
1250                 log_debug("Too many open journal files, not adding %s, ignoring.", path);
1251                 return set_put_error(j->errors, -ETOOMANYREFS);
1252         }
1253
1254         r = journal_file_open(path, O_RDONLY, 0, false, false, NULL, j->mmap, NULL, &f);
1255         if (r < 0) {
1256                 if (errno == ENOENT)
1257                         return 0;
1258
1259                 return r;
1260         }
1261
1262         /* journal_file_dump(f); */
1263
1264         r = hashmap_put(j->files, f->path, f);
1265         if (r < 0) {
1266                 journal_file_close(f);
1267                 return r;
1268         }
1269
1270         log_debug("File %s got added.", f->path);
1271
1272         check_network(j, f->fd);
1273
1274         j->current_invalidate_counter ++;
1275
1276         return 0;
1277 }
1278
1279 static int remove_file(sd_journal *j, const char *prefix, const char *filename) {
1280         char *path;
1281         JournalFile *f;
1282
1283         assert(j);
1284         assert(prefix);
1285         assert(filename);
1286
1287         path = strjoin(prefix, "/", filename, NULL);
1288         if (!path)
1289                 return -ENOMEM;
1290
1291         f = hashmap_get(j->files, path);
1292         free(path);
1293         if (!f)
1294                 return 0;
1295
1296         hashmap_remove(j->files, f->path);
1297
1298         log_debug("File %s got removed.", f->path);
1299
1300         if (j->current_file == f) {
1301                 j->current_file = NULL;
1302                 j->current_field = 0;
1303         }
1304
1305         if (j->unique_file == f) {
1306                 j->unique_file = NULL;
1307                 j->unique_offset = 0;
1308         }
1309
1310         journal_file_close(f);
1311
1312         j->current_invalidate_counter ++;
1313
1314         return 0;
1315 }
1316
1317 static int add_directory(sd_journal *j, const char *prefix, const char *dirname) {
1318         char _cleanup_free_ *path = NULL;
1319         int r;
1320         DIR _cleanup_closedir_ *d = NULL;
1321         sd_id128_t id, mid;
1322         Directory *m;
1323
1324         assert(j);
1325         assert(prefix);
1326         assert(dirname);
1327
1328         log_debug("Considering %s/%s.", prefix, dirname);
1329
1330         if ((j->flags & SD_JOURNAL_LOCAL_ONLY) &&
1331             (sd_id128_from_string(dirname, &id) < 0 ||
1332              sd_id128_get_machine(&mid) < 0 ||
1333              !(sd_id128_equal(id, mid) || path_startswith(prefix, "/run"))))
1334             return 0;
1335
1336         path = strjoin(prefix, "/", dirname, NULL);
1337         if (!path)
1338                 return -ENOMEM;
1339
1340         d = opendir(path);
1341         if (!d) {
1342                 log_debug("Failed to open %s: %m", path);
1343                 if (errno == ENOENT)
1344                         return 0;
1345                 return -errno;
1346         }
1347
1348         m = hashmap_get(j->directories_by_path, path);
1349         if (!m) {
1350                 m = new0(Directory, 1);
1351                 if (!m)
1352                         return -ENOMEM;
1353
1354                 m->is_root = false;
1355                 m->path = path;
1356
1357                 if (hashmap_put(j->directories_by_path, m->path, m) < 0) {
1358                         free(m);
1359                         return -ENOMEM;
1360                 }
1361
1362                 path = NULL; /* avoid freeing in cleanup */
1363                 j->current_invalidate_counter ++;
1364
1365                 log_debug("Directory %s got added.", m->path);
1366
1367         } else if (m->is_root)
1368                 return 0;
1369
1370         if (m->wd <= 0 && j->inotify_fd >= 0) {
1371
1372                 m->wd = inotify_add_watch(j->inotify_fd, m->path,
1373                                           IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE|
1374                                           IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT|IN_MOVED_FROM|
1375                                           IN_ONLYDIR);
1376
1377                 if (m->wd > 0 && hashmap_put(j->directories_by_wd, INT_TO_PTR(m->wd), m) < 0)
1378                         inotify_rm_watch(j->inotify_fd, m->wd);
1379         }
1380
1381         for (;;) {
1382                 struct dirent *de;
1383                 union dirent_storage buf;
1384
1385                 r = readdir_r(d, &buf.de, &de);
1386                 if (r != 0 || !de)
1387                         break;
1388
1389                 if (dirent_is_file_with_suffix(de, ".journal") ||
1390                     dirent_is_file_with_suffix(de, ".journal~")) {
1391                         r = add_file(j, m->path, de->d_name);
1392                         if (r < 0) {
1393                                 log_debug("Failed to add file %s/%s: %s",
1394                                           m->path, de->d_name, strerror(-r));
1395                                 r = set_put_error(j->errors, r);
1396                                 if (r < 0)
1397                                         return r;
1398                         }
1399                 }
1400         }
1401
1402         check_network(j, dirfd(d));
1403
1404         return 0;
1405 }
1406
1407 static int add_root_directory(sd_journal *j, const char *p) {
1408         DIR _cleanup_closedir_ *d = NULL;
1409         Directory *m;
1410         int r;
1411
1412         assert(j);
1413         assert(p);
1414
1415         if ((j->flags & SD_JOURNAL_RUNTIME_ONLY) &&
1416             !path_startswith(p, "/run"))
1417                 return -EINVAL;
1418
1419         d = opendir(p);
1420         if (!d)
1421                 return -errno;
1422
1423         m = hashmap_get(j->directories_by_path, p);
1424         if (!m) {
1425                 m = new0(Directory, 1);
1426                 if (!m)
1427                         return -ENOMEM;
1428
1429                 m->is_root = true;
1430                 m->path = strdup(p);
1431                 if (!m->path) {
1432                         free(m);
1433                         return -ENOMEM;
1434                 }
1435
1436                 if (hashmap_put(j->directories_by_path, m->path, m) < 0) {
1437                         free(m->path);
1438                         free(m);
1439                         return -ENOMEM;
1440                 }
1441
1442                 j->current_invalidate_counter ++;
1443
1444                 log_debug("Root directory %s got added.", m->path);
1445
1446         } else if (!m->is_root)
1447                 return 0;
1448
1449         if (m->wd <= 0 && j->inotify_fd >= 0) {
1450
1451                 m->wd = inotify_add_watch(j->inotify_fd, m->path,
1452                                           IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE|
1453                                           IN_ONLYDIR);
1454
1455                 if (m->wd > 0 && hashmap_put(j->directories_by_wd, INT_TO_PTR(m->wd), m) < 0)
1456                         inotify_rm_watch(j->inotify_fd, m->wd);
1457         }
1458
1459         for (;;) {
1460                 struct dirent *de;
1461                 union dirent_storage buf;
1462                 sd_id128_t id;
1463
1464                 r = readdir_r(d, &buf.de, &de);
1465                 if (r != 0 || !de)
1466                         break;
1467
1468                 if (dirent_is_file_with_suffix(de, ".journal") ||
1469                     dirent_is_file_with_suffix(de, ".journal~")) {
1470                         r = add_file(j, m->path, de->d_name);
1471                         if (r < 0) {
1472                                 log_debug("Failed to add file %s/%s: %s",
1473                                           m->path, de->d_name, strerror(-r));
1474                                 r = set_put_error(j->errors, r);
1475                                 if (r < 0)
1476                                         return r;
1477                         }
1478                 } else if ((de->d_type == DT_DIR || de->d_type == DT_LNK || de->d_type == DT_UNKNOWN) &&
1479                            sd_id128_from_string(de->d_name, &id) >= 0) {
1480
1481                         r = add_directory(j, m->path, de->d_name);
1482                         if (r < 0)
1483                                 log_debug("Failed to add directory %s/%s: %s", m->path, de->d_name, strerror(-r));
1484                 }
1485         }
1486
1487         check_network(j, dirfd(d));
1488
1489         return 0;
1490 }
1491
1492 static int remove_directory(sd_journal *j, Directory *d) {
1493         assert(j);
1494
1495         if (d->wd > 0) {
1496                 hashmap_remove(j->directories_by_wd, INT_TO_PTR(d->wd));
1497
1498                 if (j->inotify_fd >= 0)
1499                         inotify_rm_watch(j->inotify_fd, d->wd);
1500         }
1501
1502         hashmap_remove(j->directories_by_path, d->path);
1503
1504         if (d->is_root)
1505                 log_debug("Root directory %s got removed.", d->path);
1506         else
1507                 log_debug("Directory %s got removed.", d->path);
1508
1509         free(d->path);
1510         free(d);
1511
1512         return 0;
1513 }
1514
1515 static int add_search_paths(sd_journal *j) {
1516         int r;
1517         const char search_paths[] =
1518                 "/run/log/journal\0"
1519                 "/var/log/journal\0";
1520         const char *p;
1521
1522         assert(j);
1523
1524         /* We ignore most errors here, since the idea is to only open
1525          * what's actually accessible, and ignore the rest. */
1526
1527         NULSTR_FOREACH(p, search_paths) {
1528                 r = add_root_directory(j, p);
1529                 if (r < 0)
1530                         return set_put_error(j->errors, r);
1531         }
1532
1533         return 0;
1534 }
1535
1536 static int allocate_inotify(sd_journal *j) {
1537         assert(j);
1538
1539         if (j->inotify_fd < 0) {
1540                 j->inotify_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
1541                 if (j->inotify_fd < 0)
1542                         return -errno;
1543         }
1544
1545         if (!j->directories_by_wd) {
1546                 j->directories_by_wd = hashmap_new(trivial_hash_func, trivial_compare_func);
1547                 if (!j->directories_by_wd)
1548                         return -ENOMEM;
1549         }
1550
1551         return 0;
1552 }
1553
1554 static sd_journal *journal_new(int flags, const char *path) {
1555         sd_journal *j;
1556
1557         j = new0(sd_journal, 1);
1558         if (!j)
1559                 return NULL;
1560
1561         j->inotify_fd = -1;
1562         j->flags = flags;
1563         j->data_threshold = DEFAULT_DATA_THRESHOLD;
1564
1565         if (path) {
1566                 j->path = strdup(path);
1567                 if (!j->path)
1568                         goto fail;
1569         }
1570
1571         j->files = hashmap_new(string_hash_func, string_compare_func);
1572         j->directories_by_path = hashmap_new(string_hash_func, string_compare_func);
1573         j->mmap = mmap_cache_new();
1574         j->errors = set_new(trivial_hash_func, trivial_compare_func);
1575         if (!j->files || !j->directories_by_path || !j->mmap || !j->errors)
1576                 goto fail;
1577
1578         return j;
1579
1580 fail:
1581         sd_journal_close(j);
1582         return NULL;
1583 }
1584
1585 _public_ int sd_journal_open(sd_journal **ret, int flags) {
1586         sd_journal *j;
1587         int r;
1588
1589         if (!ret)
1590                 return -EINVAL;
1591
1592         if (flags & ~(SD_JOURNAL_LOCAL_ONLY|
1593                       SD_JOURNAL_RUNTIME_ONLY|
1594                       SD_JOURNAL_SYSTEM_ONLY))
1595                 return -EINVAL;
1596
1597         j = journal_new(flags, NULL);
1598         if (!j)
1599                 return -ENOMEM;
1600
1601         r = add_search_paths(j);
1602         if (r < 0)
1603                 goto fail;
1604
1605         *ret = j;
1606         return 0;
1607
1608 fail:
1609         sd_journal_close(j);
1610
1611         return r;
1612 }
1613
1614 _public_ int sd_journal_open_directory(sd_journal **ret, const char *path, int flags) {
1615         sd_journal *j;
1616         int r;
1617
1618         if (!ret)
1619                 return -EINVAL;
1620
1621         if (!path)
1622                 return -EINVAL;
1623
1624         if (flags != 0)
1625                 return -EINVAL;
1626
1627         j = journal_new(flags, path);
1628         if (!j)
1629                 return -ENOMEM;
1630
1631         r = add_root_directory(j, path);
1632         if (r < 0) {
1633                 set_put_error(j->errors, r);
1634                 goto fail;
1635         }
1636
1637         *ret = j;
1638         return 0;
1639
1640 fail:
1641         sd_journal_close(j);
1642
1643         return r;
1644 }
1645
1646 _public_ void sd_journal_close(sd_journal *j) {
1647         Directory *d;
1648         JournalFile *f;
1649
1650         if (!j)
1651                 return;
1652
1653         while ((f = hashmap_steal_first(j->files)))
1654                 journal_file_close(f);
1655
1656         hashmap_free(j->files);
1657
1658         while ((d = hashmap_first(j->directories_by_path)))
1659                 remove_directory(j, d);
1660
1661         while ((d = hashmap_first(j->directories_by_wd)))
1662                 remove_directory(j, d);
1663
1664         hashmap_free(j->directories_by_path);
1665         hashmap_free(j->directories_by_wd);
1666
1667         if (j->inotify_fd >= 0)
1668                 close_nointr_nofail(j->inotify_fd);
1669
1670         sd_journal_flush_matches(j);
1671
1672         if (j->mmap)
1673                 mmap_cache_unref(j->mmap);
1674
1675         free(j->path);
1676         free(j->unique_field);
1677         set_free(j->errors);
1678         free(j);
1679 }
1680
1681 _public_ int sd_journal_get_realtime_usec(sd_journal *j, uint64_t *ret) {
1682         Object *o;
1683         JournalFile *f;
1684         int r;
1685
1686         if (!j)
1687                 return -EINVAL;
1688         if (!ret)
1689                 return -EINVAL;
1690
1691         f = j->current_file;
1692         if (!f)
1693                 return -EADDRNOTAVAIL;
1694
1695         if (f->current_offset <= 0)
1696                 return -EADDRNOTAVAIL;
1697
1698         r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1699         if (r < 0)
1700                 return r;
1701
1702         *ret = le64toh(o->entry.realtime);
1703         return 0;
1704 }
1705
1706 _public_ int sd_journal_get_monotonic_usec(sd_journal *j, uint64_t *ret, sd_id128_t *ret_boot_id) {
1707         Object *o;
1708         JournalFile *f;
1709         int r;
1710         sd_id128_t id;
1711
1712         if (!j)
1713                 return -EINVAL;
1714
1715         f = j->current_file;
1716         if (!f)
1717                 return -EADDRNOTAVAIL;
1718
1719         if (f->current_offset <= 0)
1720                 return -EADDRNOTAVAIL;
1721
1722         r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1723         if (r < 0)
1724                 return r;
1725
1726         if (ret_boot_id)
1727                 *ret_boot_id = o->entry.boot_id;
1728         else {
1729                 r = sd_id128_get_boot(&id);
1730                 if (r < 0)
1731                         return r;
1732
1733                 if (!sd_id128_equal(id, o->entry.boot_id))
1734                         return -ESTALE;
1735         }
1736
1737         if (ret)
1738                 *ret = le64toh(o->entry.monotonic);
1739
1740         return 0;
1741 }
1742
1743 static bool field_is_valid(const char *field) {
1744         const char *p;
1745
1746         assert(field);
1747
1748         if (isempty(field))
1749                 return false;
1750
1751         if (startswith(field, "__"))
1752                 return false;
1753
1754         for (p = field; *p; p++) {
1755
1756                 if (*p == '_')
1757                         continue;
1758
1759                 if (*p >= 'A' && *p <= 'Z')
1760                         continue;
1761
1762                 if (*p >= '0' && *p <= '9')
1763                         continue;
1764
1765                 return false;
1766         }
1767
1768         return true;
1769 }
1770
1771 _public_ int sd_journal_get_data(sd_journal *j, const char *field, const void **data, size_t *size) {
1772         JournalFile *f;
1773         uint64_t i, n;
1774         size_t field_length;
1775         int r;
1776         Object *o;
1777
1778         if (!j)
1779                 return -EINVAL;
1780         if (!field)
1781                 return -EINVAL;
1782         if (!data)
1783                 return -EINVAL;
1784         if (!size)
1785                 return -EINVAL;
1786
1787         if (!field_is_valid(field))
1788                 return -EINVAL;
1789
1790         f = j->current_file;
1791         if (!f)
1792                 return -EADDRNOTAVAIL;
1793
1794         if (f->current_offset <= 0)
1795                 return -EADDRNOTAVAIL;
1796
1797         r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1798         if (r < 0)
1799                 return r;
1800
1801         field_length = strlen(field);
1802
1803         n = journal_file_entry_n_items(o);
1804         for (i = 0; i < n; i++) {
1805                 uint64_t p, l;
1806                 le64_t le_hash;
1807                 size_t t;
1808
1809                 p = le64toh(o->entry.items[i].object_offset);
1810                 le_hash = o->entry.items[i].hash;
1811                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
1812                 if (r < 0)
1813                         return r;
1814
1815                 if (le_hash != o->data.hash)
1816                         return -EBADMSG;
1817
1818                 l = le64toh(o->object.size) - offsetof(Object, data.payload);
1819
1820                 if (o->object.flags & OBJECT_COMPRESSED) {
1821
1822 #ifdef HAVE_XZ
1823                         if (uncompress_startswith(o->data.payload, l,
1824                                                   &f->compress_buffer, &f->compress_buffer_size,
1825                                                   field, field_length, '=')) {
1826
1827                                 uint64_t rsize;
1828
1829                                 if (!uncompress_blob(o->data.payload, l,
1830                                                      &f->compress_buffer, &f->compress_buffer_size, &rsize,
1831                                                      j->data_threshold))
1832                                         return -EBADMSG;
1833
1834                                 *data = f->compress_buffer;
1835                                 *size = (size_t) rsize;
1836
1837                                 return 0;
1838                         }
1839 #else
1840                         return -EPROTONOSUPPORT;
1841 #endif
1842
1843                 } else if (l >= field_length+1 &&
1844                            memcmp(o->data.payload, field, field_length) == 0 &&
1845                            o->data.payload[field_length] == '=') {
1846
1847                         t = (size_t) l;
1848
1849                         if ((uint64_t) t != l)
1850                                 return -E2BIG;
1851
1852                         *data = o->data.payload;
1853                         *size = t;
1854
1855                         return 0;
1856                 }
1857
1858                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1859                 if (r < 0)
1860                         return r;
1861         }
1862
1863         return -ENOENT;
1864 }
1865
1866 static int return_data(sd_journal *j, JournalFile *f, Object *o, const void **data, size_t *size) {
1867         size_t t;
1868         uint64_t l;
1869
1870         l = le64toh(o->object.size) - offsetof(Object, data.payload);
1871         t = (size_t) l;
1872
1873         /* We can't read objects larger than 4G on a 32bit machine */
1874         if ((uint64_t) t != l)
1875                 return -E2BIG;
1876
1877         if (o->object.flags & OBJECT_COMPRESSED) {
1878 #ifdef HAVE_XZ
1879                 uint64_t rsize;
1880
1881                 if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize, j->data_threshold))
1882                         return -EBADMSG;
1883
1884                 *data = f->compress_buffer;
1885                 *size = (size_t) rsize;
1886 #else
1887                 return -EPROTONOSUPPORT;
1888 #endif
1889         } else {
1890                 *data = o->data.payload;
1891                 *size = t;
1892         }
1893
1894         return 0;
1895 }
1896
1897 _public_ int sd_journal_enumerate_data(sd_journal *j, const void **data, size_t *size) {
1898         JournalFile *f;
1899         uint64_t p, n;
1900         le64_t le_hash;
1901         int r;
1902         Object *o;
1903
1904         if (!j)
1905                 return -EINVAL;
1906         if (!data)
1907                 return -EINVAL;
1908         if (!size)
1909                 return -EINVAL;
1910
1911         f = j->current_file;
1912         if (!f)
1913                 return -EADDRNOTAVAIL;
1914
1915         if (f->current_offset <= 0)
1916                 return -EADDRNOTAVAIL;
1917
1918         r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1919         if (r < 0)
1920                 return r;
1921
1922         n = journal_file_entry_n_items(o);
1923         if (j->current_field >= n)
1924                 return 0;
1925
1926         p = le64toh(o->entry.items[j->current_field].object_offset);
1927         le_hash = o->entry.items[j->current_field].hash;
1928         r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
1929         if (r < 0)
1930                 return r;
1931
1932         if (le_hash != o->data.hash)
1933                 return -EBADMSG;
1934
1935         r = return_data(j, f, o, data, size);
1936         if (r < 0)
1937                 return r;
1938
1939         j->current_field ++;
1940
1941         return 1;
1942 }
1943
1944 _public_ void sd_journal_restart_data(sd_journal *j) {
1945         if (!j)
1946                 return;
1947
1948         j->current_field = 0;
1949 }
1950
1951 _public_ int sd_journal_get_fd(sd_journal *j) {
1952         int r;
1953
1954         if (!j)
1955                 return -EINVAL;
1956
1957         if (j->inotify_fd >= 0)
1958                 return j->inotify_fd;
1959
1960         r = allocate_inotify(j);
1961         if (r < 0)
1962                 return r;
1963
1964         /* Iterate through all dirs again, to add them to the
1965          * inotify */
1966         if (j->path)
1967                 r = add_root_directory(j, j->path);
1968         else
1969                 r = add_search_paths(j);
1970         if (r < 0)
1971                 return r;
1972
1973         return j->inotify_fd;
1974 }
1975
1976 static void process_inotify_event(sd_journal *j, struct inotify_event *e) {
1977         Directory *d;
1978         int r;
1979
1980         assert(j);
1981         assert(e);
1982
1983         /* Is this a subdirectory we watch? */
1984         d = hashmap_get(j->directories_by_wd, INT_TO_PTR(e->wd));
1985         if (d) {
1986                 sd_id128_t id;
1987
1988                 if (!(e->mask & IN_ISDIR) && e->len > 0 &&
1989                     (endswith(e->name, ".journal") ||
1990                      endswith(e->name, ".journal~"))) {
1991
1992                         /* Event for a journal file */
1993
1994                         if (e->mask & (IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB)) {
1995                                 r = add_file(j, d->path, e->name);
1996                                 if (r < 0) {
1997                                         log_debug("Failed to add file %s/%s: %s",
1998                                                   d->path, e->name, strerror(-r));
1999                                         set_put_error(j->errors, r);
2000                                 }
2001
2002                         } else if (e->mask & (IN_DELETE|IN_MOVED_FROM|IN_UNMOUNT)) {
2003
2004                                 r = remove_file(j, d->path, e->name);
2005                                 if (r < 0)
2006                                         log_debug("Failed to remove file %s/%s: %s", d->path, e->name, strerror(-r));
2007                         }
2008
2009                 } else if (!d->is_root && e->len == 0) {
2010
2011                         /* Event for a subdirectory */
2012
2013                         if (e->mask & (IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT)) {
2014                                 r = remove_directory(j, d);
2015                                 if (r < 0)
2016                                         log_debug("Failed to remove directory %s: %s", d->path, strerror(-r));
2017                         }
2018
2019
2020                 } else if (d->is_root && (e->mask & IN_ISDIR) && e->len > 0 && sd_id128_from_string(e->name, &id) >= 0) {
2021
2022                         /* Event for root directory */
2023
2024                         if (e->mask & (IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB)) {
2025                                 r = add_directory(j, d->path, e->name);
2026                                 if (r < 0)
2027                                         log_debug("Failed to add directory %s/%s: %s", d->path, e->name, strerror(-r));
2028                         }
2029                 }
2030
2031                 return;
2032         }
2033
2034         if (e->mask & IN_IGNORED)
2035                 return;
2036
2037         log_warning("Unknown inotify event.");
2038 }
2039
2040 static int determine_change(sd_journal *j) {
2041         bool b;
2042
2043         assert(j);
2044
2045         b = j->current_invalidate_counter != j->last_invalidate_counter;
2046         j->last_invalidate_counter = j->current_invalidate_counter;
2047
2048         return b ? SD_JOURNAL_INVALIDATE : SD_JOURNAL_APPEND;
2049 }
2050
2051 _public_ int sd_journal_process(sd_journal *j) {
2052         uint8_t buffer[sizeof(struct inotify_event) + FILENAME_MAX] _alignas_(struct inotify_event);
2053         bool got_something = false;
2054
2055         if (!j)
2056                 return -EINVAL;
2057
2058         for (;;) {
2059                 struct inotify_event *e;
2060                 ssize_t l;
2061
2062                 l = read(j->inotify_fd, buffer, sizeof(buffer));
2063                 if (l < 0) {
2064                         if (errno == EAGAIN || errno == EINTR)
2065                                 return got_something ? determine_change(j) : SD_JOURNAL_NOP;
2066
2067                         return -errno;
2068                 }
2069
2070                 got_something = true;
2071
2072                 e = (struct inotify_event*) buffer;
2073                 while (l > 0) {
2074                         size_t step;
2075
2076                         process_inotify_event(j, e);
2077
2078                         step = sizeof(struct inotify_event) + e->len;
2079                         assert(step <= (size_t) l);
2080
2081                         e = (struct inotify_event*) ((uint8_t*) e + step);
2082                         l -= step;
2083                 }
2084         }
2085
2086         return determine_change(j);
2087 }
2088
2089 _public_ int sd_journal_wait(sd_journal *j, uint64_t timeout_usec) {
2090         int r;
2091
2092         assert(j);
2093
2094         if (j->inotify_fd < 0) {
2095
2096                 /* This is the first invocation, hence create the
2097                  * inotify watch */
2098                 r = sd_journal_get_fd(j);
2099                 if (r < 0)
2100                         return r;
2101
2102                 /* The journal might have changed since the context
2103                  * object was created and we weren't watching before,
2104                  * hence don't wait for anything, and return
2105                  * immediately. */
2106                 return determine_change(j);
2107         }
2108
2109         if (j->on_network) {
2110                 /* If we are on the network we need to regularly check
2111                  * for changes manually */
2112
2113                 if (timeout_usec == (uint64_t) -1 || timeout_usec > JOURNAL_FILES_RECHECK_USEC)
2114                         timeout_usec = JOURNAL_FILES_RECHECK_USEC;
2115         }
2116
2117         do {
2118                 r = fd_wait_for_event(j->inotify_fd, POLLIN, timeout_usec);
2119         } while (r == -EINTR);
2120
2121         if (r < 0)
2122                 return r;
2123
2124         return sd_journal_process(j);
2125 }
2126
2127 _public_ int sd_journal_get_cutoff_realtime_usec(sd_journal *j, uint64_t *from, uint64_t *to) {
2128         Iterator i;
2129         JournalFile *f;
2130         bool first = true;
2131         int r;
2132
2133         if (!j)
2134                 return -EINVAL;
2135         if (!from && !to)
2136                 return -EINVAL;
2137
2138         HASHMAP_FOREACH(f, j->files, i) {
2139                 usec_t fr, t;
2140
2141                 r = journal_file_get_cutoff_realtime_usec(f, &fr, &t);
2142                 if (r == -ENOENT)
2143                         continue;
2144                 if (r < 0)
2145                         return r;
2146                 if (r == 0)
2147                         continue;
2148
2149                 if (first) {
2150                         if (from)
2151                                 *from = fr;
2152                         if (to)
2153                                 *to = t;
2154                         first = false;
2155                 } else {
2156                         if (from)
2157                                 *from = MIN(fr, *from);
2158                         if (to)
2159                                 *to = MAX(t, *to);
2160                 }
2161         }
2162
2163         return first ? 0 : 1;
2164 }
2165
2166 _public_ int sd_journal_get_cutoff_monotonic_usec(sd_journal *j, sd_id128_t boot_id, uint64_t *from, uint64_t *to) {
2167         Iterator i;
2168         JournalFile *f;
2169         bool first = true;
2170         int r;
2171
2172         if (!j)
2173                 return -EINVAL;
2174         if (!from && !to)
2175                 return -EINVAL;
2176
2177         HASHMAP_FOREACH(f, j->files, i) {
2178                 usec_t fr, t;
2179
2180                 r = journal_file_get_cutoff_monotonic_usec(f, boot_id, &fr, &t);
2181                 if (r == -ENOENT)
2182                         continue;
2183                 if (r < 0)
2184                         return r;
2185                 if (r == 0)
2186                         continue;
2187
2188                 if (first) {
2189                         if (from)
2190                                 *from = fr;
2191                         if (to)
2192                                 *to = t;
2193                         first = false;
2194                 } else {
2195                         if (from)
2196                                 *from = MIN(fr, *from);
2197                         if (to)
2198                                 *to = MAX(t, *to);
2199                 }
2200         }
2201
2202         return first ? 0 : 1;
2203 }
2204
2205 void journal_print_header(sd_journal *j) {
2206         Iterator i;
2207         JournalFile *f;
2208         bool newline = false;
2209
2210         assert(j);
2211
2212         HASHMAP_FOREACH(f, j->files, i) {
2213                 if (newline)
2214                         putchar('\n');
2215                 else
2216                         newline = true;
2217
2218                 journal_file_print_header(f);
2219         }
2220 }
2221
2222 _public_ int sd_journal_get_usage(sd_journal *j, uint64_t *bytes) {
2223         Iterator i;
2224         JournalFile *f;
2225         uint64_t sum = 0;
2226
2227         if (!j)
2228                 return -EINVAL;
2229         if (!bytes)
2230                 return -EINVAL;
2231
2232         HASHMAP_FOREACH(f, j->files, i) {
2233                 struct stat st;
2234
2235                 if (fstat(f->fd, &st) < 0)
2236                         return -errno;
2237
2238                 sum += (uint64_t) st.st_blocks * 512ULL;
2239         }
2240
2241         *bytes = sum;
2242         return 0;
2243 }
2244
2245 _public_ int sd_journal_query_unique(sd_journal *j, const char *field) {
2246         char *f;
2247
2248         if (!j)
2249                 return -EINVAL;
2250         if (isempty(field))
2251                 return -EINVAL;
2252         if (!field_is_valid(field))
2253                 return -EINVAL;
2254
2255         f = strdup(field);
2256         if (!f)
2257                 return -ENOMEM;
2258
2259         free(j->unique_field);
2260         j->unique_field = f;
2261         j->unique_file = NULL;
2262         j->unique_offset = 0;
2263
2264         return 0;
2265 }
2266
2267 _public_ int sd_journal_enumerate_unique(sd_journal *j, const void **data, size_t *l) {
2268         Object *o;
2269         size_t k;
2270         int r;
2271
2272         if (!j)
2273                 return -EINVAL;
2274         if (!data)
2275                 return -EINVAL;
2276         if (!l)
2277                 return -EINVAL;
2278         if (!j->unique_field)
2279                 return -EINVAL;
2280
2281         k = strlen(j->unique_field);
2282
2283         if (!j->unique_file) {
2284                 j->unique_file = hashmap_first(j->files);
2285                 if (!j->unique_file)
2286                         return 0;
2287                 j->unique_offset = 0;
2288         }
2289
2290         for (;;) {
2291                 JournalFile *of;
2292                 Iterator i;
2293                 const void *odata;
2294                 size_t ol;
2295                 bool found;
2296
2297                 /* Proceed to next data object in the field's linked list */
2298                 if (j->unique_offset == 0) {
2299                         r = journal_file_find_field_object(j->unique_file, j->unique_field, k, &o, NULL);
2300                         if (r < 0)
2301                                 return r;
2302
2303                         j->unique_offset = r > 0 ? le64toh(o->field.head_data_offset) : 0;
2304                 } else {
2305                         r = journal_file_move_to_object(j->unique_file, OBJECT_DATA, j->unique_offset, &o);
2306                         if (r < 0)
2307                                 return r;
2308
2309                         j->unique_offset = le64toh(o->data.next_field_offset);
2310                 }
2311
2312                 /* We reached the end of the list? Then start again, with the next file */
2313                 if (j->unique_offset == 0) {
2314                         JournalFile *n;
2315
2316                         n = hashmap_next(j->files, j->unique_file->path);
2317                         if (!n)
2318                                 return 0;
2319
2320                         j->unique_file = n;
2321                         continue;
2322                 }
2323
2324                 /* We do not use the type context here, but 0 instead,
2325                  * so that we can look at this data object at the same
2326                  * time as one on another file */
2327                 r = journal_file_move_to_object(j->unique_file, 0, j->unique_offset, &o);
2328                 if (r < 0)
2329                         return r;
2330
2331                 /* Let's do the type check by hand, since we used 0 context above. */
2332                 if (o->object.type != OBJECT_DATA)
2333                         return -EBADMSG;
2334
2335                 r = return_data(j, j->unique_file, o, &odata, &ol);
2336                 if (r < 0)
2337                         return r;
2338
2339                 /* OK, now let's see if we already returned this data
2340                  * object by checking if it exists in the earlier
2341                  * traversed files. */
2342                 found = false;
2343                 HASHMAP_FOREACH(of, j->files, i) {
2344                         Object *oo;
2345                         uint64_t op;
2346
2347                         if (of == j->unique_file)
2348                                 break;
2349
2350                         /* Skip this file it didn't have any fields
2351                          * indexed */
2352                         if (JOURNAL_HEADER_CONTAINS(of->header, n_fields) &&
2353                             le64toh(of->header->n_fields) <= 0)
2354                                 continue;
2355
2356                         r = journal_file_find_data_object_with_hash(of, odata, ol, le64toh(o->data.hash), &oo, &op);
2357                         if (r < 0)
2358                                 return r;
2359
2360                         if (r > 0)
2361                                 found = true;
2362                 }
2363
2364                 if (found)
2365                         continue;
2366
2367                 r = return_data(j, j->unique_file, o, data, l);
2368                 if (r < 0)
2369                         return r;
2370
2371                 return 1;
2372         }
2373 }
2374
2375 _public_ void sd_journal_restart_unique(sd_journal *j) {
2376         if (!j)
2377                 return;
2378
2379         j->unique_file = NULL;
2380         j->unique_offset = 0;
2381 }
2382
2383 _public_ int sd_journal_reliable_fd(sd_journal *j) {
2384         if (!j)
2385                 return -EINVAL;
2386
2387         return !j->on_network;
2388 }
2389
2390 static char *lookup_field(const char *field, void *userdata) {
2391         sd_journal *j = userdata;
2392         const void *data;
2393         size_t size, d;
2394         int r;
2395
2396         assert(field);
2397         assert(j);
2398
2399         r = sd_journal_get_data(j, field, &data, &size);
2400         if (r < 0 ||
2401             size > REPLACE_VAR_MAX)
2402                 return strdup(field);
2403
2404         d = strlen(field) + 1;
2405
2406         return strndup((const char*) data + d, size - d);
2407 }
2408
2409 _public_ int sd_journal_get_catalog(sd_journal *j, char **ret) {
2410         const void *data;
2411         size_t size;
2412         sd_id128_t id;
2413         _cleanup_free_ char *text = NULL, *cid = NULL;
2414         char *t;
2415         int r;
2416
2417         if (!j)
2418                 return -EINVAL;
2419         if (!ret)
2420                 return -EINVAL;
2421
2422         r = sd_journal_get_data(j, "MESSAGE_ID", &data, &size);
2423         if (r < 0)
2424                 return r;
2425
2426         cid = strndup((const char*) data + 11, size - 11);
2427         if (!cid)
2428                 return -ENOMEM;
2429
2430         r = sd_id128_from_string(cid, &id);
2431         if (r < 0)
2432                 return r;
2433
2434         r = catalog_get(id, &text);
2435         if (r < 0)
2436                 return r;
2437
2438         t = replace_var(text, lookup_field, j);
2439         if (!t)
2440                 return -ENOMEM;
2441
2442         *ret = t;
2443         return 0;
2444 }
2445
2446 _public_ int sd_journal_get_catalog_for_message_id(sd_id128_t id, char **ret) {
2447         if (!ret)
2448                 return -EINVAL;
2449
2450         return catalog_get(id, ret);
2451 }
2452
2453 _public_ int sd_journal_set_data_threshold(sd_journal *j, size_t sz) {
2454         if (!j)
2455                 return -EINVAL;
2456
2457         j->data_threshold = sz;
2458         return 0;
2459 }
2460
2461 _public_ int sd_journal_get_data_threshold(sd_journal *j, size_t *sz) {
2462         if (!j)
2463                 return -EINVAL;
2464         if (!sz)
2465                 return -EINVAL;
2466
2467         *sz = j->data_threshold;
2468         return 0;
2469 }