chiark / gitweb /
82cacf3674446cd6cc600277da4a8e13364d1539
[elogind.git] / src / journal / sd-journal.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <errno.h>
23 #include <fcntl.h>
24 #include <stddef.h>
25 #include <unistd.h>
26 #include <sys/inotify.h>
27 #include <sys/poll.h>
28 #include <sys/vfs.h>
29 #include <linux/magic.h>
30
31 #include "sd-journal.h"
32 #include "journal-def.h"
33 #include "journal-file.h"
34 #include "hashmap.h"
35 #include "list.h"
36 #include "path-util.h"
37 #include "lookup3.h"
38 #include "compress.h"
39 #include "journal-internal.h"
40 #include "missing.h"
41 #include "catalog.h"
42 #include "replace-var.h"
43
44 #define JOURNAL_FILES_MAX 1024
45
46 #define JOURNAL_FILES_RECHECK_USEC (2 * USEC_PER_SEC)
47
48 #define REPLACE_VAR_MAX 256
49
50 #define DEFAULT_DATA_THRESHOLD (64*1024)
51
52 /* We return an error here only if we didn't manage to
53    memorize the real error. */
54 static int set_put_error(sd_journal *j, int r) {
55         int k;
56
57         if (r >= 0)
58                 return r;
59
60         k = set_ensure_allocated(&j->errors, trivial_hash_func, trivial_compare_func);
61         if (k < 0)
62                 return k;
63
64         return set_put(j->errors, INT_TO_PTR(r));
65 }
66
67 static void detach_location(sd_journal *j) {
68         Iterator i;
69         JournalFile *f;
70
71         assert(j);
72
73         j->current_file = NULL;
74         j->current_field = 0;
75
76         HASHMAP_FOREACH(f, j->files, i)
77                 f->current_offset = 0;
78 }
79
80 static void reset_location(sd_journal *j) {
81         assert(j);
82
83         detach_location(j);
84         zero(j->current_location);
85 }
86
87 static void init_location(Location *l, LocationType type, JournalFile *f, Object *o) {
88         assert(l);
89         assert(type == LOCATION_DISCRETE || type == LOCATION_SEEK);
90         assert(f);
91         assert(o->object.type == OBJECT_ENTRY);
92
93         l->type = type;
94         l->seqnum = le64toh(o->entry.seqnum);
95         l->seqnum_id = f->header->seqnum_id;
96         l->realtime = le64toh(o->entry.realtime);
97         l->monotonic = le64toh(o->entry.monotonic);
98         l->boot_id = o->entry.boot_id;
99         l->xor_hash = le64toh(o->entry.xor_hash);
100
101         l->seqnum_set = l->realtime_set = l->monotonic_set = l->xor_hash_set = true;
102 }
103
104 static void set_location(sd_journal *j, LocationType type, JournalFile *f, Object *o, uint64_t offset) {
105         assert(j);
106         assert(type == LOCATION_DISCRETE || type == LOCATION_SEEK);
107         assert(f);
108         assert(o);
109
110         init_location(&j->current_location, type, f, o);
111
112         j->current_file = f;
113         j->current_field = 0;
114
115         f->current_offset = offset;
116 }
117
118 static int match_is_valid(const void *data, size_t size) {
119         const char *b, *p;
120
121         assert(data);
122
123         if (size < 2)
124                 return false;
125
126         if (startswith(data, "__"))
127                 return false;
128
129         b = data;
130         for (p = b; p < b + size; p++) {
131
132                 if (*p == '=')
133                         return p > b;
134
135                 if (*p == '_')
136                         continue;
137
138                 if (*p >= 'A' && *p <= 'Z')
139                         continue;
140
141                 if (*p >= '0' && *p <= '9')
142                         continue;
143
144                 return false;
145         }
146
147         return false;
148 }
149
150 static bool same_field(const void *_a, size_t s, const void *_b, size_t t) {
151         const uint8_t *a = _a, *b = _b;
152         size_t j;
153
154         for (j = 0; j < s && j < t; j++) {
155
156                 if (a[j] != b[j])
157                         return false;
158
159                 if (a[j] == '=')
160                         return true;
161         }
162
163         return true;
164 }
165
166 static Match *match_new(Match *p, MatchType t) {
167         Match *m;
168
169         m = new0(Match, 1);
170         if (!m)
171                 return NULL;
172
173         m->type = t;
174
175         if (p) {
176                 m->parent = p;
177                 LIST_PREPEND(Match, matches, p->matches, m);
178         }
179
180         return m;
181 }
182
183 static void match_free(Match *m) {
184         assert(m);
185
186         while (m->matches)
187                 match_free(m->matches);
188
189         if (m->parent)
190                 LIST_REMOVE(Match, matches, m->parent->matches, m);
191
192         free(m->data);
193         free(m);
194 }
195
196 static void match_free_if_empty(Match *m) {
197         assert(m);
198
199         if (m->matches)
200                 return;
201
202         match_free(m);
203 }
204
205 _public_ int sd_journal_add_match(sd_journal *j, const void *data, size_t size) {
206         Match *l2, *l3, *add_here = NULL, *m;
207         le64_t le_hash;
208
209         if (!j)
210                 return -EINVAL;
211
212         if (!data)
213                 return -EINVAL;
214
215         if (size == 0)
216                 size = strlen(data);
217
218         if (!match_is_valid(data, size))
219                 return -EINVAL;
220
221         /* level 0: OR term
222          * level 1: AND terms
223          * level 2: OR terms
224          * level 3: concrete matches */
225
226         if (!j->level0) {
227                 j->level0 = match_new(NULL, MATCH_OR_TERM);
228                 if (!j->level0)
229                         return -ENOMEM;
230         }
231
232         if (!j->level1) {
233                 j->level1 = match_new(j->level0, MATCH_AND_TERM);
234                 if (!j->level1)
235                         return -ENOMEM;
236         }
237
238         assert(j->level0->type == MATCH_OR_TERM);
239         assert(j->level1->type == MATCH_AND_TERM);
240
241         le_hash = htole64(hash64(data, size));
242
243         LIST_FOREACH(matches, l2, j->level1->matches) {
244                 assert(l2->type == MATCH_OR_TERM);
245
246                 LIST_FOREACH(matches, l3, l2->matches) {
247                         assert(l3->type == MATCH_DISCRETE);
248
249                         /* Exactly the same match already? Then ignore
250                          * this addition */
251                         if (l3->le_hash == le_hash &&
252                             l3->size == size &&
253                             memcmp(l3->data, data, size) == 0)
254                                 return 0;
255
256                         /* Same field? Then let's add this to this OR term */
257                         if (same_field(data, size, l3->data, l3->size)) {
258                                 add_here = l2;
259                                 break;
260                         }
261                 }
262
263                 if (add_here)
264                         break;
265         }
266
267         if (!add_here) {
268                 add_here = match_new(j->level1, MATCH_OR_TERM);
269                 if (!add_here)
270                         goto fail;
271         }
272
273         m = match_new(add_here, MATCH_DISCRETE);
274         if (!m)
275                 goto fail;
276
277         m->le_hash = le_hash;
278         m->size = size;
279         m->data = memdup(data, size);
280         if (!m->data)
281                 goto fail;
282
283         detach_location(j);
284
285         return 0;
286
287 fail:
288         if (add_here)
289                 match_free_if_empty(add_here);
290
291         if (j->level1)
292                 match_free_if_empty(j->level1);
293
294         if (j->level0)
295                 match_free_if_empty(j->level0);
296
297         return -ENOMEM;
298 }
299
300 _public_ int sd_journal_add_disjunction(sd_journal *j) {
301         Match *m;
302
303         assert(j);
304
305         if (!j->level0)
306                 return 0;
307
308         if (!j->level1)
309                 return 0;
310
311         if (!j->level1->matches)
312                 return 0;
313
314         m = match_new(j->level0, MATCH_AND_TERM);
315         if (!m)
316                 return -ENOMEM;
317
318         j->level1 = m;
319         return 0;
320 }
321
322 static char *match_make_string(Match *m) {
323         char *p, *r;
324         Match *i;
325         bool enclose = false;
326
327         if (!m)
328                 return strdup("");
329
330         if (m->type == MATCH_DISCRETE)
331                 return strndup(m->data, m->size);
332
333         p = NULL;
334         LIST_FOREACH(matches, i, m->matches) {
335                 char *t, *k;
336
337                 t = match_make_string(i);
338                 if (!t) {
339                         free(p);
340                         return NULL;
341                 }
342
343                 if (p) {
344                         k = strjoin(p, m->type == MATCH_OR_TERM ? " OR " : " AND ", t, NULL);
345                         free(p);
346                         free(t);
347
348                         if (!k)
349                                 return NULL;
350
351                         p = k;
352
353                         enclose = true;
354                 } else {
355                         free(p);
356                         p = t;
357                 }
358         }
359
360         if (enclose) {
361                 r = strjoin("(", p, ")", NULL);
362                 free(p);
363                 return r;
364         }
365
366         return p;
367 }
368
369 char *journal_make_match_string(sd_journal *j) {
370         assert(j);
371
372         return match_make_string(j->level0);
373 }
374
375 _public_ void sd_journal_flush_matches(sd_journal *j) {
376
377         if (!j)
378                 return;
379
380         if (j->level0)
381                 match_free(j->level0);
382
383         j->level0 = j->level1 = NULL;
384
385         detach_location(j);
386 }
387
388 static int compare_entry_order(JournalFile *af, Object *_ao,
389                          JournalFile *bf, uint64_t bp) {
390
391         uint64_t a, b;
392         Object *ao, *bo;
393         int r;
394
395         assert(af);
396         assert(bf);
397         assert(_ao);
398
399         /* The mmap cache might invalidate the object from the first
400          * file if we look at the one from the second file. Hence
401          * temporarily copy the header of the first one, and look at
402          * that only. */
403         ao = alloca(offsetof(EntryObject, items));
404         memcpy(ao, _ao, offsetof(EntryObject, items));
405
406         r = journal_file_move_to_object(bf, OBJECT_ENTRY, bp, &bo);
407         if (r < 0)
408                 return strcmp(af->path, bf->path);
409
410         /* We operate on two different files here, hence we can access
411          * two objects at the same time, which we normally can't.
412          *
413          * If contents and timestamps match, these entries are
414          * identical, even if the seqnum does not match */
415
416         if (sd_id128_equal(ao->entry.boot_id, bo->entry.boot_id) &&
417             ao->entry.monotonic == bo->entry.monotonic &&
418             ao->entry.realtime == bo->entry.realtime &&
419             ao->entry.xor_hash == bo->entry.xor_hash)
420                 return 0;
421
422         if (sd_id128_equal(af->header->seqnum_id, bf->header->seqnum_id)) {
423
424                 /* If this is from the same seqnum source, compare
425                  * seqnums */
426                 a = le64toh(ao->entry.seqnum);
427                 b = le64toh(bo->entry.seqnum);
428
429                 if (a < b)
430                         return -1;
431                 if (a > b)
432                         return 1;
433
434                 /* Wow! This is weird, different data but the same
435                  * seqnums? Something is borked, but let's make the
436                  * best of it and compare by time. */
437         }
438
439         if (sd_id128_equal(ao->entry.boot_id, bo->entry.boot_id)) {
440
441                 /* If the boot id matches compare monotonic time */
442                 a = le64toh(ao->entry.monotonic);
443                 b = le64toh(bo->entry.monotonic);
444
445                 if (a < b)
446                         return -1;
447                 if (a > b)
448                         return 1;
449         }
450
451         /* Otherwise compare UTC time */
452         a = le64toh(ao->entry.realtime);
453         b = le64toh(bo->entry.realtime);
454
455         if (a < b)
456                 return -1;
457         if (a > b)
458                 return 1;
459
460         /* Finally, compare by contents */
461         a = le64toh(ao->entry.xor_hash);
462         b = le64toh(bo->entry.xor_hash);
463
464         if (a < b)
465                 return -1;
466         if (a > b)
467                 return 1;
468
469         return 0;
470 }
471
472 static int compare_with_location(JournalFile *af, Object *ao, Location *l) {
473         uint64_t a;
474
475         assert(af);
476         assert(ao);
477         assert(l);
478         assert(l->type == LOCATION_DISCRETE || l->type == LOCATION_SEEK);
479
480         if (l->monotonic_set &&
481             sd_id128_equal(ao->entry.boot_id, l->boot_id) &&
482             l->realtime_set &&
483             le64toh(ao->entry.realtime) == l->realtime &&
484             l->xor_hash_set &&
485             le64toh(ao->entry.xor_hash) == l->xor_hash)
486                 return 0;
487
488         if (l->seqnum_set &&
489             sd_id128_equal(af->header->seqnum_id, l->seqnum_id)) {
490
491                 a = le64toh(ao->entry.seqnum);
492
493                 if (a < l->seqnum)
494                         return -1;
495                 if (a > l->seqnum)
496                         return 1;
497         }
498
499         if (l->monotonic_set &&
500             sd_id128_equal(ao->entry.boot_id, l->boot_id)) {
501
502                 a = le64toh(ao->entry.monotonic);
503
504                 if (a < l->monotonic)
505                         return -1;
506                 if (a > l->monotonic)
507                         return 1;
508         }
509
510         if (l->realtime_set) {
511
512                 a = le64toh(ao->entry.realtime);
513
514                 if (a < l->realtime)
515                         return -1;
516                 if (a > l->realtime)
517                         return 1;
518         }
519
520         if (l->xor_hash_set) {
521                 a = le64toh(ao->entry.xor_hash);
522
523                 if (a < l->xor_hash)
524                         return -1;
525                 if (a > l->xor_hash)
526                         return 1;
527         }
528
529         return 0;
530 }
531
532 static int next_for_match(
533                 sd_journal *j,
534                 Match *m,
535                 JournalFile *f,
536                 uint64_t after_offset,
537                 direction_t direction,
538                 Object **ret,
539                 uint64_t *offset) {
540
541         int r;
542         uint64_t np = 0;
543         Object *n;
544
545         assert(j);
546         assert(m);
547         assert(f);
548
549         if (m->type == MATCH_DISCRETE) {
550                 uint64_t dp;
551
552                 r = journal_file_find_data_object_with_hash(f, m->data, m->size, le64toh(m->le_hash), NULL, &dp);
553                 if (r <= 0)
554                         return r;
555
556                 return journal_file_move_to_entry_by_offset_for_data(f, dp, after_offset, direction, ret, offset);
557
558         } else if (m->type == MATCH_OR_TERM) {
559                 Match *i;
560
561                 /* Find the earliest match beyond after_offset */
562
563                 LIST_FOREACH(matches, i, m->matches) {
564                         uint64_t cp;
565
566                         r = next_for_match(j, i, f, after_offset, direction, NULL, &cp);
567                         if (r < 0)
568                                 return r;
569                         else if (r > 0) {
570                                 if (np == 0 || (direction == DIRECTION_DOWN ? np > cp : np < cp))
571                                         np = cp;
572                         }
573                 }
574
575         } else if (m->type == MATCH_AND_TERM) {
576                 Match *i;
577                 bool continue_looking;
578
579                 /* Always jump to the next matching entry and repeat
580                  * this until we fine and offset that matches for all
581                  * matches. */
582
583                 if (!m->matches)
584                         return 0;
585
586                 np = 0;
587                 do {
588                         continue_looking = false;
589
590                         LIST_FOREACH(matches, i, m->matches) {
591                                 uint64_t cp, limit;
592
593                                 if (np == 0)
594                                         limit = after_offset;
595                                 else if (direction == DIRECTION_DOWN)
596                                         limit = MAX(np, after_offset);
597                                 else
598                                         limit = MIN(np, after_offset);
599
600                                 r = next_for_match(j, i, f, limit, direction, NULL, &cp);
601                                 if (r <= 0)
602                                         return r;
603
604                                 if ((direction == DIRECTION_DOWN ? cp >= after_offset : cp <= after_offset) &&
605                                     (np == 0 || (direction == DIRECTION_DOWN ? cp > np : np < cp))) {
606                                         np = cp;
607                                         continue_looking = true;
608                                 }
609                         }
610
611                 } while (continue_looking);
612         }
613
614         if (np == 0)
615                 return 0;
616
617         r = journal_file_move_to_object(f, OBJECT_ENTRY, np, &n);
618         if (r < 0)
619                 return r;
620
621         if (ret)
622                 *ret = n;
623         if (offset)
624                 *offset = np;
625
626         return 1;
627 }
628
629 static int find_location_for_match(
630                 sd_journal *j,
631                 Match *m,
632                 JournalFile *f,
633                 direction_t direction,
634                 Object **ret,
635                 uint64_t *offset) {
636
637         int r;
638
639         assert(j);
640         assert(m);
641         assert(f);
642
643         if (m->type == MATCH_DISCRETE) {
644                 uint64_t dp;
645
646                 r = journal_file_find_data_object_with_hash(f, m->data, m->size, le64toh(m->le_hash), NULL, &dp);
647                 if (r <= 0)
648                         return r;
649
650                 /* FIXME: missing: find by monotonic */
651
652                 if (j->current_location.type == LOCATION_HEAD)
653                         return journal_file_next_entry_for_data(f, NULL, 0, dp, DIRECTION_DOWN, ret, offset);
654                 if (j->current_location.type == LOCATION_TAIL)
655                         return journal_file_next_entry_for_data(f, NULL, 0, dp, DIRECTION_UP, ret, offset);
656                 if (j->current_location.seqnum_set && sd_id128_equal(j->current_location.seqnum_id, f->header->seqnum_id))
657                         return journal_file_move_to_entry_by_seqnum_for_data(f, dp, j->current_location.seqnum, direction, ret, offset);
658                 if (j->current_location.monotonic_set) {
659                         r = journal_file_move_to_entry_by_monotonic_for_data(f, dp, j->current_location.boot_id, j->current_location.monotonic, direction, ret, offset);
660                         if (r != -ENOENT)
661                                 return r;
662                 }
663                 if (j->current_location.realtime_set)
664                         return journal_file_move_to_entry_by_realtime_for_data(f, dp, j->current_location.realtime, direction, ret, offset);
665
666                 return journal_file_next_entry_for_data(f, NULL, 0, dp, direction, ret, offset);
667
668         } else if (m->type == MATCH_OR_TERM) {
669                 uint64_t np = 0;
670                 Object *n;
671                 Match *i;
672
673                 /* Find the earliest match */
674
675                 LIST_FOREACH(matches, i, m->matches) {
676                         uint64_t cp;
677
678                         r = find_location_for_match(j, i, f, direction, NULL, &cp);
679                         if (r < 0)
680                                 return r;
681                         else if (r > 0) {
682                                 if (np == 0 || (direction == DIRECTION_DOWN ? np > cp : np < cp))
683                                         np = cp;
684                         }
685                 }
686
687                 if (np == 0)
688                         return 0;
689
690                 r = journal_file_move_to_object(f, OBJECT_ENTRY, np, &n);
691                 if (r < 0)
692                         return r;
693
694                 if (ret)
695                         *ret = n;
696                 if (offset)
697                         *offset = np;
698
699                 return 1;
700
701         } else {
702                 Match *i;
703                 uint64_t np = 0;
704
705                 assert(m->type == MATCH_AND_TERM);
706
707                 /* First jump to the last match, and then find the
708                  * next one where all matches match */
709
710                 if (!m->matches)
711                         return 0;
712
713                 LIST_FOREACH(matches, i, m->matches) {
714                         uint64_t cp;
715
716                         r = find_location_for_match(j, i, f, direction, NULL, &cp);
717                         if (r <= 0)
718                                 return r;
719
720                         if (np == 0 || (direction == DIRECTION_DOWN ? np < cp : np > cp))
721                                 np = cp;
722                 }
723
724                 return next_for_match(j, m, f, np, direction, ret, offset);
725         }
726 }
727
728 static int find_location_with_matches(
729                 sd_journal *j,
730                 JournalFile *f,
731                 direction_t direction,
732                 Object **ret,
733                 uint64_t *offset) {
734
735         int r;
736
737         assert(j);
738         assert(f);
739         assert(ret);
740         assert(offset);
741
742         if (!j->level0) {
743                 /* No matches is simple */
744
745                 if (j->current_location.type == LOCATION_HEAD)
746                         return journal_file_next_entry(f, NULL, 0, DIRECTION_DOWN, ret, offset);
747                 if (j->current_location.type == LOCATION_TAIL)
748                         return journal_file_next_entry(f, NULL, 0, DIRECTION_UP, ret, offset);
749                 if (j->current_location.seqnum_set && sd_id128_equal(j->current_location.seqnum_id, f->header->seqnum_id))
750                         return journal_file_move_to_entry_by_seqnum(f, j->current_location.seqnum, direction, ret, offset);
751                 if (j->current_location.monotonic_set) {
752                         r = journal_file_move_to_entry_by_monotonic(f, j->current_location.boot_id, j->current_location.monotonic, direction, ret, offset);
753                         if (r != -ENOENT)
754                                 return r;
755                 }
756                 if (j->current_location.realtime_set)
757                         return journal_file_move_to_entry_by_realtime(f, j->current_location.realtime, direction, ret, offset);
758
759                 return journal_file_next_entry(f, NULL, 0, direction, ret, offset);
760         } else
761                 return find_location_for_match(j, j->level0, f, direction, ret, offset);
762 }
763
764 static int next_with_matches(
765                 sd_journal *j,
766                 JournalFile *f,
767                 direction_t direction,
768                 Object **ret,
769                 uint64_t *offset) {
770
771         Object *c;
772         uint64_t cp;
773
774         assert(j);
775         assert(f);
776         assert(ret);
777         assert(offset);
778
779         c = *ret;
780         cp = *offset;
781
782         /* No matches is easy. We simple advance the file
783          * pointer by one. */
784         if (!j->level0)
785                 return journal_file_next_entry(f, c, cp, direction, ret, offset);
786
787         /* If we have a match then we look for the next matching entry
788          * with an offset at least one step larger */
789         return next_for_match(j, j->level0, f, direction == DIRECTION_DOWN ? cp+1 : cp-1, direction, ret, offset);
790 }
791
792 static int next_beyond_location(sd_journal *j, JournalFile *f, direction_t direction, Object **ret, uint64_t *offset) {
793         Object *c;
794         uint64_t cp;
795         int r;
796
797         assert(j);
798         assert(f);
799
800         if (f->current_offset > 0) {
801                 cp = f->current_offset;
802
803                 r = journal_file_move_to_object(f, OBJECT_ENTRY, cp, &c);
804                 if (r < 0)
805                         return r;
806
807                 r = next_with_matches(j, f, direction, &c, &cp);
808                 if (r <= 0)
809                         return r;
810         } else {
811                 r = find_location_with_matches(j, f, direction, &c, &cp);
812                 if (r <= 0)
813                         return r;
814         }
815
816         /* OK, we found the spot, now let's advance until to an entry
817          * that is actually different from what we were previously
818          * looking at. This is necessary to handle entries which exist
819          * in two (or more) journal files, and which shall all be
820          * suppressed but one. */
821
822         for (;;) {
823                 bool found;
824
825                 if (j->current_location.type == LOCATION_DISCRETE) {
826                         int k;
827
828                         k = compare_with_location(f, c, &j->current_location);
829                         if (direction == DIRECTION_DOWN)
830                                 found = k > 0;
831                         else
832                                 found = k < 0;
833                 } else
834                         found = true;
835
836                 if (found) {
837                         if (ret)
838                                 *ret = c;
839                         if (offset)
840                                 *offset = cp;
841                         return 1;
842                 }
843
844                 r = next_with_matches(j, f, direction, &c, &cp);
845                 if (r <= 0)
846                         return r;
847         }
848 }
849
850 static int real_journal_next(sd_journal *j, direction_t direction) {
851         JournalFile *f, *new_file = NULL;
852         uint64_t new_offset = 0;
853         Object *o;
854         uint64_t p;
855         Iterator i;
856         int r;
857
858         if (!j)
859                 return -EINVAL;
860
861         HASHMAP_FOREACH(f, j->files, i) {
862                 bool found;
863
864                 r = next_beyond_location(j, f, direction, &o, &p);
865                 if (r < 0) {
866                         log_debug("Can't iterate through %s, ignoring: %s", f->path, strerror(-r));
867                         continue;
868                 } else if (r == 0)
869                         continue;
870
871                 if (!new_file)
872                         found = true;
873                 else {
874                         int k;
875
876                         k = compare_entry_order(f, o, new_file, new_offset);
877
878                         if (direction == DIRECTION_DOWN)
879                                 found = k < 0;
880                         else
881                                 found = k > 0;
882                 }
883
884                 if (found) {
885                         new_file = f;
886                         new_offset = p;
887                 }
888         }
889
890         if (!new_file)
891                 return 0;
892
893         r = journal_file_move_to_object(new_file, OBJECT_ENTRY, new_offset, &o);
894         if (r < 0)
895                 return r;
896
897         set_location(j, LOCATION_DISCRETE, new_file, o, new_offset);
898
899         return 1;
900 }
901
902 _public_ int sd_journal_next(sd_journal *j) {
903         return real_journal_next(j, DIRECTION_DOWN);
904 }
905
906 _public_ int sd_journal_previous(sd_journal *j) {
907         return real_journal_next(j, DIRECTION_UP);
908 }
909
910 static int real_journal_next_skip(sd_journal *j, direction_t direction, uint64_t skip) {
911         int c = 0, r;
912
913         if (!j)
914                 return -EINVAL;
915
916         if (skip == 0) {
917                 /* If this is not a discrete skip, then at least
918                  * resolve the current location */
919                 if (j->current_location.type != LOCATION_DISCRETE)
920                         return real_journal_next(j, direction);
921
922                 return 0;
923         }
924
925         do {
926                 r = real_journal_next(j, direction);
927                 if (r < 0)
928                         return r;
929
930                 if (r == 0)
931                         return c;
932
933                 skip--;
934                 c++;
935         } while (skip > 0);
936
937         return c;
938 }
939
940 _public_ int sd_journal_next_skip(sd_journal *j, uint64_t skip) {
941         return real_journal_next_skip(j, DIRECTION_DOWN, skip);
942 }
943
944 _public_ int sd_journal_previous_skip(sd_journal *j, uint64_t skip) {
945         return real_journal_next_skip(j, DIRECTION_UP, skip);
946 }
947
948 _public_ int sd_journal_get_cursor(sd_journal *j, char **cursor) {
949         Object *o;
950         int r;
951         char bid[33], sid[33];
952
953         if (!j)
954                 return -EINVAL;
955         if (!cursor)
956                 return -EINVAL;
957
958         if (!j->current_file || j->current_file->current_offset <= 0)
959                 return -EADDRNOTAVAIL;
960
961         r = journal_file_move_to_object(j->current_file, OBJECT_ENTRY, j->current_file->current_offset, &o);
962         if (r < 0)
963                 return r;
964
965         sd_id128_to_string(j->current_file->header->seqnum_id, sid);
966         sd_id128_to_string(o->entry.boot_id, bid);
967
968         if (asprintf(cursor,
969                      "s=%s;i=%llx;b=%s;m=%llx;t=%llx;x=%llx",
970                      sid, (unsigned long long) le64toh(o->entry.seqnum),
971                      bid, (unsigned long long) le64toh(o->entry.monotonic),
972                      (unsigned long long) le64toh(o->entry.realtime),
973                      (unsigned long long) le64toh(o->entry.xor_hash)) < 0)
974                 return -ENOMEM;
975
976         return 0;
977 }
978
979 _public_ int sd_journal_seek_cursor(sd_journal *j, const char *cursor) {
980         char *w, *state;
981         size_t l;
982         unsigned long long seqnum, monotonic, realtime, xor_hash;
983         bool
984                 seqnum_id_set = false,
985                 seqnum_set = false,
986                 boot_id_set = false,
987                 monotonic_set = false,
988                 realtime_set = false,
989                 xor_hash_set = false;
990         sd_id128_t seqnum_id, boot_id;
991
992         if (!j)
993                 return -EINVAL;
994         if (isempty(cursor))
995                 return -EINVAL;
996
997         FOREACH_WORD_SEPARATOR(w, l, cursor, ";", state) {
998                 char *item;
999                 int k = 0;
1000
1001                 if (l < 2 || w[1] != '=')
1002                         return -EINVAL;
1003
1004                 item = strndup(w, l);
1005                 if (!item)
1006                         return -ENOMEM;
1007
1008                 switch (w[0]) {
1009
1010                 case 's':
1011                         seqnum_id_set = true;
1012                         k = sd_id128_from_string(item+2, &seqnum_id);
1013                         break;
1014
1015                 case 'i':
1016                         seqnum_set = true;
1017                         if (sscanf(item+2, "%llx", &seqnum) != 1)
1018                                 k = -EINVAL;
1019                         break;
1020
1021                 case 'b':
1022                         boot_id_set = true;
1023                         k = sd_id128_from_string(item+2, &boot_id);
1024                         break;
1025
1026                 case 'm':
1027                         monotonic_set = true;
1028                         if (sscanf(item+2, "%llx", &monotonic) != 1)
1029                                 k = -EINVAL;
1030                         break;
1031
1032                 case 't':
1033                         realtime_set = true;
1034                         if (sscanf(item+2, "%llx", &realtime) != 1)
1035                                 k = -EINVAL;
1036                         break;
1037
1038                 case 'x':
1039                         xor_hash_set = true;
1040                         if (sscanf(item+2, "%llx", &xor_hash) != 1)
1041                                 k = -EINVAL;
1042                         break;
1043                 }
1044
1045                 free(item);
1046
1047                 if (k < 0)
1048                         return k;
1049         }
1050
1051         if ((!seqnum_set || !seqnum_id_set) &&
1052             (!monotonic_set || !boot_id_set) &&
1053             !realtime_set)
1054                 return -EINVAL;
1055
1056         reset_location(j);
1057
1058         j->current_location.type = LOCATION_SEEK;
1059
1060         if (realtime_set) {
1061                 j->current_location.realtime = (uint64_t) realtime;
1062                 j->current_location.realtime_set = true;
1063         }
1064
1065         if (seqnum_set && seqnum_id_set) {
1066                 j->current_location.seqnum = (uint64_t) seqnum;
1067                 j->current_location.seqnum_id = seqnum_id;
1068                 j->current_location.seqnum_set = true;
1069         }
1070
1071         if (monotonic_set && boot_id_set) {
1072                 j->current_location.monotonic = (uint64_t) monotonic;
1073                 j->current_location.boot_id = boot_id;
1074                 j->current_location.monotonic_set = true;
1075         }
1076
1077         if (xor_hash_set) {
1078                 j->current_location.xor_hash = (uint64_t) xor_hash;
1079                 j->current_location.xor_hash_set = true;
1080         }
1081
1082         return 0;
1083 }
1084
1085 _public_ int sd_journal_test_cursor(sd_journal *j, const char *cursor) {
1086         int r;
1087         char *w, *state;
1088         size_t l;
1089         Object *o;
1090
1091         if (!j)
1092                 return -EINVAL;
1093         if (isempty(cursor))
1094                 return -EINVAL;
1095
1096         if (!j->current_file || j->current_file->current_offset <= 0)
1097                 return -EADDRNOTAVAIL;
1098
1099         r = journal_file_move_to_object(j->current_file, OBJECT_ENTRY, j->current_file->current_offset, &o);
1100         if (r < 0)
1101                 return r;
1102
1103         FOREACH_WORD_SEPARATOR(w, l, cursor, ";", state) {
1104                 _cleanup_free_ char *item = NULL;
1105                 sd_id128_t id;
1106                 unsigned long long ll;
1107                 int k = 0;
1108
1109                 if (l < 2 || w[1] != '=')
1110                         return -EINVAL;
1111
1112                 item = strndup(w, l);
1113                 if (!item)
1114                         return -ENOMEM;
1115
1116                 switch (w[0]) {
1117
1118                 case 's':
1119                         k = sd_id128_from_string(item+2, &id);
1120                         if (k < 0)
1121                                 return k;
1122                         if (!sd_id128_equal(id, j->current_file->header->seqnum_id))
1123                                 return 0;
1124                         break;
1125
1126                 case 'i':
1127                         if (sscanf(item+2, "%llx", &ll) != 1)
1128                                 return -EINVAL;
1129                         if (ll != le64toh(o->entry.seqnum))
1130                                 return 0;
1131                         break;
1132
1133                 case 'b':
1134                         k = sd_id128_from_string(item+2, &id);
1135                         if (k < 0)
1136                                 return k;
1137                         if (!sd_id128_equal(id, o->entry.boot_id))
1138                                 return 0;
1139                         break;
1140
1141                 case 'm':
1142                         if (sscanf(item+2, "%llx", &ll) != 1)
1143                                 return -EINVAL;
1144                         if (ll != le64toh(o->entry.monotonic))
1145                                 return 0;
1146                         break;
1147
1148                 case 't':
1149                         if (sscanf(item+2, "%llx", &ll) != 1)
1150                                 return -EINVAL;
1151                         if (ll != le64toh(o->entry.realtime))
1152                                 return 0;
1153                         break;
1154
1155                 case 'x':
1156                         if (sscanf(item+2, "%llx", &ll) != 1)
1157                                 return -EINVAL;
1158                         if (ll != le64toh(o->entry.xor_hash))
1159                                 return 0;
1160                         break;
1161                 }
1162         }
1163
1164         return 1;
1165 }
1166
1167
1168 _public_ int sd_journal_seek_monotonic_usec(sd_journal *j, sd_id128_t boot_id, uint64_t usec) {
1169         if (!j)
1170                 return -EINVAL;
1171
1172         reset_location(j);
1173         j->current_location.type = LOCATION_SEEK;
1174         j->current_location.boot_id = boot_id;
1175         j->current_location.monotonic = usec;
1176         j->current_location.monotonic_set = true;
1177
1178         return 0;
1179 }
1180
1181 _public_ int sd_journal_seek_realtime_usec(sd_journal *j, uint64_t usec) {
1182         if (!j)
1183                 return -EINVAL;
1184
1185         reset_location(j);
1186         j->current_location.type = LOCATION_SEEK;
1187         j->current_location.realtime = usec;
1188         j->current_location.realtime_set = true;
1189
1190         return 0;
1191 }
1192
1193 _public_ int sd_journal_seek_head(sd_journal *j) {
1194         if (!j)
1195                 return -EINVAL;
1196
1197         reset_location(j);
1198         j->current_location.type = LOCATION_HEAD;
1199
1200         return 0;
1201 }
1202
1203 _public_ int sd_journal_seek_tail(sd_journal *j) {
1204         if (!j)
1205                 return -EINVAL;
1206
1207         reset_location(j);
1208         j->current_location.type = LOCATION_TAIL;
1209
1210         return 0;
1211 }
1212
1213 static void check_network(sd_journal *j, int fd) {
1214         struct statfs sfs;
1215
1216         assert(j);
1217
1218         if (j->on_network)
1219                 return;
1220
1221         if (fstatfs(fd, &sfs) < 0)
1222                 return;
1223
1224         j->on_network =
1225                 (long)sfs.f_type == (long)CIFS_MAGIC_NUMBER ||
1226                 sfs.f_type == CODA_SUPER_MAGIC ||
1227                 sfs.f_type == NCP_SUPER_MAGIC ||
1228                 sfs.f_type == NFS_SUPER_MAGIC ||
1229                 sfs.f_type == SMB_SUPER_MAGIC;
1230 }
1231
1232 static int add_file(sd_journal *j, const char *prefix, const char *filename) {
1233         char _cleanup_free_ *path = NULL;
1234         int r;
1235         JournalFile *f;
1236
1237         assert(j);
1238         assert(prefix);
1239         assert(filename);
1240
1241         if ((j->flags & SD_JOURNAL_SYSTEM_ONLY) &&
1242             !(streq(filename, "system.journal") ||
1243               streq(filename, "system.journal~") ||
1244               (startswith(filename, "system@") &&
1245                (endswith(filename, ".journal") || endswith(filename, ".journal~")))))
1246                 return 0;
1247
1248         path = strjoin(prefix, "/", filename, NULL);
1249         if (!path)
1250                 return -ENOMEM;
1251
1252         if (hashmap_get(j->files, path))
1253                 return 0;
1254
1255         if (hashmap_size(j->files) >= JOURNAL_FILES_MAX) {
1256                 log_debug("Too many open journal files, not adding %s, ignoring.", path);
1257                 return set_put_error(j, -ETOOMANYREFS);
1258         }
1259
1260         r = journal_file_open(path, O_RDONLY, 0, false, false, NULL, j->mmap, NULL, &f);
1261         if (r < 0) {
1262                 if (errno == ENOENT)
1263                         return 0;
1264
1265                 return r;
1266         }
1267
1268         /* journal_file_dump(f); */
1269
1270         r = hashmap_put(j->files, f->path, f);
1271         if (r < 0) {
1272                 journal_file_close(f);
1273                 return r;
1274         }
1275
1276         log_debug("File %s got added.", f->path);
1277
1278         check_network(j, f->fd);
1279
1280         j->current_invalidate_counter ++;
1281
1282         return 0;
1283 }
1284
1285 static int remove_file(sd_journal *j, const char *prefix, const char *filename) {
1286         char *path;
1287         JournalFile *f;
1288
1289         assert(j);
1290         assert(prefix);
1291         assert(filename);
1292
1293         path = strjoin(prefix, "/", filename, NULL);
1294         if (!path)
1295                 return -ENOMEM;
1296
1297         f = hashmap_get(j->files, path);
1298         free(path);
1299         if (!f)
1300                 return 0;
1301
1302         hashmap_remove(j->files, f->path);
1303
1304         log_debug("File %s got removed.", f->path);
1305
1306         if (j->current_file == f) {
1307                 j->current_file = NULL;
1308                 j->current_field = 0;
1309         }
1310
1311         if (j->unique_file == f) {
1312                 j->unique_file = NULL;
1313                 j->unique_offset = 0;
1314         }
1315
1316         journal_file_close(f);
1317
1318         j->current_invalidate_counter ++;
1319
1320         return 0;
1321 }
1322
1323 static int add_directory(sd_journal *j, const char *prefix, const char *dirname) {
1324         char _cleanup_free_ *path = NULL;
1325         int r;
1326         DIR _cleanup_closedir_ *d = NULL;
1327         sd_id128_t id, mid;
1328         Directory *m;
1329
1330         assert(j);
1331         assert(prefix);
1332         assert(dirname);
1333
1334         log_debug("Considering %s/%s.", prefix, dirname);
1335
1336         if ((j->flags & SD_JOURNAL_LOCAL_ONLY) &&
1337             (sd_id128_from_string(dirname, &id) < 0 ||
1338              sd_id128_get_machine(&mid) < 0 ||
1339              !(sd_id128_equal(id, mid) || path_startswith(prefix, "/run"))))
1340             return 0;
1341
1342         path = strjoin(prefix, "/", dirname, NULL);
1343         if (!path)
1344                 return -ENOMEM;
1345
1346         d = opendir(path);
1347         if (!d) {
1348                 log_debug("Failed to open %s: %m", path);
1349                 if (errno == ENOENT)
1350                         return 0;
1351                 return -errno;
1352         }
1353
1354         m = hashmap_get(j->directories_by_path, path);
1355         if (!m) {
1356                 m = new0(Directory, 1);
1357                 if (!m)
1358                         return -ENOMEM;
1359
1360                 m->is_root = false;
1361                 m->path = path;
1362
1363                 if (hashmap_put(j->directories_by_path, m->path, m) < 0) {
1364                         free(m);
1365                         return -ENOMEM;
1366                 }
1367
1368                 path = NULL; /* avoid freeing in cleanup */
1369                 j->current_invalidate_counter ++;
1370
1371                 log_debug("Directory %s got added.", m->path);
1372
1373         } else if (m->is_root)
1374                 return 0;
1375
1376         if (m->wd <= 0 && j->inotify_fd >= 0) {
1377
1378                 m->wd = inotify_add_watch(j->inotify_fd, m->path,
1379                                           IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE|
1380                                           IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT|IN_MOVED_FROM|
1381                                           IN_ONLYDIR);
1382
1383                 if (m->wd > 0 && hashmap_put(j->directories_by_wd, INT_TO_PTR(m->wd), m) < 0)
1384                         inotify_rm_watch(j->inotify_fd, m->wd);
1385         }
1386
1387         for (;;) {
1388                 struct dirent *de;
1389                 union dirent_storage buf;
1390
1391                 r = readdir_r(d, &buf.de, &de);
1392                 if (r != 0 || !de)
1393                         break;
1394
1395                 if (dirent_is_file_with_suffix(de, ".journal") ||
1396                     dirent_is_file_with_suffix(de, ".journal~")) {
1397                         r = add_file(j, m->path, de->d_name);
1398                         if (r < 0) {
1399                                 log_debug("Failed to add file %s/%s: %s",
1400                                           m->path, de->d_name, strerror(-r));
1401                                 r = set_put_error(j, r);
1402                                 if (r < 0)
1403                                         return r;
1404                         }
1405                 }
1406         }
1407
1408         check_network(j, dirfd(d));
1409
1410         return 0;
1411 }
1412
1413 static int add_root_directory(sd_journal *j, const char *p) {
1414         DIR _cleanup_closedir_ *d = NULL;
1415         Directory *m;
1416         int r;
1417
1418         assert(j);
1419         assert(p);
1420
1421         if ((j->flags & SD_JOURNAL_RUNTIME_ONLY) &&
1422             !path_startswith(p, "/run"))
1423                 return -EINVAL;
1424
1425         d = opendir(p);
1426         if (!d)
1427                 return -errno;
1428
1429         m = hashmap_get(j->directories_by_path, p);
1430         if (!m) {
1431                 m = new0(Directory, 1);
1432                 if (!m)
1433                         return -ENOMEM;
1434
1435                 m->is_root = true;
1436                 m->path = strdup(p);
1437                 if (!m->path) {
1438                         free(m);
1439                         return -ENOMEM;
1440                 }
1441
1442                 if (hashmap_put(j->directories_by_path, m->path, m) < 0) {
1443                         free(m->path);
1444                         free(m);
1445                         return -ENOMEM;
1446                 }
1447
1448                 j->current_invalidate_counter ++;
1449
1450                 log_debug("Root directory %s got added.", m->path);
1451
1452         } else if (!m->is_root)
1453                 return 0;
1454
1455         if (m->wd <= 0 && j->inotify_fd >= 0) {
1456
1457                 m->wd = inotify_add_watch(j->inotify_fd, m->path,
1458                                           IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE|
1459                                           IN_ONLYDIR);
1460
1461                 if (m->wd > 0 && hashmap_put(j->directories_by_wd, INT_TO_PTR(m->wd), m) < 0)
1462                         inotify_rm_watch(j->inotify_fd, m->wd);
1463         }
1464
1465         for (;;) {
1466                 struct dirent *de;
1467                 union dirent_storage buf;
1468                 sd_id128_t id;
1469
1470                 r = readdir_r(d, &buf.de, &de);
1471                 if (r != 0 || !de)
1472                         break;
1473
1474                 if (dirent_is_file_with_suffix(de, ".journal") ||
1475                     dirent_is_file_with_suffix(de, ".journal~")) {
1476                         r = add_file(j, m->path, de->d_name);
1477                         if (r < 0) {
1478                                 log_debug("Failed to add file %s/%s: %s",
1479                                           m->path, de->d_name, strerror(-r));
1480                                 r = set_put_error(j, r);
1481                                 if (r < 0)
1482                                         return r;
1483                         }
1484                 } else if ((de->d_type == DT_DIR || de->d_type == DT_LNK || de->d_type == DT_UNKNOWN) &&
1485                            sd_id128_from_string(de->d_name, &id) >= 0) {
1486
1487                         r = add_directory(j, m->path, de->d_name);
1488                         if (r < 0)
1489                                 log_debug("Failed to add directory %s/%s: %s", m->path, de->d_name, strerror(-r));
1490                 }
1491         }
1492
1493         check_network(j, dirfd(d));
1494
1495         return 0;
1496 }
1497
1498 static int remove_directory(sd_journal *j, Directory *d) {
1499         assert(j);
1500
1501         if (d->wd > 0) {
1502                 hashmap_remove(j->directories_by_wd, INT_TO_PTR(d->wd));
1503
1504                 if (j->inotify_fd >= 0)
1505                         inotify_rm_watch(j->inotify_fd, d->wd);
1506         }
1507
1508         hashmap_remove(j->directories_by_path, d->path);
1509
1510         if (d->is_root)
1511                 log_debug("Root directory %s got removed.", d->path);
1512         else
1513                 log_debug("Directory %s got removed.", d->path);
1514
1515         free(d->path);
1516         free(d);
1517
1518         return 0;
1519 }
1520
1521 static int add_search_paths(sd_journal *j) {
1522         int r;
1523         const char search_paths[] =
1524                 "/run/log/journal\0"
1525                 "/var/log/journal\0";
1526         const char *p;
1527
1528         assert(j);
1529
1530         /* We ignore most errors here, since the idea is to only open
1531          * what's actually accessible, and ignore the rest. */
1532
1533         NULSTR_FOREACH(p, search_paths) {
1534                 r = add_root_directory(j, p);
1535                 if (r < 0 && r != -ENOENT) {
1536                         r = set_put_error(j, r);
1537                         if (r < 0)
1538                                 return r;
1539                 }
1540         }
1541
1542         return 0;
1543 }
1544
1545 static int allocate_inotify(sd_journal *j) {
1546         assert(j);
1547
1548         if (j->inotify_fd < 0) {
1549                 j->inotify_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
1550                 if (j->inotify_fd < 0)
1551                         return -errno;
1552         }
1553
1554         if (!j->directories_by_wd) {
1555                 j->directories_by_wd = hashmap_new(trivial_hash_func, trivial_compare_func);
1556                 if (!j->directories_by_wd)
1557                         return -ENOMEM;
1558         }
1559
1560         return 0;
1561 }
1562
1563 static sd_journal *journal_new(int flags, const char *path) {
1564         sd_journal *j;
1565
1566         j = new0(sd_journal, 1);
1567         if (!j)
1568                 return NULL;
1569
1570         j->inotify_fd = -1;
1571         j->flags = flags;
1572         j->data_threshold = DEFAULT_DATA_THRESHOLD;
1573
1574         if (path) {
1575                 j->path = strdup(path);
1576                 if (!j->path)
1577                         goto fail;
1578         }
1579
1580         j->files = hashmap_new(string_hash_func, string_compare_func);
1581         j->directories_by_path = hashmap_new(string_hash_func, string_compare_func);
1582         j->mmap = mmap_cache_new();
1583         if (!j->files || !j->directories_by_path || !j->mmap)
1584                 goto fail;
1585
1586         return j;
1587
1588 fail:
1589         sd_journal_close(j);
1590         return NULL;
1591 }
1592
1593 _public_ int sd_journal_open(sd_journal **ret, int flags) {
1594         sd_journal *j;
1595         int r;
1596
1597         if (!ret)
1598                 return -EINVAL;
1599
1600         if (flags & ~(SD_JOURNAL_LOCAL_ONLY|
1601                       SD_JOURNAL_RUNTIME_ONLY|
1602                       SD_JOURNAL_SYSTEM_ONLY))
1603                 return -EINVAL;
1604
1605         j = journal_new(flags, NULL);
1606         if (!j)
1607                 return -ENOMEM;
1608
1609         r = add_search_paths(j);
1610         if (r < 0)
1611                 goto fail;
1612
1613         *ret = j;
1614         return 0;
1615
1616 fail:
1617         sd_journal_close(j);
1618
1619         return r;
1620 }
1621
1622 _public_ int sd_journal_open_directory(sd_journal **ret, const char *path, int flags) {
1623         sd_journal *j;
1624         int r;
1625
1626         if (!ret)
1627                 return -EINVAL;
1628
1629         if (!path)
1630                 return -EINVAL;
1631
1632         if (flags != 0)
1633                 return -EINVAL;
1634
1635         j = journal_new(flags, path);
1636         if (!j)
1637                 return -ENOMEM;
1638
1639         r = add_root_directory(j, path);
1640         if (r < 0) {
1641                 set_put_error(j, r);
1642                 goto fail;
1643         }
1644
1645         *ret = j;
1646         return 0;
1647
1648 fail:
1649         sd_journal_close(j);
1650
1651         return r;
1652 }
1653
1654 _public_ void sd_journal_close(sd_journal *j) {
1655         Directory *d;
1656         JournalFile *f;
1657
1658         if (!j)
1659                 return;
1660
1661         sd_journal_flush_matches(j);
1662
1663         while ((f = hashmap_steal_first(j->files)))
1664                 journal_file_close(f);
1665
1666         hashmap_free(j->files);
1667
1668         while ((d = hashmap_first(j->directories_by_path)))
1669                 remove_directory(j, d);
1670
1671         while ((d = hashmap_first(j->directories_by_wd)))
1672                 remove_directory(j, d);
1673
1674         hashmap_free(j->directories_by_path);
1675         hashmap_free(j->directories_by_wd);
1676
1677         if (j->inotify_fd >= 0)
1678                 close_nointr_nofail(j->inotify_fd);
1679
1680         if (j->mmap)
1681                 mmap_cache_unref(j->mmap);
1682
1683         free(j->path);
1684         free(j->unique_field);
1685         set_free(j->errors);
1686         free(j);
1687 }
1688
1689 _public_ int sd_journal_get_realtime_usec(sd_journal *j, uint64_t *ret) {
1690         Object *o;
1691         JournalFile *f;
1692         int r;
1693
1694         if (!j)
1695                 return -EINVAL;
1696         if (!ret)
1697                 return -EINVAL;
1698
1699         f = j->current_file;
1700         if (!f)
1701                 return -EADDRNOTAVAIL;
1702
1703         if (f->current_offset <= 0)
1704                 return -EADDRNOTAVAIL;
1705
1706         r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1707         if (r < 0)
1708                 return r;
1709
1710         *ret = le64toh(o->entry.realtime);
1711         return 0;
1712 }
1713
1714 _public_ int sd_journal_get_monotonic_usec(sd_journal *j, uint64_t *ret, sd_id128_t *ret_boot_id) {
1715         Object *o;
1716         JournalFile *f;
1717         int r;
1718         sd_id128_t id;
1719
1720         if (!j)
1721                 return -EINVAL;
1722
1723         f = j->current_file;
1724         if (!f)
1725                 return -EADDRNOTAVAIL;
1726
1727         if (f->current_offset <= 0)
1728                 return -EADDRNOTAVAIL;
1729
1730         r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1731         if (r < 0)
1732                 return r;
1733
1734         if (ret_boot_id)
1735                 *ret_boot_id = o->entry.boot_id;
1736         else {
1737                 r = sd_id128_get_boot(&id);
1738                 if (r < 0)
1739                         return r;
1740
1741                 if (!sd_id128_equal(id, o->entry.boot_id))
1742                         return -ESTALE;
1743         }
1744
1745         if (ret)
1746                 *ret = le64toh(o->entry.monotonic);
1747
1748         return 0;
1749 }
1750
1751 static bool field_is_valid(const char *field) {
1752         const char *p;
1753
1754         assert(field);
1755
1756         if (isempty(field))
1757                 return false;
1758
1759         if (startswith(field, "__"))
1760                 return false;
1761
1762         for (p = field; *p; p++) {
1763
1764                 if (*p == '_')
1765                         continue;
1766
1767                 if (*p >= 'A' && *p <= 'Z')
1768                         continue;
1769
1770                 if (*p >= '0' && *p <= '9')
1771                         continue;
1772
1773                 return false;
1774         }
1775
1776         return true;
1777 }
1778
1779 _public_ int sd_journal_get_data(sd_journal *j, const char *field, const void **data, size_t *size) {
1780         JournalFile *f;
1781         uint64_t i, n;
1782         size_t field_length;
1783         int r;
1784         Object *o;
1785
1786         if (!j)
1787                 return -EINVAL;
1788         if (!field)
1789                 return -EINVAL;
1790         if (!data)
1791                 return -EINVAL;
1792         if (!size)
1793                 return -EINVAL;
1794
1795         if (!field_is_valid(field))
1796                 return -EINVAL;
1797
1798         f = j->current_file;
1799         if (!f)
1800                 return -EADDRNOTAVAIL;
1801
1802         if (f->current_offset <= 0)
1803                 return -EADDRNOTAVAIL;
1804
1805         r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1806         if (r < 0)
1807                 return r;
1808
1809         field_length = strlen(field);
1810
1811         n = journal_file_entry_n_items(o);
1812         for (i = 0; i < n; i++) {
1813                 uint64_t p, l;
1814                 le64_t le_hash;
1815                 size_t t;
1816
1817                 p = le64toh(o->entry.items[i].object_offset);
1818                 le_hash = o->entry.items[i].hash;
1819                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
1820                 if (r < 0)
1821                         return r;
1822
1823                 if (le_hash != o->data.hash)
1824                         return -EBADMSG;
1825
1826                 l = le64toh(o->object.size) - offsetof(Object, data.payload);
1827
1828                 if (o->object.flags & OBJECT_COMPRESSED) {
1829
1830 #ifdef HAVE_XZ
1831                         if (uncompress_startswith(o->data.payload, l,
1832                                                   &f->compress_buffer, &f->compress_buffer_size,
1833                                                   field, field_length, '=')) {
1834
1835                                 uint64_t rsize;
1836
1837                                 if (!uncompress_blob(o->data.payload, l,
1838                                                      &f->compress_buffer, &f->compress_buffer_size, &rsize,
1839                                                      j->data_threshold))
1840                                         return -EBADMSG;
1841
1842                                 *data = f->compress_buffer;
1843                                 *size = (size_t) rsize;
1844
1845                                 return 0;
1846                         }
1847 #else
1848                         return -EPROTONOSUPPORT;
1849 #endif
1850
1851                 } else if (l >= field_length+1 &&
1852                            memcmp(o->data.payload, field, field_length) == 0 &&
1853                            o->data.payload[field_length] == '=') {
1854
1855                         t = (size_t) l;
1856
1857                         if ((uint64_t) t != l)
1858                                 return -E2BIG;
1859
1860                         *data = o->data.payload;
1861                         *size = t;
1862
1863                         return 0;
1864                 }
1865
1866                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1867                 if (r < 0)
1868                         return r;
1869         }
1870
1871         return -ENOENT;
1872 }
1873
1874 static int return_data(sd_journal *j, JournalFile *f, Object *o, const void **data, size_t *size) {
1875         size_t t;
1876         uint64_t l;
1877
1878         l = le64toh(o->object.size) - offsetof(Object, data.payload);
1879         t = (size_t) l;
1880
1881         /* We can't read objects larger than 4G on a 32bit machine */
1882         if ((uint64_t) t != l)
1883                 return -E2BIG;
1884
1885         if (o->object.flags & OBJECT_COMPRESSED) {
1886 #ifdef HAVE_XZ
1887                 uint64_t rsize;
1888
1889                 if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize, j->data_threshold))
1890                         return -EBADMSG;
1891
1892                 *data = f->compress_buffer;
1893                 *size = (size_t) rsize;
1894 #else
1895                 return -EPROTONOSUPPORT;
1896 #endif
1897         } else {
1898                 *data = o->data.payload;
1899                 *size = t;
1900         }
1901
1902         return 0;
1903 }
1904
1905 _public_ int sd_journal_enumerate_data(sd_journal *j, const void **data, size_t *size) {
1906         JournalFile *f;
1907         uint64_t p, n;
1908         le64_t le_hash;
1909         int r;
1910         Object *o;
1911
1912         if (!j)
1913                 return -EINVAL;
1914         if (!data)
1915                 return -EINVAL;
1916         if (!size)
1917                 return -EINVAL;
1918
1919         f = j->current_file;
1920         if (!f)
1921                 return -EADDRNOTAVAIL;
1922
1923         if (f->current_offset <= 0)
1924                 return -EADDRNOTAVAIL;
1925
1926         r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1927         if (r < 0)
1928                 return r;
1929
1930         n = journal_file_entry_n_items(o);
1931         if (j->current_field >= n)
1932                 return 0;
1933
1934         p = le64toh(o->entry.items[j->current_field].object_offset);
1935         le_hash = o->entry.items[j->current_field].hash;
1936         r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
1937         if (r < 0)
1938                 return r;
1939
1940         if (le_hash != o->data.hash)
1941                 return -EBADMSG;
1942
1943         r = return_data(j, f, o, data, size);
1944         if (r < 0)
1945                 return r;
1946
1947         j->current_field ++;
1948
1949         return 1;
1950 }
1951
1952 _public_ void sd_journal_restart_data(sd_journal *j) {
1953         if (!j)
1954                 return;
1955
1956         j->current_field = 0;
1957 }
1958
1959 _public_ int sd_journal_get_fd(sd_journal *j) {
1960         int r;
1961
1962         if (!j)
1963                 return -EINVAL;
1964
1965         if (j->inotify_fd >= 0)
1966                 return j->inotify_fd;
1967
1968         r = allocate_inotify(j);
1969         if (r < 0)
1970                 return r;
1971
1972         /* Iterate through all dirs again, to add them to the
1973          * inotify */
1974         if (j->path)
1975                 r = add_root_directory(j, j->path);
1976         else
1977                 r = add_search_paths(j);
1978         if (r < 0)
1979                 return r;
1980
1981         return j->inotify_fd;
1982 }
1983
1984 static void process_inotify_event(sd_journal *j, struct inotify_event *e) {
1985         Directory *d;
1986         int r;
1987
1988         assert(j);
1989         assert(e);
1990
1991         /* Is this a subdirectory we watch? */
1992         d = hashmap_get(j->directories_by_wd, INT_TO_PTR(e->wd));
1993         if (d) {
1994                 sd_id128_t id;
1995
1996                 if (!(e->mask & IN_ISDIR) && e->len > 0 &&
1997                     (endswith(e->name, ".journal") ||
1998                      endswith(e->name, ".journal~"))) {
1999
2000                         /* Event for a journal file */
2001
2002                         if (e->mask & (IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB)) {
2003                                 r = add_file(j, d->path, e->name);
2004                                 if (r < 0) {
2005                                         log_debug("Failed to add file %s/%s: %s",
2006                                                   d->path, e->name, strerror(-r));
2007                                         set_put_error(j, r);
2008                                 }
2009
2010                         } else if (e->mask & (IN_DELETE|IN_MOVED_FROM|IN_UNMOUNT)) {
2011
2012                                 r = remove_file(j, d->path, e->name);
2013                                 if (r < 0)
2014                                         log_debug("Failed to remove file %s/%s: %s", d->path, e->name, strerror(-r));
2015                         }
2016
2017                 } else if (!d->is_root && e->len == 0) {
2018
2019                         /* Event for a subdirectory */
2020
2021                         if (e->mask & (IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT)) {
2022                                 r = remove_directory(j, d);
2023                                 if (r < 0)
2024                                         log_debug("Failed to remove directory %s: %s", d->path, strerror(-r));
2025                         }
2026
2027
2028                 } else if (d->is_root && (e->mask & IN_ISDIR) && e->len > 0 && sd_id128_from_string(e->name, &id) >= 0) {
2029
2030                         /* Event for root directory */
2031
2032                         if (e->mask & (IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB)) {
2033                                 r = add_directory(j, d->path, e->name);
2034                                 if (r < 0)
2035                                         log_debug("Failed to add directory %s/%s: %s", d->path, e->name, strerror(-r));
2036                         }
2037                 }
2038
2039                 return;
2040         }
2041
2042         if (e->mask & IN_IGNORED)
2043                 return;
2044
2045         log_warning("Unknown inotify event.");
2046 }
2047
2048 static int determine_change(sd_journal *j) {
2049         bool b;
2050
2051         assert(j);
2052
2053         b = j->current_invalidate_counter != j->last_invalidate_counter;
2054         j->last_invalidate_counter = j->current_invalidate_counter;
2055
2056         return b ? SD_JOURNAL_INVALIDATE : SD_JOURNAL_APPEND;
2057 }
2058
2059 _public_ int sd_journal_process(sd_journal *j) {
2060         uint8_t buffer[sizeof(struct inotify_event) + FILENAME_MAX] _alignas_(struct inotify_event);
2061         bool got_something = false;
2062
2063         if (!j)
2064                 return -EINVAL;
2065
2066         for (;;) {
2067                 struct inotify_event *e;
2068                 ssize_t l;
2069
2070                 l = read(j->inotify_fd, buffer, sizeof(buffer));
2071                 if (l < 0) {
2072                         if (errno == EAGAIN || errno == EINTR)
2073                                 return got_something ? determine_change(j) : SD_JOURNAL_NOP;
2074
2075                         return -errno;
2076                 }
2077
2078                 got_something = true;
2079
2080                 e = (struct inotify_event*) buffer;
2081                 while (l > 0) {
2082                         size_t step;
2083
2084                         process_inotify_event(j, e);
2085
2086                         step = sizeof(struct inotify_event) + e->len;
2087                         assert(step <= (size_t) l);
2088
2089                         e = (struct inotify_event*) ((uint8_t*) e + step);
2090                         l -= step;
2091                 }
2092         }
2093
2094         return determine_change(j);
2095 }
2096
2097 _public_ int sd_journal_wait(sd_journal *j, uint64_t timeout_usec) {
2098         int r;
2099
2100         assert(j);
2101
2102         if (j->inotify_fd < 0) {
2103
2104                 /* This is the first invocation, hence create the
2105                  * inotify watch */
2106                 r = sd_journal_get_fd(j);
2107                 if (r < 0)
2108                         return r;
2109
2110                 /* The journal might have changed since the context
2111                  * object was created and we weren't watching before,
2112                  * hence don't wait for anything, and return
2113                  * immediately. */
2114                 return determine_change(j);
2115         }
2116
2117         if (j->on_network) {
2118                 /* If we are on the network we need to regularly check
2119                  * for changes manually */
2120
2121                 if (timeout_usec == (uint64_t) -1 || timeout_usec > JOURNAL_FILES_RECHECK_USEC)
2122                         timeout_usec = JOURNAL_FILES_RECHECK_USEC;
2123         }
2124
2125         do {
2126                 r = fd_wait_for_event(j->inotify_fd, POLLIN, timeout_usec);
2127         } while (r == -EINTR);
2128
2129         if (r < 0)
2130                 return r;
2131
2132         return sd_journal_process(j);
2133 }
2134
2135 _public_ int sd_journal_get_cutoff_realtime_usec(sd_journal *j, uint64_t *from, uint64_t *to) {
2136         Iterator i;
2137         JournalFile *f;
2138         bool first = true;
2139         int r;
2140
2141         if (!j)
2142                 return -EINVAL;
2143         if (!from && !to)
2144                 return -EINVAL;
2145
2146         HASHMAP_FOREACH(f, j->files, i) {
2147                 usec_t fr, t;
2148
2149                 r = journal_file_get_cutoff_realtime_usec(f, &fr, &t);
2150                 if (r == -ENOENT)
2151                         continue;
2152                 if (r < 0)
2153                         return r;
2154                 if (r == 0)
2155                         continue;
2156
2157                 if (first) {
2158                         if (from)
2159                                 *from = fr;
2160                         if (to)
2161                                 *to = t;
2162                         first = false;
2163                 } else {
2164                         if (from)
2165                                 *from = MIN(fr, *from);
2166                         if (to)
2167                                 *to = MAX(t, *to);
2168                 }
2169         }
2170
2171         return first ? 0 : 1;
2172 }
2173
2174 _public_ int sd_journal_get_cutoff_monotonic_usec(sd_journal *j, sd_id128_t boot_id, uint64_t *from, uint64_t *to) {
2175         Iterator i;
2176         JournalFile *f;
2177         bool first = true;
2178         int r;
2179
2180         if (!j)
2181                 return -EINVAL;
2182         if (!from && !to)
2183                 return -EINVAL;
2184
2185         HASHMAP_FOREACH(f, j->files, i) {
2186                 usec_t fr, t;
2187
2188                 r = journal_file_get_cutoff_monotonic_usec(f, boot_id, &fr, &t);
2189                 if (r == -ENOENT)
2190                         continue;
2191                 if (r < 0)
2192                         return r;
2193                 if (r == 0)
2194                         continue;
2195
2196                 if (first) {
2197                         if (from)
2198                                 *from = fr;
2199                         if (to)
2200                                 *to = t;
2201                         first = false;
2202                 } else {
2203                         if (from)
2204                                 *from = MIN(fr, *from);
2205                         if (to)
2206                                 *to = MAX(t, *to);
2207                 }
2208         }
2209
2210         return first ? 0 : 1;
2211 }
2212
2213 void journal_print_header(sd_journal *j) {
2214         Iterator i;
2215         JournalFile *f;
2216         bool newline = false;
2217
2218         assert(j);
2219
2220         HASHMAP_FOREACH(f, j->files, i) {
2221                 if (newline)
2222                         putchar('\n');
2223                 else
2224                         newline = true;
2225
2226                 journal_file_print_header(f);
2227         }
2228 }
2229
2230 _public_ int sd_journal_get_usage(sd_journal *j, uint64_t *bytes) {
2231         Iterator i;
2232         JournalFile *f;
2233         uint64_t sum = 0;
2234
2235         if (!j)
2236                 return -EINVAL;
2237         if (!bytes)
2238                 return -EINVAL;
2239
2240         HASHMAP_FOREACH(f, j->files, i) {
2241                 struct stat st;
2242
2243                 if (fstat(f->fd, &st) < 0)
2244                         return -errno;
2245
2246                 sum += (uint64_t) st.st_blocks * 512ULL;
2247         }
2248
2249         *bytes = sum;
2250         return 0;
2251 }
2252
2253 _public_ int sd_journal_query_unique(sd_journal *j, const char *field) {
2254         char *f;
2255
2256         if (!j)
2257                 return -EINVAL;
2258         if (isempty(field))
2259                 return -EINVAL;
2260         if (!field_is_valid(field))
2261                 return -EINVAL;
2262
2263         f = strdup(field);
2264         if (!f)
2265                 return -ENOMEM;
2266
2267         free(j->unique_field);
2268         j->unique_field = f;
2269         j->unique_file = NULL;
2270         j->unique_offset = 0;
2271
2272         return 0;
2273 }
2274
2275 _public_ int sd_journal_enumerate_unique(sd_journal *j, const void **data, size_t *l) {
2276         Object *o;
2277         size_t k;
2278         int r;
2279
2280         if (!j)
2281                 return -EINVAL;
2282         if (!data)
2283                 return -EINVAL;
2284         if (!l)
2285                 return -EINVAL;
2286         if (!j->unique_field)
2287                 return -EINVAL;
2288
2289         k = strlen(j->unique_field);
2290
2291         if (!j->unique_file) {
2292                 j->unique_file = hashmap_first(j->files);
2293                 if (!j->unique_file)
2294                         return 0;
2295                 j->unique_offset = 0;
2296         }
2297
2298         for (;;) {
2299                 JournalFile *of;
2300                 Iterator i;
2301                 const void *odata;
2302                 size_t ol;
2303                 bool found;
2304
2305                 /* Proceed to next data object in the field's linked list */
2306                 if (j->unique_offset == 0) {
2307                         r = journal_file_find_field_object(j->unique_file, j->unique_field, k, &o, NULL);
2308                         if (r < 0)
2309                                 return r;
2310
2311                         j->unique_offset = r > 0 ? le64toh(o->field.head_data_offset) : 0;
2312                 } else {
2313                         r = journal_file_move_to_object(j->unique_file, OBJECT_DATA, j->unique_offset, &o);
2314                         if (r < 0)
2315                                 return r;
2316
2317                         j->unique_offset = le64toh(o->data.next_field_offset);
2318                 }
2319
2320                 /* We reached the end of the list? Then start again, with the next file */
2321                 if (j->unique_offset == 0) {
2322                         JournalFile *n;
2323
2324                         n = hashmap_next(j->files, j->unique_file->path);
2325                         if (!n)
2326                                 return 0;
2327
2328                         j->unique_file = n;
2329                         continue;
2330                 }
2331
2332                 /* We do not use the type context here, but 0 instead,
2333                  * so that we can look at this data object at the same
2334                  * time as one on another file */
2335                 r = journal_file_move_to_object(j->unique_file, 0, j->unique_offset, &o);
2336                 if (r < 0)
2337                         return r;
2338
2339                 /* Let's do the type check by hand, since we used 0 context above. */
2340                 if (o->object.type != OBJECT_DATA)
2341                         return -EBADMSG;
2342
2343                 r = return_data(j, j->unique_file, o, &odata, &ol);
2344                 if (r < 0)
2345                         return r;
2346
2347                 /* OK, now let's see if we already returned this data
2348                  * object by checking if it exists in the earlier
2349                  * traversed files. */
2350                 found = false;
2351                 HASHMAP_FOREACH(of, j->files, i) {
2352                         Object *oo;
2353                         uint64_t op;
2354
2355                         if (of == j->unique_file)
2356                                 break;
2357
2358                         /* Skip this file it didn't have any fields
2359                          * indexed */
2360                         if (JOURNAL_HEADER_CONTAINS(of->header, n_fields) &&
2361                             le64toh(of->header->n_fields) <= 0)
2362                                 continue;
2363
2364                         r = journal_file_find_data_object_with_hash(of, odata, ol, le64toh(o->data.hash), &oo, &op);
2365                         if (r < 0)
2366                                 return r;
2367
2368                         if (r > 0)
2369                                 found = true;
2370                 }
2371
2372                 if (found)
2373                         continue;
2374
2375                 r = return_data(j, j->unique_file, o, data, l);
2376                 if (r < 0)
2377                         return r;
2378
2379                 return 1;
2380         }
2381 }
2382
2383 _public_ void sd_journal_restart_unique(sd_journal *j) {
2384         if (!j)
2385                 return;
2386
2387         j->unique_file = NULL;
2388         j->unique_offset = 0;
2389 }
2390
2391 _public_ int sd_journal_reliable_fd(sd_journal *j) {
2392         if (!j)
2393                 return -EINVAL;
2394
2395         return !j->on_network;
2396 }
2397
2398 static char *lookup_field(const char *field, void *userdata) {
2399         sd_journal *j = userdata;
2400         const void *data;
2401         size_t size, d;
2402         int r;
2403
2404         assert(field);
2405         assert(j);
2406
2407         r = sd_journal_get_data(j, field, &data, &size);
2408         if (r < 0 ||
2409             size > REPLACE_VAR_MAX)
2410                 return strdup(field);
2411
2412         d = strlen(field) + 1;
2413
2414         return strndup((const char*) data + d, size - d);
2415 }
2416
2417 _public_ int sd_journal_get_catalog(sd_journal *j, char **ret) {
2418         const void *data;
2419         size_t size;
2420         sd_id128_t id;
2421         _cleanup_free_ char *text = NULL, *cid = NULL;
2422         char *t;
2423         int r;
2424
2425         if (!j)
2426                 return -EINVAL;
2427         if (!ret)
2428                 return -EINVAL;
2429
2430         r = sd_journal_get_data(j, "MESSAGE_ID", &data, &size);
2431         if (r < 0)
2432                 return r;
2433
2434         cid = strndup((const char*) data + 11, size - 11);
2435         if (!cid)
2436                 return -ENOMEM;
2437
2438         r = sd_id128_from_string(cid, &id);
2439         if (r < 0)
2440                 return r;
2441
2442         r = catalog_get(id, &text);
2443         if (r < 0)
2444                 return r;
2445
2446         t = replace_var(text, lookup_field, j);
2447         if (!t)
2448                 return -ENOMEM;
2449
2450         *ret = t;
2451         return 0;
2452 }
2453
2454 _public_ int sd_journal_get_catalog_for_message_id(sd_id128_t id, char **ret) {
2455         if (!ret)
2456                 return -EINVAL;
2457
2458         return catalog_get(id, ret);
2459 }
2460
2461 _public_ int sd_journal_set_data_threshold(sd_journal *j, size_t sz) {
2462         if (!j)
2463                 return -EINVAL;
2464
2465         j->data_threshold = sz;
2466         return 0;
2467 }
2468
2469 _public_ int sd_journal_get_data_threshold(sd_journal *j, size_t *sz) {
2470         if (!j)
2471                 return -EINVAL;
2472         if (!sz)
2473                 return -EINVAL;
2474
2475         *sz = j->data_threshold;
2476         return 0;
2477 }