chiark / gitweb /
journal: log user units for coredumps and show them in systemctl status
[elogind.git] / src / journal / sd-journal.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <errno.h>
23 #include <fcntl.h>
24 #include <stddef.h>
25 #include <unistd.h>
26 #include <sys/inotify.h>
27 #include <sys/poll.h>
28 #include <sys/vfs.h>
29 #include <linux/magic.h>
30
31 #include "sd-journal.h"
32 #include "journal-def.h"
33 #include "journal-file.h"
34 #include "hashmap.h"
35 #include "list.h"
36 #include "path-util.h"
37 #include "lookup3.h"
38 #include "compress.h"
39 #include "journal-internal.h"
40 #include "missing.h"
41 #include "catalog.h"
42 #include "replace-var.h"
43
44 #define JOURNAL_FILES_MAX 1024
45
46 #define JOURNAL_FILES_RECHECK_USEC (2 * USEC_PER_SEC)
47
48 #define REPLACE_VAR_MAX 256
49
50 #define DEFAULT_DATA_THRESHOLD (64*1024)
51
52 static void detach_location(sd_journal *j) {
53         Iterator i;
54         JournalFile *f;
55
56         assert(j);
57
58         j->current_file = NULL;
59         j->current_field = 0;
60
61         HASHMAP_FOREACH(f, j->files, i)
62                 f->current_offset = 0;
63 }
64
65 static void reset_location(sd_journal *j) {
66         assert(j);
67
68         detach_location(j);
69         zero(j->current_location);
70 }
71
72 static void init_location(Location *l, LocationType type, JournalFile *f, Object *o) {
73         assert(l);
74         assert(type == LOCATION_DISCRETE || type == LOCATION_SEEK);
75         assert(f);
76         assert(o->object.type == OBJECT_ENTRY);
77
78         l->type = type;
79         l->seqnum = le64toh(o->entry.seqnum);
80         l->seqnum_id = f->header->seqnum_id;
81         l->realtime = le64toh(o->entry.realtime);
82         l->monotonic = le64toh(o->entry.monotonic);
83         l->boot_id = o->entry.boot_id;
84         l->xor_hash = le64toh(o->entry.xor_hash);
85
86         l->seqnum_set = l->realtime_set = l->monotonic_set = l->xor_hash_set = true;
87 }
88
89 static void set_location(sd_journal *j, LocationType type, JournalFile *f, Object *o, uint64_t offset) {
90         assert(j);
91         assert(type == LOCATION_DISCRETE || type == LOCATION_SEEK);
92         assert(f);
93         assert(o);
94
95         init_location(&j->current_location, type, f, o);
96
97         j->current_file = f;
98         j->current_field = 0;
99
100         f->current_offset = offset;
101 }
102
103 static int match_is_valid(const void *data, size_t size) {
104         const char *b, *p;
105
106         assert(data);
107
108         if (size < 2)
109                 return false;
110
111         if (startswith(data, "__"))
112                 return false;
113
114         b = data;
115         for (p = b; p < b + size; p++) {
116
117                 if (*p == '=')
118                         return p > b;
119
120                 if (*p == '_')
121                         continue;
122
123                 if (*p >= 'A' && *p <= 'Z')
124                         continue;
125
126                 if (*p >= '0' && *p <= '9')
127                         continue;
128
129                 return false;
130         }
131
132         return false;
133 }
134
135 static bool same_field(const void *_a, size_t s, const void *_b, size_t t) {
136         const uint8_t *a = _a, *b = _b;
137         size_t j;
138
139         for (j = 0; j < s && j < t; j++) {
140
141                 if (a[j] != b[j])
142                         return false;
143
144                 if (a[j] == '=')
145                         return true;
146         }
147
148         return true;
149 }
150
151 static Match *match_new(Match *p, MatchType t) {
152         Match *m;
153
154         m = new0(Match, 1);
155         if (!m)
156                 return NULL;
157
158         m->type = t;
159
160         if (p) {
161                 m->parent = p;
162                 LIST_PREPEND(Match, matches, p->matches, m);
163         }
164
165         return m;
166 }
167
168 static void match_free(Match *m) {
169         assert(m);
170
171         while (m->matches)
172                 match_free(m->matches);
173
174         if (m->parent)
175                 LIST_REMOVE(Match, matches, m->parent->matches, m);
176
177         free(m->data);
178         free(m);
179 }
180
181 static void match_free_if_empty(Match *m) {
182         assert(m);
183
184         if (m->matches)
185                 return;
186
187         match_free(m);
188 }
189
190 _public_ int sd_journal_add_match(sd_journal *j, const void *data, size_t size) {
191         Match *l2, *l3, *add_here = NULL, *m;
192         le64_t le_hash;
193
194         if (!j)
195                 return -EINVAL;
196
197         if (!data)
198                 return -EINVAL;
199
200         if (size == 0)
201                 size = strlen(data);
202
203         if (!match_is_valid(data, size))
204                 return -EINVAL;
205
206         /* level 0: OR term
207          * level 1: AND terms
208          * level 2: OR terms
209          * level 3: concrete matches */
210
211         if (!j->level0) {
212                 j->level0 = match_new(NULL, MATCH_OR_TERM);
213                 if (!j->level0)
214                         return -ENOMEM;
215         }
216
217         if (!j->level1) {
218                 j->level1 = match_new(j->level0, MATCH_AND_TERM);
219                 if (!j->level1)
220                         return -ENOMEM;
221         }
222
223         assert(j->level0->type == MATCH_OR_TERM);
224         assert(j->level1->type == MATCH_AND_TERM);
225
226         le_hash = htole64(hash64(data, size));
227
228         LIST_FOREACH(matches, l2, j->level1->matches) {
229                 assert(l2->type == MATCH_OR_TERM);
230
231                 LIST_FOREACH(matches, l3, l2->matches) {
232                         assert(l3->type == MATCH_DISCRETE);
233
234                         /* Exactly the same match already? Then ignore
235                          * this addition */
236                         if (l3->le_hash == le_hash &&
237                             l3->size == size &&
238                             memcmp(l3->data, data, size) == 0)
239                                 return 0;
240
241                         /* Same field? Then let's add this to this OR term */
242                         if (same_field(data, size, l3->data, l3->size)) {
243                                 add_here = l2;
244                                 break;
245                         }
246                 }
247
248                 if (add_here)
249                         break;
250         }
251
252         if (!add_here) {
253                 add_here = match_new(j->level1, MATCH_OR_TERM);
254                 if (!add_here)
255                         goto fail;
256         }
257
258         m = match_new(add_here, MATCH_DISCRETE);
259         if (!m)
260                 goto fail;
261
262         m->le_hash = le_hash;
263         m->size = size;
264         m->data = memdup(data, size);
265         if (!m->data)
266                 goto fail;
267
268         detach_location(j);
269
270         return 0;
271
272 fail:
273         if (add_here)
274                 match_free_if_empty(add_here);
275
276         if (j->level1)
277                 match_free_if_empty(j->level1);
278
279         if (j->level0)
280                 match_free_if_empty(j->level0);
281
282         return -ENOMEM;
283 }
284
285 _public_ int sd_journal_add_disjunction(sd_journal *j) {
286         Match *m;
287
288         assert(j);
289
290         if (!j->level0)
291                 return 0;
292
293         if (!j->level1)
294                 return 0;
295
296         if (!j->level1->matches)
297                 return 0;
298
299         m = match_new(j->level0, MATCH_AND_TERM);
300         if (!m)
301                 return -ENOMEM;
302
303         j->level1 = m;
304         return 0;
305 }
306
307 static char *match_make_string(Match *m) {
308         char *p, *r;
309         Match *i;
310         bool enclose = false;
311
312         if (!m)
313                 return strdup("");
314
315         if (m->type == MATCH_DISCRETE)
316                 return strndup(m->data, m->size);
317
318         p = NULL;
319         LIST_FOREACH(matches, i, m->matches) {
320                 char *t, *k;
321
322                 t = match_make_string(i);
323                 if (!t) {
324                         free(p);
325                         return NULL;
326                 }
327
328                 if (p) {
329                         k = strjoin(p, m->type == MATCH_OR_TERM ? " OR " : " AND ", t, NULL);
330                         free(p);
331                         free(t);
332
333                         if (!k)
334                                 return NULL;
335
336                         p = k;
337
338                         enclose = true;
339                 } else {
340                         free(p);
341                         p = t;
342                 }
343         }
344
345         if (enclose) {
346                 r = strjoin("(", p, ")", NULL);
347                 free(p);
348                 return r;
349         }
350
351         return p;
352 }
353
354 char *journal_make_match_string(sd_journal *j) {
355         assert(j);
356
357         return match_make_string(j->level0);
358 }
359
360 _public_ void sd_journal_flush_matches(sd_journal *j) {
361
362         if (!j)
363                 return;
364
365         if (j->level0)
366                 match_free(j->level0);
367
368         j->level0 = j->level1 = NULL;
369
370         detach_location(j);
371 }
372
373 static int compare_entry_order(JournalFile *af, Object *_ao,
374                          JournalFile *bf, uint64_t bp) {
375
376         uint64_t a, b;
377         Object *ao, *bo;
378         int r;
379
380         assert(af);
381         assert(bf);
382         assert(_ao);
383
384         /* The mmap cache might invalidate the object from the first
385          * file if we look at the one from the second file. Hence
386          * temporarily copy the header of the first one, and look at
387          * that only. */
388         ao = alloca(offsetof(EntryObject, items));
389         memcpy(ao, _ao, offsetof(EntryObject, items));
390
391         r = journal_file_move_to_object(bf, OBJECT_ENTRY, bp, &bo);
392         if (r < 0)
393                 return strcmp(af->path, bf->path);
394
395         /* We operate on two different files here, hence we can access
396          * two objects at the same time, which we normally can't.
397          *
398          * If contents and timestamps match, these entries are
399          * identical, even if the seqnum does not match */
400
401         if (sd_id128_equal(ao->entry.boot_id, bo->entry.boot_id) &&
402             ao->entry.monotonic == bo->entry.monotonic &&
403             ao->entry.realtime == bo->entry.realtime &&
404             ao->entry.xor_hash == bo->entry.xor_hash)
405                 return 0;
406
407         if (sd_id128_equal(af->header->seqnum_id, bf->header->seqnum_id)) {
408
409                 /* If this is from the same seqnum source, compare
410                  * seqnums */
411                 a = le64toh(ao->entry.seqnum);
412                 b = le64toh(bo->entry.seqnum);
413
414                 if (a < b)
415                         return -1;
416                 if (a > b)
417                         return 1;
418
419                 /* Wow! This is weird, different data but the same
420                  * seqnums? Something is borked, but let's make the
421                  * best of it and compare by time. */
422         }
423
424         if (sd_id128_equal(ao->entry.boot_id, bo->entry.boot_id)) {
425
426                 /* If the boot id matches compare monotonic time */
427                 a = le64toh(ao->entry.monotonic);
428                 b = le64toh(bo->entry.monotonic);
429
430                 if (a < b)
431                         return -1;
432                 if (a > b)
433                         return 1;
434         }
435
436         /* Otherwise compare UTC time */
437         a = le64toh(ao->entry.realtime);
438         b = le64toh(bo->entry.realtime);
439
440         if (a < b)
441                 return -1;
442         if (a > b)
443                 return 1;
444
445         /* Finally, compare by contents */
446         a = le64toh(ao->entry.xor_hash);
447         b = le64toh(bo->entry.xor_hash);
448
449         if (a < b)
450                 return -1;
451         if (a > b)
452                 return 1;
453
454         return 0;
455 }
456
457 static int compare_with_location(JournalFile *af, Object *ao, Location *l) {
458         uint64_t a;
459
460         assert(af);
461         assert(ao);
462         assert(l);
463         assert(l->type == LOCATION_DISCRETE || l->type == LOCATION_SEEK);
464
465         if (l->monotonic_set &&
466             sd_id128_equal(ao->entry.boot_id, l->boot_id) &&
467             l->realtime_set &&
468             le64toh(ao->entry.realtime) == l->realtime &&
469             l->xor_hash_set &&
470             le64toh(ao->entry.xor_hash) == l->xor_hash)
471                 return 0;
472
473         if (l->seqnum_set &&
474             sd_id128_equal(af->header->seqnum_id, l->seqnum_id)) {
475
476                 a = le64toh(ao->entry.seqnum);
477
478                 if (a < l->seqnum)
479                         return -1;
480                 if (a > l->seqnum)
481                         return 1;
482         }
483
484         if (l->monotonic_set &&
485             sd_id128_equal(ao->entry.boot_id, l->boot_id)) {
486
487                 a = le64toh(ao->entry.monotonic);
488
489                 if (a < l->monotonic)
490                         return -1;
491                 if (a > l->monotonic)
492                         return 1;
493         }
494
495         if (l->realtime_set) {
496
497                 a = le64toh(ao->entry.realtime);
498
499                 if (a < l->realtime)
500                         return -1;
501                 if (a > l->realtime)
502                         return 1;
503         }
504
505         if (l->xor_hash_set) {
506                 a = le64toh(ao->entry.xor_hash);
507
508                 if (a < l->xor_hash)
509                         return -1;
510                 if (a > l->xor_hash)
511                         return 1;
512         }
513
514         return 0;
515 }
516
517 static int next_for_match(
518                 sd_journal *j,
519                 Match *m,
520                 JournalFile *f,
521                 uint64_t after_offset,
522                 direction_t direction,
523                 Object **ret,
524                 uint64_t *offset) {
525
526         int r;
527         uint64_t np = 0;
528         Object *n;
529
530         assert(j);
531         assert(m);
532         assert(f);
533
534         if (m->type == MATCH_DISCRETE) {
535                 uint64_t dp;
536
537                 r = journal_file_find_data_object_with_hash(f, m->data, m->size, le64toh(m->le_hash), NULL, &dp);
538                 if (r <= 0)
539                         return r;
540
541                 return journal_file_move_to_entry_by_offset_for_data(f, dp, after_offset, direction, ret, offset);
542
543         } else if (m->type == MATCH_OR_TERM) {
544                 Match *i;
545
546                 /* Find the earliest match beyond after_offset */
547
548                 LIST_FOREACH(matches, i, m->matches) {
549                         uint64_t cp;
550
551                         r = next_for_match(j, i, f, after_offset, direction, NULL, &cp);
552                         if (r < 0)
553                                 return r;
554                         else if (r > 0) {
555                                 if (np == 0 || (direction == DIRECTION_DOWN ? np > cp : np < cp))
556                                         np = cp;
557                         }
558                 }
559
560         } else if (m->type == MATCH_AND_TERM) {
561                 Match *i;
562                 bool continue_looking;
563
564                 /* Always jump to the next matching entry and repeat
565                  * this until we fine and offset that matches for all
566                  * matches. */
567
568                 if (!m->matches)
569                         return 0;
570
571                 np = 0;
572                 do {
573                         continue_looking = false;
574
575                         LIST_FOREACH(matches, i, m->matches) {
576                                 uint64_t cp, limit;
577
578                                 if (np == 0)
579                                         limit = after_offset;
580                                 else if (direction == DIRECTION_DOWN)
581                                         limit = MAX(np, after_offset);
582                                 else
583                                         limit = MIN(np, after_offset);
584
585                                 r = next_for_match(j, i, f, limit, direction, NULL, &cp);
586                                 if (r <= 0)
587                                         return r;
588
589                                 if ((direction == DIRECTION_DOWN ? cp >= after_offset : cp <= after_offset) &&
590                                     (np == 0 || (direction == DIRECTION_DOWN ? cp > np : np < cp))) {
591                                         np = cp;
592                                         continue_looking = true;
593                                 }
594                         }
595
596                 } while (continue_looking);
597         }
598
599         if (np == 0)
600                 return 0;
601
602         r = journal_file_move_to_object(f, OBJECT_ENTRY, np, &n);
603         if (r < 0)
604                 return r;
605
606         if (ret)
607                 *ret = n;
608         if (offset)
609                 *offset = np;
610
611         return 1;
612 }
613
614 static int find_location_for_match(
615                 sd_journal *j,
616                 Match *m,
617                 JournalFile *f,
618                 direction_t direction,
619                 Object **ret,
620                 uint64_t *offset) {
621
622         int r;
623
624         assert(j);
625         assert(m);
626         assert(f);
627
628         if (m->type == MATCH_DISCRETE) {
629                 uint64_t dp;
630
631                 r = journal_file_find_data_object_with_hash(f, m->data, m->size, le64toh(m->le_hash), NULL, &dp);
632                 if (r <= 0)
633                         return r;
634
635                 /* FIXME: missing: find by monotonic */
636
637                 if (j->current_location.type == LOCATION_HEAD)
638                         return journal_file_next_entry_for_data(f, NULL, 0, dp, DIRECTION_DOWN, ret, offset);
639                 if (j->current_location.type == LOCATION_TAIL)
640                         return journal_file_next_entry_for_data(f, NULL, 0, dp, DIRECTION_UP, ret, offset);
641                 if (j->current_location.seqnum_set && sd_id128_equal(j->current_location.seqnum_id, f->header->seqnum_id))
642                         return journal_file_move_to_entry_by_seqnum_for_data(f, dp, j->current_location.seqnum, direction, ret, offset);
643                 if (j->current_location.monotonic_set) {
644                         r = journal_file_move_to_entry_by_monotonic_for_data(f, dp, j->current_location.boot_id, j->current_location.monotonic, direction, ret, offset);
645                         if (r != -ENOENT)
646                                 return r;
647                 }
648                 if (j->current_location.realtime_set)
649                         return journal_file_move_to_entry_by_realtime_for_data(f, dp, j->current_location.realtime, direction, ret, offset);
650
651                 return journal_file_next_entry_for_data(f, NULL, 0, dp, direction, ret, offset);
652
653         } else if (m->type == MATCH_OR_TERM) {
654                 uint64_t np = 0;
655                 Object *n;
656                 Match *i;
657
658                 /* Find the earliest match */
659
660                 LIST_FOREACH(matches, i, m->matches) {
661                         uint64_t cp;
662
663                         r = find_location_for_match(j, i, f, direction, NULL, &cp);
664                         if (r < 0)
665                                 return r;
666                         else if (r > 0) {
667                                 if (np == 0 || (direction == DIRECTION_DOWN ? np > cp : np < cp))
668                                         np = cp;
669                         }
670                 }
671
672                 if (np == 0)
673                         return 0;
674
675                 r = journal_file_move_to_object(f, OBJECT_ENTRY, np, &n);
676                 if (r < 0)
677                         return r;
678
679                 if (ret)
680                         *ret = n;
681                 if (offset)
682                         *offset = np;
683
684                 return 1;
685
686         } else {
687                 Match *i;
688                 uint64_t np = 0;
689
690                 assert(m->type == MATCH_AND_TERM);
691
692                 /* First jump to the last match, and then find the
693                  * next one where all matches match */
694
695                 if (!m->matches)
696                         return 0;
697
698                 LIST_FOREACH(matches, i, m->matches) {
699                         uint64_t cp;
700
701                         r = find_location_for_match(j, i, f, direction, NULL, &cp);
702                         if (r <= 0)
703                                 return r;
704
705                         if (np == 0 || (direction == DIRECTION_DOWN ? np < cp : np > cp))
706                                 np = cp;
707                 }
708
709                 return next_for_match(j, m, f, np, direction, ret, offset);
710         }
711 }
712
713 static int find_location_with_matches(
714                 sd_journal *j,
715                 JournalFile *f,
716                 direction_t direction,
717                 Object **ret,
718                 uint64_t *offset) {
719
720         int r;
721
722         assert(j);
723         assert(f);
724         assert(ret);
725         assert(offset);
726
727         if (!j->level0) {
728                 /* No matches is simple */
729
730                 if (j->current_location.type == LOCATION_HEAD)
731                         return journal_file_next_entry(f, NULL, 0, DIRECTION_DOWN, ret, offset);
732                 if (j->current_location.type == LOCATION_TAIL)
733                         return journal_file_next_entry(f, NULL, 0, DIRECTION_UP, ret, offset);
734                 if (j->current_location.seqnum_set && sd_id128_equal(j->current_location.seqnum_id, f->header->seqnum_id))
735                         return journal_file_move_to_entry_by_seqnum(f, j->current_location.seqnum, direction, ret, offset);
736                 if (j->current_location.monotonic_set) {
737                         r = journal_file_move_to_entry_by_monotonic(f, j->current_location.boot_id, j->current_location.monotonic, direction, ret, offset);
738                         if (r != -ENOENT)
739                                 return r;
740                 }
741                 if (j->current_location.realtime_set)
742                         return journal_file_move_to_entry_by_realtime(f, j->current_location.realtime, direction, ret, offset);
743
744                 return journal_file_next_entry(f, NULL, 0, direction, ret, offset);
745         } else
746                 return find_location_for_match(j, j->level0, f, direction, ret, offset);
747 }
748
749 static int next_with_matches(
750                 sd_journal *j,
751                 JournalFile *f,
752                 direction_t direction,
753                 Object **ret,
754                 uint64_t *offset) {
755
756         Object *c;
757         uint64_t cp;
758
759         assert(j);
760         assert(f);
761         assert(ret);
762         assert(offset);
763
764         c = *ret;
765         cp = *offset;
766
767         /* No matches is easy. We simple advance the file
768          * pointer by one. */
769         if (!j->level0)
770                 return journal_file_next_entry(f, c, cp, direction, ret, offset);
771
772         /* If we have a match then we look for the next matching entry
773          * with an offset at least one step larger */
774         return next_for_match(j, j->level0, f, direction == DIRECTION_DOWN ? cp+1 : cp-1, direction, ret, offset);
775 }
776
777 static int next_beyond_location(sd_journal *j, JournalFile *f, direction_t direction, Object **ret, uint64_t *offset) {
778         Object *c;
779         uint64_t cp;
780         int r;
781
782         assert(j);
783         assert(f);
784
785         if (f->current_offset > 0) {
786                 cp = f->current_offset;
787
788                 r = journal_file_move_to_object(f, OBJECT_ENTRY, cp, &c);
789                 if (r < 0)
790                         return r;
791
792                 r = next_with_matches(j, f, direction, &c, &cp);
793                 if (r <= 0)
794                         return r;
795         } else {
796                 r = find_location_with_matches(j, f, direction, &c, &cp);
797                 if (r <= 0)
798                         return r;
799         }
800
801         /* OK, we found the spot, now let's advance until to an entry
802          * that is actually different from what we were previously
803          * looking at. This is necessary to handle entries which exist
804          * in two (or more) journal files, and which shall all be
805          * suppressed but one. */
806
807         for (;;) {
808                 bool found;
809
810                 if (j->current_location.type == LOCATION_DISCRETE) {
811                         int k;
812
813                         k = compare_with_location(f, c, &j->current_location);
814                         if (direction == DIRECTION_DOWN)
815                                 found = k > 0;
816                         else
817                                 found = k < 0;
818                 } else
819                         found = true;
820
821                 if (found) {
822                         if (ret)
823                                 *ret = c;
824                         if (offset)
825                                 *offset = cp;
826                         return 1;
827                 }
828
829                 r = next_with_matches(j, f, direction, &c, &cp);
830                 if (r <= 0)
831                         return r;
832         }
833 }
834
835 static int real_journal_next(sd_journal *j, direction_t direction) {
836         JournalFile *f, *new_file = NULL;
837         uint64_t new_offset = 0;
838         Object *o;
839         uint64_t p;
840         Iterator i;
841         int r;
842
843         if (!j)
844                 return -EINVAL;
845
846         HASHMAP_FOREACH(f, j->files, i) {
847                 bool found;
848
849                 r = next_beyond_location(j, f, direction, &o, &p);
850                 if (r < 0) {
851                         log_debug("Can't iterate through %s, ignoring: %s", f->path, strerror(-r));
852                         continue;
853                 } else if (r == 0)
854                         continue;
855
856                 if (!new_file)
857                         found = true;
858                 else {
859                         int k;
860
861                         k = compare_entry_order(f, o, new_file, new_offset);
862
863                         if (direction == DIRECTION_DOWN)
864                                 found = k < 0;
865                         else
866                                 found = k > 0;
867                 }
868
869                 if (found) {
870                         new_file = f;
871                         new_offset = p;
872                 }
873         }
874
875         if (!new_file)
876                 return 0;
877
878         r = journal_file_move_to_object(new_file, OBJECT_ENTRY, new_offset, &o);
879         if (r < 0)
880                 return r;
881
882         set_location(j, LOCATION_DISCRETE, new_file, o, new_offset);
883
884         return 1;
885 }
886
887 _public_ int sd_journal_next(sd_journal *j) {
888         return real_journal_next(j, DIRECTION_DOWN);
889 }
890
891 _public_ int sd_journal_previous(sd_journal *j) {
892         return real_journal_next(j, DIRECTION_UP);
893 }
894
895 static int real_journal_next_skip(sd_journal *j, direction_t direction, uint64_t skip) {
896         int c = 0, r;
897
898         if (!j)
899                 return -EINVAL;
900
901         if (skip == 0) {
902                 /* If this is not a discrete skip, then at least
903                  * resolve the current location */
904                 if (j->current_location.type != LOCATION_DISCRETE)
905                         return real_journal_next(j, direction);
906
907                 return 0;
908         }
909
910         do {
911                 r = real_journal_next(j, direction);
912                 if (r < 0)
913                         return r;
914
915                 if (r == 0)
916                         return c;
917
918                 skip--;
919                 c++;
920         } while (skip > 0);
921
922         return c;
923 }
924
925 _public_ int sd_journal_next_skip(sd_journal *j, uint64_t skip) {
926         return real_journal_next_skip(j, DIRECTION_DOWN, skip);
927 }
928
929 _public_ int sd_journal_previous_skip(sd_journal *j, uint64_t skip) {
930         return real_journal_next_skip(j, DIRECTION_UP, skip);
931 }
932
933 _public_ int sd_journal_get_cursor(sd_journal *j, char **cursor) {
934         Object *o;
935         int r;
936         char bid[33], sid[33];
937
938         if (!j)
939                 return -EINVAL;
940         if (!cursor)
941                 return -EINVAL;
942
943         if (!j->current_file || j->current_file->current_offset <= 0)
944                 return -EADDRNOTAVAIL;
945
946         r = journal_file_move_to_object(j->current_file, OBJECT_ENTRY, j->current_file->current_offset, &o);
947         if (r < 0)
948                 return r;
949
950         sd_id128_to_string(j->current_file->header->seqnum_id, sid);
951         sd_id128_to_string(o->entry.boot_id, bid);
952
953         if (asprintf(cursor,
954                      "s=%s;i=%llx;b=%s;m=%llx;t=%llx;x=%llx",
955                      sid, (unsigned long long) le64toh(o->entry.seqnum),
956                      bid, (unsigned long long) le64toh(o->entry.monotonic),
957                      (unsigned long long) le64toh(o->entry.realtime),
958                      (unsigned long long) le64toh(o->entry.xor_hash)) < 0)
959                 return -ENOMEM;
960
961         return 1;
962 }
963
964 _public_ int sd_journal_seek_cursor(sd_journal *j, const char *cursor) {
965         char *w, *state;
966         size_t l;
967         unsigned long long seqnum, monotonic, realtime, xor_hash;
968         bool
969                 seqnum_id_set = false,
970                 seqnum_set = false,
971                 boot_id_set = false,
972                 monotonic_set = false,
973                 realtime_set = false,
974                 xor_hash_set = false;
975         sd_id128_t seqnum_id, boot_id;
976
977         if (!j)
978                 return -EINVAL;
979         if (isempty(cursor))
980                 return -EINVAL;
981
982         FOREACH_WORD_SEPARATOR(w, l, cursor, ";", state) {
983                 char *item;
984                 int k = 0;
985
986                 if (l < 2 || w[1] != '=')
987                         return -EINVAL;
988
989                 item = strndup(w, l);
990                 if (!item)
991                         return -ENOMEM;
992
993                 switch (w[0]) {
994
995                 case 's':
996                         seqnum_id_set = true;
997                         k = sd_id128_from_string(item+2, &seqnum_id);
998                         break;
999
1000                 case 'i':
1001                         seqnum_set = true;
1002                         if (sscanf(item+2, "%llx", &seqnum) != 1)
1003                                 k = -EINVAL;
1004                         break;
1005
1006                 case 'b':
1007                         boot_id_set = true;
1008                         k = sd_id128_from_string(item+2, &boot_id);
1009                         break;
1010
1011                 case 'm':
1012                         monotonic_set = true;
1013                         if (sscanf(item+2, "%llx", &monotonic) != 1)
1014                                 k = -EINVAL;
1015                         break;
1016
1017                 case 't':
1018                         realtime_set = true;
1019                         if (sscanf(item+2, "%llx", &realtime) != 1)
1020                                 k = -EINVAL;
1021                         break;
1022
1023                 case 'x':
1024                         xor_hash_set = true;
1025                         if (sscanf(item+2, "%llx", &xor_hash) != 1)
1026                                 k = -EINVAL;
1027                         break;
1028                 }
1029
1030                 free(item);
1031
1032                 if (k < 0)
1033                         return k;
1034         }
1035
1036         if ((!seqnum_set || !seqnum_id_set) &&
1037             (!monotonic_set || !boot_id_set) &&
1038             !realtime_set)
1039                 return -EINVAL;
1040
1041         reset_location(j);
1042
1043         j->current_location.type = LOCATION_SEEK;
1044
1045         if (realtime_set) {
1046                 j->current_location.realtime = (uint64_t) realtime;
1047                 j->current_location.realtime_set = true;
1048         }
1049
1050         if (seqnum_set && seqnum_id_set) {
1051                 j->current_location.seqnum = (uint64_t) seqnum;
1052                 j->current_location.seqnum_id = seqnum_id;
1053                 j->current_location.seqnum_set = true;
1054         }
1055
1056         if (monotonic_set && boot_id_set) {
1057                 j->current_location.monotonic = (uint64_t) monotonic;
1058                 j->current_location.boot_id = boot_id;
1059                 j->current_location.monotonic_set = true;
1060         }
1061
1062         if (xor_hash_set) {
1063                 j->current_location.xor_hash = (uint64_t) xor_hash;
1064                 j->current_location.xor_hash_set = true;
1065         }
1066
1067         return 0;
1068 }
1069
1070 _public_ int sd_journal_test_cursor(sd_journal *j, const char *cursor) {
1071         int r;
1072         char *w, *state;
1073         size_t l;
1074         Object *o;
1075
1076         if (!j)
1077                 return -EINVAL;
1078         if (isempty(cursor))
1079                 return -EINVAL;
1080
1081         if (!j->current_file || j->current_file->current_offset <= 0)
1082                 return -EADDRNOTAVAIL;
1083
1084         r = journal_file_move_to_object(j->current_file, OBJECT_ENTRY, j->current_file->current_offset, &o);
1085         if (r < 0)
1086                 return r;
1087
1088         FOREACH_WORD_SEPARATOR(w, l, cursor, ";", state) {
1089                 _cleanup_free_ char *item = NULL;
1090                 sd_id128_t id;
1091                 unsigned long long ll;
1092                 int k = 0;
1093
1094                 if (l < 2 || w[1] != '=')
1095                         return -EINVAL;
1096
1097                 item = strndup(w, l);
1098                 if (!item)
1099                         return -ENOMEM;
1100
1101                 switch (w[0]) {
1102
1103                 case 's':
1104                         k = sd_id128_from_string(item+2, &id);
1105                         if (k < 0)
1106                                 return k;
1107                         if (!sd_id128_equal(id, j->current_file->header->seqnum_id))
1108                                 return 0;
1109                         break;
1110
1111                 case 'i':
1112                         if (sscanf(item+2, "%llx", &ll) != 1)
1113                                 return -EINVAL;
1114                         if (ll != le64toh(o->entry.seqnum))
1115                                 return 0;
1116                         break;
1117
1118                 case 'b':
1119                         k = sd_id128_from_string(item+2, &id);
1120                         if (k < 0)
1121                                 return k;
1122                         if (!sd_id128_equal(id, o->entry.boot_id))
1123                                 return 0;
1124                         break;
1125
1126                 case 'm':
1127                         if (sscanf(item+2, "%llx", &ll) != 1)
1128                                 return -EINVAL;
1129                         if (ll != le64toh(o->entry.monotonic))
1130                                 return 0;
1131                         break;
1132
1133                 case 't':
1134                         if (sscanf(item+2, "%llx", &ll) != 1)
1135                                 return -EINVAL;
1136                         if (ll != le64toh(o->entry.realtime))
1137                                 return 0;
1138                         break;
1139
1140                 case 'x':
1141                         if (sscanf(item+2, "%llx", &ll) != 1)
1142                                 return -EINVAL;
1143                         if (ll != le64toh(o->entry.xor_hash))
1144                                 return 0;
1145                         break;
1146                 }
1147         }
1148
1149         return 1;
1150 }
1151
1152
1153 _public_ int sd_journal_seek_monotonic_usec(sd_journal *j, sd_id128_t boot_id, uint64_t usec) {
1154         if (!j)
1155                 return -EINVAL;
1156
1157         reset_location(j);
1158         j->current_location.type = LOCATION_SEEK;
1159         j->current_location.boot_id = boot_id;
1160         j->current_location.monotonic = usec;
1161         j->current_location.monotonic_set = true;
1162
1163         return 0;
1164 }
1165
1166 _public_ int sd_journal_seek_realtime_usec(sd_journal *j, uint64_t usec) {
1167         if (!j)
1168                 return -EINVAL;
1169
1170         reset_location(j);
1171         j->current_location.type = LOCATION_SEEK;
1172         j->current_location.realtime = usec;
1173         j->current_location.realtime_set = true;
1174
1175         return 0;
1176 }
1177
1178 _public_ int sd_journal_seek_head(sd_journal *j) {
1179         if (!j)
1180                 return -EINVAL;
1181
1182         reset_location(j);
1183         j->current_location.type = LOCATION_HEAD;
1184
1185         return 0;
1186 }
1187
1188 _public_ int sd_journal_seek_tail(sd_journal *j) {
1189         if (!j)
1190                 return -EINVAL;
1191
1192         reset_location(j);
1193         j->current_location.type = LOCATION_TAIL;
1194
1195         return 0;
1196 }
1197
1198 static void check_network(sd_journal *j, int fd) {
1199         struct statfs sfs;
1200
1201         assert(j);
1202
1203         if (j->on_network)
1204                 return;
1205
1206         if (fstatfs(fd, &sfs) < 0)
1207                 return;
1208
1209         j->on_network =
1210                 (long)sfs.f_type == (long)CIFS_MAGIC_NUMBER ||
1211                 sfs.f_type == CODA_SUPER_MAGIC ||
1212                 sfs.f_type == NCP_SUPER_MAGIC ||
1213                 sfs.f_type == NFS_SUPER_MAGIC ||
1214                 sfs.f_type == SMB_SUPER_MAGIC;
1215 }
1216
1217 static int add_file(sd_journal *j, const char *prefix, const char *filename) {
1218         char *path;
1219         int r;
1220         JournalFile *f;
1221
1222         assert(j);
1223         assert(prefix);
1224         assert(filename);
1225
1226         if ((j->flags & SD_JOURNAL_SYSTEM_ONLY) &&
1227             !(streq(filename, "system.journal") ||
1228               streq(filename, "system.journal~") ||
1229               (startswith(filename, "system@") &&
1230                (endswith(filename, ".journal") || endswith(filename, ".journal~")))))
1231                 return 0;
1232
1233         path = strjoin(prefix, "/", filename, NULL);
1234         if (!path)
1235                 return -ENOMEM;
1236
1237         if (hashmap_get(j->files, path)) {
1238                 free(path);
1239                 return 0;
1240         }
1241
1242         if (hashmap_size(j->files) >= JOURNAL_FILES_MAX) {
1243                 log_debug("Too many open journal files, not adding %s, ignoring.", path);
1244                 free(path);
1245                 return 0;
1246         }
1247
1248         r = journal_file_open(path, O_RDONLY, 0, false, false, NULL, j->mmap, NULL, &f);
1249         free(path);
1250
1251         if (r < 0) {
1252                 if (errno == ENOENT)
1253                         return 0;
1254
1255                 return r;
1256         }
1257
1258         /* journal_file_dump(f); */
1259
1260         r = hashmap_put(j->files, f->path, f);
1261         if (r < 0) {
1262                 journal_file_close(f);
1263                 return r;
1264         }
1265
1266         check_network(j, f->fd);
1267
1268         j->current_invalidate_counter ++;
1269
1270         log_debug("File %s got added.", f->path);
1271
1272         return 0;
1273 }
1274
1275 static int remove_file(sd_journal *j, const char *prefix, const char *filename) {
1276         char *path;
1277         JournalFile *f;
1278
1279         assert(j);
1280         assert(prefix);
1281         assert(filename);
1282
1283         path = strjoin(prefix, "/", filename, NULL);
1284         if (!path)
1285                 return -ENOMEM;
1286
1287         f = hashmap_get(j->files, path);
1288         free(path);
1289         if (!f)
1290                 return 0;
1291
1292         hashmap_remove(j->files, f->path);
1293
1294         log_debug("File %s got removed.", f->path);
1295
1296         if (j->current_file == f) {
1297                 j->current_file = NULL;
1298                 j->current_field = 0;
1299         }
1300
1301         if (j->unique_file == f) {
1302                 j->unique_file = NULL;
1303                 j->unique_offset = 0;
1304         }
1305
1306         journal_file_close(f);
1307
1308         j->current_invalidate_counter ++;
1309
1310         return 0;
1311 }
1312
1313 static int add_directory(sd_journal *j, const char *prefix, const char *dirname) {
1314         char *path;
1315         int r;
1316         DIR *d;
1317         sd_id128_t id, mid;
1318         Directory *m;
1319
1320         assert(j);
1321         assert(prefix);
1322         assert(dirname);
1323
1324         if ((j->flags & SD_JOURNAL_LOCAL_ONLY) &&
1325             (sd_id128_from_string(dirname, &id) < 0 ||
1326              sd_id128_get_machine(&mid) < 0 ||
1327              !sd_id128_equal(id, mid)))
1328             return 0;
1329
1330         path = strjoin(prefix, "/", dirname, NULL);
1331         if (!path)
1332                 return -ENOMEM;
1333
1334         d = opendir(path);
1335         if (!d) {
1336                 log_debug("Failed to open %s: %m", path);
1337                 free(path);
1338
1339                 if (errno == ENOENT)
1340                         return 0;
1341                 return -errno;
1342         }
1343
1344         m = hashmap_get(j->directories_by_path, path);
1345         if (!m) {
1346                 m = new0(Directory, 1);
1347                 if (!m) {
1348                         closedir(d);
1349                         free(path);
1350                         return -ENOMEM;
1351                 }
1352
1353                 m->is_root = false;
1354                 m->path = path;
1355
1356                 if (hashmap_put(j->directories_by_path, m->path, m) < 0) {
1357                         closedir(d);
1358                         free(m->path);
1359                         free(m);
1360                         return -ENOMEM;
1361                 }
1362
1363                 j->current_invalidate_counter ++;
1364
1365                 log_debug("Directory %s got added.", m->path);
1366
1367         } else if (m->is_root) {
1368                 free (path);
1369                 closedir(d);
1370                 return 0;
1371         }  else
1372                 free(path);
1373
1374         if (m->wd <= 0 && j->inotify_fd >= 0) {
1375
1376                 m->wd = inotify_add_watch(j->inotify_fd, m->path,
1377                                           IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE|
1378                                           IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT|IN_MOVED_FROM|
1379                                           IN_ONLYDIR);
1380
1381                 if (m->wd > 0 && hashmap_put(j->directories_by_wd, INT_TO_PTR(m->wd), m) < 0)
1382                         inotify_rm_watch(j->inotify_fd, m->wd);
1383         }
1384
1385         for (;;) {
1386                 struct dirent *de;
1387                 union dirent_storage buf;
1388
1389                 r = readdir_r(d, &buf.de, &de);
1390                 if (r != 0 || !de)
1391                         break;
1392
1393                 if (dirent_is_file_with_suffix(de, ".journal") ||
1394                     dirent_is_file_with_suffix(de, ".journal~")) {
1395                         r = add_file(j, m->path, de->d_name);
1396                         if (r < 0)
1397                                 log_debug("Failed to add file %s/%s: %s", m->path, de->d_name, strerror(-r));
1398                 }
1399         }
1400
1401         check_network(j, dirfd(d));
1402
1403         closedir(d);
1404
1405         return 0;
1406 }
1407
1408 static int add_root_directory(sd_journal *j, const char *p) {
1409         DIR *d;
1410         Directory *m;
1411         int r;
1412
1413         assert(j);
1414         assert(p);
1415
1416         if ((j->flags & SD_JOURNAL_RUNTIME_ONLY) &&
1417             !path_startswith(p, "/run"))
1418                 return -EINVAL;
1419
1420         d = opendir(p);
1421         if (!d)
1422                 return -errno;
1423
1424         m = hashmap_get(j->directories_by_path, p);
1425         if (!m) {
1426                 m = new0(Directory, 1);
1427                 if (!m) {
1428                         closedir(d);
1429                         return -ENOMEM;
1430                 }
1431
1432                 m->is_root = true;
1433                 m->path = strdup(p);
1434                 if (!m->path) {
1435                         closedir(d);
1436                         free(m);
1437                         return -ENOMEM;
1438                 }
1439
1440                 if (hashmap_put(j->directories_by_path, m->path, m) < 0) {
1441                         closedir(d);
1442                         free(m->path);
1443                         free(m);
1444                         return -ENOMEM;
1445                 }
1446
1447                 j->current_invalidate_counter ++;
1448
1449                 log_debug("Root directory %s got added.", m->path);
1450
1451         } else if (!m->is_root) {
1452                 closedir(d);
1453                 return 0;
1454         }
1455
1456         if (m->wd <= 0 && j->inotify_fd >= 0) {
1457
1458                 m->wd = inotify_add_watch(j->inotify_fd, m->path,
1459                                           IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE|
1460                                           IN_ONLYDIR);
1461
1462                 if (m->wd > 0 && hashmap_put(j->directories_by_wd, INT_TO_PTR(m->wd), m) < 0)
1463                         inotify_rm_watch(j->inotify_fd, m->wd);
1464         }
1465
1466         for (;;) {
1467                 struct dirent *de;
1468                 union dirent_storage buf;
1469                 sd_id128_t id;
1470
1471                 r = readdir_r(d, &buf.de, &de);
1472                 if (r != 0 || !de)
1473                         break;
1474
1475                 if (dirent_is_file_with_suffix(de, ".journal") ||
1476                     dirent_is_file_with_suffix(de, ".journal~")) {
1477                         r = add_file(j, m->path, de->d_name);
1478                         if (r < 0)
1479                                 log_debug("Failed to add file %s/%s: %s", m->path, de->d_name, strerror(-r));
1480
1481                 } else if ((de->d_type == DT_DIR || de->d_type == DT_LNK || de->d_type == DT_UNKNOWN) &&
1482                            sd_id128_from_string(de->d_name, &id) >= 0) {
1483
1484                         r = add_directory(j, m->path, de->d_name);
1485                         if (r < 0)
1486                                 log_debug("Failed to add directory %s/%s: %s", m->path, de->d_name, strerror(-r));
1487                 }
1488         }
1489
1490         check_network(j, dirfd(d));
1491
1492         closedir(d);
1493
1494         return 0;
1495 }
1496
1497 static int remove_directory(sd_journal *j, Directory *d) {
1498         assert(j);
1499
1500         if (d->wd > 0) {
1501                 hashmap_remove(j->directories_by_wd, INT_TO_PTR(d->wd));
1502
1503                 if (j->inotify_fd >= 0)
1504                         inotify_rm_watch(j->inotify_fd, d->wd);
1505         }
1506
1507         hashmap_remove(j->directories_by_path, d->path);
1508
1509         if (d->is_root)
1510                 log_debug("Root directory %s got removed.", d->path);
1511         else
1512                 log_debug("Directory %s got removed.", d->path);
1513
1514         free(d->path);
1515         free(d);
1516
1517         return 0;
1518 }
1519
1520 static int add_search_paths(sd_journal *j) {
1521
1522         const char search_paths[] =
1523                 "/run/log/journal\0"
1524                 "/var/log/journal\0";
1525         const char *p;
1526
1527         assert(j);
1528
1529         /* We ignore most errors here, since the idea is to only open
1530          * what's actually accessible, and ignore the rest. */
1531
1532         NULSTR_FOREACH(p, search_paths)
1533                 add_root_directory(j, p);
1534
1535         return 0;
1536 }
1537
1538 static int allocate_inotify(sd_journal *j) {
1539         assert(j);
1540
1541         if (j->inotify_fd < 0) {
1542                 j->inotify_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
1543                 if (j->inotify_fd < 0)
1544                         return -errno;
1545         }
1546
1547         if (!j->directories_by_wd) {
1548                 j->directories_by_wd = hashmap_new(trivial_hash_func, trivial_compare_func);
1549                 if (!j->directories_by_wd)
1550                         return -ENOMEM;
1551         }
1552
1553         return 0;
1554 }
1555
1556 static sd_journal *journal_new(int flags, const char *path) {
1557         sd_journal *j;
1558
1559         j = new0(sd_journal, 1);
1560         if (!j)
1561                 return NULL;
1562
1563         j->inotify_fd = -1;
1564         j->flags = flags;
1565         j->data_threshold = DEFAULT_DATA_THRESHOLD;
1566
1567         if (path) {
1568                 j->path = strdup(path);
1569                 if (!j->path) {
1570                         free(j);
1571                         return NULL;
1572                 }
1573         }
1574
1575         j->files = hashmap_new(string_hash_func, string_compare_func);
1576         if (!j->files) {
1577                 free(j->path);
1578                 free(j);
1579                 return NULL;
1580         }
1581
1582         j->directories_by_path = hashmap_new(string_hash_func, string_compare_func);
1583         if (!j->directories_by_path) {
1584                 hashmap_free(j->files);
1585                 free(j->path);
1586                 free(j);
1587                 return NULL;
1588         }
1589
1590         j->mmap = mmap_cache_new();
1591         if (!j->mmap) {
1592                 hashmap_free(j->files);
1593                 hashmap_free(j->directories_by_path);
1594                 free(j->path);
1595                 free(j);
1596                 return NULL;
1597         }
1598
1599         return j;
1600 }
1601
1602 _public_ int sd_journal_open(sd_journal **ret, int flags) {
1603         sd_journal *j;
1604         int r;
1605
1606         if (!ret)
1607                 return -EINVAL;
1608
1609         if (flags & ~(SD_JOURNAL_LOCAL_ONLY|
1610                       SD_JOURNAL_RUNTIME_ONLY|
1611                       SD_JOURNAL_SYSTEM_ONLY))
1612                 return -EINVAL;
1613
1614         j = journal_new(flags, NULL);
1615         if (!j)
1616                 return -ENOMEM;
1617
1618         r = add_search_paths(j);
1619         if (r < 0)
1620                 goto fail;
1621
1622         *ret = j;
1623         return 0;
1624
1625 fail:
1626         sd_journal_close(j);
1627
1628         return r;
1629 }
1630
1631 _public_ int sd_journal_open_directory(sd_journal **ret, const char *path, int flags) {
1632         sd_journal *j;
1633         int r;
1634
1635         if (!ret)
1636                 return -EINVAL;
1637
1638         if (!path || !path_is_absolute(path))
1639                 return -EINVAL;
1640
1641         if (flags != 0)
1642                 return -EINVAL;
1643
1644         j = journal_new(flags, path);
1645         if (!j)
1646                 return -ENOMEM;
1647
1648         r = add_root_directory(j, path);
1649         if (r < 0)
1650                 goto fail;
1651
1652         *ret = j;
1653         return 0;
1654
1655 fail:
1656         sd_journal_close(j);
1657
1658         return r;
1659 }
1660
1661 _public_ void sd_journal_close(sd_journal *j) {
1662         Directory *d;
1663         JournalFile *f;
1664
1665         if (!j)
1666                 return;
1667
1668         while ((f = hashmap_steal_first(j->files)))
1669                 journal_file_close(f);
1670
1671         hashmap_free(j->files);
1672
1673         while ((d = hashmap_first(j->directories_by_path)))
1674                 remove_directory(j, d);
1675
1676         while ((d = hashmap_first(j->directories_by_wd)))
1677                 remove_directory(j, d);
1678
1679         hashmap_free(j->directories_by_path);
1680         hashmap_free(j->directories_by_wd);
1681
1682         if (j->inotify_fd >= 0)
1683                 close_nointr_nofail(j->inotify_fd);
1684
1685         sd_journal_flush_matches(j);
1686
1687         if (j->mmap)
1688                 mmap_cache_unref(j->mmap);
1689
1690         free(j->path);
1691         free(j->unique_field);
1692         free(j);
1693 }
1694
1695 _public_ int sd_journal_get_realtime_usec(sd_journal *j, uint64_t *ret) {
1696         Object *o;
1697         JournalFile *f;
1698         int r;
1699
1700         if (!j)
1701                 return -EINVAL;
1702         if (!ret)
1703                 return -EINVAL;
1704
1705         f = j->current_file;
1706         if (!f)
1707                 return -EADDRNOTAVAIL;
1708
1709         if (f->current_offset <= 0)
1710                 return -EADDRNOTAVAIL;
1711
1712         r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1713         if (r < 0)
1714                 return r;
1715
1716         *ret = le64toh(o->entry.realtime);
1717         return 0;
1718 }
1719
1720 _public_ int sd_journal_get_monotonic_usec(sd_journal *j, uint64_t *ret, sd_id128_t *ret_boot_id) {
1721         Object *o;
1722         JournalFile *f;
1723         int r;
1724         sd_id128_t id;
1725
1726         if (!j)
1727                 return -EINVAL;
1728
1729         f = j->current_file;
1730         if (!f)
1731                 return -EADDRNOTAVAIL;
1732
1733         if (f->current_offset <= 0)
1734                 return -EADDRNOTAVAIL;
1735
1736         r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1737         if (r < 0)
1738                 return r;
1739
1740         if (ret_boot_id)
1741                 *ret_boot_id = o->entry.boot_id;
1742         else {
1743                 r = sd_id128_get_boot(&id);
1744                 if (r < 0)
1745                         return r;
1746
1747                 if (!sd_id128_equal(id, o->entry.boot_id))
1748                         return -ESTALE;
1749         }
1750
1751         if (ret)
1752                 *ret = le64toh(o->entry.monotonic);
1753
1754         return 0;
1755 }
1756
1757 static bool field_is_valid(const char *field) {
1758         const char *p;
1759
1760         assert(field);
1761
1762         if (isempty(field))
1763                 return false;
1764
1765         if (startswith(field, "__"))
1766                 return false;
1767
1768         for (p = field; *p; p++) {
1769
1770                 if (*p == '_')
1771                         continue;
1772
1773                 if (*p >= 'A' && *p <= 'Z')
1774                         continue;
1775
1776                 if (*p >= '0' && *p <= '9')
1777                         continue;
1778
1779                 return false;
1780         }
1781
1782         return true;
1783 }
1784
1785 _public_ int sd_journal_get_data(sd_journal *j, const char *field, const void **data, size_t *size) {
1786         JournalFile *f;
1787         uint64_t i, n;
1788         size_t field_length;
1789         int r;
1790         Object *o;
1791
1792         if (!j)
1793                 return -EINVAL;
1794         if (!field)
1795                 return -EINVAL;
1796         if (!data)
1797                 return -EINVAL;
1798         if (!size)
1799                 return -EINVAL;
1800
1801         if (!field_is_valid(field))
1802                 return -EINVAL;
1803
1804         f = j->current_file;
1805         if (!f)
1806                 return -EADDRNOTAVAIL;
1807
1808         if (f->current_offset <= 0)
1809                 return -EADDRNOTAVAIL;
1810
1811         r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1812         if (r < 0)
1813                 return r;
1814
1815         field_length = strlen(field);
1816
1817         n = journal_file_entry_n_items(o);
1818         for (i = 0; i < n; i++) {
1819                 uint64_t p, l;
1820                 le64_t le_hash;
1821                 size_t t;
1822
1823                 p = le64toh(o->entry.items[i].object_offset);
1824                 le_hash = o->entry.items[i].hash;
1825                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
1826                 if (r < 0)
1827                         return r;
1828
1829                 if (le_hash != o->data.hash)
1830                         return -EBADMSG;
1831
1832                 l = le64toh(o->object.size) - offsetof(Object, data.payload);
1833
1834                 if (o->object.flags & OBJECT_COMPRESSED) {
1835
1836 #ifdef HAVE_XZ
1837                         if (uncompress_startswith(o->data.payload, l,
1838                                                   &f->compress_buffer, &f->compress_buffer_size,
1839                                                   field, field_length, '=')) {
1840
1841                                 uint64_t rsize;
1842
1843                                 if (!uncompress_blob(o->data.payload, l,
1844                                                      &f->compress_buffer, &f->compress_buffer_size, &rsize,
1845                                                      j->data_threshold))
1846                                         return -EBADMSG;
1847
1848                                 *data = f->compress_buffer;
1849                                 *size = (size_t) rsize;
1850
1851                                 return 0;
1852                         }
1853 #else
1854                         return -EPROTONOSUPPORT;
1855 #endif
1856
1857                 } else if (l >= field_length+1 &&
1858                            memcmp(o->data.payload, field, field_length) == 0 &&
1859                            o->data.payload[field_length] == '=') {
1860
1861                         t = (size_t) l;
1862
1863                         if ((uint64_t) t != l)
1864                                 return -E2BIG;
1865
1866                         *data = o->data.payload;
1867                         *size = t;
1868
1869                         return 1;
1870                 }
1871
1872                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1873                 if (r < 0)
1874                         return r;
1875         }
1876
1877         return -ENOENT;
1878 }
1879
1880 static int return_data(sd_journal *j, JournalFile *f, Object *o, const void **data, size_t *size) {
1881         size_t t;
1882         uint64_t l;
1883
1884         l = le64toh(o->object.size) - offsetof(Object, data.payload);
1885         t = (size_t) l;
1886
1887         /* We can't read objects larger than 4G on a 32bit machine */
1888         if ((uint64_t) t != l)
1889                 return -E2BIG;
1890
1891         if (o->object.flags & OBJECT_COMPRESSED) {
1892 #ifdef HAVE_XZ
1893                 uint64_t rsize;
1894
1895                 if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize, j->data_threshold))
1896                         return -EBADMSG;
1897
1898                 *data = f->compress_buffer;
1899                 *size = (size_t) rsize;
1900 #else
1901                 return -EPROTONOSUPPORT;
1902 #endif
1903         } else {
1904                 *data = o->data.payload;
1905                 *size = t;
1906         }
1907
1908         return 0;
1909 }
1910
1911 _public_ int sd_journal_enumerate_data(sd_journal *j, const void **data, size_t *size) {
1912         JournalFile *f;
1913         uint64_t p, n;
1914         le64_t le_hash;
1915         int r;
1916         Object *o;
1917
1918         if (!j)
1919                 return -EINVAL;
1920         if (!data)
1921                 return -EINVAL;
1922         if (!size)
1923                 return -EINVAL;
1924
1925         f = j->current_file;
1926         if (!f)
1927                 return -EADDRNOTAVAIL;
1928
1929         if (f->current_offset <= 0)
1930                 return -EADDRNOTAVAIL;
1931
1932         r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1933         if (r < 0)
1934                 return r;
1935
1936         n = journal_file_entry_n_items(o);
1937         if (j->current_field >= n)
1938                 return 0;
1939
1940         p = le64toh(o->entry.items[j->current_field].object_offset);
1941         le_hash = o->entry.items[j->current_field].hash;
1942         r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
1943         if (r < 0)
1944                 return r;
1945
1946         if (le_hash != o->data.hash)
1947                 return -EBADMSG;
1948
1949         r = return_data(j, f, o, data, size);
1950         if (r < 0)
1951                 return r;
1952
1953         j->current_field ++;
1954
1955         return 1;
1956 }
1957
1958 _public_ void sd_journal_restart_data(sd_journal *j) {
1959         if (!j)
1960                 return;
1961
1962         j->current_field = 0;
1963 }
1964
1965 _public_ int sd_journal_get_fd(sd_journal *j) {
1966         int r;
1967
1968         if (!j)
1969                 return -EINVAL;
1970
1971         if (j->inotify_fd >= 0)
1972                 return j->inotify_fd;
1973
1974         r = allocate_inotify(j);
1975         if (r < 0)
1976                 return r;
1977
1978         /* Iterate through all dirs again, to add them to the
1979          * inotify */
1980         if (j->path)
1981                 r = add_root_directory(j, j->path);
1982         else
1983                 r = add_search_paths(j);
1984         if (r < 0)
1985                 return r;
1986
1987         return j->inotify_fd;
1988 }
1989
1990 static void process_inotify_event(sd_journal *j, struct inotify_event *e) {
1991         Directory *d;
1992         int r;
1993
1994         assert(j);
1995         assert(e);
1996
1997         /* Is this a subdirectory we watch? */
1998         d = hashmap_get(j->directories_by_wd, INT_TO_PTR(e->wd));
1999         if (d) {
2000                 sd_id128_t id;
2001
2002                 if (!(e->mask & IN_ISDIR) && e->len > 0 &&
2003                     (endswith(e->name, ".journal") ||
2004                      endswith(e->name, ".journal~"))) {
2005
2006                         /* Event for a journal file */
2007
2008                         if (e->mask & (IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB)) {
2009                                 r = add_file(j, d->path, e->name);
2010                                 if (r < 0)
2011                                         log_debug("Failed to add file %s/%s: %s", d->path, e->name, strerror(-r));
2012
2013                         } else if (e->mask & (IN_DELETE|IN_MOVED_FROM|IN_UNMOUNT)) {
2014
2015                                 r = remove_file(j, d->path, e->name);
2016                                 if (r < 0)
2017                                         log_debug("Failed to remove file %s/%s: %s", d->path, e->name, strerror(-r));
2018                         }
2019
2020                 } else if (!d->is_root && e->len == 0) {
2021
2022                         /* Event for a subdirectory */
2023
2024                         if (e->mask & (IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT)) {
2025                                 r = remove_directory(j, d);
2026                                 if (r < 0)
2027                                         log_debug("Failed to remove directory %s: %s", d->path, strerror(-r));
2028                         }
2029
2030
2031                 } else if (d->is_root && (e->mask & IN_ISDIR) && e->len > 0 && sd_id128_from_string(e->name, &id) >= 0) {
2032
2033                         /* Event for root directory */
2034
2035                         if (e->mask & (IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB)) {
2036                                 r = add_directory(j, d->path, e->name);
2037                                 if (r < 0)
2038                                         log_debug("Failed to add directory %s/%s: %s", d->path, e->name, strerror(-r));
2039                         }
2040                 }
2041
2042                 return;
2043         }
2044
2045         if (e->mask & IN_IGNORED)
2046                 return;
2047
2048         log_warning("Unknown inotify event.");
2049 }
2050
2051 static int determine_change(sd_journal *j) {
2052         bool b;
2053
2054         assert(j);
2055
2056         b = j->current_invalidate_counter != j->last_invalidate_counter;
2057         j->last_invalidate_counter = j->current_invalidate_counter;
2058
2059         return b ? SD_JOURNAL_INVALIDATE : SD_JOURNAL_APPEND;
2060 }
2061
2062 _public_ int sd_journal_process(sd_journal *j) {
2063         uint8_t buffer[sizeof(struct inotify_event) + FILENAME_MAX] _alignas_(struct inotify_event);
2064         bool got_something = false;
2065
2066         if (!j)
2067                 return -EINVAL;
2068
2069         for (;;) {
2070                 struct inotify_event *e;
2071                 ssize_t l;
2072
2073                 l = read(j->inotify_fd, buffer, sizeof(buffer));
2074                 if (l < 0) {
2075                         if (errno == EAGAIN || errno == EINTR)
2076                                 return got_something ? determine_change(j) : SD_JOURNAL_NOP;
2077
2078                         return -errno;
2079                 }
2080
2081                 got_something = true;
2082
2083                 e = (struct inotify_event*) buffer;
2084                 while (l > 0) {
2085                         size_t step;
2086
2087                         process_inotify_event(j, e);
2088
2089                         step = sizeof(struct inotify_event) + e->len;
2090                         assert(step <= (size_t) l);
2091
2092                         e = (struct inotify_event*) ((uint8_t*) e + step);
2093                         l -= step;
2094                 }
2095         }
2096
2097         return determine_change(j);
2098 }
2099
2100 _public_ int sd_journal_wait(sd_journal *j, uint64_t timeout_usec) {
2101         int r;
2102
2103         assert(j);
2104
2105         if (j->inotify_fd < 0) {
2106
2107                 /* This is the first invocation, hence create the
2108                  * inotify watch */
2109                 r = sd_journal_get_fd(j);
2110                 if (r < 0)
2111                         return r;
2112
2113                 /* The journal might have changed since the context
2114                  * object was created and we weren't watching before,
2115                  * hence don't wait for anything, and return
2116                  * immediately. */
2117                 return determine_change(j);
2118         }
2119
2120         if (j->on_network) {
2121                 /* If we are on the network we need to regularly check
2122                  * for changes manually */
2123
2124                 if (timeout_usec == (uint64_t) -1 || timeout_usec > JOURNAL_FILES_RECHECK_USEC)
2125                         timeout_usec = JOURNAL_FILES_RECHECK_USEC;
2126         }
2127
2128         do {
2129                 r = fd_wait_for_event(j->inotify_fd, POLLIN, timeout_usec);
2130         } while (r == -EINTR);
2131
2132         if (r < 0)
2133                 return r;
2134
2135         return sd_journal_process(j);
2136 }
2137
2138 _public_ int sd_journal_get_cutoff_realtime_usec(sd_journal *j, uint64_t *from, uint64_t *to) {
2139         Iterator i;
2140         JournalFile *f;
2141         bool first = true;
2142         int r;
2143
2144         if (!j)
2145                 return -EINVAL;
2146         if (!from && !to)
2147                 return -EINVAL;
2148
2149         HASHMAP_FOREACH(f, j->files, i) {
2150                 usec_t fr, t;
2151
2152                 r = journal_file_get_cutoff_realtime_usec(f, &fr, &t);
2153                 if (r == -ENOENT)
2154                         continue;
2155                 if (r < 0)
2156                         return r;
2157                 if (r == 0)
2158                         continue;
2159
2160                 if (first) {
2161                         if (from)
2162                                 *from = fr;
2163                         if (to)
2164                                 *to = t;
2165                         first = false;
2166                 } else {
2167                         if (from)
2168                                 *from = MIN(fr, *from);
2169                         if (to)
2170                                 *to = MAX(t, *to);
2171                 }
2172         }
2173
2174         return first ? 0 : 1;
2175 }
2176
2177 _public_ int sd_journal_get_cutoff_monotonic_usec(sd_journal *j, sd_id128_t boot_id, uint64_t *from, uint64_t *to) {
2178         Iterator i;
2179         JournalFile *f;
2180         bool first = true;
2181         int r;
2182
2183         if (!j)
2184                 return -EINVAL;
2185         if (!from && !to)
2186                 return -EINVAL;
2187
2188         HASHMAP_FOREACH(f, j->files, i) {
2189                 usec_t fr, t;
2190
2191                 r = journal_file_get_cutoff_monotonic_usec(f, boot_id, &fr, &t);
2192                 if (r == -ENOENT)
2193                         continue;
2194                 if (r < 0)
2195                         return r;
2196                 if (r == 0)
2197                         continue;
2198
2199                 if (first) {
2200                         if (from)
2201                                 *from = fr;
2202                         if (to)
2203                                 *to = t;
2204                         first = false;
2205                 } else {
2206                         if (from)
2207                                 *from = MIN(fr, *from);
2208                         if (to)
2209                                 *to = MAX(t, *to);
2210                 }
2211         }
2212
2213         return first ? 0 : 1;
2214 }
2215
2216 void journal_print_header(sd_journal *j) {
2217         Iterator i;
2218         JournalFile *f;
2219         bool newline = false;
2220
2221         assert(j);
2222
2223         HASHMAP_FOREACH(f, j->files, i) {
2224                 if (newline)
2225                         putchar('\n');
2226                 else
2227                         newline = true;
2228
2229                 journal_file_print_header(f);
2230         }
2231 }
2232
2233 _public_ int sd_journal_get_usage(sd_journal *j, uint64_t *bytes) {
2234         Iterator i;
2235         JournalFile *f;
2236         uint64_t sum = 0;
2237
2238         if (!j)
2239                 return -EINVAL;
2240         if (!bytes)
2241                 return -EINVAL;
2242
2243         HASHMAP_FOREACH(f, j->files, i) {
2244                 struct stat st;
2245
2246                 if (fstat(f->fd, &st) < 0)
2247                         return -errno;
2248
2249                 sum += (uint64_t) st.st_blocks * 512ULL;
2250         }
2251
2252         *bytes = sum;
2253         return 0;
2254 }
2255
2256 _public_ int sd_journal_query_unique(sd_journal *j, const char *field) {
2257         char *f;
2258
2259         if (!j)
2260                 return -EINVAL;
2261         if (isempty(field))
2262                 return -EINVAL;
2263         if (!field_is_valid(field))
2264                 return -EINVAL;
2265
2266         f = strdup(field);
2267         if (!f)
2268                 return -ENOMEM;
2269
2270         free(j->unique_field);
2271         j->unique_field = f;
2272         j->unique_file = NULL;
2273         j->unique_offset = 0;
2274
2275         return 0;
2276 }
2277
2278 _public_ int sd_journal_enumerate_unique(sd_journal *j, const void **data, size_t *l) {
2279         Object *o;
2280         size_t k;
2281         int r;
2282
2283         if (!j)
2284                 return -EINVAL;
2285         if (!data)
2286                 return -EINVAL;
2287         if (!l)
2288                 return -EINVAL;
2289         if (!j->unique_field)
2290                 return -EINVAL;
2291
2292         k = strlen(j->unique_field);
2293
2294         if (!j->unique_file) {
2295                 j->unique_file = hashmap_first(j->files);
2296                 if (!j->unique_file)
2297                         return 0;
2298                 j->unique_offset = 0;
2299         }
2300
2301         for (;;) {
2302                 JournalFile *of;
2303                 Iterator i;
2304                 const void *odata;
2305                 size_t ol;
2306                 bool found;
2307
2308                 /* Proceed to next data object in the field's linked list */
2309                 if (j->unique_offset == 0) {
2310                         r = journal_file_find_field_object(j->unique_file, j->unique_field, k, &o, NULL);
2311                         if (r < 0)
2312                                 return r;
2313
2314                         j->unique_offset = r > 0 ? le64toh(o->field.head_data_offset) : 0;
2315                 } else {
2316                         r = journal_file_move_to_object(j->unique_file, OBJECT_DATA, j->unique_offset, &o);
2317                         if (r < 0)
2318                                 return r;
2319
2320                         j->unique_offset = le64toh(o->data.next_field_offset);
2321                 }
2322
2323                 /* We reached the end of the list? Then start again, with the next file */
2324                 if (j->unique_offset == 0) {
2325                         JournalFile *n;
2326
2327                         n = hashmap_next(j->files, j->unique_file->path);
2328                         if (!n)
2329                                 return 0;
2330
2331                         j->unique_file = n;
2332                         continue;
2333                 }
2334
2335                 /* We do not use the type context here, but 0 instead,
2336                  * so that we can look at this data object at the same
2337                  * time as one on another file */
2338                 r = journal_file_move_to_object(j->unique_file, 0, j->unique_offset, &o);
2339                 if (r < 0)
2340                         return r;
2341
2342                 /* Let's do the type check by hand, since we used 0 context above. */
2343                 if (o->object.type != OBJECT_DATA)
2344                         return -EBADMSG;
2345
2346                 r = return_data(j, j->unique_file, o, &odata, &ol);
2347                 if (r < 0)
2348                         return r;
2349
2350                 /* OK, now let's see if we already returned this data
2351                  * object by checking if it exists in the earlier
2352                  * traversed files. */
2353                 found = false;
2354                 HASHMAP_FOREACH(of, j->files, i) {
2355                         Object *oo;
2356                         uint64_t op;
2357
2358                         if (of == j->unique_file)
2359                                 break;
2360
2361                         /* Skip this file it didn't have any fields
2362                          * indexed */
2363                         if (JOURNAL_HEADER_CONTAINS(of->header, n_fields) &&
2364                             le64toh(of->header->n_fields) <= 0)
2365                                 continue;
2366
2367                         r = journal_file_find_data_object_with_hash(of, odata, ol, le64toh(o->data.hash), &oo, &op);
2368                         if (r < 0)
2369                                 return r;
2370
2371                         if (r > 0)
2372                                 found = true;
2373                 }
2374
2375                 if (found)
2376                         continue;
2377
2378                 r = return_data(j, j->unique_file, o, data, l);
2379                 if (r < 0)
2380                         return r;
2381
2382                 return 1;
2383         }
2384 }
2385
2386 _public_ void sd_journal_restart_unique(sd_journal *j) {
2387         if (!j)
2388                 return;
2389
2390         j->unique_file = NULL;
2391         j->unique_offset = 0;
2392 }
2393
2394 _public_ int sd_journal_reliable_fd(sd_journal *j) {
2395         if (!j)
2396                 return -EINVAL;
2397
2398         return !j->on_network;
2399 }
2400
2401 static char *lookup_field(const char *field, void *userdata) {
2402         sd_journal *j = userdata;
2403         const void *data;
2404         size_t size, d;
2405         int r;
2406
2407         assert(field);
2408         assert(j);
2409
2410         r = sd_journal_get_data(j, field, &data, &size);
2411         if (r < 0 ||
2412             size > REPLACE_VAR_MAX)
2413                 return strdup(field);
2414
2415         d = strlen(field) + 1;
2416
2417         return strndup((const char*) data + d, size - d);
2418 }
2419
2420 _public_ int sd_journal_get_catalog(sd_journal *j, char **ret) {
2421         const void *data;
2422         size_t size;
2423         sd_id128_t id;
2424         _cleanup_free_ char *text = NULL, *cid = NULL;
2425         char *t;
2426         int r;
2427
2428         if (!j)
2429                 return -EINVAL;
2430         if (!ret)
2431                 return -EINVAL;
2432
2433         r = sd_journal_get_data(j, "MESSAGE_ID", &data, &size);
2434         if (r < 0)
2435                 return r;
2436
2437         cid = strndup((const char*) data + 11, size - 11);
2438         if (!cid)
2439                 return -ENOMEM;
2440
2441         r = sd_id128_from_string(cid, &id);
2442         if (r < 0)
2443                 return r;
2444
2445         r = catalog_get(id, &text);
2446         if (r < 0)
2447                 return r;
2448
2449         t = replace_var(text, lookup_field, j);
2450         if (!t)
2451                 return -ENOMEM;
2452
2453         *ret = t;
2454         return 0;
2455 }
2456
2457 _public_ int sd_journal_get_catalog_for_message_id(sd_id128_t id, char **ret) {
2458         if (!ret)
2459                 return -EINVAL;
2460
2461         return catalog_get(id, ret);
2462 }
2463
2464 _public_ int sd_journal_set_data_threshold(sd_journal *j, size_t sz) {
2465         if (!j)
2466                 return -EINVAL;
2467
2468         j->data_threshold = sz;
2469         return 0;
2470 }
2471
2472 _public_ int sd_journal_get_data_threshold(sd_journal *j, size_t *sz) {
2473         if (!j)
2474                 return -EINVAL;
2475         if (!sz)
2476                 return -EINVAL;
2477
2478         *sz = j->data_threshold;
2479         return 0;
2480 }