chiark / gitweb /
journal: delete unused function journal_file_skip_entry()
[elogind.git] / src / journal / sd-journal.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <errno.h>
23 #include <fcntl.h>
24 #include <stddef.h>
25 #include <unistd.h>
26 #include <sys/inotify.h>
27 #include <sys/poll.h>
28 #include <sys/vfs.h>
29 #include <linux/magic.h>
30
31 #include "sd-journal.h"
32 #include "journal-def.h"
33 #include "journal-file.h"
34 #include "hashmap.h"
35 #include "list.h"
36 #include "strv.h"
37 #include "path-util.h"
38 #include "lookup3.h"
39 #include "compress.h"
40 #include "journal-internal.h"
41 #include "missing.h"
42 #include "catalog.h"
43 #include "replace-var.h"
44 #include "fileio.h"
45
46 #define JOURNAL_FILES_MAX 1024
47
48 #define JOURNAL_FILES_RECHECK_USEC (2 * USEC_PER_SEC)
49
50 #define REPLACE_VAR_MAX 256
51
52 #define DEFAULT_DATA_THRESHOLD (64*1024)
53
54 static void remove_file_real(sd_journal *j, JournalFile *f);
55
56 static bool journal_pid_changed(sd_journal *j) {
57         assert(j);
58
59         /* We don't support people creating a journal object and
60          * keeping it around over a fork(). Let's complain. */
61
62         return j->original_pid != getpid();
63 }
64
65 /* We return an error here only if we didn't manage to
66    memorize the real error. */
67 static int set_put_error(sd_journal *j, int r) {
68         int k;
69
70         if (r >= 0)
71                 return r;
72
73         k = set_ensure_allocated(&j->errors, NULL);
74         if (k < 0)
75                 return k;
76
77         return set_put(j->errors, INT_TO_PTR(r));
78 }
79
80 static void detach_location(sd_journal *j) {
81         Iterator i;
82         JournalFile *f;
83
84         assert(j);
85
86         j->current_file = NULL;
87         j->current_field = 0;
88
89         ORDERED_HASHMAP_FOREACH(f, j->files, i)
90                 f->current_offset = 0;
91 }
92
93 static void reset_location(sd_journal *j) {
94         assert(j);
95
96         detach_location(j);
97         zero(j->current_location);
98 }
99
100 static void init_location(Location *l, LocationType type, JournalFile *f, Object *o) {
101         assert(l);
102         assert(type == LOCATION_DISCRETE || type == LOCATION_SEEK);
103         assert(f);
104         assert(o->object.type == OBJECT_ENTRY);
105
106         l->type = type;
107         l->seqnum = le64toh(o->entry.seqnum);
108         l->seqnum_id = f->header->seqnum_id;
109         l->realtime = le64toh(o->entry.realtime);
110         l->monotonic = le64toh(o->entry.monotonic);
111         l->boot_id = o->entry.boot_id;
112         l->xor_hash = le64toh(o->entry.xor_hash);
113
114         l->seqnum_set = l->realtime_set = l->monotonic_set = l->xor_hash_set = true;
115 }
116
117 static void set_location(sd_journal *j, LocationType type, JournalFile *f, Object *o,
118                          direction_t direction, uint64_t offset) {
119         assert(j);
120         assert(type == LOCATION_DISCRETE || type == LOCATION_SEEK);
121         assert(f);
122         assert(o);
123
124         init_location(&j->current_location, type, f, o);
125
126         j->current_file = f;
127         j->current_field = 0;
128
129         f->last_direction = direction;
130         f->current_offset = offset;
131 }
132
133 static int match_is_valid(const void *data, size_t size) {
134         const char *b, *p;
135
136         assert(data);
137
138         if (size < 2)
139                 return false;
140
141         if (startswith(data, "__"))
142                 return false;
143
144         b = data;
145         for (p = b; p < b + size; p++) {
146
147                 if (*p == '=')
148                         return p > b;
149
150                 if (*p == '_')
151                         continue;
152
153                 if (*p >= 'A' && *p <= 'Z')
154                         continue;
155
156                 if (*p >= '0' && *p <= '9')
157                         continue;
158
159                 return false;
160         }
161
162         return false;
163 }
164
165 static bool same_field(const void *_a, size_t s, const void *_b, size_t t) {
166         const uint8_t *a = _a, *b = _b;
167         size_t j;
168
169         for (j = 0; j < s && j < t; j++) {
170
171                 if (a[j] != b[j])
172                         return false;
173
174                 if (a[j] == '=')
175                         return true;
176         }
177
178         assert_not_reached("\"=\" not found");
179 }
180
181 static Match *match_new(Match *p, MatchType t) {
182         Match *m;
183
184         m = new0(Match, 1);
185         if (!m)
186                 return NULL;
187
188         m->type = t;
189
190         if (p) {
191                 m->parent = p;
192                 LIST_PREPEND(matches, p->matches, m);
193         }
194
195         return m;
196 }
197
198 static void match_free(Match *m) {
199         assert(m);
200
201         while (m->matches)
202                 match_free(m->matches);
203
204         if (m->parent)
205                 LIST_REMOVE(matches, m->parent->matches, m);
206
207         free(m->data);
208         free(m);
209 }
210
211 static void match_free_if_empty(Match *m) {
212         if (!m || m->matches)
213                 return;
214
215         match_free(m);
216 }
217
218 _public_ int sd_journal_add_match(sd_journal *j, const void *data, size_t size) {
219         Match *l3, *l4, *add_here = NULL, *m;
220         le64_t le_hash;
221
222         assert_return(j, -EINVAL);
223         assert_return(!journal_pid_changed(j), -ECHILD);
224         assert_return(data, -EINVAL);
225
226         if (size == 0)
227                 size = strlen(data);
228
229         assert_return(match_is_valid(data, size), -EINVAL);
230
231         /* level 0: AND term
232          * level 1: OR terms
233          * level 2: AND terms
234          * level 3: OR terms
235          * level 4: concrete matches */
236
237         if (!j->level0) {
238                 j->level0 = match_new(NULL, MATCH_AND_TERM);
239                 if (!j->level0)
240                         return -ENOMEM;
241         }
242
243         if (!j->level1) {
244                 j->level1 = match_new(j->level0, MATCH_OR_TERM);
245                 if (!j->level1)
246                         return -ENOMEM;
247         }
248
249         if (!j->level2) {
250                 j->level2 = match_new(j->level1, MATCH_AND_TERM);
251                 if (!j->level2)
252                         return -ENOMEM;
253         }
254
255         assert(j->level0->type == MATCH_AND_TERM);
256         assert(j->level1->type == MATCH_OR_TERM);
257         assert(j->level2->type == MATCH_AND_TERM);
258
259         le_hash = htole64(hash64(data, size));
260
261         LIST_FOREACH(matches, l3, j->level2->matches) {
262                 assert(l3->type == MATCH_OR_TERM);
263
264                 LIST_FOREACH(matches, l4, l3->matches) {
265                         assert(l4->type == MATCH_DISCRETE);
266
267                         /* Exactly the same match already? Then ignore
268                          * this addition */
269                         if (l4->le_hash == le_hash &&
270                             l4->size == size &&
271                             memcmp(l4->data, data, size) == 0)
272                                 return 0;
273
274                         /* Same field? Then let's add this to this OR term */
275                         if (same_field(data, size, l4->data, l4->size)) {
276                                 add_here = l3;
277                                 break;
278                         }
279                 }
280
281                 if (add_here)
282                         break;
283         }
284
285         if (!add_here) {
286                 add_here = match_new(j->level2, MATCH_OR_TERM);
287                 if (!add_here)
288                         goto fail;
289         }
290
291         m = match_new(add_here, MATCH_DISCRETE);
292         if (!m)
293                 goto fail;
294
295         m->le_hash = le_hash;
296         m->size = size;
297         m->data = memdup(data, size);
298         if (!m->data)
299                 goto fail;
300
301         detach_location(j);
302
303         return 0;
304
305 fail:
306         match_free_if_empty(add_here);
307         match_free_if_empty(j->level2);
308         match_free_if_empty(j->level1);
309         match_free_if_empty(j->level0);
310
311         return -ENOMEM;
312 }
313
314 _public_ int sd_journal_add_conjunction(sd_journal *j) {
315         assert_return(j, -EINVAL);
316         assert_return(!journal_pid_changed(j), -ECHILD);
317
318         if (!j->level0)
319                 return 0;
320
321         if (!j->level1)
322                 return 0;
323
324         if (!j->level1->matches)
325                 return 0;
326
327         j->level1 = NULL;
328         j->level2 = NULL;
329
330         return 0;
331 }
332
333 _public_ int sd_journal_add_disjunction(sd_journal *j) {
334         assert_return(j, -EINVAL);
335         assert_return(!journal_pid_changed(j), -ECHILD);
336
337         if (!j->level0)
338                 return 0;
339
340         if (!j->level1)
341                 return 0;
342
343         if (!j->level2)
344                 return 0;
345
346         if (!j->level2->matches)
347                 return 0;
348
349         j->level2 = NULL;
350         return 0;
351 }
352
353 static char *match_make_string(Match *m) {
354         char *p, *r;
355         Match *i;
356         bool enclose = false;
357
358         if (!m)
359                 return strdup("none");
360
361         if (m->type == MATCH_DISCRETE)
362                 return strndup(m->data, m->size);
363
364         p = NULL;
365         LIST_FOREACH(matches, i, m->matches) {
366                 char *t, *k;
367
368                 t = match_make_string(i);
369                 if (!t) {
370                         free(p);
371                         return NULL;
372                 }
373
374                 if (p) {
375                         k = strjoin(p, m->type == MATCH_OR_TERM ? " OR " : " AND ", t, NULL);
376                         free(p);
377                         free(t);
378
379                         if (!k)
380                                 return NULL;
381
382                         p = k;
383
384                         enclose = true;
385                 } else
386                         p = t;
387         }
388
389         if (enclose) {
390                 r = strjoin("(", p, ")", NULL);
391                 free(p);
392                 return r;
393         }
394
395         return p;
396 }
397
398 char *journal_make_match_string(sd_journal *j) {
399         assert(j);
400
401         return match_make_string(j->level0);
402 }
403
404 _public_ void sd_journal_flush_matches(sd_journal *j) {
405         if (!j)
406                 return;
407
408         if (j->level0)
409                 match_free(j->level0);
410
411         j->level0 = j->level1 = j->level2 = NULL;
412
413         detach_location(j);
414 }
415
416 static int compare_entry_order(JournalFile *af, Object *_ao,
417                                JournalFile *bf, uint64_t bp) {
418
419         uint64_t a, b;
420         Object *ao, *bo;
421         int r;
422
423         assert(af);
424         assert(bf);
425         assert(_ao);
426
427         /* The mmap cache might invalidate the object from the first
428          * file if we look at the one from the second file. Hence
429          * temporarily copy the header of the first one, and look at
430          * that only. */
431         ao = alloca(offsetof(EntryObject, items));
432         memcpy(ao, _ao, offsetof(EntryObject, items));
433
434         r = journal_file_move_to_object(bf, OBJECT_ENTRY, bp, &bo);
435         if (r < 0)
436                 return strcmp(af->path, bf->path);
437
438         /* We operate on two different files here, hence we can access
439          * two objects at the same time, which we normally can't.
440          *
441          * If contents and timestamps match, these entries are
442          * identical, even if the seqnum does not match */
443
444         if (sd_id128_equal(ao->entry.boot_id, bo->entry.boot_id) &&
445             ao->entry.monotonic == bo->entry.monotonic &&
446             ao->entry.realtime == bo->entry.realtime &&
447             ao->entry.xor_hash == bo->entry.xor_hash)
448                 return 0;
449
450         if (sd_id128_equal(af->header->seqnum_id, bf->header->seqnum_id)) {
451
452                 /* If this is from the same seqnum source, compare
453                  * seqnums */
454                 a = le64toh(ao->entry.seqnum);
455                 b = le64toh(bo->entry.seqnum);
456
457                 if (a < b)
458                         return -1;
459                 if (a > b)
460                         return 1;
461
462                 /* Wow! This is weird, different data but the same
463                  * seqnums? Something is borked, but let's make the
464                  * best of it and compare by time. */
465         }
466
467         if (sd_id128_equal(ao->entry.boot_id, bo->entry.boot_id)) {
468
469                 /* If the boot id matches, compare monotonic time */
470                 a = le64toh(ao->entry.monotonic);
471                 b = le64toh(bo->entry.monotonic);
472
473                 if (a < b)
474                         return -1;
475                 if (a > b)
476                         return 1;
477         }
478
479         /* Otherwise, compare UTC time */
480         a = le64toh(ao->entry.realtime);
481         b = le64toh(bo->entry.realtime);
482
483         if (a < b)
484                 return -1;
485         if (a > b)
486                 return 1;
487
488         /* Finally, compare by contents */
489         a = le64toh(ao->entry.xor_hash);
490         b = le64toh(bo->entry.xor_hash);
491
492         if (a < b)
493                 return -1;
494         if (a > b)
495                 return 1;
496
497         return 0;
498 }
499
500 static bool whole_file_precedes_location(JournalFile *f, Location *l, direction_t direction) {
501         assert(f);
502         assert(l);
503
504         if (l->type != LOCATION_DISCRETE && l->type != LOCATION_SEEK)
505                 return false;
506
507         if (l->seqnum_set && sd_id128_equal(l->seqnum_id, f->header->seqnum_id))
508                 return direction == DIRECTION_DOWN ?
509                         l->seqnum > le64toh(f->header->tail_entry_seqnum) :
510                         l->seqnum < le64toh(f->header->head_entry_seqnum);
511
512         if (l->realtime_set)
513                 return direction == DIRECTION_DOWN ?
514                         l->realtime > le64toh(f->header->tail_entry_realtime) :
515                         l->realtime < le64toh(f->header->head_entry_realtime);
516
517         return false;
518 }
519
520 static bool file_may_have_preceding_entry(JournalFile *f, JournalFile *of, uint64_t op, direction_t direction) {
521         Object *o;
522         int r;
523
524         assert(f);
525         assert(of);
526
527         r = journal_file_move_to_object(of, OBJECT_ENTRY, op, &o);
528         if (r < 0)
529                 return true;
530
531         if (sd_id128_equal(f->header->seqnum_id, of->header->seqnum_id))
532                 return direction == DIRECTION_DOWN ?
533                         le64toh(o->entry.seqnum) >= le64toh(f->header->head_entry_seqnum) :
534                         le64toh(o->entry.seqnum) <= le64toh(f->header->tail_entry_seqnum);
535
536         return direction == DIRECTION_DOWN ?
537                 le64toh(o->entry.realtime) >= le64toh(f->header->head_entry_realtime) :
538                 le64toh(o->entry.realtime) <= le64toh(f->header->tail_entry_realtime);
539 }
540
541 _pure_ static int compare_with_location(JournalFile *af, Object *ao, Location *l) {
542         uint64_t a;
543
544         assert(af);
545         assert(ao);
546         assert(l);
547         assert(l->type == LOCATION_DISCRETE || l->type == LOCATION_SEEK);
548
549         if (l->monotonic_set &&
550             sd_id128_equal(ao->entry.boot_id, l->boot_id) &&
551             l->realtime_set &&
552             le64toh(ao->entry.realtime) == l->realtime &&
553             l->xor_hash_set &&
554             le64toh(ao->entry.xor_hash) == l->xor_hash)
555                 return 0;
556
557         if (l->seqnum_set &&
558             sd_id128_equal(af->header->seqnum_id, l->seqnum_id)) {
559
560                 a = le64toh(ao->entry.seqnum);
561
562                 if (a < l->seqnum)
563                         return -1;
564                 if (a > l->seqnum)
565                         return 1;
566         }
567
568         if (l->monotonic_set &&
569             sd_id128_equal(ao->entry.boot_id, l->boot_id)) {
570
571                 a = le64toh(ao->entry.monotonic);
572
573                 if (a < l->monotonic)
574                         return -1;
575                 if (a > l->monotonic)
576                         return 1;
577         }
578
579         if (l->realtime_set) {
580
581                 a = le64toh(ao->entry.realtime);
582
583                 if (a < l->realtime)
584                         return -1;
585                 if (a > l->realtime)
586                         return 1;
587         }
588
589         if (l->xor_hash_set) {
590                 a = le64toh(ao->entry.xor_hash);
591
592                 if (a < l->xor_hash)
593                         return -1;
594                 if (a > l->xor_hash)
595                         return 1;
596         }
597
598         return 0;
599 }
600
601 static int next_for_match(
602                 sd_journal *j,
603                 Match *m,
604                 JournalFile *f,
605                 uint64_t after_offset,
606                 direction_t direction,
607                 Object **ret,
608                 uint64_t *offset) {
609
610         int r;
611         uint64_t np = 0;
612         Object *n;
613
614         assert(j);
615         assert(m);
616         assert(f);
617
618         if (m->type == MATCH_DISCRETE) {
619                 uint64_t dp;
620
621                 r = journal_file_find_data_object_with_hash(f, m->data, m->size, le64toh(m->le_hash), NULL, &dp);
622                 if (r <= 0)
623                         return r;
624
625                 return journal_file_move_to_entry_by_offset_for_data(f, dp, after_offset, direction, ret, offset);
626
627         } else if (m->type == MATCH_OR_TERM) {
628                 Match *i;
629
630                 /* Find the earliest match beyond after_offset */
631
632                 LIST_FOREACH(matches, i, m->matches) {
633                         uint64_t cp;
634
635                         r = next_for_match(j, i, f, after_offset, direction, NULL, &cp);
636                         if (r < 0)
637                                 return r;
638                         else if (r > 0) {
639                                 if (np == 0 || (direction == DIRECTION_DOWN ? cp < np : cp > np))
640                                         np = cp;
641                         }
642                 }
643
644                 if (np == 0)
645                         return 0;
646
647         } else if (m->type == MATCH_AND_TERM) {
648                 Match *i, *last_moved;
649
650                 /* Always jump to the next matching entry and repeat
651                  * this until we find an offset that matches for all
652                  * matches. */
653
654                 if (!m->matches)
655                         return 0;
656
657                 r = next_for_match(j, m->matches, f, after_offset, direction, NULL, &np);
658                 if (r <= 0)
659                         return r;
660
661                 assert(direction == DIRECTION_DOWN ? np >= after_offset : np <= after_offset);
662                 last_moved = m->matches;
663
664                 LIST_LOOP_BUT_ONE(matches, i, m->matches, last_moved) {
665                         uint64_t cp;
666
667                         r = next_for_match(j, i, f, np, direction, NULL, &cp);
668                         if (r <= 0)
669                                 return r;
670
671                         assert(direction == DIRECTION_DOWN ? cp >= np : cp <= np);
672                         if (direction == DIRECTION_DOWN ? cp > np : cp < np) {
673                                 np = cp;
674                                 last_moved = i;
675                         }
676                 }
677         }
678
679         assert(np > 0);
680
681         r = journal_file_move_to_object(f, OBJECT_ENTRY, np, &n);
682         if (r < 0)
683                 return r;
684
685         if (ret)
686                 *ret = n;
687         if (offset)
688                 *offset = np;
689
690         return 1;
691 }
692
693 static int find_location_for_match(
694                 sd_journal *j,
695                 Match *m,
696                 JournalFile *f,
697                 direction_t direction,
698                 Object **ret,
699                 uint64_t *offset) {
700
701         int r;
702
703         assert(j);
704         assert(m);
705         assert(f);
706
707         if (m->type == MATCH_DISCRETE) {
708                 uint64_t dp;
709
710                 r = journal_file_find_data_object_with_hash(f, m->data, m->size, le64toh(m->le_hash), NULL, &dp);
711                 if (r <= 0)
712                         return r;
713
714                 /* FIXME: missing: find by monotonic */
715
716                 if (j->current_location.type == LOCATION_HEAD)
717                         return journal_file_next_entry_for_data(f, NULL, 0, dp, DIRECTION_DOWN, ret, offset);
718                 if (j->current_location.type == LOCATION_TAIL)
719                         return journal_file_next_entry_for_data(f, NULL, 0, dp, DIRECTION_UP, ret, offset);
720                 if (j->current_location.seqnum_set && sd_id128_equal(j->current_location.seqnum_id, f->header->seqnum_id))
721                         return journal_file_move_to_entry_by_seqnum_for_data(f, dp, j->current_location.seqnum, direction, ret, offset);
722                 if (j->current_location.monotonic_set) {
723                         r = journal_file_move_to_entry_by_monotonic_for_data(f, dp, j->current_location.boot_id, j->current_location.monotonic, direction, ret, offset);
724                         if (r != -ENOENT)
725                                 return r;
726                 }
727                 if (j->current_location.realtime_set)
728                         return journal_file_move_to_entry_by_realtime_for_data(f, dp, j->current_location.realtime, direction, ret, offset);
729
730                 return journal_file_next_entry_for_data(f, NULL, 0, dp, direction, ret, offset);
731
732         } else if (m->type == MATCH_OR_TERM) {
733                 uint64_t np = 0;
734                 Object *n;
735                 Match *i;
736
737                 /* Find the earliest match */
738
739                 LIST_FOREACH(matches, i, m->matches) {
740                         uint64_t cp;
741
742                         r = find_location_for_match(j, i, f, direction, NULL, &cp);
743                         if (r < 0)
744                                 return r;
745                         else if (r > 0) {
746                                 if (np == 0 || (direction == DIRECTION_DOWN ? np > cp : np < cp))
747                                         np = cp;
748                         }
749                 }
750
751                 if (np == 0)
752                         return 0;
753
754                 r = journal_file_move_to_object(f, OBJECT_ENTRY, np, &n);
755                 if (r < 0)
756                         return r;
757
758                 if (ret)
759                         *ret = n;
760                 if (offset)
761                         *offset = np;
762
763                 return 1;
764
765         } else {
766                 Match *i;
767                 uint64_t np = 0;
768
769                 assert(m->type == MATCH_AND_TERM);
770
771                 /* First jump to the last match, and then find the
772                  * next one where all matches match */
773
774                 if (!m->matches)
775                         return 0;
776
777                 LIST_FOREACH(matches, i, m->matches) {
778                         uint64_t cp;
779
780                         r = find_location_for_match(j, i, f, direction, NULL, &cp);
781                         if (r <= 0)
782                                 return r;
783
784                         if (np == 0 || (direction == DIRECTION_DOWN ? cp > np : cp < np))
785                                 np = cp;
786                 }
787
788                 return next_for_match(j, m, f, np, direction, ret, offset);
789         }
790 }
791
792 static int find_location_with_matches(
793                 sd_journal *j,
794                 JournalFile *f,
795                 direction_t direction,
796                 Object **ret,
797                 uint64_t *offset) {
798
799         int r;
800
801         assert(j);
802         assert(f);
803         assert(ret);
804         assert(offset);
805
806         if (!j->level0) {
807                 /* No matches is simple */
808
809                 if (j->current_location.type == LOCATION_HEAD)
810                         return journal_file_next_entry(f, NULL, 0, DIRECTION_DOWN, ret, offset);
811                 if (j->current_location.type == LOCATION_TAIL)
812                         return journal_file_next_entry(f, NULL, 0, DIRECTION_UP, ret, offset);
813                 if (j->current_location.seqnum_set && sd_id128_equal(j->current_location.seqnum_id, f->header->seqnum_id))
814                         return journal_file_move_to_entry_by_seqnum(f, j->current_location.seqnum, direction, ret, offset);
815                 if (j->current_location.monotonic_set) {
816                         r = journal_file_move_to_entry_by_monotonic(f, j->current_location.boot_id, j->current_location.monotonic, direction, ret, offset);
817                         if (r != -ENOENT)
818                                 return r;
819                 }
820                 if (j->current_location.realtime_set)
821                         return journal_file_move_to_entry_by_realtime(f, j->current_location.realtime, direction, ret, offset);
822
823                 return journal_file_next_entry(f, NULL, 0, direction, ret, offset);
824         } else
825                 return find_location_for_match(j, j->level0, f, direction, ret, offset);
826 }
827
828 static int next_with_matches(
829                 sd_journal *j,
830                 JournalFile *f,
831                 direction_t direction,
832                 Object **ret,
833                 uint64_t *offset) {
834
835         Object *c;
836         uint64_t cp;
837
838         assert(j);
839         assert(f);
840         assert(ret);
841         assert(offset);
842
843         c = *ret;
844         cp = *offset;
845
846         /* No matches is easy. We simple advance the file
847          * pointer by one. */
848         if (!j->level0)
849                 return journal_file_next_entry(f, c, cp, direction, ret, offset);
850
851         /* If we have a match then we look for the next matching entry
852          * with an offset at least one step larger */
853         return next_for_match(j, j->level0, f, direction == DIRECTION_DOWN ? cp+1 : cp-1, direction, ret, offset);
854 }
855
856 static int next_beyond_location(sd_journal *j, JournalFile *f, direction_t direction, Object **ret, uint64_t *offset) {
857         Object *c;
858         uint64_t cp;
859         int r;
860
861         assert(j);
862         assert(f);
863
864         if (f->last_direction == direction && f->current_offset > 0) {
865                 cp = f->current_offset;
866
867                 r = journal_file_move_to_object(f, OBJECT_ENTRY, cp, &c);
868                 if (r < 0)
869                         return r;
870
871                 r = next_with_matches(j, f, direction, &c, &cp);
872                 if (r <= 0)
873                         return r;
874         } else {
875                 r = find_location_with_matches(j, f, direction, &c, &cp);
876                 if (r <= 0)
877                         return r;
878         }
879
880         /* OK, we found the spot, now let's advance until an entry
881          * that is actually different from what we were previously
882          * looking at. This is necessary to handle entries which exist
883          * in two (or more) journal files, and which shall all be
884          * suppressed but one. */
885
886         for (;;) {
887                 bool found;
888
889                 if (j->current_location.type == LOCATION_DISCRETE) {
890                         int k;
891
892                         k = compare_with_location(f, c, &j->current_location);
893
894                         found = direction == DIRECTION_DOWN ? k > 0 : k < 0;
895                 } else
896                         found = true;
897
898                 if (found) {
899                         if (ret)
900                                 *ret = c;
901                         if (offset)
902                                 *offset = cp;
903                         return 1;
904                 }
905
906                 r = next_with_matches(j, f, direction, &c, &cp);
907                 if (r <= 0)
908                         return r;
909         }
910 }
911
912 static int real_journal_next(sd_journal *j, direction_t direction) {
913         JournalFile *f, *new_file = NULL;
914         uint64_t new_offset = 0;
915         uint64_t p = 0;
916         Iterator i;
917         Object *o;
918         int r;
919
920         assert_return(j, -EINVAL);
921         assert_return(!journal_pid_changed(j), -ECHILD);
922
923         ORDERED_HASHMAP_FOREACH(f, j->files, i) {
924                 bool found;
925
926                 if (whole_file_precedes_location(f, &j->current_location, direction))
927                         continue;
928
929                 if (new_file && !file_may_have_preceding_entry(f, new_file, new_offset, direction))
930                         continue;
931
932                 r = next_beyond_location(j, f, direction, &o, &p);
933                 if (r < 0) {
934                         log_debug_errno(r, "Can't iterate through %s, ignoring: %m", f->path);
935                         remove_file_real(j, f);
936                         continue;
937                 } else if (r == 0)
938                         continue;
939
940                 if (!new_file)
941                         found = true;
942                 else {
943                         int k;
944
945                         k = compare_entry_order(f, o, new_file, new_offset);
946
947                         found = direction == DIRECTION_DOWN ? k < 0 : k > 0;
948                 }
949
950                 if (found) {
951                         new_file = f;
952                         new_offset = p;
953                 }
954         }
955
956         if (!new_file)
957                 return 0;
958
959         r = journal_file_move_to_object(new_file, OBJECT_ENTRY, new_offset, &o);
960         if (r < 0)
961                 return r;
962
963         set_location(j, LOCATION_DISCRETE, new_file, o, direction, new_offset);
964
965         return 1;
966 }
967
968 _public_ int sd_journal_next(sd_journal *j) {
969         return real_journal_next(j, DIRECTION_DOWN);
970 }
971
972 _public_ int sd_journal_previous(sd_journal *j) {
973         return real_journal_next(j, DIRECTION_UP);
974 }
975
976 static int real_journal_next_skip(sd_journal *j, direction_t direction, uint64_t skip) {
977         int c = 0, r;
978
979         assert_return(j, -EINVAL);
980         assert_return(!journal_pid_changed(j), -ECHILD);
981
982         if (skip == 0) {
983                 /* If this is not a discrete skip, then at least
984                  * resolve the current location */
985                 if (j->current_location.type != LOCATION_DISCRETE)
986                         return real_journal_next(j, direction);
987
988                 return 0;
989         }
990
991         do {
992                 r = real_journal_next(j, direction);
993                 if (r < 0)
994                         return r;
995
996                 if (r == 0)
997                         return c;
998
999                 skip--;
1000                 c++;
1001         } while (skip > 0);
1002
1003         return c;
1004 }
1005
1006 _public_ int sd_journal_next_skip(sd_journal *j, uint64_t skip) {
1007         return real_journal_next_skip(j, DIRECTION_DOWN, skip);
1008 }
1009
1010 _public_ int sd_journal_previous_skip(sd_journal *j, uint64_t skip) {
1011         return real_journal_next_skip(j, DIRECTION_UP, skip);
1012 }
1013
1014 _public_ int sd_journal_get_cursor(sd_journal *j, char **cursor) {
1015         Object *o;
1016         int r;
1017         char bid[33], sid[33];
1018
1019         assert_return(j, -EINVAL);
1020         assert_return(!journal_pid_changed(j), -ECHILD);
1021         assert_return(cursor, -EINVAL);
1022
1023         if (!j->current_file || j->current_file->current_offset <= 0)
1024                 return -EADDRNOTAVAIL;
1025
1026         r = journal_file_move_to_object(j->current_file, OBJECT_ENTRY, j->current_file->current_offset, &o);
1027         if (r < 0)
1028                 return r;
1029
1030         sd_id128_to_string(j->current_file->header->seqnum_id, sid);
1031         sd_id128_to_string(o->entry.boot_id, bid);
1032
1033         if (asprintf(cursor,
1034                      "s=%s;i=%"PRIx64";b=%s;m=%"PRIx64";t=%"PRIx64";x=%"PRIx64,
1035                      sid, le64toh(o->entry.seqnum),
1036                      bid, le64toh(o->entry.monotonic),
1037                      le64toh(o->entry.realtime),
1038                      le64toh(o->entry.xor_hash)) < 0)
1039                 return -ENOMEM;
1040
1041         return 0;
1042 }
1043
1044 _public_ int sd_journal_seek_cursor(sd_journal *j, const char *cursor) {
1045         const char *word, *state;
1046         size_t l;
1047         unsigned long long seqnum, monotonic, realtime, xor_hash;
1048         bool
1049                 seqnum_id_set = false,
1050                 seqnum_set = false,
1051                 boot_id_set = false,
1052                 monotonic_set = false,
1053                 realtime_set = false,
1054                 xor_hash_set = false;
1055         sd_id128_t seqnum_id, boot_id;
1056
1057         assert_return(j, -EINVAL);
1058         assert_return(!journal_pid_changed(j), -ECHILD);
1059         assert_return(!isempty(cursor), -EINVAL);
1060
1061         FOREACH_WORD_SEPARATOR(word, l, cursor, ";", state) {
1062                 char *item;
1063                 int k = 0;
1064
1065                 if (l < 2 || word[1] != '=')
1066                         return -EINVAL;
1067
1068                 item = strndup(word, l);
1069                 if (!item)
1070                         return -ENOMEM;
1071
1072                 switch (word[0]) {
1073
1074                 case 's':
1075                         seqnum_id_set = true;
1076                         k = sd_id128_from_string(item+2, &seqnum_id);
1077                         break;
1078
1079                 case 'i':
1080                         seqnum_set = true;
1081                         if (sscanf(item+2, "%llx", &seqnum) != 1)
1082                                 k = -EINVAL;
1083                         break;
1084
1085                 case 'b':
1086                         boot_id_set = true;
1087                         k = sd_id128_from_string(item+2, &boot_id);
1088                         break;
1089
1090                 case 'm':
1091                         monotonic_set = true;
1092                         if (sscanf(item+2, "%llx", &monotonic) != 1)
1093                                 k = -EINVAL;
1094                         break;
1095
1096                 case 't':
1097                         realtime_set = true;
1098                         if (sscanf(item+2, "%llx", &realtime) != 1)
1099                                 k = -EINVAL;
1100                         break;
1101
1102                 case 'x':
1103                         xor_hash_set = true;
1104                         if (sscanf(item+2, "%llx", &xor_hash) != 1)
1105                                 k = -EINVAL;
1106                         break;
1107                 }
1108
1109                 free(item);
1110
1111                 if (k < 0)
1112                         return k;
1113         }
1114
1115         if ((!seqnum_set || !seqnum_id_set) &&
1116             (!monotonic_set || !boot_id_set) &&
1117             !realtime_set)
1118                 return -EINVAL;
1119
1120         reset_location(j);
1121
1122         j->current_location.type = LOCATION_SEEK;
1123
1124         if (realtime_set) {
1125                 j->current_location.realtime = (uint64_t) realtime;
1126                 j->current_location.realtime_set = true;
1127         }
1128
1129         if (seqnum_set && seqnum_id_set) {
1130                 j->current_location.seqnum = (uint64_t) seqnum;
1131                 j->current_location.seqnum_id = seqnum_id;
1132                 j->current_location.seqnum_set = true;
1133         }
1134
1135         if (monotonic_set && boot_id_set) {
1136                 j->current_location.monotonic = (uint64_t) monotonic;
1137                 j->current_location.boot_id = boot_id;
1138                 j->current_location.monotonic_set = true;
1139         }
1140
1141         if (xor_hash_set) {
1142                 j->current_location.xor_hash = (uint64_t) xor_hash;
1143                 j->current_location.xor_hash_set = true;
1144         }
1145
1146         return 0;
1147 }
1148
1149 _public_ int sd_journal_test_cursor(sd_journal *j, const char *cursor) {
1150         int r;
1151         const char *word, *state;
1152         size_t l;
1153         Object *o;
1154
1155         assert_return(j, -EINVAL);
1156         assert_return(!journal_pid_changed(j), -ECHILD);
1157         assert_return(!isempty(cursor), -EINVAL);
1158
1159         if (!j->current_file || j->current_file->current_offset <= 0)
1160                 return -EADDRNOTAVAIL;
1161
1162         r = journal_file_move_to_object(j->current_file, OBJECT_ENTRY, j->current_file->current_offset, &o);
1163         if (r < 0)
1164                 return r;
1165
1166         FOREACH_WORD_SEPARATOR(word, l, cursor, ";", state) {
1167                 _cleanup_free_ char *item = NULL;
1168                 sd_id128_t id;
1169                 unsigned long long ll;
1170                 int k = 0;
1171
1172                 if (l < 2 || word[1] != '=')
1173                         return -EINVAL;
1174
1175                 item = strndup(word, l);
1176                 if (!item)
1177                         return -ENOMEM;
1178
1179                 switch (word[0]) {
1180
1181                 case 's':
1182                         k = sd_id128_from_string(item+2, &id);
1183                         if (k < 0)
1184                                 return k;
1185                         if (!sd_id128_equal(id, j->current_file->header->seqnum_id))
1186                                 return 0;
1187                         break;
1188
1189                 case 'i':
1190                         if (sscanf(item+2, "%llx", &ll) != 1)
1191                                 return -EINVAL;
1192                         if (ll != le64toh(o->entry.seqnum))
1193                                 return 0;
1194                         break;
1195
1196                 case 'b':
1197                         k = sd_id128_from_string(item+2, &id);
1198                         if (k < 0)
1199                                 return k;
1200                         if (!sd_id128_equal(id, o->entry.boot_id))
1201                                 return 0;
1202                         break;
1203
1204                 case 'm':
1205                         if (sscanf(item+2, "%llx", &ll) != 1)
1206                                 return -EINVAL;
1207                         if (ll != le64toh(o->entry.monotonic))
1208                                 return 0;
1209                         break;
1210
1211                 case 't':
1212                         if (sscanf(item+2, "%llx", &ll) != 1)
1213                                 return -EINVAL;
1214                         if (ll != le64toh(o->entry.realtime))
1215                                 return 0;
1216                         break;
1217
1218                 case 'x':
1219                         if (sscanf(item+2, "%llx", &ll) != 1)
1220                                 return -EINVAL;
1221                         if (ll != le64toh(o->entry.xor_hash))
1222                                 return 0;
1223                         break;
1224                 }
1225         }
1226
1227         return 1;
1228 }
1229
1230
1231 _public_ int sd_journal_seek_monotonic_usec(sd_journal *j, sd_id128_t boot_id, uint64_t usec) {
1232         assert_return(j, -EINVAL);
1233         assert_return(!journal_pid_changed(j), -ECHILD);
1234
1235         reset_location(j);
1236         j->current_location.type = LOCATION_SEEK;
1237         j->current_location.boot_id = boot_id;
1238         j->current_location.monotonic = usec;
1239         j->current_location.monotonic_set = true;
1240
1241         return 0;
1242 }
1243
1244 _public_ int sd_journal_seek_realtime_usec(sd_journal *j, uint64_t usec) {
1245         assert_return(j, -EINVAL);
1246         assert_return(!journal_pid_changed(j), -ECHILD);
1247
1248         reset_location(j);
1249         j->current_location.type = LOCATION_SEEK;
1250         j->current_location.realtime = usec;
1251         j->current_location.realtime_set = true;
1252
1253         return 0;
1254 }
1255
1256 _public_ int sd_journal_seek_head(sd_journal *j) {
1257         assert_return(j, -EINVAL);
1258         assert_return(!journal_pid_changed(j), -ECHILD);
1259
1260         reset_location(j);
1261         j->current_location.type = LOCATION_HEAD;
1262
1263         return 0;
1264 }
1265
1266 _public_ int sd_journal_seek_tail(sd_journal *j) {
1267         assert_return(j, -EINVAL);
1268         assert_return(!journal_pid_changed(j), -ECHILD);
1269
1270         reset_location(j);
1271         j->current_location.type = LOCATION_TAIL;
1272
1273         return 0;
1274 }
1275
1276 static void check_network(sd_journal *j, int fd) {
1277         struct statfs sfs;
1278
1279         assert(j);
1280
1281         if (j->on_network)
1282                 return;
1283
1284         if (fstatfs(fd, &sfs) < 0)
1285                 return;
1286
1287         j->on_network =
1288                 F_TYPE_EQUAL(sfs.f_type, CIFS_MAGIC_NUMBER) ||
1289                 F_TYPE_EQUAL(sfs.f_type, CODA_SUPER_MAGIC) ||
1290                 F_TYPE_EQUAL(sfs.f_type, NCP_SUPER_MAGIC) ||
1291                 F_TYPE_EQUAL(sfs.f_type, NFS_SUPER_MAGIC) ||
1292                 F_TYPE_EQUAL(sfs.f_type, SMB_SUPER_MAGIC);
1293 }
1294
1295 static bool file_has_type_prefix(const char *prefix, const char *filename) {
1296         const char *full, *tilded, *atted;
1297
1298         full = strappenda(prefix, ".journal");
1299         tilded = strappenda(full, "~");
1300         atted = strappenda(prefix, "@");
1301
1302         return streq(filename, full) ||
1303                streq(filename, tilded) ||
1304                startswith(filename, atted);
1305 }
1306
1307 static bool file_type_wanted(int flags, const char *filename) {
1308         if (!endswith(filename, ".journal") && !endswith(filename, ".journal~"))
1309                 return false;
1310
1311         /* no flags set â†’ every type is OK */
1312         if (!(flags & (SD_JOURNAL_SYSTEM | SD_JOURNAL_CURRENT_USER)))
1313                 return true;
1314
1315         if (flags & SD_JOURNAL_SYSTEM && file_has_type_prefix("system", filename))
1316                 return true;
1317
1318         if (flags & SD_JOURNAL_CURRENT_USER) {
1319                 char prefix[5 + DECIMAL_STR_MAX(uid_t) + 1];
1320
1321                 assert_se(snprintf(prefix, sizeof(prefix), "user-"UID_FMT, getuid())
1322                           < (int) sizeof(prefix));
1323
1324                 if (file_has_type_prefix(prefix, filename))
1325                         return true;
1326         }
1327
1328         return false;
1329 }
1330
1331 static int add_any_file(sd_journal *j, const char *path) {
1332         JournalFile *f = NULL;
1333         int r;
1334
1335         assert(j);
1336         assert(path);
1337
1338         if (ordered_hashmap_get(j->files, path))
1339                 return 0;
1340
1341         if (ordered_hashmap_size(j->files) >= JOURNAL_FILES_MAX) {
1342                 log_warning("Too many open journal files, not adding %s.", path);
1343                 return set_put_error(j, -ETOOMANYREFS);
1344         }
1345
1346         r = journal_file_open(path, O_RDONLY, 0, false, false, NULL, j->mmap, NULL, &f);
1347         if (r < 0)
1348                 return r;
1349
1350         /* journal_file_dump(f); */
1351
1352         r = ordered_hashmap_put(j->files, f->path, f);
1353         if (r < 0) {
1354                 journal_file_close(f);
1355                 return r;
1356         }
1357
1358         log_debug("File %s added.", f->path);
1359
1360         check_network(j, f->fd);
1361
1362         j->current_invalidate_counter ++;
1363
1364         return 0;
1365 }
1366
1367 static int add_file(sd_journal *j, const char *prefix, const char *filename) {
1368         _cleanup_free_ char *path = NULL;
1369         int r;
1370
1371         assert(j);
1372         assert(prefix);
1373         assert(filename);
1374
1375         if (j->no_new_files ||
1376             !file_type_wanted(j->flags, filename))
1377                 return 0;
1378
1379         path = strjoin(prefix, "/", filename, NULL);
1380         if (!path)
1381                 return -ENOMEM;
1382
1383         r = add_any_file(j, path);
1384         if (r == -ENOENT)
1385                 return 0;
1386         return 0;
1387 }
1388
1389 static int remove_file(sd_journal *j, const char *prefix, const char *filename) {
1390         _cleanup_free_ char *path;
1391         JournalFile *f;
1392
1393         assert(j);
1394         assert(prefix);
1395         assert(filename);
1396
1397         path = strjoin(prefix, "/", filename, NULL);
1398         if (!path)
1399                 return -ENOMEM;
1400
1401         f = ordered_hashmap_get(j->files, path);
1402         if (!f)
1403                 return 0;
1404
1405         remove_file_real(j, f);
1406         return 0;
1407 }
1408
1409 static void remove_file_real(sd_journal *j, JournalFile *f) {
1410         assert(j);
1411         assert(f);
1412
1413         ordered_hashmap_remove(j->files, f->path);
1414
1415         log_debug("File %s removed.", f->path);
1416
1417         if (j->current_file == f) {
1418                 j->current_file = NULL;
1419                 j->current_field = 0;
1420         }
1421
1422         if (j->unique_file == f) {
1423                 /* Jump to the next unique_file or NULL if that one was last */
1424                 j->unique_file = ordered_hashmap_next(j->files, j->unique_file->path);
1425                 j->unique_offset = 0;
1426                 if (!j->unique_file)
1427                         j->unique_file_lost = true;
1428         }
1429
1430         journal_file_close(f);
1431
1432         j->current_invalidate_counter ++;
1433 }
1434
1435 static int add_directory(sd_journal *j, const char *prefix, const char *dirname) {
1436         _cleanup_free_ char *path = NULL;
1437         int r;
1438         _cleanup_closedir_ DIR *d = NULL;
1439         sd_id128_t id, mid;
1440         Directory *m;
1441
1442         assert(j);
1443         assert(prefix);
1444         assert(dirname);
1445
1446         log_debug("Considering %s/%s.", prefix, dirname);
1447
1448         if ((j->flags & SD_JOURNAL_LOCAL_ONLY) &&
1449             (sd_id128_from_string(dirname, &id) < 0 ||
1450              sd_id128_get_machine(&mid) < 0 ||
1451              !(sd_id128_equal(id, mid) || path_startswith(prefix, "/run"))))
1452             return 0;
1453
1454         path = strjoin(prefix, "/", dirname, NULL);
1455         if (!path)
1456                 return -ENOMEM;
1457
1458         d = opendir(path);
1459         if (!d) {
1460                 log_debug_errno(errno, "Failed to open %s: %m", path);
1461                 if (errno == ENOENT)
1462                         return 0;
1463                 return -errno;
1464         }
1465
1466         m = hashmap_get(j->directories_by_path, path);
1467         if (!m) {
1468                 m = new0(Directory, 1);
1469                 if (!m)
1470                         return -ENOMEM;
1471
1472                 m->is_root = false;
1473                 m->path = path;
1474
1475                 if (hashmap_put(j->directories_by_path, m->path, m) < 0) {
1476                         free(m);
1477                         return -ENOMEM;
1478                 }
1479
1480                 path = NULL; /* avoid freeing in cleanup */
1481                 j->current_invalidate_counter ++;
1482
1483                 log_debug("Directory %s added.", m->path);
1484
1485         } else if (m->is_root)
1486                 return 0;
1487
1488         if (m->wd <= 0 && j->inotify_fd >= 0) {
1489
1490                 m->wd = inotify_add_watch(j->inotify_fd, m->path,
1491                                           IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE|
1492                                           IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT|IN_MOVED_FROM|
1493                                           IN_ONLYDIR);
1494
1495                 if (m->wd > 0 && hashmap_put(j->directories_by_wd, INT_TO_PTR(m->wd), m) < 0)
1496                         inotify_rm_watch(j->inotify_fd, m->wd);
1497         }
1498
1499         for (;;) {
1500                 struct dirent *de;
1501
1502                 errno = 0;
1503                 de = readdir(d);
1504                 if (!de && errno != 0) {
1505                         r = -errno;
1506                         log_debug_errno(errno, "Failed to read directory %s: %m", m->path);
1507                         return r;
1508                 }
1509                 if (!de)
1510                         break;
1511
1512                 if (dirent_is_file_with_suffix(de, ".journal") ||
1513                     dirent_is_file_with_suffix(de, ".journal~")) {
1514                         r = add_file(j, m->path, de->d_name);
1515                         if (r < 0) {
1516                                 log_debug_errno(r, "Failed to add file %s/%s: %m",
1517                                                 m->path, de->d_name);
1518                                 r = set_put_error(j, r);
1519                                 if (r < 0)
1520                                         return r;
1521                         }
1522                 }
1523         }
1524
1525         check_network(j, dirfd(d));
1526
1527         return 0;
1528 }
1529
1530 static int add_root_directory(sd_journal *j, const char *p) {
1531         _cleanup_closedir_ DIR *d = NULL;
1532         Directory *m;
1533         int r;
1534
1535         assert(j);
1536         assert(p);
1537
1538         if ((j->flags & SD_JOURNAL_RUNTIME_ONLY) &&
1539             !path_startswith(p, "/run"))
1540                 return -EINVAL;
1541
1542         if (j->prefix)
1543                 p = strappenda(j->prefix, p);
1544
1545         d = opendir(p);
1546         if (!d)
1547                 return -errno;
1548
1549         m = hashmap_get(j->directories_by_path, p);
1550         if (!m) {
1551                 m = new0(Directory, 1);
1552                 if (!m)
1553                         return -ENOMEM;
1554
1555                 m->is_root = true;
1556                 m->path = strdup(p);
1557                 if (!m->path) {
1558                         free(m);
1559                         return -ENOMEM;
1560                 }
1561
1562                 if (hashmap_put(j->directories_by_path, m->path, m) < 0) {
1563                         free(m->path);
1564                         free(m);
1565                         return -ENOMEM;
1566                 }
1567
1568                 j->current_invalidate_counter ++;
1569
1570                 log_debug("Root directory %s added.", m->path);
1571
1572         } else if (!m->is_root)
1573                 return 0;
1574
1575         if (m->wd <= 0 && j->inotify_fd >= 0) {
1576
1577                 m->wd = inotify_add_watch(j->inotify_fd, m->path,
1578                                           IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE|
1579                                           IN_ONLYDIR);
1580
1581                 if (m->wd > 0 && hashmap_put(j->directories_by_wd, INT_TO_PTR(m->wd), m) < 0)
1582                         inotify_rm_watch(j->inotify_fd, m->wd);
1583         }
1584
1585         if (j->no_new_files)
1586                 return 0;
1587
1588         for (;;) {
1589                 struct dirent *de;
1590                 sd_id128_t id;
1591
1592                 errno = 0;
1593                 de = readdir(d);
1594                 if (!de && errno != 0) {
1595                         r = -errno;
1596                         log_debug_errno(errno, "Failed to read directory %s: %m", m->path);
1597                         return r;
1598                 }
1599                 if (!de)
1600                         break;
1601
1602                 if (dirent_is_file_with_suffix(de, ".journal") ||
1603                     dirent_is_file_with_suffix(de, ".journal~")) {
1604                         r = add_file(j, m->path, de->d_name);
1605                         if (r < 0) {
1606                                 log_debug_errno(r, "Failed to add file %s/%s: %m",
1607                                                 m->path, de->d_name);
1608                                 r = set_put_error(j, r);
1609                                 if (r < 0)
1610                                         return r;
1611                         }
1612                 } else if ((de->d_type == DT_DIR || de->d_type == DT_LNK || de->d_type == DT_UNKNOWN) &&
1613                            sd_id128_from_string(de->d_name, &id) >= 0) {
1614
1615                         r = add_directory(j, m->path, de->d_name);
1616                         if (r < 0)
1617                                 log_debug_errno(r, "Failed to add directory %s/%s: %m", m->path, de->d_name);
1618                 }
1619         }
1620
1621         check_network(j, dirfd(d));
1622
1623         return 0;
1624 }
1625
1626 static int remove_directory(sd_journal *j, Directory *d) {
1627         assert(j);
1628
1629         if (d->wd > 0) {
1630                 hashmap_remove(j->directories_by_wd, INT_TO_PTR(d->wd));
1631
1632                 if (j->inotify_fd >= 0)
1633                         inotify_rm_watch(j->inotify_fd, d->wd);
1634         }
1635
1636         hashmap_remove(j->directories_by_path, d->path);
1637
1638         if (d->is_root)
1639                 log_debug("Root directory %s removed.", d->path);
1640         else
1641                 log_debug("Directory %s removed.", d->path);
1642
1643         free(d->path);
1644         free(d);
1645
1646         return 0;
1647 }
1648
1649 static int add_search_paths(sd_journal *j) {
1650         int r;
1651         const char search_paths[] =
1652                 "/run/log/journal\0"
1653                 "/var/log/journal\0";
1654         const char *p;
1655
1656         assert(j);
1657
1658         /* We ignore most errors here, since the idea is to only open
1659          * what's actually accessible, and ignore the rest. */
1660
1661         NULSTR_FOREACH(p, search_paths) {
1662                 r = add_root_directory(j, p);
1663                 if (r < 0 && r != -ENOENT) {
1664                         r = set_put_error(j, r);
1665                         if (r < 0)
1666                                 return r;
1667                 }
1668         }
1669
1670         return 0;
1671 }
1672
1673 static int add_current_paths(sd_journal *j) {
1674         Iterator i;
1675         JournalFile *f;
1676
1677         assert(j);
1678         assert(j->no_new_files);
1679
1680         /* Simply adds all directories for files we have open as
1681          * "root" directories. We don't expect errors here, so we
1682          * treat them as fatal. */
1683
1684         ORDERED_HASHMAP_FOREACH(f, j->files, i) {
1685                 _cleanup_free_ char *dir;
1686                 int r;
1687
1688                 dir = dirname_malloc(f->path);
1689                 if (!dir)
1690                         return -ENOMEM;
1691
1692                 r = add_root_directory(j, dir);
1693                 if (r < 0) {
1694                         set_put_error(j, r);
1695                         return r;
1696                 }
1697         }
1698
1699         return 0;
1700 }
1701
1702
1703 static int allocate_inotify(sd_journal *j) {
1704         assert(j);
1705
1706         if (j->inotify_fd < 0) {
1707                 j->inotify_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
1708                 if (j->inotify_fd < 0)
1709                         return -errno;
1710         }
1711
1712         if (!j->directories_by_wd) {
1713                 j->directories_by_wd = hashmap_new(NULL);
1714                 if (!j->directories_by_wd)
1715                         return -ENOMEM;
1716         }
1717
1718         return 0;
1719 }
1720
1721 static sd_journal *journal_new(int flags, const char *path) {
1722         sd_journal *j;
1723
1724         j = new0(sd_journal, 1);
1725         if (!j)
1726                 return NULL;
1727
1728         j->original_pid = getpid();
1729         j->inotify_fd = -1;
1730         j->flags = flags;
1731         j->data_threshold = DEFAULT_DATA_THRESHOLD;
1732
1733         if (path) {
1734                 j->path = strdup(path);
1735                 if (!j->path)
1736                         goto fail;
1737         }
1738
1739         j->files = ordered_hashmap_new(&string_hash_ops);
1740         j->directories_by_path = hashmap_new(&string_hash_ops);
1741         j->mmap = mmap_cache_new();
1742         if (!j->files || !j->directories_by_path || !j->mmap)
1743                 goto fail;
1744
1745         return j;
1746
1747 fail:
1748         sd_journal_close(j);
1749         return NULL;
1750 }
1751
1752 _public_ int sd_journal_open(sd_journal **ret, int flags) {
1753         sd_journal *j;
1754         int r;
1755
1756         assert_return(ret, -EINVAL);
1757         assert_return((flags & ~(SD_JOURNAL_LOCAL_ONLY|SD_JOURNAL_RUNTIME_ONLY|SD_JOURNAL_SYSTEM|SD_JOURNAL_CURRENT_USER)) == 0, -EINVAL);
1758
1759         j = journal_new(flags, NULL);
1760         if (!j)
1761                 return -ENOMEM;
1762
1763         r = add_search_paths(j);
1764         if (r < 0)
1765                 goto fail;
1766
1767         *ret = j;
1768         return 0;
1769
1770 fail:
1771         sd_journal_close(j);
1772
1773         return r;
1774 }
1775
1776 _public_ int sd_journal_open_container(sd_journal **ret, const char *machine, int flags) {
1777         _cleanup_free_ char *root = NULL, *class = NULL;
1778         sd_journal *j;
1779         char *p;
1780         int r;
1781
1782         assert_return(machine, -EINVAL);
1783         assert_return(ret, -EINVAL);
1784         assert_return((flags & ~(SD_JOURNAL_LOCAL_ONLY|SD_JOURNAL_SYSTEM)) == 0, -EINVAL);
1785         assert_return(machine_name_is_valid(machine), -EINVAL);
1786
1787         p = strappenda("/run/systemd/machines/", machine);
1788         r = parse_env_file(p, NEWLINE, "ROOT", &root, "CLASS", &class, NULL);
1789         if (r == -ENOENT)
1790                 return -EHOSTDOWN;
1791         if (r < 0)
1792                 return r;
1793         if (!root)
1794                 return -ENODATA;
1795
1796         if (!streq_ptr(class, "container"))
1797                 return -EIO;
1798
1799         j = journal_new(flags, NULL);
1800         if (!j)
1801                 return -ENOMEM;
1802
1803         j->prefix = root;
1804         root = NULL;
1805
1806         r = add_search_paths(j);
1807         if (r < 0)
1808                 goto fail;
1809
1810         *ret = j;
1811         return 0;
1812
1813 fail:
1814         sd_journal_close(j);
1815         return r;
1816 }
1817
1818 _public_ int sd_journal_open_directory(sd_journal **ret, const char *path, int flags) {
1819         sd_journal *j;
1820         int r;
1821
1822         assert_return(ret, -EINVAL);
1823         assert_return(path, -EINVAL);
1824         assert_return(flags == 0, -EINVAL);
1825
1826         j = journal_new(flags, path);
1827         if (!j)
1828                 return -ENOMEM;
1829
1830         r = add_root_directory(j, path);
1831         if (r < 0) {
1832                 set_put_error(j, r);
1833                 goto fail;
1834         }
1835
1836         *ret = j;
1837         return 0;
1838
1839 fail:
1840         sd_journal_close(j);
1841
1842         return r;
1843 }
1844
1845 _public_ int sd_journal_open_files(sd_journal **ret, const char **paths, int flags) {
1846         sd_journal *j;
1847         const char **path;
1848         int r;
1849
1850         assert_return(ret, -EINVAL);
1851         assert_return(flags == 0, -EINVAL);
1852
1853         j = journal_new(flags, NULL);
1854         if (!j)
1855                 return -ENOMEM;
1856
1857         STRV_FOREACH(path, paths) {
1858                 r = add_any_file(j, *path);
1859                 if (r < 0) {
1860                         log_error_errno(r, "Failed to open %s: %m", *path);
1861                         goto fail;
1862                 }
1863         }
1864
1865         j->no_new_files = true;
1866
1867         *ret = j;
1868         return 0;
1869
1870 fail:
1871         sd_journal_close(j);
1872
1873         return r;
1874 }
1875
1876 _public_ void sd_journal_close(sd_journal *j) {
1877         Directory *d;
1878         JournalFile *f;
1879
1880         if (!j)
1881                 return;
1882
1883         sd_journal_flush_matches(j);
1884
1885         while ((f = ordered_hashmap_steal_first(j->files)))
1886                 journal_file_close(f);
1887
1888         ordered_hashmap_free(j->files);
1889
1890         while ((d = hashmap_first(j->directories_by_path)))
1891                 remove_directory(j, d);
1892
1893         while ((d = hashmap_first(j->directories_by_wd)))
1894                 remove_directory(j, d);
1895
1896         hashmap_free(j->directories_by_path);
1897         hashmap_free(j->directories_by_wd);
1898
1899         safe_close(j->inotify_fd);
1900
1901         if (j->mmap) {
1902                 log_debug("mmap cache statistics: %u hit, %u miss", mmap_cache_get_hit(j->mmap), mmap_cache_get_missed(j->mmap));
1903                 mmap_cache_unref(j->mmap);
1904         }
1905
1906         free(j->path);
1907         free(j->prefix);
1908         free(j->unique_field);
1909         set_free(j->errors);
1910         free(j);
1911 }
1912
1913 _public_ int sd_journal_get_realtime_usec(sd_journal *j, uint64_t *ret) {
1914         Object *o;
1915         JournalFile *f;
1916         int r;
1917
1918         assert_return(j, -EINVAL);
1919         assert_return(!journal_pid_changed(j), -ECHILD);
1920         assert_return(ret, -EINVAL);
1921
1922         f = j->current_file;
1923         if (!f)
1924                 return -EADDRNOTAVAIL;
1925
1926         if (f->current_offset <= 0)
1927                 return -EADDRNOTAVAIL;
1928
1929         r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1930         if (r < 0)
1931                 return r;
1932
1933         *ret = le64toh(o->entry.realtime);
1934         return 0;
1935 }
1936
1937 _public_ int sd_journal_get_monotonic_usec(sd_journal *j, uint64_t *ret, sd_id128_t *ret_boot_id) {
1938         Object *o;
1939         JournalFile *f;
1940         int r;
1941         sd_id128_t id;
1942
1943         assert_return(j, -EINVAL);
1944         assert_return(!journal_pid_changed(j), -ECHILD);
1945
1946         f = j->current_file;
1947         if (!f)
1948                 return -EADDRNOTAVAIL;
1949
1950         if (f->current_offset <= 0)
1951                 return -EADDRNOTAVAIL;
1952
1953         r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1954         if (r < 0)
1955                 return r;
1956
1957         if (ret_boot_id)
1958                 *ret_boot_id = o->entry.boot_id;
1959         else {
1960                 r = sd_id128_get_boot(&id);
1961                 if (r < 0)
1962                         return r;
1963
1964                 if (!sd_id128_equal(id, o->entry.boot_id))
1965                         return -ESTALE;
1966         }
1967
1968         if (ret)
1969                 *ret = le64toh(o->entry.monotonic);
1970
1971         return 0;
1972 }
1973
1974 static bool field_is_valid(const char *field) {
1975         const char *p;
1976
1977         assert(field);
1978
1979         if (isempty(field))
1980                 return false;
1981
1982         if (startswith(field, "__"))
1983                 return false;
1984
1985         for (p = field; *p; p++) {
1986
1987                 if (*p == '_')
1988                         continue;
1989
1990                 if (*p >= 'A' && *p <= 'Z')
1991                         continue;
1992
1993                 if (*p >= '0' && *p <= '9')
1994                         continue;
1995
1996                 return false;
1997         }
1998
1999         return true;
2000 }
2001
2002 _public_ int sd_journal_get_data(sd_journal *j, const char *field, const void **data, size_t *size) {
2003         JournalFile *f;
2004         uint64_t i, n;
2005         size_t field_length;
2006         int r;
2007         Object *o;
2008
2009         assert_return(j, -EINVAL);
2010         assert_return(!journal_pid_changed(j), -ECHILD);
2011         assert_return(field, -EINVAL);
2012         assert_return(data, -EINVAL);
2013         assert_return(size, -EINVAL);
2014         assert_return(field_is_valid(field), -EINVAL);
2015
2016         f = j->current_file;
2017         if (!f)
2018                 return -EADDRNOTAVAIL;
2019
2020         if (f->current_offset <= 0)
2021                 return -EADDRNOTAVAIL;
2022
2023         r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
2024         if (r < 0)
2025                 return r;
2026
2027         field_length = strlen(field);
2028
2029         n = journal_file_entry_n_items(o);
2030         for (i = 0; i < n; i++) {
2031                 uint64_t p, l;
2032                 le64_t le_hash;
2033                 size_t t;
2034                 int compression;
2035
2036                 p = le64toh(o->entry.items[i].object_offset);
2037                 le_hash = o->entry.items[i].hash;
2038                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
2039                 if (r < 0)
2040                         return r;
2041
2042                 if (le_hash != o->data.hash)
2043                         return -EBADMSG;
2044
2045                 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2046
2047                 compression = o->object.flags & OBJECT_COMPRESSION_MASK;
2048                 if (compression) {
2049 #if defined(HAVE_XZ) || defined(HAVE_LZ4)
2050                         if (decompress_startswith(compression,
2051                                                   o->data.payload, l,
2052                                                   &f->compress_buffer, &f->compress_buffer_size,
2053                                                   field, field_length, '=')) {
2054
2055                                 size_t rsize;
2056
2057                                 r = decompress_blob(compression,
2058                                                     o->data.payload, l,
2059                                                     &f->compress_buffer, &f->compress_buffer_size, &rsize,
2060                                                     j->data_threshold);
2061                                 if (r < 0)
2062                                         return r;
2063
2064                                 *data = f->compress_buffer;
2065                                 *size = (size_t) rsize;
2066
2067                                 return 0;
2068                         }
2069 #else
2070                         return -EPROTONOSUPPORT;
2071 #endif
2072                 } else if (l >= field_length+1 &&
2073                            memcmp(o->data.payload, field, field_length) == 0 &&
2074                            o->data.payload[field_length] == '=') {
2075
2076                         t = (size_t) l;
2077
2078                         if ((uint64_t) t != l)
2079                                 return -E2BIG;
2080
2081                         *data = o->data.payload;
2082                         *size = t;
2083
2084                         return 0;
2085                 }
2086
2087                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
2088                 if (r < 0)
2089                         return r;
2090         }
2091
2092         return -ENOENT;
2093 }
2094
2095 static int return_data(sd_journal *j, JournalFile *f, Object *o, const void **data, size_t *size) {
2096         size_t t;
2097         uint64_t l;
2098         int compression;
2099
2100         l = le64toh(o->object.size) - offsetof(Object, data.payload);
2101         t = (size_t) l;
2102
2103         /* We can't read objects larger than 4G on a 32bit machine */
2104         if ((uint64_t) t != l)
2105                 return -E2BIG;
2106
2107         compression = o->object.flags & OBJECT_COMPRESSION_MASK;
2108         if (compression) {
2109 #if defined(HAVE_XZ) || defined(HAVE_LZ4)
2110                 size_t rsize;
2111                 int r;
2112
2113                 r = decompress_blob(compression,
2114                                     o->data.payload, l, &f->compress_buffer,
2115                                     &f->compress_buffer_size, &rsize, j->data_threshold);
2116                 if (r < 0)
2117                         return r;
2118
2119                 *data = f->compress_buffer;
2120                 *size = (size_t) rsize;
2121 #else
2122                 return -EPROTONOSUPPORT;
2123 #endif
2124         } else {
2125                 *data = o->data.payload;
2126                 *size = t;
2127         }
2128
2129         return 0;
2130 }
2131
2132 _public_ int sd_journal_enumerate_data(sd_journal *j, const void **data, size_t *size) {
2133         JournalFile *f;
2134         uint64_t p, n;
2135         le64_t le_hash;
2136         int r;
2137         Object *o;
2138
2139         assert_return(j, -EINVAL);
2140         assert_return(!journal_pid_changed(j), -ECHILD);
2141         assert_return(data, -EINVAL);
2142         assert_return(size, -EINVAL);
2143
2144         f = j->current_file;
2145         if (!f)
2146                 return -EADDRNOTAVAIL;
2147
2148         if (f->current_offset <= 0)
2149                 return -EADDRNOTAVAIL;
2150
2151         r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
2152         if (r < 0)
2153                 return r;
2154
2155         n = journal_file_entry_n_items(o);
2156         if (j->current_field >= n)
2157                 return 0;
2158
2159         p = le64toh(o->entry.items[j->current_field].object_offset);
2160         le_hash = o->entry.items[j->current_field].hash;
2161         r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
2162         if (r < 0)
2163                 return r;
2164
2165         if (le_hash != o->data.hash)
2166                 return -EBADMSG;
2167
2168         r = return_data(j, f, o, data, size);
2169         if (r < 0)
2170                 return r;
2171
2172         j->current_field ++;
2173
2174         return 1;
2175 }
2176
2177 _public_ void sd_journal_restart_data(sd_journal *j) {
2178         if (!j)
2179                 return;
2180
2181         j->current_field = 0;
2182 }
2183
2184 _public_ int sd_journal_get_fd(sd_journal *j) {
2185         int r;
2186
2187         assert_return(j, -EINVAL);
2188         assert_return(!journal_pid_changed(j), -ECHILD);
2189
2190         if (j->inotify_fd >= 0)
2191                 return j->inotify_fd;
2192
2193         r = allocate_inotify(j);
2194         if (r < 0)
2195                 return r;
2196
2197         /* Iterate through all dirs again, to add them to the
2198          * inotify */
2199         if (j->no_new_files)
2200                 r = add_current_paths(j);
2201         else if (j->path)
2202                 r = add_root_directory(j, j->path);
2203         else
2204                 r = add_search_paths(j);
2205         if (r < 0)
2206                 return r;
2207
2208         return j->inotify_fd;
2209 }
2210
2211 _public_ int sd_journal_get_events(sd_journal *j) {
2212         int fd;
2213
2214         assert_return(j, -EINVAL);
2215         assert_return(!journal_pid_changed(j), -ECHILD);
2216
2217         fd = sd_journal_get_fd(j);
2218         if (fd < 0)
2219                 return fd;
2220
2221         return POLLIN;
2222 }
2223
2224 _public_ int sd_journal_get_timeout(sd_journal *j, uint64_t *timeout_usec) {
2225         int fd;
2226
2227         assert_return(j, -EINVAL);
2228         assert_return(!journal_pid_changed(j), -ECHILD);
2229         assert_return(timeout_usec, -EINVAL);
2230
2231         fd = sd_journal_get_fd(j);
2232         if (fd < 0)
2233                 return fd;
2234
2235         if (!j->on_network) {
2236                 *timeout_usec = (uint64_t) -1;
2237                 return 0;
2238         }
2239
2240         /* If we are on the network we need to regularly check for
2241          * changes manually */
2242
2243         *timeout_usec = j->last_process_usec + JOURNAL_FILES_RECHECK_USEC;
2244         return 1;
2245 }
2246
2247 static void process_inotify_event(sd_journal *j, struct inotify_event *e) {
2248         Directory *d;
2249         int r;
2250
2251         assert(j);
2252         assert(e);
2253
2254         /* Is this a subdirectory we watch? */
2255         d = hashmap_get(j->directories_by_wd, INT_TO_PTR(e->wd));
2256         if (d) {
2257                 sd_id128_t id;
2258
2259                 if (!(e->mask & IN_ISDIR) && e->len > 0 &&
2260                     (endswith(e->name, ".journal") ||
2261                      endswith(e->name, ".journal~"))) {
2262
2263                         /* Event for a journal file */
2264
2265                         if (e->mask & (IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB)) {
2266                                 r = add_file(j, d->path, e->name);
2267                                 if (r < 0) {
2268                                         log_debug_errno(r, "Failed to add file %s/%s: %m",
2269                                                         d->path, e->name);
2270                                         set_put_error(j, r);
2271                                 }
2272
2273                         } else if (e->mask & (IN_DELETE|IN_MOVED_FROM|IN_UNMOUNT)) {
2274
2275                                 r = remove_file(j, d->path, e->name);
2276                                 if (r < 0)
2277                                         log_debug_errno(r, "Failed to remove file %s/%s: %m", d->path, e->name);
2278                         }
2279
2280                 } else if (!d->is_root && e->len == 0) {
2281
2282                         /* Event for a subdirectory */
2283
2284                         if (e->mask & (IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT)) {
2285                                 r = remove_directory(j, d);
2286                                 if (r < 0)
2287                                         log_debug_errno(r, "Failed to remove directory %s: %m", d->path);
2288                         }
2289
2290
2291                 } else if (d->is_root && (e->mask & IN_ISDIR) && e->len > 0 && sd_id128_from_string(e->name, &id) >= 0) {
2292
2293                         /* Event for root directory */
2294
2295                         if (e->mask & (IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB)) {
2296                                 r = add_directory(j, d->path, e->name);
2297                                 if (r < 0)
2298                                         log_debug_errno(r, "Failed to add directory %s/%s: %m", d->path, e->name);
2299                         }
2300                 }
2301
2302                 return;
2303         }
2304
2305         if (e->mask & IN_IGNORED)
2306                 return;
2307
2308         log_warning("Unknown inotify event.");
2309 }
2310
2311 static int determine_change(sd_journal *j) {
2312         bool b;
2313
2314         assert(j);
2315
2316         b = j->current_invalidate_counter != j->last_invalidate_counter;
2317         j->last_invalidate_counter = j->current_invalidate_counter;
2318
2319         return b ? SD_JOURNAL_INVALIDATE : SD_JOURNAL_APPEND;
2320 }
2321
2322 _public_ int sd_journal_process(sd_journal *j) {
2323         bool got_something = false;
2324
2325         assert_return(j, -EINVAL);
2326         assert_return(!journal_pid_changed(j), -ECHILD);
2327
2328         j->last_process_usec = now(CLOCK_MONOTONIC);
2329
2330         for (;;) {
2331                 uint8_t buffer[INOTIFY_EVENT_MAX] _alignas_(struct inotify_event);
2332                 struct inotify_event *e;
2333                 ssize_t l;
2334
2335                 l = read(j->inotify_fd, buffer, sizeof(buffer));
2336                 if (l < 0) {
2337                         if (errno == EAGAIN || errno == EINTR)
2338                                 return got_something ? determine_change(j) : SD_JOURNAL_NOP;
2339
2340                         return -errno;
2341                 }
2342
2343                 got_something = true;
2344
2345                 FOREACH_INOTIFY_EVENT(e, buffer, l)
2346                         process_inotify_event(j, e);
2347         }
2348 }
2349
2350 _public_ int sd_journal_wait(sd_journal *j, uint64_t timeout_usec) {
2351         int r;
2352         uint64_t t;
2353
2354         assert_return(j, -EINVAL);
2355         assert_return(!journal_pid_changed(j), -ECHILD);
2356
2357         if (j->inotify_fd < 0) {
2358
2359                 /* This is the first invocation, hence create the
2360                  * inotify watch */
2361                 r = sd_journal_get_fd(j);
2362                 if (r < 0)
2363                         return r;
2364
2365                 /* The journal might have changed since the context
2366                  * object was created and we weren't watching before,
2367                  * hence don't wait for anything, and return
2368                  * immediately. */
2369                 return determine_change(j);
2370         }
2371
2372         r = sd_journal_get_timeout(j, &t);
2373         if (r < 0)
2374                 return r;
2375
2376         if (t != (uint64_t) -1) {
2377                 usec_t n;
2378
2379                 n = now(CLOCK_MONOTONIC);
2380                 t = t > n ? t - n : 0;
2381
2382                 if (timeout_usec == (uint64_t) -1 || timeout_usec > t)
2383                         timeout_usec = t;
2384         }
2385
2386         do {
2387                 r = fd_wait_for_event(j->inotify_fd, POLLIN, timeout_usec);
2388         } while (r == -EINTR);
2389
2390         if (r < 0)
2391                 return r;
2392
2393         return sd_journal_process(j);
2394 }
2395
2396 _public_ int sd_journal_get_cutoff_realtime_usec(sd_journal *j, uint64_t *from, uint64_t *to) {
2397         Iterator i;
2398         JournalFile *f;
2399         bool first = true;
2400         uint64_t fmin = 0, tmax = 0;
2401         int r;
2402
2403         assert_return(j, -EINVAL);
2404         assert_return(!journal_pid_changed(j), -ECHILD);
2405         assert_return(from || to, -EINVAL);
2406         assert_return(from != to, -EINVAL);
2407
2408         ORDERED_HASHMAP_FOREACH(f, j->files, i) {
2409                 usec_t fr, t;
2410
2411                 r = journal_file_get_cutoff_realtime_usec(f, &fr, &t);
2412                 if (r == -ENOENT)
2413                         continue;
2414                 if (r < 0)
2415                         return r;
2416                 if (r == 0)
2417                         continue;
2418
2419                 if (first) {
2420                         fmin = fr;
2421                         tmax = t;
2422                         first = false;
2423                 } else {
2424                         fmin = MIN(fr, fmin);
2425                         tmax = MAX(t, tmax);
2426                 }
2427         }
2428
2429         if (from)
2430                 *from = fmin;
2431         if (to)
2432                 *to = tmax;
2433
2434         return first ? 0 : 1;
2435 }
2436
2437 _public_ int sd_journal_get_cutoff_monotonic_usec(sd_journal *j, sd_id128_t boot_id, uint64_t *from, uint64_t *to) {
2438         Iterator i;
2439         JournalFile *f;
2440         bool found = false;
2441         int r;
2442
2443         assert_return(j, -EINVAL);
2444         assert_return(!journal_pid_changed(j), -ECHILD);
2445         assert_return(from || to, -EINVAL);
2446         assert_return(from != to, -EINVAL);
2447
2448         ORDERED_HASHMAP_FOREACH(f, j->files, i) {
2449                 usec_t fr, t;
2450
2451                 r = journal_file_get_cutoff_monotonic_usec(f, boot_id, &fr, &t);
2452                 if (r == -ENOENT)
2453                         continue;
2454                 if (r < 0)
2455                         return r;
2456                 if (r == 0)
2457                         continue;
2458
2459                 if (found) {
2460                         if (from)
2461                                 *from = MIN(fr, *from);
2462                         if (to)
2463                                 *to = MAX(t, *to);
2464                 } else {
2465                         if (from)
2466                                 *from = fr;
2467                         if (to)
2468                                 *to = t;
2469                         found = true;
2470                 }
2471         }
2472
2473         return found;
2474 }
2475
2476 void journal_print_header(sd_journal *j) {
2477         Iterator i;
2478         JournalFile *f;
2479         bool newline = false;
2480
2481         assert(j);
2482
2483         ORDERED_HASHMAP_FOREACH(f, j->files, i) {
2484                 if (newline)
2485                         putchar('\n');
2486                 else
2487                         newline = true;
2488
2489                 journal_file_print_header(f);
2490         }
2491 }
2492
2493 _public_ int sd_journal_get_usage(sd_journal *j, uint64_t *bytes) {
2494         Iterator i;
2495         JournalFile *f;
2496         uint64_t sum = 0;
2497
2498         assert_return(j, -EINVAL);
2499         assert_return(!journal_pid_changed(j), -ECHILD);
2500         assert_return(bytes, -EINVAL);
2501
2502         ORDERED_HASHMAP_FOREACH(f, j->files, i) {
2503                 struct stat st;
2504
2505                 if (fstat(f->fd, &st) < 0)
2506                         return -errno;
2507
2508                 sum += (uint64_t) st.st_blocks * 512ULL;
2509         }
2510
2511         *bytes = sum;
2512         return 0;
2513 }
2514
2515 _public_ int sd_journal_query_unique(sd_journal *j, const char *field) {
2516         char *f;
2517
2518         assert_return(j, -EINVAL);
2519         assert_return(!journal_pid_changed(j), -ECHILD);
2520         assert_return(!isempty(field), -EINVAL);
2521         assert_return(field_is_valid(field), -EINVAL);
2522
2523         f = strdup(field);
2524         if (!f)
2525                 return -ENOMEM;
2526
2527         free(j->unique_field);
2528         j->unique_field = f;
2529         j->unique_file = NULL;
2530         j->unique_offset = 0;
2531         j->unique_file_lost = false;
2532
2533         return 0;
2534 }
2535
2536 _public_ int sd_journal_enumerate_unique(sd_journal *j, const void **data, size_t *l) {
2537         size_t k;
2538
2539         assert_return(j, -EINVAL);
2540         assert_return(!journal_pid_changed(j), -ECHILD);
2541         assert_return(data, -EINVAL);
2542         assert_return(l, -EINVAL);
2543         assert_return(j->unique_field, -EINVAL);
2544
2545         k = strlen(j->unique_field);
2546
2547         if (!j->unique_file) {
2548                 if (j->unique_file_lost)
2549                         return 0;
2550
2551                 j->unique_file = ordered_hashmap_first(j->files);
2552                 if (!j->unique_file)
2553                         return 0;
2554
2555                 j->unique_offset = 0;
2556         }
2557
2558         for (;;) {
2559                 JournalFile *of;
2560                 Iterator i;
2561                 Object *o;
2562                 const void *odata;
2563                 size_t ol;
2564                 bool found;
2565                 int r;
2566
2567                 /* Proceed to next data object in the field's linked list */
2568                 if (j->unique_offset == 0) {
2569                         r = journal_file_find_field_object(j->unique_file, j->unique_field, k, &o, NULL);
2570                         if (r < 0)
2571                                 return r;
2572
2573                         j->unique_offset = r > 0 ? le64toh(o->field.head_data_offset) : 0;
2574                 } else {
2575                         r = journal_file_move_to_object(j->unique_file, OBJECT_DATA, j->unique_offset, &o);
2576                         if (r < 0)
2577                                 return r;
2578
2579                         j->unique_offset = le64toh(o->data.next_field_offset);
2580                 }
2581
2582                 /* We reached the end of the list? Then start again, with the next file */
2583                 if (j->unique_offset == 0) {
2584                         j->unique_file = ordered_hashmap_next(j->files, j->unique_file->path);
2585                         if (!j->unique_file)
2586                                 return 0;
2587
2588                         continue;
2589                 }
2590
2591                 /* We do not use OBJECT_DATA context here, but OBJECT_UNUSED
2592                  * instead, so that we can look at this data object at the same
2593                  * time as one on another file */
2594                 r = journal_file_move_to_object(j->unique_file, OBJECT_UNUSED, j->unique_offset, &o);
2595                 if (r < 0)
2596                         return r;
2597
2598                 /* Let's do the type check by hand, since we used 0 context above. */
2599                 if (o->object.type != OBJECT_DATA) {
2600                         log_debug("%s:offset " OFSfmt ": object has type %d, expected %d",
2601                                   j->unique_file->path, j->unique_offset,
2602                                   o->object.type, OBJECT_DATA);
2603                         return -EBADMSG;
2604                 }
2605
2606                 r = return_data(j, j->unique_file, o, &odata, &ol);
2607                 if (r < 0)
2608                         return r;
2609
2610                 /* Check if we have at least the field name and "=". */
2611                 if (ol <= k) {
2612                         log_debug("%s:offset " OFSfmt ": object has size %zu, expected at least %zu",
2613                                   j->unique_file->path, j->unique_offset,
2614                                   ol, k + 1);
2615                         return -EBADMSG;
2616                 }
2617
2618                 if (memcmp(odata, j->unique_field, k) || ((const char*) odata)[k] != '=') {
2619                         log_debug("%s:offset " OFSfmt ": object does not start with \"%s=\"",
2620                                   j->unique_file->path, j->unique_offset,
2621                                   j->unique_field);
2622                         return -EBADMSG;
2623                 }
2624
2625                 /* OK, now let's see if we already returned this data
2626                  * object by checking if it exists in the earlier
2627                  * traversed files. */
2628                 found = false;
2629                 ORDERED_HASHMAP_FOREACH(of, j->files, i) {
2630                         Object *oo;
2631                         uint64_t op;
2632
2633                         if (of == j->unique_file)
2634                                 break;
2635
2636                         /* Skip this file it didn't have any fields
2637                          * indexed */
2638                         if (JOURNAL_HEADER_CONTAINS(of->header, n_fields) &&
2639                             le64toh(of->header->n_fields) <= 0)
2640                                 continue;
2641
2642                         r = journal_file_find_data_object_with_hash(of, odata, ol, le64toh(o->data.hash), &oo, &op);
2643                         if (r < 0)
2644                                 return r;
2645
2646                         if (r > 0)
2647                                 found = true;
2648                 }
2649
2650                 if (found)
2651                         continue;
2652
2653                 r = return_data(j, j->unique_file, o, data, l);
2654                 if (r < 0)
2655                         return r;
2656
2657                 return 1;
2658         }
2659 }
2660
2661 _public_ void sd_journal_restart_unique(sd_journal *j) {
2662         if (!j)
2663                 return;
2664
2665         j->unique_file = NULL;
2666         j->unique_offset = 0;
2667         j->unique_file_lost = false;
2668 }
2669
2670 _public_ int sd_journal_reliable_fd(sd_journal *j) {
2671         assert_return(j, -EINVAL);
2672         assert_return(!journal_pid_changed(j), -ECHILD);
2673
2674         return !j->on_network;
2675 }
2676
2677 static char *lookup_field(const char *field, void *userdata) {
2678         sd_journal *j = userdata;
2679         const void *data;
2680         size_t size, d;
2681         int r;
2682
2683         assert(field);
2684         assert(j);
2685
2686         r = sd_journal_get_data(j, field, &data, &size);
2687         if (r < 0 ||
2688             size > REPLACE_VAR_MAX)
2689                 return strdup(field);
2690
2691         d = strlen(field) + 1;
2692
2693         return strndup((const char*) data + d, size - d);
2694 }
2695
2696 _public_ int sd_journal_get_catalog(sd_journal *j, char **ret) {
2697         const void *data;
2698         size_t size;
2699         sd_id128_t id;
2700         _cleanup_free_ char *text = NULL, *cid = NULL;
2701         char *t;
2702         int r;
2703
2704         assert_return(j, -EINVAL);
2705         assert_return(!journal_pid_changed(j), -ECHILD);
2706         assert_return(ret, -EINVAL);
2707
2708         r = sd_journal_get_data(j, "MESSAGE_ID", &data, &size);
2709         if (r < 0)
2710                 return r;
2711
2712         cid = strndup((const char*) data + 11, size - 11);
2713         if (!cid)
2714                 return -ENOMEM;
2715
2716         r = sd_id128_from_string(cid, &id);
2717         if (r < 0)
2718                 return r;
2719
2720         r = catalog_get(CATALOG_DATABASE, id, &text);
2721         if (r < 0)
2722                 return r;
2723
2724         t = replace_var(text, lookup_field, j);
2725         if (!t)
2726                 return -ENOMEM;
2727
2728         *ret = t;
2729         return 0;
2730 }
2731
2732 _public_ int sd_journal_get_catalog_for_message_id(sd_id128_t id, char **ret) {
2733         assert_return(ret, -EINVAL);
2734
2735         return catalog_get(CATALOG_DATABASE, id, ret);
2736 }
2737
2738 _public_ int sd_journal_set_data_threshold(sd_journal *j, size_t sz) {
2739         assert_return(j, -EINVAL);
2740         assert_return(!journal_pid_changed(j), -ECHILD);
2741
2742         j->data_threshold = sz;
2743         return 0;
2744 }
2745
2746 _public_ int sd_journal_get_data_threshold(sd_journal *j, size_t *sz) {
2747         assert_return(j, -EINVAL);
2748         assert_return(!journal_pid_changed(j), -ECHILD);
2749         assert_return(sz, -EINVAL);
2750
2751         *sz = j->data_threshold;
2752         return 0;
2753 }