chiark / gitweb /
b72a0867e78c7d5a6a2f089ef99b2c2f04089af8
[elogind.git] / src / journal / sd-journal.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <errno.h>
23 #include <fcntl.h>
24 #include <stddef.h>
25 #include <unistd.h>
26 #include <sys/inotify.h>
27 #include <sys/poll.h>
28 #include <sys/vfs.h>
29 #include <linux/magic.h>
30
31 #include "sd-journal.h"
32 #include "journal-def.h"
33 #include "journal-file.h"
34 #include "hashmap.h"
35 #include "list.h"
36 #include "strv.h"
37 #include "path-util.h"
38 #include "lookup3.h"
39 #include "compress.h"
40 #include "journal-internal.h"
41 #include "missing.h"
42 #include "catalog.h"
43 #include "replace-var.h"
44 #include "fileio.h"
45
46 #define JOURNAL_FILES_MAX 1024
47
48 #define JOURNAL_FILES_RECHECK_USEC (2 * USEC_PER_SEC)
49
50 #define REPLACE_VAR_MAX 256
51
52 #define DEFAULT_DATA_THRESHOLD (64*1024)
53
54 static void remove_file_real(sd_journal *j, JournalFile *f);
55
56 static bool journal_pid_changed(sd_journal *j) {
57         assert(j);
58
59         /* We don't support people creating a journal object and
60          * keeping it around over a fork(). Let's complain. */
61
62         return j->original_pid != getpid();
63 }
64
65 /* We return an error here only if we didn't manage to
66    memorize the real error. */
67 static int set_put_error(sd_journal *j, int r) {
68         int k;
69
70         if (r >= 0)
71                 return r;
72
73         k = set_ensure_allocated(&j->errors, NULL);
74         if (k < 0)
75                 return k;
76
77         return set_put(j->errors, INT_TO_PTR(r));
78 }
79
80 static void detach_location(sd_journal *j) {
81         Iterator i;
82         JournalFile *f;
83
84         assert(j);
85
86         j->current_file = NULL;
87         j->current_field = 0;
88
89         HASHMAP_FOREACH(f, j->files, i)
90                 f->current_offset = 0;
91 }
92
93 static void reset_location(sd_journal *j) {
94         assert(j);
95
96         detach_location(j);
97         zero(j->current_location);
98 }
99
100 static void init_location(Location *l, LocationType type, JournalFile *f, Object *o) {
101         assert(l);
102         assert(type == LOCATION_DISCRETE || type == LOCATION_SEEK);
103         assert(f);
104         assert(o->object.type == OBJECT_ENTRY);
105
106         l->type = type;
107         l->seqnum = le64toh(o->entry.seqnum);
108         l->seqnum_id = f->header->seqnum_id;
109         l->realtime = le64toh(o->entry.realtime);
110         l->monotonic = le64toh(o->entry.monotonic);
111         l->boot_id = o->entry.boot_id;
112         l->xor_hash = le64toh(o->entry.xor_hash);
113
114         l->seqnum_set = l->realtime_set = l->monotonic_set = l->xor_hash_set = true;
115 }
116
117 static void set_location(sd_journal *j, LocationType type, JournalFile *f, Object *o,
118                          direction_t direction, uint64_t offset) {
119         assert(j);
120         assert(type == LOCATION_DISCRETE || type == LOCATION_SEEK);
121         assert(f);
122         assert(o);
123
124         init_location(&j->current_location, type, f, o);
125
126         j->current_file = f;
127         j->current_field = 0;
128
129         f->last_direction = direction;
130         f->current_offset = offset;
131 }
132
133 static int match_is_valid(const void *data, size_t size) {
134         const char *b, *p;
135
136         assert(data);
137
138         if (size < 2)
139                 return false;
140
141         if (startswith(data, "__"))
142                 return false;
143
144         b = data;
145         for (p = b; p < b + size; p++) {
146
147                 if (*p == '=')
148                         return p > b;
149
150                 if (*p == '_')
151                         continue;
152
153                 if (*p >= 'A' && *p <= 'Z')
154                         continue;
155
156                 if (*p >= '0' && *p <= '9')
157                         continue;
158
159                 return false;
160         }
161
162         return false;
163 }
164
165 static bool same_field(const void *_a, size_t s, const void *_b, size_t t) {
166         const uint8_t *a = _a, *b = _b;
167         size_t j;
168
169         for (j = 0; j < s && j < t; j++) {
170
171                 if (a[j] != b[j])
172                         return false;
173
174                 if (a[j] == '=')
175                         return true;
176         }
177
178         assert_not_reached("\"=\" not found");
179 }
180
181 static Match *match_new(Match *p, MatchType t) {
182         Match *m;
183
184         m = new0(Match, 1);
185         if (!m)
186                 return NULL;
187
188         m->type = t;
189
190         if (p) {
191                 m->parent = p;
192                 LIST_PREPEND(matches, p->matches, m);
193         }
194
195         return m;
196 }
197
198 static void match_free(Match *m) {
199         assert(m);
200
201         while (m->matches)
202                 match_free(m->matches);
203
204         if (m->parent)
205                 LIST_REMOVE(matches, m->parent->matches, m);
206
207         free(m->data);
208         free(m);
209 }
210
211 static void match_free_if_empty(Match *m) {
212         if (!m || m->matches)
213                 return;
214
215         match_free(m);
216 }
217
218 _public_ int sd_journal_add_match(sd_journal *j, const void *data, size_t size) {
219         Match *l3, *l4, *add_here = NULL, *m;
220         le64_t le_hash;
221
222         assert_return(j, -EINVAL);
223         assert_return(!journal_pid_changed(j), -ECHILD);
224         assert_return(data, -EINVAL);
225
226         if (size == 0)
227                 size = strlen(data);
228
229         assert_return(match_is_valid(data, size), -EINVAL);
230
231         /* level 0: AND term
232          * level 1: OR terms
233          * level 2: AND terms
234          * level 3: OR terms
235          * level 4: concrete matches */
236
237         if (!j->level0) {
238                 j->level0 = match_new(NULL, MATCH_AND_TERM);
239                 if (!j->level0)
240                         return -ENOMEM;
241         }
242
243         if (!j->level1) {
244                 j->level1 = match_new(j->level0, MATCH_OR_TERM);
245                 if (!j->level1)
246                         return -ENOMEM;
247         }
248
249         if (!j->level2) {
250                 j->level2 = match_new(j->level1, MATCH_AND_TERM);
251                 if (!j->level2)
252                         return -ENOMEM;
253         }
254
255         assert(j->level0->type == MATCH_AND_TERM);
256         assert(j->level1->type == MATCH_OR_TERM);
257         assert(j->level2->type == MATCH_AND_TERM);
258
259         le_hash = htole64(hash64(data, size));
260
261         LIST_FOREACH(matches, l3, j->level2->matches) {
262                 assert(l3->type == MATCH_OR_TERM);
263
264                 LIST_FOREACH(matches, l4, l3->matches) {
265                         assert(l4->type == MATCH_DISCRETE);
266
267                         /* Exactly the same match already? Then ignore
268                          * this addition */
269                         if (l4->le_hash == le_hash &&
270                             l4->size == size &&
271                             memcmp(l4->data, data, size) == 0)
272                                 return 0;
273
274                         /* Same field? Then let's add this to this OR term */
275                         if (same_field(data, size, l4->data, l4->size)) {
276                                 add_here = l3;
277                                 break;
278                         }
279                 }
280
281                 if (add_here)
282                         break;
283         }
284
285         if (!add_here) {
286                 add_here = match_new(j->level2, MATCH_OR_TERM);
287                 if (!add_here)
288                         goto fail;
289         }
290
291         m = match_new(add_here, MATCH_DISCRETE);
292         if (!m)
293                 goto fail;
294
295         m->le_hash = le_hash;
296         m->size = size;
297         m->data = memdup(data, size);
298         if (!m->data)
299                 goto fail;
300
301         detach_location(j);
302
303         return 0;
304
305 fail:
306         match_free_if_empty(add_here);
307         match_free_if_empty(j->level2);
308         match_free_if_empty(j->level1);
309         match_free_if_empty(j->level0);
310
311         return -ENOMEM;
312 }
313
314 _public_ int sd_journal_add_conjunction(sd_journal *j) {
315         assert_return(j, -EINVAL);
316         assert_return(!journal_pid_changed(j), -ECHILD);
317
318         if (!j->level0)
319                 return 0;
320
321         if (!j->level1)
322                 return 0;
323
324         if (!j->level1->matches)
325                 return 0;
326
327         j->level1 = NULL;
328         j->level2 = NULL;
329
330         return 0;
331 }
332
333 _public_ int sd_journal_add_disjunction(sd_journal *j) {
334         assert_return(j, -EINVAL);
335         assert_return(!journal_pid_changed(j), -ECHILD);
336
337         if (!j->level0)
338                 return 0;
339
340         if (!j->level1)
341                 return 0;
342
343         if (!j->level2)
344                 return 0;
345
346         if (!j->level2->matches)
347                 return 0;
348
349         j->level2 = NULL;
350         return 0;
351 }
352
353 static char *match_make_string(Match *m) {
354         char *p, *r;
355         Match *i;
356         bool enclose = false;
357
358         if (!m)
359                 return strdup("none");
360
361         if (m->type == MATCH_DISCRETE)
362                 return strndup(m->data, m->size);
363
364         p = NULL;
365         LIST_FOREACH(matches, i, m->matches) {
366                 char *t, *k;
367
368                 t = match_make_string(i);
369                 if (!t) {
370                         free(p);
371                         return NULL;
372                 }
373
374                 if (p) {
375                         k = strjoin(p, m->type == MATCH_OR_TERM ? " OR " : " AND ", t, NULL);
376                         free(p);
377                         free(t);
378
379                         if (!k)
380                                 return NULL;
381
382                         p = k;
383
384                         enclose = true;
385                 } else
386                         p = t;
387         }
388
389         if (enclose) {
390                 r = strjoin("(", p, ")", NULL);
391                 free(p);
392                 return r;
393         }
394
395         return p;
396 }
397
398 char *journal_make_match_string(sd_journal *j) {
399         assert(j);
400
401         return match_make_string(j->level0);
402 }
403
404 _public_ void sd_journal_flush_matches(sd_journal *j) {
405         if (!j)
406                 return;
407
408         if (j->level0)
409                 match_free(j->level0);
410
411         j->level0 = j->level1 = j->level2 = NULL;
412
413         detach_location(j);
414 }
415
416 static int compare_entry_order(JournalFile *af, Object *_ao,
417                                JournalFile *bf, uint64_t bp) {
418
419         uint64_t a, b;
420         Object *ao, *bo;
421         int r;
422
423         assert(af);
424         assert(bf);
425         assert(_ao);
426
427         /* The mmap cache might invalidate the object from the first
428          * file if we look at the one from the second file. Hence
429          * temporarily copy the header of the first one, and look at
430          * that only. */
431         ao = alloca(offsetof(EntryObject, items));
432         memcpy(ao, _ao, offsetof(EntryObject, items));
433
434         r = journal_file_move_to_object(bf, OBJECT_ENTRY, bp, &bo);
435         if (r < 0)
436                 return strcmp(af->path, bf->path);
437
438         /* We operate on two different files here, hence we can access
439          * two objects at the same time, which we normally can't.
440          *
441          * If contents and timestamps match, these entries are
442          * identical, even if the seqnum does not match */
443
444         if (sd_id128_equal(ao->entry.boot_id, bo->entry.boot_id) &&
445             ao->entry.monotonic == bo->entry.monotonic &&
446             ao->entry.realtime == bo->entry.realtime &&
447             ao->entry.xor_hash == bo->entry.xor_hash)
448                 return 0;
449
450         if (sd_id128_equal(af->header->seqnum_id, bf->header->seqnum_id)) {
451
452                 /* If this is from the same seqnum source, compare
453                  * seqnums */
454                 a = le64toh(ao->entry.seqnum);
455                 b = le64toh(bo->entry.seqnum);
456
457                 if (a < b)
458                         return -1;
459                 if (a > b)
460                         return 1;
461
462                 /* Wow! This is weird, different data but the same
463                  * seqnums? Something is borked, but let's make the
464                  * best of it and compare by time. */
465         }
466
467         if (sd_id128_equal(ao->entry.boot_id, bo->entry.boot_id)) {
468
469                 /* If the boot id matches, compare monotonic time */
470                 a = le64toh(ao->entry.monotonic);
471                 b = le64toh(bo->entry.monotonic);
472
473                 if (a < b)
474                         return -1;
475                 if (a > b)
476                         return 1;
477         }
478
479         /* Otherwise, compare UTC time */
480         a = le64toh(ao->entry.realtime);
481         b = le64toh(bo->entry.realtime);
482
483         if (a < b)
484                 return -1;
485         if (a > b)
486                 return 1;
487
488         /* Finally, compare by contents */
489         a = le64toh(ao->entry.xor_hash);
490         b = le64toh(bo->entry.xor_hash);
491
492         if (a < b)
493                 return -1;
494         if (a > b)
495                 return 1;
496
497         return 0;
498 }
499
500 _pure_ static int compare_with_location(JournalFile *af, Object *ao, Location *l) {
501         uint64_t a;
502
503         assert(af);
504         assert(ao);
505         assert(l);
506         assert(l->type == LOCATION_DISCRETE || l->type == LOCATION_SEEK);
507
508         if (l->monotonic_set &&
509             sd_id128_equal(ao->entry.boot_id, l->boot_id) &&
510             l->realtime_set &&
511             le64toh(ao->entry.realtime) == l->realtime &&
512             l->xor_hash_set &&
513             le64toh(ao->entry.xor_hash) == l->xor_hash)
514                 return 0;
515
516         if (l->seqnum_set &&
517             sd_id128_equal(af->header->seqnum_id, l->seqnum_id)) {
518
519                 a = le64toh(ao->entry.seqnum);
520
521                 if (a < l->seqnum)
522                         return -1;
523                 if (a > l->seqnum)
524                         return 1;
525         }
526
527         if (l->monotonic_set &&
528             sd_id128_equal(ao->entry.boot_id, l->boot_id)) {
529
530                 a = le64toh(ao->entry.monotonic);
531
532                 if (a < l->monotonic)
533                         return -1;
534                 if (a > l->monotonic)
535                         return 1;
536         }
537
538         if (l->realtime_set) {
539
540                 a = le64toh(ao->entry.realtime);
541
542                 if (a < l->realtime)
543                         return -1;
544                 if (a > l->realtime)
545                         return 1;
546         }
547
548         if (l->xor_hash_set) {
549                 a = le64toh(ao->entry.xor_hash);
550
551                 if (a < l->xor_hash)
552                         return -1;
553                 if (a > l->xor_hash)
554                         return 1;
555         }
556
557         return 0;
558 }
559
560 static int next_for_match(
561                 sd_journal *j,
562                 Match *m,
563                 JournalFile *f,
564                 uint64_t after_offset,
565                 direction_t direction,
566                 Object **ret,
567                 uint64_t *offset) {
568
569         int r;
570         uint64_t np = 0;
571         Object *n;
572
573         assert(j);
574         assert(m);
575         assert(f);
576
577         if (m->type == MATCH_DISCRETE) {
578                 uint64_t dp;
579
580                 r = journal_file_find_data_object_with_hash(f, m->data, m->size, le64toh(m->le_hash), NULL, &dp);
581                 if (r <= 0)
582                         return r;
583
584                 return journal_file_move_to_entry_by_offset_for_data(f, dp, after_offset, direction, ret, offset);
585
586         } else if (m->type == MATCH_OR_TERM) {
587                 Match *i;
588
589                 /* Find the earliest match beyond after_offset */
590
591                 LIST_FOREACH(matches, i, m->matches) {
592                         uint64_t cp;
593
594                         r = next_for_match(j, i, f, after_offset, direction, NULL, &cp);
595                         if (r < 0)
596                                 return r;
597                         else if (r > 0) {
598                                 if (np == 0 || (direction == DIRECTION_DOWN ? cp < np : cp > np))
599                                         np = cp;
600                         }
601                 }
602
603                 if (np == 0)
604                         return 0;
605
606         } else if (m->type == MATCH_AND_TERM) {
607                 Match *i, *last_moved;
608
609                 /* Always jump to the next matching entry and repeat
610                  * this until we find an offset that matches for all
611                  * matches. */
612
613                 if (!m->matches)
614                         return 0;
615
616                 r = next_for_match(j, m->matches, f, after_offset, direction, NULL, &np);
617                 if (r <= 0)
618                         return r;
619
620                 assert(direction == DIRECTION_DOWN ? np >= after_offset : np <= after_offset);
621                 last_moved = m->matches;
622
623                 LIST_LOOP_BUT_ONE(matches, i, m->matches, last_moved) {
624                         uint64_t cp;
625
626                         r = next_for_match(j, i, f, np, direction, NULL, &cp);
627                         if (r <= 0)
628                                 return r;
629
630                         assert(direction == DIRECTION_DOWN ? cp >= np : cp <= np);
631                         if (direction == DIRECTION_DOWN ? cp > np : cp < np) {
632                                 np = cp;
633                                 last_moved = i;
634                         }
635                 }
636         }
637
638         assert(np > 0);
639
640         r = journal_file_move_to_object(f, OBJECT_ENTRY, np, &n);
641         if (r < 0)
642                 return r;
643
644         if (ret)
645                 *ret = n;
646         if (offset)
647                 *offset = np;
648
649         return 1;
650 }
651
652 static int find_location_for_match(
653                 sd_journal *j,
654                 Match *m,
655                 JournalFile *f,
656                 direction_t direction,
657                 Object **ret,
658                 uint64_t *offset) {
659
660         int r;
661
662         assert(j);
663         assert(m);
664         assert(f);
665
666         if (m->type == MATCH_DISCRETE) {
667                 uint64_t dp;
668
669                 r = journal_file_find_data_object_with_hash(f, m->data, m->size, le64toh(m->le_hash), NULL, &dp);
670                 if (r <= 0)
671                         return r;
672
673                 /* FIXME: missing: find by monotonic */
674
675                 if (j->current_location.type == LOCATION_HEAD)
676                         return journal_file_next_entry_for_data(f, NULL, 0, dp, DIRECTION_DOWN, ret, offset);
677                 if (j->current_location.type == LOCATION_TAIL)
678                         return journal_file_next_entry_for_data(f, NULL, 0, dp, DIRECTION_UP, ret, offset);
679                 if (j->current_location.seqnum_set && sd_id128_equal(j->current_location.seqnum_id, f->header->seqnum_id))
680                         return journal_file_move_to_entry_by_seqnum_for_data(f, dp, j->current_location.seqnum, direction, ret, offset);
681                 if (j->current_location.monotonic_set) {
682                         r = journal_file_move_to_entry_by_monotonic_for_data(f, dp, j->current_location.boot_id, j->current_location.monotonic, direction, ret, offset);
683                         if (r != -ENOENT)
684                                 return r;
685                 }
686                 if (j->current_location.realtime_set)
687                         return journal_file_move_to_entry_by_realtime_for_data(f, dp, j->current_location.realtime, direction, ret, offset);
688
689                 return journal_file_next_entry_for_data(f, NULL, 0, dp, direction, ret, offset);
690
691         } else if (m->type == MATCH_OR_TERM) {
692                 uint64_t np = 0;
693                 Object *n;
694                 Match *i;
695
696                 /* Find the earliest match */
697
698                 LIST_FOREACH(matches, i, m->matches) {
699                         uint64_t cp;
700
701                         r = find_location_for_match(j, i, f, direction, NULL, &cp);
702                         if (r < 0)
703                                 return r;
704                         else if (r > 0) {
705                                 if (np == 0 || (direction == DIRECTION_DOWN ? np > cp : np < cp))
706                                         np = cp;
707                         }
708                 }
709
710                 if (np == 0)
711                         return 0;
712
713                 r = journal_file_move_to_object(f, OBJECT_ENTRY, np, &n);
714                 if (r < 0)
715                         return r;
716
717                 if (ret)
718                         *ret = n;
719                 if (offset)
720                         *offset = np;
721
722                 return 1;
723
724         } else {
725                 Match *i;
726                 uint64_t np = 0;
727
728                 assert(m->type == MATCH_AND_TERM);
729
730                 /* First jump to the last match, and then find the
731                  * next one where all matches match */
732
733                 if (!m->matches)
734                         return 0;
735
736                 LIST_FOREACH(matches, i, m->matches) {
737                         uint64_t cp;
738
739                         r = find_location_for_match(j, i, f, direction, NULL, &cp);
740                         if (r <= 0)
741                                 return r;
742
743                         if (np == 0 || (direction == DIRECTION_DOWN ? cp > np : cp < np))
744                                 np = cp;
745                 }
746
747                 return next_for_match(j, m, f, np, direction, ret, offset);
748         }
749 }
750
751 static int find_location_with_matches(
752                 sd_journal *j,
753                 JournalFile *f,
754                 direction_t direction,
755                 Object **ret,
756                 uint64_t *offset) {
757
758         int r;
759
760         assert(j);
761         assert(f);
762         assert(ret);
763         assert(offset);
764
765         if (!j->level0) {
766                 /* No matches is simple */
767
768                 if (j->current_location.type == LOCATION_HEAD)
769                         return journal_file_next_entry(f, NULL, 0, DIRECTION_DOWN, ret, offset);
770                 if (j->current_location.type == LOCATION_TAIL)
771                         return journal_file_next_entry(f, NULL, 0, DIRECTION_UP, ret, offset);
772                 if (j->current_location.seqnum_set && sd_id128_equal(j->current_location.seqnum_id, f->header->seqnum_id))
773                         return journal_file_move_to_entry_by_seqnum(f, j->current_location.seqnum, direction, ret, offset);
774                 if (j->current_location.monotonic_set) {
775                         r = journal_file_move_to_entry_by_monotonic(f, j->current_location.boot_id, j->current_location.monotonic, direction, ret, offset);
776                         if (r != -ENOENT)
777                                 return r;
778                 }
779                 if (j->current_location.realtime_set)
780                         return journal_file_move_to_entry_by_realtime(f, j->current_location.realtime, direction, ret, offset);
781
782                 return journal_file_next_entry(f, NULL, 0, direction, ret, offset);
783         } else
784                 return find_location_for_match(j, j->level0, f, direction, ret, offset);
785 }
786
787 static int next_with_matches(
788                 sd_journal *j,
789                 JournalFile *f,
790                 direction_t direction,
791                 Object **ret,
792                 uint64_t *offset) {
793
794         Object *c;
795         uint64_t cp;
796
797         assert(j);
798         assert(f);
799         assert(ret);
800         assert(offset);
801
802         c = *ret;
803         cp = *offset;
804
805         /* No matches is easy. We simple advance the file
806          * pointer by one. */
807         if (!j->level0)
808                 return journal_file_next_entry(f, c, cp, direction, ret, offset);
809
810         /* If we have a match then we look for the next matching entry
811          * with an offset at least one step larger */
812         return next_for_match(j, j->level0, f, direction == DIRECTION_DOWN ? cp+1 : cp-1, direction, ret, offset);
813 }
814
815 static int next_beyond_location(sd_journal *j, JournalFile *f, direction_t direction, Object **ret, uint64_t *offset) {
816         Object *c;
817         uint64_t cp;
818         int r;
819
820         assert(j);
821         assert(f);
822
823         if (f->last_direction == direction && f->current_offset > 0) {
824                 cp = f->current_offset;
825
826                 r = journal_file_move_to_object(f, OBJECT_ENTRY, cp, &c);
827                 if (r < 0)
828                         return r;
829
830                 r = next_with_matches(j, f, direction, &c, &cp);
831                 if (r <= 0)
832                         return r;
833         } else {
834                 r = find_location_with_matches(j, f, direction, &c, &cp);
835                 if (r <= 0)
836                         return r;
837         }
838
839         /* OK, we found the spot, now let's advance until an entry
840          * that is actually different from what we were previously
841          * looking at. This is necessary to handle entries which exist
842          * in two (or more) journal files, and which shall all be
843          * suppressed but one. */
844
845         for (;;) {
846                 bool found;
847
848                 if (j->current_location.type == LOCATION_DISCRETE) {
849                         int k;
850
851                         k = compare_with_location(f, c, &j->current_location);
852                         if (direction == DIRECTION_DOWN)
853                                 found = k > 0;
854                         else
855                                 found = k < 0;
856                 } else
857                         found = true;
858
859                 if (found) {
860                         if (ret)
861                                 *ret = c;
862                         if (offset)
863                                 *offset = cp;
864                         return 1;
865                 }
866
867                 r = next_with_matches(j, f, direction, &c, &cp);
868                 if (r <= 0)
869                         return r;
870         }
871 }
872
873 static int real_journal_next(sd_journal *j, direction_t direction) {
874         JournalFile *f, *new_file = NULL;
875         uint64_t new_offset = 0;
876         uint64_t p = 0;
877         Iterator i;
878         Object *o;
879         int r;
880
881         assert_return(j, -EINVAL);
882         assert_return(!journal_pid_changed(j), -ECHILD);
883
884         HASHMAP_FOREACH(f, j->files, i) {
885                 bool found;
886
887                 r = next_beyond_location(j, f, direction, &o, &p);
888                 if (r < 0) {
889                         log_debug("Can't iterate through %s, ignoring: %s", f->path, strerror(-r));
890                         remove_file_real(j, f);
891                         continue;
892                 } else if (r == 0)
893                         continue;
894
895                 if (!new_file)
896                         found = true;
897                 else {
898                         int k;
899
900                         k = compare_entry_order(f, o, new_file, new_offset);
901
902                         found = direction == DIRECTION_DOWN ? k < 0 : k > 0;
903                 }
904
905                 if (found) {
906                         new_file = f;
907                         new_offset = p;
908                 }
909         }
910
911         if (!new_file)
912                 return 0;
913
914         r = journal_file_move_to_object(new_file, OBJECT_ENTRY, new_offset, &o);
915         if (r < 0)
916                 return r;
917
918         set_location(j, LOCATION_DISCRETE, new_file, o, direction, new_offset);
919
920         return 1;
921 }
922
923 _public_ int sd_journal_next(sd_journal *j) {
924         return real_journal_next(j, DIRECTION_DOWN);
925 }
926
927 _public_ int sd_journal_previous(sd_journal *j) {
928         return real_journal_next(j, DIRECTION_UP);
929 }
930
931 static int real_journal_next_skip(sd_journal *j, direction_t direction, uint64_t skip) {
932         int c = 0, r;
933
934         assert_return(j, -EINVAL);
935         assert_return(!journal_pid_changed(j), -ECHILD);
936
937         if (skip == 0) {
938                 /* If this is not a discrete skip, then at least
939                  * resolve the current location */
940                 if (j->current_location.type != LOCATION_DISCRETE)
941                         return real_journal_next(j, direction);
942
943                 return 0;
944         }
945
946         do {
947                 r = real_journal_next(j, direction);
948                 if (r < 0)
949                         return r;
950
951                 if (r == 0)
952                         return c;
953
954                 skip--;
955                 c++;
956         } while (skip > 0);
957
958         return c;
959 }
960
961 _public_ int sd_journal_next_skip(sd_journal *j, uint64_t skip) {
962         return real_journal_next_skip(j, DIRECTION_DOWN, skip);
963 }
964
965 _public_ int sd_journal_previous_skip(sd_journal *j, uint64_t skip) {
966         return real_journal_next_skip(j, DIRECTION_UP, skip);
967 }
968
969 _public_ int sd_journal_get_cursor(sd_journal *j, char **cursor) {
970         Object *o;
971         int r;
972         char bid[33], sid[33];
973
974         assert_return(j, -EINVAL);
975         assert_return(!journal_pid_changed(j), -ECHILD);
976         assert_return(cursor, -EINVAL);
977
978         if (!j->current_file || j->current_file->current_offset <= 0)
979                 return -EADDRNOTAVAIL;
980
981         r = journal_file_move_to_object(j->current_file, OBJECT_ENTRY, j->current_file->current_offset, &o);
982         if (r < 0)
983                 return r;
984
985         sd_id128_to_string(j->current_file->header->seqnum_id, sid);
986         sd_id128_to_string(o->entry.boot_id, bid);
987
988         if (asprintf(cursor,
989                      "s=%s;i=%"PRIx64";b=%s;m=%"PRIx64";t=%"PRIx64";x=%"PRIx64,
990                      sid, le64toh(o->entry.seqnum),
991                      bid, le64toh(o->entry.monotonic),
992                      le64toh(o->entry.realtime),
993                      le64toh(o->entry.xor_hash)) < 0)
994                 return -ENOMEM;
995
996         return 0;
997 }
998
999 _public_ int sd_journal_seek_cursor(sd_journal *j, const char *cursor) {
1000         const char *word, *state;
1001         size_t l;
1002         unsigned long long seqnum, monotonic, realtime, xor_hash;
1003         bool
1004                 seqnum_id_set = false,
1005                 seqnum_set = false,
1006                 boot_id_set = false,
1007                 monotonic_set = false,
1008                 realtime_set = false,
1009                 xor_hash_set = false;
1010         sd_id128_t seqnum_id, boot_id;
1011
1012         assert_return(j, -EINVAL);
1013         assert_return(!journal_pid_changed(j), -ECHILD);
1014         assert_return(!isempty(cursor), -EINVAL);
1015
1016         FOREACH_WORD_SEPARATOR(word, l, cursor, ";", state) {
1017                 char *item;
1018                 int k = 0;
1019
1020                 if (l < 2 || word[1] != '=')
1021                         return -EINVAL;
1022
1023                 item = strndup(word, l);
1024                 if (!item)
1025                         return -ENOMEM;
1026
1027                 switch (word[0]) {
1028
1029                 case 's':
1030                         seqnum_id_set = true;
1031                         k = sd_id128_from_string(item+2, &seqnum_id);
1032                         break;
1033
1034                 case 'i':
1035                         seqnum_set = true;
1036                         if (sscanf(item+2, "%llx", &seqnum) != 1)
1037                                 k = -EINVAL;
1038                         break;
1039
1040                 case 'b':
1041                         boot_id_set = true;
1042                         k = sd_id128_from_string(item+2, &boot_id);
1043                         break;
1044
1045                 case 'm':
1046                         monotonic_set = true;
1047                         if (sscanf(item+2, "%llx", &monotonic) != 1)
1048                                 k = -EINVAL;
1049                         break;
1050
1051                 case 't':
1052                         realtime_set = true;
1053                         if (sscanf(item+2, "%llx", &realtime) != 1)
1054                                 k = -EINVAL;
1055                         break;
1056
1057                 case 'x':
1058                         xor_hash_set = true;
1059                         if (sscanf(item+2, "%llx", &xor_hash) != 1)
1060                                 k = -EINVAL;
1061                         break;
1062                 }
1063
1064                 free(item);
1065
1066                 if (k < 0)
1067                         return k;
1068         }
1069
1070         if ((!seqnum_set || !seqnum_id_set) &&
1071             (!monotonic_set || !boot_id_set) &&
1072             !realtime_set)
1073                 return -EINVAL;
1074
1075         reset_location(j);
1076
1077         j->current_location.type = LOCATION_SEEK;
1078
1079         if (realtime_set) {
1080                 j->current_location.realtime = (uint64_t) realtime;
1081                 j->current_location.realtime_set = true;
1082         }
1083
1084         if (seqnum_set && seqnum_id_set) {
1085                 j->current_location.seqnum = (uint64_t) seqnum;
1086                 j->current_location.seqnum_id = seqnum_id;
1087                 j->current_location.seqnum_set = true;
1088         }
1089
1090         if (monotonic_set && boot_id_set) {
1091                 j->current_location.monotonic = (uint64_t) monotonic;
1092                 j->current_location.boot_id = boot_id;
1093                 j->current_location.monotonic_set = true;
1094         }
1095
1096         if (xor_hash_set) {
1097                 j->current_location.xor_hash = (uint64_t) xor_hash;
1098                 j->current_location.xor_hash_set = true;
1099         }
1100
1101         return 0;
1102 }
1103
1104 _public_ int sd_journal_test_cursor(sd_journal *j, const char *cursor) {
1105         int r;
1106         const char *word, *state;
1107         size_t l;
1108         Object *o;
1109
1110         assert_return(j, -EINVAL);
1111         assert_return(!journal_pid_changed(j), -ECHILD);
1112         assert_return(!isempty(cursor), -EINVAL);
1113
1114         if (!j->current_file || j->current_file->current_offset <= 0)
1115                 return -EADDRNOTAVAIL;
1116
1117         r = journal_file_move_to_object(j->current_file, OBJECT_ENTRY, j->current_file->current_offset, &o);
1118         if (r < 0)
1119                 return r;
1120
1121         FOREACH_WORD_SEPARATOR(word, l, cursor, ";", state) {
1122                 _cleanup_free_ char *item = NULL;
1123                 sd_id128_t id;
1124                 unsigned long long ll;
1125                 int k = 0;
1126
1127                 if (l < 2 || word[1] != '=')
1128                         return -EINVAL;
1129
1130                 item = strndup(word, l);
1131                 if (!item)
1132                         return -ENOMEM;
1133
1134                 switch (word[0]) {
1135
1136                 case 's':
1137                         k = sd_id128_from_string(item+2, &id);
1138                         if (k < 0)
1139                                 return k;
1140                         if (!sd_id128_equal(id, j->current_file->header->seqnum_id))
1141                                 return 0;
1142                         break;
1143
1144                 case 'i':
1145                         if (sscanf(item+2, "%llx", &ll) != 1)
1146                                 return -EINVAL;
1147                         if (ll != le64toh(o->entry.seqnum))
1148                                 return 0;
1149                         break;
1150
1151                 case 'b':
1152                         k = sd_id128_from_string(item+2, &id);
1153                         if (k < 0)
1154                                 return k;
1155                         if (!sd_id128_equal(id, o->entry.boot_id))
1156                                 return 0;
1157                         break;
1158
1159                 case 'm':
1160                         if (sscanf(item+2, "%llx", &ll) != 1)
1161                                 return -EINVAL;
1162                         if (ll != le64toh(o->entry.monotonic))
1163                                 return 0;
1164                         break;
1165
1166                 case 't':
1167                         if (sscanf(item+2, "%llx", &ll) != 1)
1168                                 return -EINVAL;
1169                         if (ll != le64toh(o->entry.realtime))
1170                                 return 0;
1171                         break;
1172
1173                 case 'x':
1174                         if (sscanf(item+2, "%llx", &ll) != 1)
1175                                 return -EINVAL;
1176                         if (ll != le64toh(o->entry.xor_hash))
1177                                 return 0;
1178                         break;
1179                 }
1180         }
1181
1182         return 1;
1183 }
1184
1185
1186 _public_ int sd_journal_seek_monotonic_usec(sd_journal *j, sd_id128_t boot_id, uint64_t usec) {
1187         assert_return(j, -EINVAL);
1188         assert_return(!journal_pid_changed(j), -ECHILD);
1189
1190         reset_location(j);
1191         j->current_location.type = LOCATION_SEEK;
1192         j->current_location.boot_id = boot_id;
1193         j->current_location.monotonic = usec;
1194         j->current_location.monotonic_set = true;
1195
1196         return 0;
1197 }
1198
1199 _public_ int sd_journal_seek_realtime_usec(sd_journal *j, uint64_t usec) {
1200         assert_return(j, -EINVAL);
1201         assert_return(!journal_pid_changed(j), -ECHILD);
1202
1203         reset_location(j);
1204         j->current_location.type = LOCATION_SEEK;
1205         j->current_location.realtime = usec;
1206         j->current_location.realtime_set = true;
1207
1208         return 0;
1209 }
1210
1211 _public_ int sd_journal_seek_head(sd_journal *j) {
1212         assert_return(j, -EINVAL);
1213         assert_return(!journal_pid_changed(j), -ECHILD);
1214
1215         reset_location(j);
1216         j->current_location.type = LOCATION_HEAD;
1217
1218         return 0;
1219 }
1220
1221 _public_ int sd_journal_seek_tail(sd_journal *j) {
1222         assert_return(j, -EINVAL);
1223         assert_return(!journal_pid_changed(j), -ECHILD);
1224
1225         reset_location(j);
1226         j->current_location.type = LOCATION_TAIL;
1227
1228         return 0;
1229 }
1230
1231 static void check_network(sd_journal *j, int fd) {
1232         struct statfs sfs;
1233
1234         assert(j);
1235
1236         if (j->on_network)
1237                 return;
1238
1239         if (fstatfs(fd, &sfs) < 0)
1240                 return;
1241
1242         j->on_network =
1243                 F_TYPE_EQUAL(sfs.f_type, CIFS_MAGIC_NUMBER) ||
1244                 F_TYPE_EQUAL(sfs.f_type, CODA_SUPER_MAGIC) ||
1245                 F_TYPE_EQUAL(sfs.f_type, NCP_SUPER_MAGIC) ||
1246                 F_TYPE_EQUAL(sfs.f_type, NFS_SUPER_MAGIC) ||
1247                 F_TYPE_EQUAL(sfs.f_type, SMB_SUPER_MAGIC);
1248 }
1249
1250 static bool file_has_type_prefix(const char *prefix, const char *filename) {
1251         const char *full, *tilded, *atted;
1252
1253         full = strappenda(prefix, ".journal");
1254         tilded = strappenda(full, "~");
1255         atted = strappenda(prefix, "@");
1256
1257         return streq(filename, full) ||
1258                streq(filename, tilded) ||
1259                startswith(filename, atted);
1260 }
1261
1262 static bool file_type_wanted(int flags, const char *filename) {
1263         if (!endswith(filename, ".journal") && !endswith(filename, ".journal~"))
1264                 return false;
1265
1266         /* no flags set â†’ every type is OK */
1267         if (!(flags & (SD_JOURNAL_SYSTEM | SD_JOURNAL_CURRENT_USER)))
1268                 return true;
1269
1270         if (flags & SD_JOURNAL_SYSTEM && file_has_type_prefix("system", filename))
1271                 return true;
1272
1273         if (flags & SD_JOURNAL_CURRENT_USER) {
1274                 char prefix[5 + DECIMAL_STR_MAX(uid_t) + 1];
1275
1276                 assert_se(snprintf(prefix, sizeof(prefix), "user-"UID_FMT, getuid())
1277                           < (int) sizeof(prefix));
1278
1279                 if (file_has_type_prefix(prefix, filename))
1280                         return true;
1281         }
1282
1283         return false;
1284 }
1285
1286 static int add_any_file(sd_journal *j, const char *path) {
1287         JournalFile *f = NULL;
1288         int r;
1289
1290         assert(j);
1291         assert(path);
1292
1293         if (hashmap_get(j->files, path))
1294                 return 0;
1295
1296         if (hashmap_size(j->files) >= JOURNAL_FILES_MAX) {
1297                 log_warning("Too many open journal files, not adding %s.", path);
1298                 return set_put_error(j, -ETOOMANYREFS);
1299         }
1300
1301         r = journal_file_open(path, O_RDONLY, 0, false, false, NULL, j->mmap, NULL, &f);
1302         if (r < 0)
1303                 return r;
1304
1305         /* journal_file_dump(f); */
1306
1307         r = hashmap_put(j->files, f->path, f);
1308         if (r < 0) {
1309                 journal_file_close(f);
1310                 return r;
1311         }
1312
1313         log_debug("File %s added.", f->path);
1314
1315         check_network(j, f->fd);
1316
1317         j->current_invalidate_counter ++;
1318
1319         return 0;
1320 }
1321
1322 static int add_file(sd_journal *j, const char *prefix, const char *filename) {
1323         _cleanup_free_ char *path = NULL;
1324         int r;
1325
1326         assert(j);
1327         assert(prefix);
1328         assert(filename);
1329
1330         if (j->no_new_files ||
1331             !file_type_wanted(j->flags, filename))
1332                 return 0;
1333
1334         path = strjoin(prefix, "/", filename, NULL);
1335         if (!path)
1336                 return -ENOMEM;
1337
1338         r = add_any_file(j, path);
1339         if (r == -ENOENT)
1340                 return 0;
1341         return 0;
1342 }
1343
1344 static int remove_file(sd_journal *j, const char *prefix, const char *filename) {
1345         _cleanup_free_ char *path;
1346         JournalFile *f;
1347
1348         assert(j);
1349         assert(prefix);
1350         assert(filename);
1351
1352         path = strjoin(prefix, "/", filename, NULL);
1353         if (!path)
1354                 return -ENOMEM;
1355
1356         f = hashmap_get(j->files, path);
1357         if (!f)
1358                 return 0;
1359
1360         remove_file_real(j, f);
1361         return 0;
1362 }
1363
1364 static void remove_file_real(sd_journal *j, JournalFile *f) {
1365         assert(j);
1366         assert(f);
1367
1368         hashmap_remove(j->files, f->path);
1369
1370         log_debug("File %s removed.", f->path);
1371
1372         if (j->current_file == f) {
1373                 j->current_file = NULL;
1374                 j->current_field = 0;
1375         }
1376
1377         if (j->unique_file == f) {
1378                 /* Jump to the next unique_file or NULL if that one was last */
1379                 j->unique_file = hashmap_next(j->files, j->unique_file->path);
1380                 j->unique_offset = 0;
1381                 if (!j->unique_file)
1382                         j->unique_file_lost = true;
1383         }
1384
1385         journal_file_close(f);
1386
1387         j->current_invalidate_counter ++;
1388 }
1389
1390 static int add_directory(sd_journal *j, const char *prefix, const char *dirname) {
1391         _cleanup_free_ char *path = NULL;
1392         int r;
1393         _cleanup_closedir_ DIR *d = NULL;
1394         sd_id128_t id, mid;
1395         Directory *m;
1396
1397         assert(j);
1398         assert(prefix);
1399         assert(dirname);
1400
1401         log_debug("Considering %s/%s.", prefix, dirname);
1402
1403         if ((j->flags & SD_JOURNAL_LOCAL_ONLY) &&
1404             (sd_id128_from_string(dirname, &id) < 0 ||
1405              sd_id128_get_machine(&mid) < 0 ||
1406              !(sd_id128_equal(id, mid) || path_startswith(prefix, "/run"))))
1407             return 0;
1408
1409         path = strjoin(prefix, "/", dirname, NULL);
1410         if (!path)
1411                 return -ENOMEM;
1412
1413         d = opendir(path);
1414         if (!d) {
1415                 log_debug("Failed to open %s: %m", path);
1416                 if (errno == ENOENT)
1417                         return 0;
1418                 return -errno;
1419         }
1420
1421         m = hashmap_get(j->directories_by_path, path);
1422         if (!m) {
1423                 m = new0(Directory, 1);
1424                 if (!m)
1425                         return -ENOMEM;
1426
1427                 m->is_root = false;
1428                 m->path = path;
1429
1430                 if (hashmap_put(j->directories_by_path, m->path, m) < 0) {
1431                         free(m);
1432                         return -ENOMEM;
1433                 }
1434
1435                 path = NULL; /* avoid freeing in cleanup */
1436                 j->current_invalidate_counter ++;
1437
1438                 log_debug("Directory %s added.", m->path);
1439
1440         } else if (m->is_root)
1441                 return 0;
1442
1443         if (m->wd <= 0 && j->inotify_fd >= 0) {
1444
1445                 m->wd = inotify_add_watch(j->inotify_fd, m->path,
1446                                           IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE|
1447                                           IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT|IN_MOVED_FROM|
1448                                           IN_ONLYDIR);
1449
1450                 if (m->wd > 0 && hashmap_put(j->directories_by_wd, INT_TO_PTR(m->wd), m) < 0)
1451                         inotify_rm_watch(j->inotify_fd, m->wd);
1452         }
1453
1454         for (;;) {
1455                 struct dirent *de;
1456
1457                 errno = 0;
1458                 de = readdir(d);
1459                 if (!de && errno != 0) {
1460                         r = -errno;
1461                         log_debug("Failed to read directory %s: %m", m->path);
1462                         return r;
1463                 }
1464                 if (!de)
1465                         break;
1466
1467                 if (dirent_is_file_with_suffix(de, ".journal") ||
1468                     dirent_is_file_with_suffix(de, ".journal~")) {
1469                         r = add_file(j, m->path, de->d_name);
1470                         if (r < 0) {
1471                                 log_debug("Failed to add file %s/%s: %s",
1472                                           m->path, de->d_name, strerror(-r));
1473                                 r = set_put_error(j, r);
1474                                 if (r < 0)
1475                                         return r;
1476                         }
1477                 }
1478         }
1479
1480         check_network(j, dirfd(d));
1481
1482         return 0;
1483 }
1484
1485 static int add_root_directory(sd_journal *j, const char *p) {
1486         _cleanup_closedir_ DIR *d = NULL;
1487         Directory *m;
1488         int r;
1489
1490         assert(j);
1491         assert(p);
1492
1493         if ((j->flags & SD_JOURNAL_RUNTIME_ONLY) &&
1494             !path_startswith(p, "/run"))
1495                 return -EINVAL;
1496
1497         if (j->prefix)
1498                 p = strappenda(j->prefix, p);
1499
1500         d = opendir(p);
1501         if (!d)
1502                 return -errno;
1503
1504         m = hashmap_get(j->directories_by_path, p);
1505         if (!m) {
1506                 m = new0(Directory, 1);
1507                 if (!m)
1508                         return -ENOMEM;
1509
1510                 m->is_root = true;
1511                 m->path = strdup(p);
1512                 if (!m->path) {
1513                         free(m);
1514                         return -ENOMEM;
1515                 }
1516
1517                 if (hashmap_put(j->directories_by_path, m->path, m) < 0) {
1518                         free(m->path);
1519                         free(m);
1520                         return -ENOMEM;
1521                 }
1522
1523                 j->current_invalidate_counter ++;
1524
1525                 log_debug("Root directory %s added.", m->path);
1526
1527         } else if (!m->is_root)
1528                 return 0;
1529
1530         if (m->wd <= 0 && j->inotify_fd >= 0) {
1531
1532                 m->wd = inotify_add_watch(j->inotify_fd, m->path,
1533                                           IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE|
1534                                           IN_ONLYDIR);
1535
1536                 if (m->wd > 0 && hashmap_put(j->directories_by_wd, INT_TO_PTR(m->wd), m) < 0)
1537                         inotify_rm_watch(j->inotify_fd, m->wd);
1538         }
1539
1540         if (j->no_new_files)
1541                 return 0;
1542
1543         for (;;) {
1544                 struct dirent *de;
1545                 sd_id128_t id;
1546
1547                 errno = 0;
1548                 de = readdir(d);
1549                 if (!de && errno != 0) {
1550                         r = -errno;
1551                         log_debug("Failed to read directory %s: %m", m->path);
1552                         return r;
1553                 }
1554                 if (!de)
1555                         break;
1556
1557                 if (dirent_is_file_with_suffix(de, ".journal") ||
1558                     dirent_is_file_with_suffix(de, ".journal~")) {
1559                         r = add_file(j, m->path, de->d_name);
1560                         if (r < 0) {
1561                                 log_debug("Failed to add file %s/%s: %s",
1562                                           m->path, de->d_name, strerror(-r));
1563                                 r = set_put_error(j, r);
1564                                 if (r < 0)
1565                                         return r;
1566                         }
1567                 } else if ((de->d_type == DT_DIR || de->d_type == DT_LNK || de->d_type == DT_UNKNOWN) &&
1568                            sd_id128_from_string(de->d_name, &id) >= 0) {
1569
1570                         r = add_directory(j, m->path, de->d_name);
1571                         if (r < 0)
1572                                 log_debug("Failed to add directory %s/%s: %s", m->path, de->d_name, strerror(-r));
1573                 }
1574         }
1575
1576         check_network(j, dirfd(d));
1577
1578         return 0;
1579 }
1580
1581 static int remove_directory(sd_journal *j, Directory *d) {
1582         assert(j);
1583
1584         if (d->wd > 0) {
1585                 hashmap_remove(j->directories_by_wd, INT_TO_PTR(d->wd));
1586
1587                 if (j->inotify_fd >= 0)
1588                         inotify_rm_watch(j->inotify_fd, d->wd);
1589         }
1590
1591         hashmap_remove(j->directories_by_path, d->path);
1592
1593         if (d->is_root)
1594                 log_debug("Root directory %s removed.", d->path);
1595         else
1596                 log_debug("Directory %s removed.", d->path);
1597
1598         free(d->path);
1599         free(d);
1600
1601         return 0;
1602 }
1603
1604 static int add_search_paths(sd_journal *j) {
1605         int r;
1606         const char search_paths[] =
1607                 "/run/log/journal\0"
1608                 "/var/log/journal\0";
1609         const char *p;
1610
1611         assert(j);
1612
1613         /* We ignore most errors here, since the idea is to only open
1614          * what's actually accessible, and ignore the rest. */
1615
1616         NULSTR_FOREACH(p, search_paths) {
1617                 r = add_root_directory(j, p);
1618                 if (r < 0 && r != -ENOENT) {
1619                         r = set_put_error(j, r);
1620                         if (r < 0)
1621                                 return r;
1622                 }
1623         }
1624
1625         return 0;
1626 }
1627
1628 static int add_current_paths(sd_journal *j) {
1629         Iterator i;
1630         JournalFile *f;
1631
1632         assert(j);
1633         assert(j->no_new_files);
1634
1635         /* Simply adds all directories for files we have open as
1636          * "root" directories. We don't expect errors here, so we
1637          * treat them as fatal. */
1638
1639         HASHMAP_FOREACH(f, j->files, i) {
1640                 _cleanup_free_ char *dir;
1641                 int r;
1642
1643                 dir = dirname_malloc(f->path);
1644                 if (!dir)
1645                         return -ENOMEM;
1646
1647                 r = add_root_directory(j, dir);
1648                 if (r < 0) {
1649                         set_put_error(j, r);
1650                         return r;
1651                 }
1652         }
1653
1654         return 0;
1655 }
1656
1657
1658 static int allocate_inotify(sd_journal *j) {
1659         assert(j);
1660
1661         if (j->inotify_fd < 0) {
1662                 j->inotify_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
1663                 if (j->inotify_fd < 0)
1664                         return -errno;
1665         }
1666
1667         if (!j->directories_by_wd) {
1668                 j->directories_by_wd = hashmap_new(NULL);
1669                 if (!j->directories_by_wd)
1670                         return -ENOMEM;
1671         }
1672
1673         return 0;
1674 }
1675
1676 static sd_journal *journal_new(int flags, const char *path) {
1677         sd_journal *j;
1678
1679         j = new0(sd_journal, 1);
1680         if (!j)
1681                 return NULL;
1682
1683         j->original_pid = getpid();
1684         j->inotify_fd = -1;
1685         j->flags = flags;
1686         j->data_threshold = DEFAULT_DATA_THRESHOLD;
1687
1688         if (path) {
1689                 j->path = strdup(path);
1690                 if (!j->path)
1691                         goto fail;
1692         }
1693
1694         j->files = hashmap_new(&string_hash_ops);
1695         j->directories_by_path = hashmap_new(&string_hash_ops);
1696         j->mmap = mmap_cache_new();
1697         if (!j->files || !j->directories_by_path || !j->mmap)
1698                 goto fail;
1699
1700         return j;
1701
1702 fail:
1703         sd_journal_close(j);
1704         return NULL;
1705 }
1706
1707 _public_ int sd_journal_open(sd_journal **ret, int flags) {
1708         sd_journal *j;
1709         int r;
1710
1711         assert_return(ret, -EINVAL);
1712         assert_return((flags & ~(SD_JOURNAL_LOCAL_ONLY|SD_JOURNAL_RUNTIME_ONLY|SD_JOURNAL_SYSTEM|SD_JOURNAL_CURRENT_USER)) == 0, -EINVAL);
1713
1714         j = journal_new(flags, NULL);
1715         if (!j)
1716                 return -ENOMEM;
1717
1718         r = add_search_paths(j);
1719         if (r < 0)
1720                 goto fail;
1721
1722         *ret = j;
1723         return 0;
1724
1725 fail:
1726         sd_journal_close(j);
1727
1728         return r;
1729 }
1730
1731 _public_ int sd_journal_open_container(sd_journal **ret, const char *machine, int flags) {
1732         _cleanup_free_ char *root = NULL, *class = NULL;
1733         sd_journal *j;
1734         char *p;
1735         int r;
1736
1737         assert_return(machine, -EINVAL);
1738         assert_return(ret, -EINVAL);
1739         assert_return((flags & ~(SD_JOURNAL_LOCAL_ONLY|SD_JOURNAL_SYSTEM)) == 0, -EINVAL);
1740         assert_return(filename_is_safe(machine), -EINVAL);
1741
1742         p = strappenda("/run/systemd/machines/", machine);
1743         r = parse_env_file(p, NEWLINE, "ROOT", &root, "CLASS", &class, NULL);
1744         if (r == -ENOENT)
1745                 return -EHOSTDOWN;
1746         if (r < 0)
1747                 return r;
1748         if (!root)
1749                 return -ENODATA;
1750
1751         if (!streq_ptr(class, "container"))
1752                 return -EIO;
1753
1754         j = journal_new(flags, NULL);
1755         if (!j)
1756                 return -ENOMEM;
1757
1758         j->prefix = root;
1759         root = NULL;
1760
1761         r = add_search_paths(j);
1762         if (r < 0)
1763                 goto fail;
1764
1765         *ret = j;
1766         return 0;
1767
1768 fail:
1769         sd_journal_close(j);
1770         return r;
1771 }
1772
1773 _public_ int sd_journal_open_directory(sd_journal **ret, const char *path, int flags) {
1774         sd_journal *j;
1775         int r;
1776
1777         assert_return(ret, -EINVAL);
1778         assert_return(path, -EINVAL);
1779         assert_return(flags == 0, -EINVAL);
1780
1781         j = journal_new(flags, path);
1782         if (!j)
1783                 return -ENOMEM;
1784
1785         r = add_root_directory(j, path);
1786         if (r < 0) {
1787                 set_put_error(j, r);
1788                 goto fail;
1789         }
1790
1791         *ret = j;
1792         return 0;
1793
1794 fail:
1795         sd_journal_close(j);
1796
1797         return r;
1798 }
1799
1800 _public_ int sd_journal_open_files(sd_journal **ret, const char **paths, int flags) {
1801         sd_journal *j;
1802         const char **path;
1803         int r;
1804
1805         assert_return(ret, -EINVAL);
1806         assert_return(flags == 0, -EINVAL);
1807
1808         j = journal_new(flags, NULL);
1809         if (!j)
1810                 return -ENOMEM;
1811
1812         STRV_FOREACH(path, paths) {
1813                 r = add_any_file(j, *path);
1814                 if (r < 0) {
1815                         log_error("Failed to open %s: %s", *path, strerror(-r));
1816                         goto fail;
1817                 }
1818         }
1819
1820         j->no_new_files = true;
1821
1822         *ret = j;
1823         return 0;
1824
1825 fail:
1826         sd_journal_close(j);
1827
1828         return r;
1829 }
1830
1831 _public_ void sd_journal_close(sd_journal *j) {
1832         Directory *d;
1833         JournalFile *f;
1834
1835         if (!j)
1836                 return;
1837
1838         sd_journal_flush_matches(j);
1839
1840         while ((f = hashmap_steal_first(j->files)))
1841                 journal_file_close(f);
1842
1843         hashmap_free(j->files);
1844
1845         while ((d = hashmap_first(j->directories_by_path)))
1846                 remove_directory(j, d);
1847
1848         while ((d = hashmap_first(j->directories_by_wd)))
1849                 remove_directory(j, d);
1850
1851         hashmap_free(j->directories_by_path);
1852         hashmap_free(j->directories_by_wd);
1853
1854         safe_close(j->inotify_fd);
1855
1856         if (j->mmap) {
1857                 log_debug("mmap cache statistics: %u hit, %u miss", mmap_cache_get_hit(j->mmap), mmap_cache_get_missed(j->mmap));
1858                 mmap_cache_unref(j->mmap);
1859         }
1860
1861         free(j->path);
1862         free(j->prefix);
1863         free(j->unique_field);
1864         set_free(j->errors);
1865         free(j);
1866 }
1867
1868 _public_ int sd_journal_get_realtime_usec(sd_journal *j, uint64_t *ret) {
1869         Object *o;
1870         JournalFile *f;
1871         int r;
1872
1873         assert_return(j, -EINVAL);
1874         assert_return(!journal_pid_changed(j), -ECHILD);
1875         assert_return(ret, -EINVAL);
1876
1877         f = j->current_file;
1878         if (!f)
1879                 return -EADDRNOTAVAIL;
1880
1881         if (f->current_offset <= 0)
1882                 return -EADDRNOTAVAIL;
1883
1884         r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1885         if (r < 0)
1886                 return r;
1887
1888         *ret = le64toh(o->entry.realtime);
1889         return 0;
1890 }
1891
1892 _public_ int sd_journal_get_monotonic_usec(sd_journal *j, uint64_t *ret, sd_id128_t *ret_boot_id) {
1893         Object *o;
1894         JournalFile *f;
1895         int r;
1896         sd_id128_t id;
1897
1898         assert_return(j, -EINVAL);
1899         assert_return(!journal_pid_changed(j), -ECHILD);
1900
1901         f = j->current_file;
1902         if (!f)
1903                 return -EADDRNOTAVAIL;
1904
1905         if (f->current_offset <= 0)
1906                 return -EADDRNOTAVAIL;
1907
1908         r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1909         if (r < 0)
1910                 return r;
1911
1912         if (ret_boot_id)
1913                 *ret_boot_id = o->entry.boot_id;
1914         else {
1915                 r = sd_id128_get_boot(&id);
1916                 if (r < 0)
1917                         return r;
1918
1919                 if (!sd_id128_equal(id, o->entry.boot_id))
1920                         return -ESTALE;
1921         }
1922
1923         if (ret)
1924                 *ret = le64toh(o->entry.monotonic);
1925
1926         return 0;
1927 }
1928
1929 static bool field_is_valid(const char *field) {
1930         const char *p;
1931
1932         assert(field);
1933
1934         if (isempty(field))
1935                 return false;
1936
1937         if (startswith(field, "__"))
1938                 return false;
1939
1940         for (p = field; *p; p++) {
1941
1942                 if (*p == '_')
1943                         continue;
1944
1945                 if (*p >= 'A' && *p <= 'Z')
1946                         continue;
1947
1948                 if (*p >= '0' && *p <= '9')
1949                         continue;
1950
1951                 return false;
1952         }
1953
1954         return true;
1955 }
1956
1957 _public_ int sd_journal_get_data(sd_journal *j, const char *field, const void **data, size_t *size) {
1958         JournalFile *f;
1959         uint64_t i, n;
1960         size_t field_length;
1961         int r;
1962         Object *o;
1963
1964         assert_return(j, -EINVAL);
1965         assert_return(!journal_pid_changed(j), -ECHILD);
1966         assert_return(field, -EINVAL);
1967         assert_return(data, -EINVAL);
1968         assert_return(size, -EINVAL);
1969         assert_return(field_is_valid(field), -EINVAL);
1970
1971         f = j->current_file;
1972         if (!f)
1973                 return -EADDRNOTAVAIL;
1974
1975         if (f->current_offset <= 0)
1976                 return -EADDRNOTAVAIL;
1977
1978         r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1979         if (r < 0)
1980                 return r;
1981
1982         field_length = strlen(field);
1983
1984         n = journal_file_entry_n_items(o);
1985         for (i = 0; i < n; i++) {
1986                 uint64_t p, l;
1987                 le64_t le_hash;
1988                 size_t t;
1989                 int compression;
1990
1991                 p = le64toh(o->entry.items[i].object_offset);
1992                 le_hash = o->entry.items[i].hash;
1993                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
1994                 if (r < 0)
1995                         return r;
1996
1997                 if (le_hash != o->data.hash)
1998                         return -EBADMSG;
1999
2000                 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2001
2002                 compression = o->object.flags & OBJECT_COMPRESSION_MASK;
2003                 if (compression) {
2004 #if defined(HAVE_XZ) || defined(HAVE_LZ4)
2005                         if (decompress_startswith(compression,
2006                                                   o->data.payload, l,
2007                                                   &f->compress_buffer, &f->compress_buffer_size,
2008                                                   field, field_length, '=')) {
2009
2010                                 size_t rsize;
2011
2012                                 r = decompress_blob(compression,
2013                                                     o->data.payload, l,
2014                                                     &f->compress_buffer, &f->compress_buffer_size, &rsize,
2015                                                     j->data_threshold);
2016                                 if (r < 0)
2017                                         return r;
2018
2019                                 *data = f->compress_buffer;
2020                                 *size = (size_t) rsize;
2021
2022                                 return 0;
2023                         }
2024 #else
2025                         return -EPROTONOSUPPORT;
2026 #endif
2027                 } else if (l >= field_length+1 &&
2028                            memcmp(o->data.payload, field, field_length) == 0 &&
2029                            o->data.payload[field_length] == '=') {
2030
2031                         t = (size_t) l;
2032
2033                         if ((uint64_t) t != l)
2034                                 return -E2BIG;
2035
2036                         *data = o->data.payload;
2037                         *size = t;
2038
2039                         return 0;
2040                 }
2041
2042                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
2043                 if (r < 0)
2044                         return r;
2045         }
2046
2047         return -ENOENT;
2048 }
2049
2050 static int return_data(sd_journal *j, JournalFile *f, Object *o, const void **data, size_t *size) {
2051         size_t t;
2052         uint64_t l;
2053         int compression;
2054
2055         l = le64toh(o->object.size) - offsetof(Object, data.payload);
2056         t = (size_t) l;
2057
2058         /* We can't read objects larger than 4G on a 32bit machine */
2059         if ((uint64_t) t != l)
2060                 return -E2BIG;
2061
2062         compression = o->object.flags & OBJECT_COMPRESSION_MASK;
2063         if (compression) {
2064 #if defined(HAVE_XZ) || defined(HAVE_LZ4)
2065                 size_t rsize;
2066                 int r;
2067
2068                 r = decompress_blob(compression,
2069                                     o->data.payload, l, &f->compress_buffer,
2070                                     &f->compress_buffer_size, &rsize, j->data_threshold);
2071                 if (r < 0)
2072                         return r;
2073
2074                 *data = f->compress_buffer;
2075                 *size = (size_t) rsize;
2076 #else
2077                 return -EPROTONOSUPPORT;
2078 #endif
2079         } else {
2080                 *data = o->data.payload;
2081                 *size = t;
2082         }
2083
2084         return 0;
2085 }
2086
2087 _public_ int sd_journal_enumerate_data(sd_journal *j, const void **data, size_t *size) {
2088         JournalFile *f;
2089         uint64_t p, n;
2090         le64_t le_hash;
2091         int r;
2092         Object *o;
2093
2094         assert_return(j, -EINVAL);
2095         assert_return(!journal_pid_changed(j), -ECHILD);
2096         assert_return(data, -EINVAL);
2097         assert_return(size, -EINVAL);
2098
2099         f = j->current_file;
2100         if (!f)
2101                 return -EADDRNOTAVAIL;
2102
2103         if (f->current_offset <= 0)
2104                 return -EADDRNOTAVAIL;
2105
2106         r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
2107         if (r < 0)
2108                 return r;
2109
2110         n = journal_file_entry_n_items(o);
2111         if (j->current_field >= n)
2112                 return 0;
2113
2114         p = le64toh(o->entry.items[j->current_field].object_offset);
2115         le_hash = o->entry.items[j->current_field].hash;
2116         r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
2117         if (r < 0)
2118                 return r;
2119
2120         if (le_hash != o->data.hash)
2121                 return -EBADMSG;
2122
2123         r = return_data(j, f, o, data, size);
2124         if (r < 0)
2125                 return r;
2126
2127         j->current_field ++;
2128
2129         return 1;
2130 }
2131
2132 _public_ void sd_journal_restart_data(sd_journal *j) {
2133         if (!j)
2134                 return;
2135
2136         j->current_field = 0;
2137 }
2138
2139 _public_ int sd_journal_get_fd(sd_journal *j) {
2140         int r;
2141
2142         assert_return(j, -EINVAL);
2143         assert_return(!journal_pid_changed(j), -ECHILD);
2144
2145         if (j->inotify_fd >= 0)
2146                 return j->inotify_fd;
2147
2148         r = allocate_inotify(j);
2149         if (r < 0)
2150                 return r;
2151
2152         /* Iterate through all dirs again, to add them to the
2153          * inotify */
2154         if (j->no_new_files)
2155                 r = add_current_paths(j);
2156         else if (j->path)
2157                 r = add_root_directory(j, j->path);
2158         else
2159                 r = add_search_paths(j);
2160         if (r < 0)
2161                 return r;
2162
2163         return j->inotify_fd;
2164 }
2165
2166 _public_ int sd_journal_get_events(sd_journal *j) {
2167         int fd;
2168
2169         assert_return(j, -EINVAL);
2170         assert_return(!journal_pid_changed(j), -ECHILD);
2171
2172         fd = sd_journal_get_fd(j);
2173         if (fd < 0)
2174                 return fd;
2175
2176         return POLLIN;
2177 }
2178
2179 _public_ int sd_journal_get_timeout(sd_journal *j, uint64_t *timeout_usec) {
2180         int fd;
2181
2182         assert_return(j, -EINVAL);
2183         assert_return(!journal_pid_changed(j), -ECHILD);
2184         assert_return(timeout_usec, -EINVAL);
2185
2186         fd = sd_journal_get_fd(j);
2187         if (fd < 0)
2188                 return fd;
2189
2190         if (!j->on_network) {
2191                 *timeout_usec = (uint64_t) -1;
2192                 return 0;
2193         }
2194
2195         /* If we are on the network we need to regularly check for
2196          * changes manually */
2197
2198         *timeout_usec = j->last_process_usec + JOURNAL_FILES_RECHECK_USEC;
2199         return 1;
2200 }
2201
2202 static void process_inotify_event(sd_journal *j, struct inotify_event *e) {
2203         Directory *d;
2204         int r;
2205
2206         assert(j);
2207         assert(e);
2208
2209         /* Is this a subdirectory we watch? */
2210         d = hashmap_get(j->directories_by_wd, INT_TO_PTR(e->wd));
2211         if (d) {
2212                 sd_id128_t id;
2213
2214                 if (!(e->mask & IN_ISDIR) && e->len > 0 &&
2215                     (endswith(e->name, ".journal") ||
2216                      endswith(e->name, ".journal~"))) {
2217
2218                         /* Event for a journal file */
2219
2220                         if (e->mask & (IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB)) {
2221                                 r = add_file(j, d->path, e->name);
2222                                 if (r < 0) {
2223                                         log_debug("Failed to add file %s/%s: %s",
2224                                                   d->path, e->name, strerror(-r));
2225                                         set_put_error(j, r);
2226                                 }
2227
2228                         } else if (e->mask & (IN_DELETE|IN_MOVED_FROM|IN_UNMOUNT)) {
2229
2230                                 r = remove_file(j, d->path, e->name);
2231                                 if (r < 0)
2232                                         log_debug("Failed to remove file %s/%s: %s", d->path, e->name, strerror(-r));
2233                         }
2234
2235                 } else if (!d->is_root && e->len == 0) {
2236
2237                         /* Event for a subdirectory */
2238
2239                         if (e->mask & (IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT)) {
2240                                 r = remove_directory(j, d);
2241                                 if (r < 0)
2242                                         log_debug("Failed to remove directory %s: %s", d->path, strerror(-r));
2243                         }
2244
2245
2246                 } else if (d->is_root && (e->mask & IN_ISDIR) && e->len > 0 && sd_id128_from_string(e->name, &id) >= 0) {
2247
2248                         /* Event for root directory */
2249
2250                         if (e->mask & (IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB)) {
2251                                 r = add_directory(j, d->path, e->name);
2252                                 if (r < 0)
2253                                         log_debug("Failed to add directory %s/%s: %s", d->path, e->name, strerror(-r));
2254                         }
2255                 }
2256
2257                 return;
2258         }
2259
2260         if (e->mask & IN_IGNORED)
2261                 return;
2262
2263         log_warning("Unknown inotify event.");
2264 }
2265
2266 static int determine_change(sd_journal *j) {
2267         bool b;
2268
2269         assert(j);
2270
2271         b = j->current_invalidate_counter != j->last_invalidate_counter;
2272         j->last_invalidate_counter = j->current_invalidate_counter;
2273
2274         return b ? SD_JOURNAL_INVALIDATE : SD_JOURNAL_APPEND;
2275 }
2276
2277 _public_ int sd_journal_process(sd_journal *j) {
2278         uint8_t buffer[sizeof(struct inotify_event) + FILENAME_MAX] _alignas_(struct inotify_event);
2279         bool got_something = false;
2280
2281         assert_return(j, -EINVAL);
2282         assert_return(!journal_pid_changed(j), -ECHILD);
2283
2284         j->last_process_usec = now(CLOCK_MONOTONIC);
2285
2286         for (;;) {
2287                 struct inotify_event *e;
2288                 ssize_t l;
2289
2290                 l = read(j->inotify_fd, buffer, sizeof(buffer));
2291                 if (l < 0) {
2292                         if (errno == EAGAIN || errno == EINTR)
2293                                 return got_something ? determine_change(j) : SD_JOURNAL_NOP;
2294
2295                         return -errno;
2296                 }
2297
2298                 got_something = true;
2299
2300                 e = (struct inotify_event*) buffer;
2301                 while (l > 0) {
2302                         size_t step;
2303
2304                         process_inotify_event(j, e);
2305
2306                         step = sizeof(struct inotify_event) + e->len;
2307                         assert(step <= (size_t) l);
2308
2309                         e = (struct inotify_event*) ((uint8_t*) e + step);
2310                         l -= step;
2311                 }
2312         }
2313 }
2314
2315 _public_ int sd_journal_wait(sd_journal *j, uint64_t timeout_usec) {
2316         int r;
2317         uint64_t t;
2318
2319         assert_return(j, -EINVAL);
2320         assert_return(!journal_pid_changed(j), -ECHILD);
2321
2322         if (j->inotify_fd < 0) {
2323
2324                 /* This is the first invocation, hence create the
2325                  * inotify watch */
2326                 r = sd_journal_get_fd(j);
2327                 if (r < 0)
2328                         return r;
2329
2330                 /* The journal might have changed since the context
2331                  * object was created and we weren't watching before,
2332                  * hence don't wait for anything, and return
2333                  * immediately. */
2334                 return determine_change(j);
2335         }
2336
2337         r = sd_journal_get_timeout(j, &t);
2338         if (r < 0)
2339                 return r;
2340
2341         if (t != (uint64_t) -1) {
2342                 usec_t n;
2343
2344                 n = now(CLOCK_MONOTONIC);
2345                 t = t > n ? t - n : 0;
2346
2347                 if (timeout_usec == (uint64_t) -1 || timeout_usec > t)
2348                         timeout_usec = t;
2349         }
2350
2351         do {
2352                 r = fd_wait_for_event(j->inotify_fd, POLLIN, timeout_usec);
2353         } while (r == -EINTR);
2354
2355         if (r < 0)
2356                 return r;
2357
2358         return sd_journal_process(j);
2359 }
2360
2361 _public_ int sd_journal_get_cutoff_realtime_usec(sd_journal *j, uint64_t *from, uint64_t *to) {
2362         Iterator i;
2363         JournalFile *f;
2364         bool first = true;
2365         uint64_t fmin = 0, tmax = 0;
2366         int r;
2367
2368         assert_return(j, -EINVAL);
2369         assert_return(!journal_pid_changed(j), -ECHILD);
2370         assert_return(from || to, -EINVAL);
2371         assert_return(from != to, -EINVAL);
2372
2373         HASHMAP_FOREACH(f, j->files, i) {
2374                 usec_t fr, t;
2375
2376                 r = journal_file_get_cutoff_realtime_usec(f, &fr, &t);
2377                 if (r == -ENOENT)
2378                         continue;
2379                 if (r < 0)
2380                         return r;
2381                 if (r == 0)
2382                         continue;
2383
2384                 if (first) {
2385                         fmin = fr;
2386                         tmax = t;
2387                         first = false;
2388                 } else {
2389                         fmin = MIN(fr, fmin);
2390                         tmax = MAX(t, tmax);
2391                 }
2392         }
2393
2394         if (from)
2395                 *from = fmin;
2396         if (to)
2397                 *to = tmax;
2398
2399         return first ? 0 : 1;
2400 }
2401
2402 _public_ int sd_journal_get_cutoff_monotonic_usec(sd_journal *j, sd_id128_t boot_id, uint64_t *from, uint64_t *to) {
2403         Iterator i;
2404         JournalFile *f;
2405         bool found = false;
2406         int r;
2407
2408         assert_return(j, -EINVAL);
2409         assert_return(!journal_pid_changed(j), -ECHILD);
2410         assert_return(from || to, -EINVAL);
2411         assert_return(from != to, -EINVAL);
2412
2413         HASHMAP_FOREACH(f, j->files, i) {
2414                 usec_t fr, t;
2415
2416                 r = journal_file_get_cutoff_monotonic_usec(f, boot_id, &fr, &t);
2417                 if (r == -ENOENT)
2418                         continue;
2419                 if (r < 0)
2420                         return r;
2421                 if (r == 0)
2422                         continue;
2423
2424                 if (found) {
2425                         if (from)
2426                                 *from = MIN(fr, *from);
2427                         if (to)
2428                                 *to = MAX(t, *to);
2429                 } else {
2430                         if (from)
2431                                 *from = fr;
2432                         if (to)
2433                                 *to = t;
2434                         found = true;
2435                 }
2436         }
2437
2438         return found;
2439 }
2440
2441 void journal_print_header(sd_journal *j) {
2442         Iterator i;
2443         JournalFile *f;
2444         bool newline = false;
2445
2446         assert(j);
2447
2448         HASHMAP_FOREACH(f, j->files, i) {
2449                 if (newline)
2450                         putchar('\n');
2451                 else
2452                         newline = true;
2453
2454                 journal_file_print_header(f);
2455         }
2456 }
2457
2458 _public_ int sd_journal_get_usage(sd_journal *j, uint64_t *bytes) {
2459         Iterator i;
2460         JournalFile *f;
2461         uint64_t sum = 0;
2462
2463         assert_return(j, -EINVAL);
2464         assert_return(!journal_pid_changed(j), -ECHILD);
2465         assert_return(bytes, -EINVAL);
2466
2467         HASHMAP_FOREACH(f, j->files, i) {
2468                 struct stat st;
2469
2470                 if (fstat(f->fd, &st) < 0)
2471                         return -errno;
2472
2473                 sum += (uint64_t) st.st_blocks * 512ULL;
2474         }
2475
2476         *bytes = sum;
2477         return 0;
2478 }
2479
2480 _public_ int sd_journal_query_unique(sd_journal *j, const char *field) {
2481         char *f;
2482
2483         assert_return(j, -EINVAL);
2484         assert_return(!journal_pid_changed(j), -ECHILD);
2485         assert_return(!isempty(field), -EINVAL);
2486         assert_return(field_is_valid(field), -EINVAL);
2487
2488         f = strdup(field);
2489         if (!f)
2490                 return -ENOMEM;
2491
2492         free(j->unique_field);
2493         j->unique_field = f;
2494         j->unique_file = NULL;
2495         j->unique_offset = 0;
2496         j->unique_file_lost = false;
2497
2498         return 0;
2499 }
2500
2501 _public_ int sd_journal_enumerate_unique(sd_journal *j, const void **data, size_t *l) {
2502         size_t k;
2503
2504         assert_return(j, -EINVAL);
2505         assert_return(!journal_pid_changed(j), -ECHILD);
2506         assert_return(data, -EINVAL);
2507         assert_return(l, -EINVAL);
2508         assert_return(j->unique_field, -EINVAL);
2509
2510         k = strlen(j->unique_field);
2511
2512         if (!j->unique_file) {
2513                 if (j->unique_file_lost)
2514                         return 0;
2515
2516                 j->unique_file = hashmap_first(j->files);
2517                 if (!j->unique_file)
2518                         return 0;
2519
2520                 j->unique_offset = 0;
2521         }
2522
2523         for (;;) {
2524                 JournalFile *of;
2525                 Iterator i;
2526                 Object *o;
2527                 const void *odata;
2528                 size_t ol;
2529                 bool found;
2530                 int r;
2531
2532                 /* Proceed to next data object in the field's linked list */
2533                 if (j->unique_offset == 0) {
2534                         r = journal_file_find_field_object(j->unique_file, j->unique_field, k, &o, NULL);
2535                         if (r < 0)
2536                                 return r;
2537
2538                         j->unique_offset = r > 0 ? le64toh(o->field.head_data_offset) : 0;
2539                 } else {
2540                         r = journal_file_move_to_object(j->unique_file, OBJECT_DATA, j->unique_offset, &o);
2541                         if (r < 0)
2542                                 return r;
2543
2544                         j->unique_offset = le64toh(o->data.next_field_offset);
2545                 }
2546
2547                 /* We reached the end of the list? Then start again, with the next file */
2548                 if (j->unique_offset == 0) {
2549                         j->unique_file = hashmap_next(j->files, j->unique_file->path);
2550                         if (!j->unique_file)
2551                                 return 0;
2552
2553                         continue;
2554                 }
2555
2556                 /* We do not use the type context here, but 0 instead,
2557                  * so that we can look at this data object at the same
2558                  * time as one on another file */
2559                 r = journal_file_move_to_object(j->unique_file, 0, j->unique_offset, &o);
2560                 if (r < 0)
2561                         return r;
2562
2563                 /* Let's do the type check by hand, since we used 0 context above. */
2564                 if (o->object.type != OBJECT_DATA) {
2565                         log_debug("%s:offset " OFSfmt ": object has type %d, expected %d",
2566                                   j->unique_file->path, j->unique_offset,
2567                                   o->object.type, OBJECT_DATA);
2568                         return -EBADMSG;
2569                 }
2570
2571                 r = journal_file_object_keep(j->unique_file, o, j->unique_offset);
2572                 if (r < 0)
2573                         return r;
2574
2575                 r = return_data(j, j->unique_file, o, &odata, &ol);
2576                 if (r < 0)
2577                         return r;
2578
2579                 /* Check if we have at least the field name and "=". */
2580                 if (ol <= k) {
2581                         log_debug("%s:offset " OFSfmt ": object has size %zu, expected at least %zu",
2582                                   j->unique_file->path, j->unique_offset,
2583                                   ol, k + 1);
2584                         return -EBADMSG;
2585                 }
2586
2587                 if (memcmp(odata, j->unique_field, k) || ((const char*) odata)[k] != '=') {
2588                         log_debug("%s:offset " OFSfmt ": object does not start with \"%s=\"",
2589                                   j->unique_file->path, j->unique_offset,
2590                                   j->unique_field);
2591                         return -EBADMSG;
2592                 }
2593
2594                 /* OK, now let's see if we already returned this data
2595                  * object by checking if it exists in the earlier
2596                  * traversed files. */
2597                 found = false;
2598                 HASHMAP_FOREACH(of, j->files, i) {
2599                         Object *oo;
2600                         uint64_t op;
2601
2602                         if (of == j->unique_file)
2603                                 break;
2604
2605                         /* Skip this file it didn't have any fields
2606                          * indexed */
2607                         if (JOURNAL_HEADER_CONTAINS(of->header, n_fields) &&
2608                             le64toh(of->header->n_fields) <= 0)
2609                                 continue;
2610
2611                         r = journal_file_find_data_object_with_hash(of, odata, ol, le64toh(o->data.hash), &oo, &op);
2612                         if (r < 0)
2613                                 return r;
2614
2615                         if (r > 0)
2616                                 found = true;
2617                 }
2618
2619                 if (found)
2620                         continue;
2621
2622                 r = journal_file_object_release(j->unique_file, o, j->unique_offset);
2623                 if (r < 0)
2624                         return r;
2625
2626                 r = return_data(j, j->unique_file, o, data, l);
2627                 if (r < 0)
2628                         return r;
2629
2630                 return 1;
2631         }
2632 }
2633
2634 _public_ void sd_journal_restart_unique(sd_journal *j) {
2635         if (!j)
2636                 return;
2637
2638         j->unique_file = NULL;
2639         j->unique_offset = 0;
2640         j->unique_file_lost = false;
2641 }
2642
2643 _public_ int sd_journal_reliable_fd(sd_journal *j) {
2644         assert_return(j, -EINVAL);
2645         assert_return(!journal_pid_changed(j), -ECHILD);
2646
2647         return !j->on_network;
2648 }
2649
2650 static char *lookup_field(const char *field, void *userdata) {
2651         sd_journal *j = userdata;
2652         const void *data;
2653         size_t size, d;
2654         int r;
2655
2656         assert(field);
2657         assert(j);
2658
2659         r = sd_journal_get_data(j, field, &data, &size);
2660         if (r < 0 ||
2661             size > REPLACE_VAR_MAX)
2662                 return strdup(field);
2663
2664         d = strlen(field) + 1;
2665
2666         return strndup((const char*) data + d, size - d);
2667 }
2668
2669 _public_ int sd_journal_get_catalog(sd_journal *j, char **ret) {
2670         const void *data;
2671         size_t size;
2672         sd_id128_t id;
2673         _cleanup_free_ char *text = NULL, *cid = NULL;
2674         char *t;
2675         int r;
2676
2677         assert_return(j, -EINVAL);
2678         assert_return(!journal_pid_changed(j), -ECHILD);
2679         assert_return(ret, -EINVAL);
2680
2681         r = sd_journal_get_data(j, "MESSAGE_ID", &data, &size);
2682         if (r < 0)
2683                 return r;
2684
2685         cid = strndup((const char*) data + 11, size - 11);
2686         if (!cid)
2687                 return -ENOMEM;
2688
2689         r = sd_id128_from_string(cid, &id);
2690         if (r < 0)
2691                 return r;
2692
2693         r = catalog_get(CATALOG_DATABASE, id, &text);
2694         if (r < 0)
2695                 return r;
2696
2697         t = replace_var(text, lookup_field, j);
2698         if (!t)
2699                 return -ENOMEM;
2700
2701         *ret = t;
2702         return 0;
2703 }
2704
2705 _public_ int sd_journal_get_catalog_for_message_id(sd_id128_t id, char **ret) {
2706         assert_return(ret, -EINVAL);
2707
2708         return catalog_get(CATALOG_DATABASE, id, ret);
2709 }
2710
2711 _public_ int sd_journal_set_data_threshold(sd_journal *j, size_t sz) {
2712         assert_return(j, -EINVAL);
2713         assert_return(!journal_pid_changed(j), -ECHILD);
2714
2715         j->data_threshold = sz;
2716         return 0;
2717 }
2718
2719 _public_ int sd_journal_get_data_threshold(sd_journal *j, size_t *sz) {
2720         assert_return(j, -EINVAL);
2721         assert_return(!journal_pid_changed(j), -ECHILD);
2722         assert_return(sz, -EINVAL);
2723
2724         *sz = j->data_threshold;
2725         return 0;
2726 }