chiark / gitweb /
ecf47fda3d36481d5426eb3c323fd5674a36b6e8
[elogind.git] / src / journal / sd-journal.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <errno.h>
23 #include <fcntl.h>
24 #include <stddef.h>
25 #include <unistd.h>
26 #include <sys/inotify.h>
27 #include <sys/poll.h>
28 #include <sys/vfs.h>
29 #include <linux/magic.h>
30
31 #include "sd-journal.h"
32 #include "journal-def.h"
33 #include "journal-file.h"
34 #include "hashmap.h"
35 #include "list.h"
36 #include "strv.h"
37 #include "path-util.h"
38 #include "lookup3.h"
39 #include "compress.h"
40 #include "journal-internal.h"
41 #include "missing.h"
42 #include "catalog.h"
43 #include "replace-var.h"
44 #include "fileio.h"
45
46 #define JOURNAL_FILES_MAX 7168
47
48 #define JOURNAL_FILES_RECHECK_USEC (2 * USEC_PER_SEC)
49
50 #define REPLACE_VAR_MAX 256
51
52 #define DEFAULT_DATA_THRESHOLD (64*1024)
53
54 static void remove_file_real(sd_journal *j, JournalFile *f);
55
56 static bool journal_pid_changed(sd_journal *j) {
57         assert(j);
58
59         /* We don't support people creating a journal object and
60          * keeping it around over a fork(). Let's complain. */
61
62         return j->original_pid != getpid();
63 }
64
65 /* We return an error here only if we didn't manage to
66    memorize the real error. */
67 static int set_put_error(sd_journal *j, int r) {
68         int k;
69
70         if (r >= 0)
71                 return r;
72
73         k = set_ensure_allocated(&j->errors, NULL);
74         if (k < 0)
75                 return k;
76
77         return set_put(j->errors, INT_TO_PTR(r));
78 }
79
80 static void detach_location(sd_journal *j) {
81         Iterator i;
82         JournalFile *f;
83
84         assert(j);
85
86         j->current_file = NULL;
87         j->current_field = 0;
88
89         ORDERED_HASHMAP_FOREACH(f, j->files, i)
90                 journal_file_reset_location(f);
91 }
92
93 static void reset_location(sd_journal *j) {
94         assert(j);
95
96         detach_location(j);
97         zero(j->current_location);
98 }
99
100 static void init_location(Location *l, LocationType type, JournalFile *f, Object *o) {
101         assert(l);
102         assert(type == LOCATION_DISCRETE || type == LOCATION_SEEK);
103         assert(f);
104         assert(o->object.type == OBJECT_ENTRY);
105
106         l->type = type;
107         l->seqnum = le64toh(o->entry.seqnum);
108         l->seqnum_id = f->header->seqnum_id;
109         l->realtime = le64toh(o->entry.realtime);
110         l->monotonic = le64toh(o->entry.monotonic);
111         l->boot_id = o->entry.boot_id;
112         l->xor_hash = le64toh(o->entry.xor_hash);
113
114         l->seqnum_set = l->realtime_set = l->monotonic_set = l->xor_hash_set = true;
115 }
116
117 static void set_location(sd_journal *j, JournalFile *f, Object *o) {
118         assert(j);
119         assert(f);
120         assert(o);
121
122         init_location(&j->current_location, LOCATION_DISCRETE, f, o);
123
124         j->current_file = f;
125         j->current_field = 0;
126
127         /* Let f know its candidate entry was picked. */
128         assert(f->location_type == LOCATION_SEEK);
129         f->location_type = LOCATION_DISCRETE;
130 }
131
132 static int match_is_valid(const void *data, size_t size) {
133         const char *b, *p;
134
135         assert(data);
136
137         if (size < 2)
138                 return false;
139
140         if (startswith(data, "__"))
141                 return false;
142
143         b = data;
144         for (p = b; p < b + size; p++) {
145
146                 if (*p == '=')
147                         return p > b;
148
149                 if (*p == '_')
150                         continue;
151
152                 if (*p >= 'A' && *p <= 'Z')
153                         continue;
154
155                 if (*p >= '0' && *p <= '9')
156                         continue;
157
158                 return false;
159         }
160
161         return false;
162 }
163
164 static bool same_field(const void *_a, size_t s, const void *_b, size_t t) {
165         const uint8_t *a = _a, *b = _b;
166         size_t j;
167
168         for (j = 0; j < s && j < t; j++) {
169
170                 if (a[j] != b[j])
171                         return false;
172
173                 if (a[j] == '=')
174                         return true;
175         }
176
177         assert_not_reached("\"=\" not found");
178 }
179
180 static Match *match_new(Match *p, MatchType t) {
181         Match *m;
182
183         m = new0(Match, 1);
184         if (!m)
185                 return NULL;
186
187         m->type = t;
188
189         if (p) {
190                 m->parent = p;
191                 LIST_PREPEND(matches, p->matches, m);
192         }
193
194         return m;
195 }
196
197 static void match_free(Match *m) {
198         assert(m);
199
200         while (m->matches)
201                 match_free(m->matches);
202
203         if (m->parent)
204                 LIST_REMOVE(matches, m->parent->matches, m);
205
206         free(m->data);
207         free(m);
208 }
209
210 static void match_free_if_empty(Match *m) {
211         if (!m || m->matches)
212                 return;
213
214         match_free(m);
215 }
216
217 _public_ int sd_journal_add_match(sd_journal *j, const void *data, size_t size) {
218         Match *l3, *l4, *add_here = NULL, *m;
219         le64_t le_hash;
220
221         assert_return(j, -EINVAL);
222         assert_return(!journal_pid_changed(j), -ECHILD);
223         assert_return(data, -EINVAL);
224
225         if (size == 0)
226                 size = strlen(data);
227
228         assert_return(match_is_valid(data, size), -EINVAL);
229
230         /* level 0: AND term
231          * level 1: OR terms
232          * level 2: AND terms
233          * level 3: OR terms
234          * level 4: concrete matches */
235
236         if (!j->level0) {
237                 j->level0 = match_new(NULL, MATCH_AND_TERM);
238                 if (!j->level0)
239                         return -ENOMEM;
240         }
241
242         if (!j->level1) {
243                 j->level1 = match_new(j->level0, MATCH_OR_TERM);
244                 if (!j->level1)
245                         return -ENOMEM;
246         }
247
248         if (!j->level2) {
249                 j->level2 = match_new(j->level1, MATCH_AND_TERM);
250                 if (!j->level2)
251                         return -ENOMEM;
252         }
253
254         assert(j->level0->type == MATCH_AND_TERM);
255         assert(j->level1->type == MATCH_OR_TERM);
256         assert(j->level2->type == MATCH_AND_TERM);
257
258         le_hash = htole64(hash64(data, size));
259
260         LIST_FOREACH(matches, l3, j->level2->matches) {
261                 assert(l3->type == MATCH_OR_TERM);
262
263                 LIST_FOREACH(matches, l4, l3->matches) {
264                         assert(l4->type == MATCH_DISCRETE);
265
266                         /* Exactly the same match already? Then ignore
267                          * this addition */
268                         if (l4->le_hash == le_hash &&
269                             l4->size == size &&
270                             memcmp(l4->data, data, size) == 0)
271                                 return 0;
272
273                         /* Same field? Then let's add this to this OR term */
274                         if (same_field(data, size, l4->data, l4->size)) {
275                                 add_here = l3;
276                                 break;
277                         }
278                 }
279
280                 if (add_here)
281                         break;
282         }
283
284         if (!add_here) {
285                 add_here = match_new(j->level2, MATCH_OR_TERM);
286                 if (!add_here)
287                         goto fail;
288         }
289
290         m = match_new(add_here, MATCH_DISCRETE);
291         if (!m)
292                 goto fail;
293
294         m->le_hash = le_hash;
295         m->size = size;
296         m->data = memdup(data, size);
297         if (!m->data)
298                 goto fail;
299
300         detach_location(j);
301
302         return 0;
303
304 fail:
305         match_free_if_empty(add_here);
306         match_free_if_empty(j->level2);
307         match_free_if_empty(j->level1);
308         match_free_if_empty(j->level0);
309
310         return -ENOMEM;
311 }
312
313 _public_ int sd_journal_add_conjunction(sd_journal *j) {
314         assert_return(j, -EINVAL);
315         assert_return(!journal_pid_changed(j), -ECHILD);
316
317         if (!j->level0)
318                 return 0;
319
320         if (!j->level1)
321                 return 0;
322
323         if (!j->level1->matches)
324                 return 0;
325
326         j->level1 = NULL;
327         j->level2 = NULL;
328
329         return 0;
330 }
331
332 _public_ int sd_journal_add_disjunction(sd_journal *j) {
333         assert_return(j, -EINVAL);
334         assert_return(!journal_pid_changed(j), -ECHILD);
335
336         if (!j->level0)
337                 return 0;
338
339         if (!j->level1)
340                 return 0;
341
342         if (!j->level2)
343                 return 0;
344
345         if (!j->level2->matches)
346                 return 0;
347
348         j->level2 = NULL;
349         return 0;
350 }
351
352 static char *match_make_string(Match *m) {
353         char *p, *r;
354         Match *i;
355         bool enclose = false;
356
357         if (!m)
358                 return strdup("none");
359
360         if (m->type == MATCH_DISCRETE)
361                 return strndup(m->data, m->size);
362
363         p = NULL;
364         LIST_FOREACH(matches, i, m->matches) {
365                 char *t, *k;
366
367                 t = match_make_string(i);
368                 if (!t) {
369                         free(p);
370                         return NULL;
371                 }
372
373                 if (p) {
374                         k = strjoin(p, m->type == MATCH_OR_TERM ? " OR " : " AND ", t, NULL);
375                         free(p);
376                         free(t);
377
378                         if (!k)
379                                 return NULL;
380
381                         p = k;
382
383                         enclose = true;
384                 } else
385                         p = t;
386         }
387
388         if (enclose) {
389                 r = strjoin("(", p, ")", NULL);
390                 free(p);
391                 return r;
392         }
393
394         return p;
395 }
396
397 char *journal_make_match_string(sd_journal *j) {
398         assert(j);
399
400         return match_make_string(j->level0);
401 }
402
403 _public_ void sd_journal_flush_matches(sd_journal *j) {
404         if (!j)
405                 return;
406
407         if (j->level0)
408                 match_free(j->level0);
409
410         j->level0 = j->level1 = j->level2 = NULL;
411
412         detach_location(j);
413 }
414
415 _pure_ static int compare_with_location(JournalFile *f, Location *l) {
416         assert(f);
417         assert(l);
418         assert(f->location_type == LOCATION_SEEK);
419         assert(l->type == LOCATION_DISCRETE || l->type == LOCATION_SEEK);
420
421         if (l->monotonic_set &&
422             sd_id128_equal(f->current_boot_id, l->boot_id) &&
423             l->realtime_set &&
424             f->current_realtime == l->realtime &&
425             l->xor_hash_set &&
426             f->current_xor_hash == l->xor_hash)
427                 return 0;
428
429         if (l->seqnum_set &&
430             sd_id128_equal(f->header->seqnum_id, l->seqnum_id)) {
431
432                 if (f->current_seqnum < l->seqnum)
433                         return -1;
434                 if (f->current_seqnum > l->seqnum)
435                         return 1;
436         }
437
438         if (l->monotonic_set &&
439             sd_id128_equal(f->current_boot_id, l->boot_id)) {
440
441                 if (f->current_monotonic < l->monotonic)
442                         return -1;
443                 if (f->current_monotonic > l->monotonic)
444                         return 1;
445         }
446
447         if (l->realtime_set) {
448
449                 if (f->current_realtime < l->realtime)
450                         return -1;
451                 if (f->current_realtime > l->realtime)
452                         return 1;
453         }
454
455         if (l->xor_hash_set) {
456
457                 if (f->current_xor_hash < l->xor_hash)
458                         return -1;
459                 if (f->current_xor_hash > l->xor_hash)
460                         return 1;
461         }
462
463         return 0;
464 }
465
466 static int next_for_match(
467                 sd_journal *j,
468                 Match *m,
469                 JournalFile *f,
470                 uint64_t after_offset,
471                 direction_t direction,
472                 Object **ret,
473                 uint64_t *offset) {
474
475         int r;
476         uint64_t np = 0;
477         Object *n;
478
479         assert(j);
480         assert(m);
481         assert(f);
482
483         if (m->type == MATCH_DISCRETE) {
484                 uint64_t dp;
485
486                 r = journal_file_find_data_object_with_hash(f, m->data, m->size, le64toh(m->le_hash), NULL, &dp);
487                 if (r <= 0)
488                         return r;
489
490                 return journal_file_move_to_entry_by_offset_for_data(f, dp, after_offset, direction, ret, offset);
491
492         } else if (m->type == MATCH_OR_TERM) {
493                 Match *i;
494
495                 /* Find the earliest match beyond after_offset */
496
497                 LIST_FOREACH(matches, i, m->matches) {
498                         uint64_t cp;
499
500                         r = next_for_match(j, i, f, after_offset, direction, NULL, &cp);
501                         if (r < 0)
502                                 return r;
503                         else if (r > 0) {
504                                 if (np == 0 || (direction == DIRECTION_DOWN ? cp < np : cp > np))
505                                         np = cp;
506                         }
507                 }
508
509                 if (np == 0)
510                         return 0;
511
512         } else if (m->type == MATCH_AND_TERM) {
513                 Match *i, *last_moved;
514
515                 /* Always jump to the next matching entry and repeat
516                  * this until we find an offset that matches for all
517                  * matches. */
518
519                 if (!m->matches)
520                         return 0;
521
522                 r = next_for_match(j, m->matches, f, after_offset, direction, NULL, &np);
523                 if (r <= 0)
524                         return r;
525
526                 assert(direction == DIRECTION_DOWN ? np >= after_offset : np <= after_offset);
527                 last_moved = m->matches;
528
529                 LIST_LOOP_BUT_ONE(matches, i, m->matches, last_moved) {
530                         uint64_t cp;
531
532                         r = next_for_match(j, i, f, np, direction, NULL, &cp);
533                         if (r <= 0)
534                                 return r;
535
536                         assert(direction == DIRECTION_DOWN ? cp >= np : cp <= np);
537                         if (direction == DIRECTION_DOWN ? cp > np : cp < np) {
538                                 np = cp;
539                                 last_moved = i;
540                         }
541                 }
542         }
543
544         assert(np > 0);
545
546         r = journal_file_move_to_object(f, OBJECT_ENTRY, np, &n);
547         if (r < 0)
548                 return r;
549
550         if (ret)
551                 *ret = n;
552         if (offset)
553                 *offset = np;
554
555         return 1;
556 }
557
558 static int find_location_for_match(
559                 sd_journal *j,
560                 Match *m,
561                 JournalFile *f,
562                 direction_t direction,
563                 Object **ret,
564                 uint64_t *offset) {
565
566         int r;
567
568         assert(j);
569         assert(m);
570         assert(f);
571
572         if (m->type == MATCH_DISCRETE) {
573                 uint64_t dp;
574
575                 r = journal_file_find_data_object_with_hash(f, m->data, m->size, le64toh(m->le_hash), NULL, &dp);
576                 if (r <= 0)
577                         return r;
578
579                 /* FIXME: missing: find by monotonic */
580
581                 if (j->current_location.type == LOCATION_HEAD)
582                         return journal_file_next_entry_for_data(f, NULL, 0, dp, DIRECTION_DOWN, ret, offset);
583                 if (j->current_location.type == LOCATION_TAIL)
584                         return journal_file_next_entry_for_data(f, NULL, 0, dp, DIRECTION_UP, ret, offset);
585                 if (j->current_location.seqnum_set && sd_id128_equal(j->current_location.seqnum_id, f->header->seqnum_id))
586                         return journal_file_move_to_entry_by_seqnum_for_data(f, dp, j->current_location.seqnum, direction, ret, offset);
587                 if (j->current_location.monotonic_set) {
588                         r = journal_file_move_to_entry_by_monotonic_for_data(f, dp, j->current_location.boot_id, j->current_location.monotonic, direction, ret, offset);
589                         if (r != -ENOENT)
590                                 return r;
591                 }
592                 if (j->current_location.realtime_set)
593                         return journal_file_move_to_entry_by_realtime_for_data(f, dp, j->current_location.realtime, direction, ret, offset);
594
595                 return journal_file_next_entry_for_data(f, NULL, 0, dp, direction, ret, offset);
596
597         } else if (m->type == MATCH_OR_TERM) {
598                 uint64_t np = 0;
599                 Object *n;
600                 Match *i;
601
602                 /* Find the earliest match */
603
604                 LIST_FOREACH(matches, i, m->matches) {
605                         uint64_t cp;
606
607                         r = find_location_for_match(j, i, f, direction, NULL, &cp);
608                         if (r < 0)
609                                 return r;
610                         else if (r > 0) {
611                                 if (np == 0 || (direction == DIRECTION_DOWN ? np > cp : np < cp))
612                                         np = cp;
613                         }
614                 }
615
616                 if (np == 0)
617                         return 0;
618
619                 r = journal_file_move_to_object(f, OBJECT_ENTRY, np, &n);
620                 if (r < 0)
621                         return r;
622
623                 if (ret)
624                         *ret = n;
625                 if (offset)
626                         *offset = np;
627
628                 return 1;
629
630         } else {
631                 Match *i;
632                 uint64_t np = 0;
633
634                 assert(m->type == MATCH_AND_TERM);
635
636                 /* First jump to the last match, and then find the
637                  * next one where all matches match */
638
639                 if (!m->matches)
640                         return 0;
641
642                 LIST_FOREACH(matches, i, m->matches) {
643                         uint64_t cp;
644
645                         r = find_location_for_match(j, i, f, direction, NULL, &cp);
646                         if (r <= 0)
647                                 return r;
648
649                         if (np == 0 || (direction == DIRECTION_DOWN ? cp > np : cp < np))
650                                 np = cp;
651                 }
652
653                 return next_for_match(j, m, f, np, direction, ret, offset);
654         }
655 }
656
657 static int find_location_with_matches(
658                 sd_journal *j,
659                 JournalFile *f,
660                 direction_t direction,
661                 Object **ret,
662                 uint64_t *offset) {
663
664         int r;
665
666         assert(j);
667         assert(f);
668         assert(ret);
669         assert(offset);
670
671         if (!j->level0) {
672                 /* No matches is simple */
673
674                 if (j->current_location.type == LOCATION_HEAD)
675                         return journal_file_next_entry(f, 0, DIRECTION_DOWN, ret, offset);
676                 if (j->current_location.type == LOCATION_TAIL)
677                         return journal_file_next_entry(f, 0, DIRECTION_UP, ret, offset);
678                 if (j->current_location.seqnum_set && sd_id128_equal(j->current_location.seqnum_id, f->header->seqnum_id))
679                         return journal_file_move_to_entry_by_seqnum(f, j->current_location.seqnum, direction, ret, offset);
680                 if (j->current_location.monotonic_set) {
681                         r = journal_file_move_to_entry_by_monotonic(f, j->current_location.boot_id, j->current_location.monotonic, direction, ret, offset);
682                         if (r != -ENOENT)
683                                 return r;
684                 }
685                 if (j->current_location.realtime_set)
686                         return journal_file_move_to_entry_by_realtime(f, j->current_location.realtime, direction, ret, offset);
687
688                 return journal_file_next_entry(f, 0, direction, ret, offset);
689         } else
690                 return find_location_for_match(j, j->level0, f, direction, ret, offset);
691 }
692
693 static int next_with_matches(
694                 sd_journal *j,
695                 JournalFile *f,
696                 direction_t direction,
697                 Object **ret,
698                 uint64_t *offset) {
699
700         assert(j);
701         assert(f);
702         assert(ret);
703         assert(offset);
704
705         /* No matches is easy. We simple advance the file
706          * pointer by one. */
707         if (!j->level0)
708                 return journal_file_next_entry(f, f->current_offset, direction, ret, offset);
709
710         /* If we have a match then we look for the next matching entry
711          * with an offset at least one step larger */
712         return next_for_match(j, j->level0, f,
713                               direction == DIRECTION_DOWN ? f->current_offset + 1
714                                                           : f->current_offset - 1,
715                               direction, ret, offset);
716 }
717
718 static int next_beyond_location(sd_journal *j, JournalFile *f, direction_t direction) {
719         Object *c;
720         uint64_t cp, n_entries;
721         int r;
722
723         assert(j);
724         assert(f);
725
726         if (f->last_direction == direction && f->current_offset > 0) {
727                 /* If we hit EOF before, recheck if any new entries arrived. */
728                 n_entries = le64toh(f->header->n_entries);
729                 if (f->location_type == LOCATION_TAIL && n_entries == f->last_n_entries)
730                         return 0;
731                 f->last_n_entries = n_entries;
732
733                 /* LOCATION_SEEK here means we did the work in a previous
734                  * iteration and the current location already points to a
735                  * candidate entry. */
736                 if (f->location_type != LOCATION_SEEK) {
737                         r = next_with_matches(j, f, direction, &c, &cp);
738                         if (r <= 0)
739                                 return r;
740
741                         journal_file_save_location(f, direction, c, cp);
742                 }
743         } else {
744                 r = find_location_with_matches(j, f, direction, &c, &cp);
745                 if (r <= 0)
746                         return r;
747
748                 journal_file_save_location(f, direction, c, cp);
749         }
750
751         /* OK, we found the spot, now let's advance until an entry
752          * that is actually different from what we were previously
753          * looking at. This is necessary to handle entries which exist
754          * in two (or more) journal files, and which shall all be
755          * suppressed but one. */
756
757         for (;;) {
758                 bool found;
759
760                 if (j->current_location.type == LOCATION_DISCRETE) {
761                         int k;
762
763                         k = compare_with_location(f, &j->current_location);
764
765                         found = direction == DIRECTION_DOWN ? k > 0 : k < 0;
766                 } else
767                         found = true;
768
769                 if (found)
770                         return 1;
771
772                 r = next_with_matches(j, f, direction, &c, &cp);
773                 if (r <= 0)
774                         return r;
775
776                 journal_file_save_location(f, direction, c, cp);
777         }
778 }
779
780 static int real_journal_next(sd_journal *j, direction_t direction) {
781         JournalFile *f, *new_file = NULL;
782         Iterator i;
783         Object *o;
784         int r;
785
786         assert_return(j, -EINVAL);
787         assert_return(!journal_pid_changed(j), -ECHILD);
788
789         ORDERED_HASHMAP_FOREACH(f, j->files, i) {
790                 bool found;
791
792                 r = next_beyond_location(j, f, direction);
793                 if (r < 0) {
794                         log_debug_errno(r, "Can't iterate through %s, ignoring: %m", f->path);
795                         remove_file_real(j, f);
796                         continue;
797                 } else if (r == 0) {
798                         f->location_type = LOCATION_TAIL;
799                         continue;
800                 }
801
802                 if (!new_file)
803                         found = true;
804                 else {
805                         int k;
806
807                         k = journal_file_compare_locations(f, new_file);
808
809                         found = direction == DIRECTION_DOWN ? k < 0 : k > 0;
810                 }
811
812                 if (found)
813                         new_file = f;
814         }
815
816         if (!new_file)
817                 return 0;
818
819         r = journal_file_move_to_object(new_file, OBJECT_ENTRY, new_file->current_offset, &o);
820         if (r < 0)
821                 return r;
822
823         set_location(j, new_file, o);
824
825         return 1;
826 }
827
828 _public_ int sd_journal_next(sd_journal *j) {
829         return real_journal_next(j, DIRECTION_DOWN);
830 }
831
832 _public_ int sd_journal_previous(sd_journal *j) {
833         return real_journal_next(j, DIRECTION_UP);
834 }
835
836 static int real_journal_next_skip(sd_journal *j, direction_t direction, uint64_t skip) {
837         int c = 0, r;
838
839         assert_return(j, -EINVAL);
840         assert_return(!journal_pid_changed(j), -ECHILD);
841
842         if (skip == 0) {
843                 /* If this is not a discrete skip, then at least
844                  * resolve the current location */
845                 if (j->current_location.type != LOCATION_DISCRETE)
846                         return real_journal_next(j, direction);
847
848                 return 0;
849         }
850
851         do {
852                 r = real_journal_next(j, direction);
853                 if (r < 0)
854                         return r;
855
856                 if (r == 0)
857                         return c;
858
859                 skip--;
860                 c++;
861         } while (skip > 0);
862
863         return c;
864 }
865
866 _public_ int sd_journal_next_skip(sd_journal *j, uint64_t skip) {
867         return real_journal_next_skip(j, DIRECTION_DOWN, skip);
868 }
869
870 _public_ int sd_journal_previous_skip(sd_journal *j, uint64_t skip) {
871         return real_journal_next_skip(j, DIRECTION_UP, skip);
872 }
873
874 _public_ int sd_journal_get_cursor(sd_journal *j, char **cursor) {
875         Object *o;
876         int r;
877         char bid[33], sid[33];
878
879         assert_return(j, -EINVAL);
880         assert_return(!journal_pid_changed(j), -ECHILD);
881         assert_return(cursor, -EINVAL);
882
883         if (!j->current_file || j->current_file->current_offset <= 0)
884                 return -EADDRNOTAVAIL;
885
886         r = journal_file_move_to_object(j->current_file, OBJECT_ENTRY, j->current_file->current_offset, &o);
887         if (r < 0)
888                 return r;
889
890         sd_id128_to_string(j->current_file->header->seqnum_id, sid);
891         sd_id128_to_string(o->entry.boot_id, bid);
892
893         if (asprintf(cursor,
894                      "s=%s;i=%"PRIx64";b=%s;m=%"PRIx64";t=%"PRIx64";x=%"PRIx64,
895                      sid, le64toh(o->entry.seqnum),
896                      bid, le64toh(o->entry.monotonic),
897                      le64toh(o->entry.realtime),
898                      le64toh(o->entry.xor_hash)) < 0)
899                 return -ENOMEM;
900
901         return 0;
902 }
903
904 _public_ int sd_journal_seek_cursor(sd_journal *j, const char *cursor) {
905         const char *word, *state;
906         size_t l;
907         unsigned long long seqnum, monotonic, realtime, xor_hash;
908         bool
909                 seqnum_id_set = false,
910                 seqnum_set = false,
911                 boot_id_set = false,
912                 monotonic_set = false,
913                 realtime_set = false,
914                 xor_hash_set = false;
915         sd_id128_t seqnum_id, boot_id;
916
917         assert_return(j, -EINVAL);
918         assert_return(!journal_pid_changed(j), -ECHILD);
919         assert_return(!isempty(cursor), -EINVAL);
920
921         FOREACH_WORD_SEPARATOR(word, l, cursor, ";", state) {
922                 char *item;
923                 int k = 0;
924
925                 if (l < 2 || word[1] != '=')
926                         return -EINVAL;
927
928                 item = strndup(word, l);
929                 if (!item)
930                         return -ENOMEM;
931
932                 switch (word[0]) {
933
934                 case 's':
935                         seqnum_id_set = true;
936                         k = sd_id128_from_string(item+2, &seqnum_id);
937                         break;
938
939                 case 'i':
940                         seqnum_set = true;
941                         if (sscanf(item+2, "%llx", &seqnum) != 1)
942                                 k = -EINVAL;
943                         break;
944
945                 case 'b':
946                         boot_id_set = true;
947                         k = sd_id128_from_string(item+2, &boot_id);
948                         break;
949
950                 case 'm':
951                         monotonic_set = true;
952                         if (sscanf(item+2, "%llx", &monotonic) != 1)
953                                 k = -EINVAL;
954                         break;
955
956                 case 't':
957                         realtime_set = true;
958                         if (sscanf(item+2, "%llx", &realtime) != 1)
959                                 k = -EINVAL;
960                         break;
961
962                 case 'x':
963                         xor_hash_set = true;
964                         if (sscanf(item+2, "%llx", &xor_hash) != 1)
965                                 k = -EINVAL;
966                         break;
967                 }
968
969                 free(item);
970
971                 if (k < 0)
972                         return k;
973         }
974
975         if ((!seqnum_set || !seqnum_id_set) &&
976             (!monotonic_set || !boot_id_set) &&
977             !realtime_set)
978                 return -EINVAL;
979
980         reset_location(j);
981
982         j->current_location.type = LOCATION_SEEK;
983
984         if (realtime_set) {
985                 j->current_location.realtime = (uint64_t) realtime;
986                 j->current_location.realtime_set = true;
987         }
988
989         if (seqnum_set && seqnum_id_set) {
990                 j->current_location.seqnum = (uint64_t) seqnum;
991                 j->current_location.seqnum_id = seqnum_id;
992                 j->current_location.seqnum_set = true;
993         }
994
995         if (monotonic_set && boot_id_set) {
996                 j->current_location.monotonic = (uint64_t) monotonic;
997                 j->current_location.boot_id = boot_id;
998                 j->current_location.monotonic_set = true;
999         }
1000
1001         if (xor_hash_set) {
1002                 j->current_location.xor_hash = (uint64_t) xor_hash;
1003                 j->current_location.xor_hash_set = true;
1004         }
1005
1006         return 0;
1007 }
1008
1009 _public_ int sd_journal_test_cursor(sd_journal *j, const char *cursor) {
1010         int r;
1011         const char *word, *state;
1012         size_t l;
1013         Object *o;
1014
1015         assert_return(j, -EINVAL);
1016         assert_return(!journal_pid_changed(j), -ECHILD);
1017         assert_return(!isempty(cursor), -EINVAL);
1018
1019         if (!j->current_file || j->current_file->current_offset <= 0)
1020                 return -EADDRNOTAVAIL;
1021
1022         r = journal_file_move_to_object(j->current_file, OBJECT_ENTRY, j->current_file->current_offset, &o);
1023         if (r < 0)
1024                 return r;
1025
1026         FOREACH_WORD_SEPARATOR(word, l, cursor, ";", state) {
1027                 _cleanup_free_ char *item = NULL;
1028                 sd_id128_t id;
1029                 unsigned long long ll;
1030                 int k = 0;
1031
1032                 if (l < 2 || word[1] != '=')
1033                         return -EINVAL;
1034
1035                 item = strndup(word, l);
1036                 if (!item)
1037                         return -ENOMEM;
1038
1039                 switch (word[0]) {
1040
1041                 case 's':
1042                         k = sd_id128_from_string(item+2, &id);
1043                         if (k < 0)
1044                                 return k;
1045                         if (!sd_id128_equal(id, j->current_file->header->seqnum_id))
1046                                 return 0;
1047                         break;
1048
1049                 case 'i':
1050                         if (sscanf(item+2, "%llx", &ll) != 1)
1051                                 return -EINVAL;
1052                         if (ll != le64toh(o->entry.seqnum))
1053                                 return 0;
1054                         break;
1055
1056                 case 'b':
1057                         k = sd_id128_from_string(item+2, &id);
1058                         if (k < 0)
1059                                 return k;
1060                         if (!sd_id128_equal(id, o->entry.boot_id))
1061                                 return 0;
1062                         break;
1063
1064                 case 'm':
1065                         if (sscanf(item+2, "%llx", &ll) != 1)
1066                                 return -EINVAL;
1067                         if (ll != le64toh(o->entry.monotonic))
1068                                 return 0;
1069                         break;
1070
1071                 case 't':
1072                         if (sscanf(item+2, "%llx", &ll) != 1)
1073                                 return -EINVAL;
1074                         if (ll != le64toh(o->entry.realtime))
1075                                 return 0;
1076                         break;
1077
1078                 case 'x':
1079                         if (sscanf(item+2, "%llx", &ll) != 1)
1080                                 return -EINVAL;
1081                         if (ll != le64toh(o->entry.xor_hash))
1082                                 return 0;
1083                         break;
1084                 }
1085         }
1086
1087         return 1;
1088 }
1089
1090
1091 _public_ int sd_journal_seek_monotonic_usec(sd_journal *j, sd_id128_t boot_id, uint64_t usec) {
1092         assert_return(j, -EINVAL);
1093         assert_return(!journal_pid_changed(j), -ECHILD);
1094
1095         reset_location(j);
1096         j->current_location.type = LOCATION_SEEK;
1097         j->current_location.boot_id = boot_id;
1098         j->current_location.monotonic = usec;
1099         j->current_location.monotonic_set = true;
1100
1101         return 0;
1102 }
1103
1104 _public_ int sd_journal_seek_realtime_usec(sd_journal *j, uint64_t usec) {
1105         assert_return(j, -EINVAL);
1106         assert_return(!journal_pid_changed(j), -ECHILD);
1107
1108         reset_location(j);
1109         j->current_location.type = LOCATION_SEEK;
1110         j->current_location.realtime = usec;
1111         j->current_location.realtime_set = true;
1112
1113         return 0;
1114 }
1115
1116 _public_ int sd_journal_seek_head(sd_journal *j) {
1117         assert_return(j, -EINVAL);
1118         assert_return(!journal_pid_changed(j), -ECHILD);
1119
1120         reset_location(j);
1121         j->current_location.type = LOCATION_HEAD;
1122
1123         return 0;
1124 }
1125
1126 _public_ int sd_journal_seek_tail(sd_journal *j) {
1127         assert_return(j, -EINVAL);
1128         assert_return(!journal_pid_changed(j), -ECHILD);
1129
1130         reset_location(j);
1131         j->current_location.type = LOCATION_TAIL;
1132
1133         return 0;
1134 }
1135
1136 static void check_network(sd_journal *j, int fd) {
1137         struct statfs sfs;
1138
1139         assert(j);
1140
1141         if (j->on_network)
1142                 return;
1143
1144         if (fstatfs(fd, &sfs) < 0)
1145                 return;
1146
1147         j->on_network =
1148                 F_TYPE_EQUAL(sfs.f_type, CIFS_MAGIC_NUMBER) ||
1149                 F_TYPE_EQUAL(sfs.f_type, CODA_SUPER_MAGIC) ||
1150                 F_TYPE_EQUAL(sfs.f_type, NCP_SUPER_MAGIC) ||
1151                 F_TYPE_EQUAL(sfs.f_type, NFS_SUPER_MAGIC) ||
1152                 F_TYPE_EQUAL(sfs.f_type, SMB_SUPER_MAGIC);
1153 }
1154
1155 static bool file_has_type_prefix(const char *prefix, const char *filename) {
1156         const char *full, *tilded, *atted;
1157
1158         full = strjoina(prefix, ".journal");
1159         tilded = strjoina(full, "~");
1160         atted = strjoina(prefix, "@");
1161
1162         return streq(filename, full) ||
1163                streq(filename, tilded) ||
1164                startswith(filename, atted);
1165 }
1166
1167 static bool file_type_wanted(int flags, const char *filename) {
1168         if (!endswith(filename, ".journal") && !endswith(filename, ".journal~"))
1169                 return false;
1170
1171         /* no flags set â†’ every type is OK */
1172         if (!(flags & (SD_JOURNAL_SYSTEM | SD_JOURNAL_CURRENT_USER)))
1173                 return true;
1174
1175         if (flags & SD_JOURNAL_SYSTEM && file_has_type_prefix("system", filename))
1176                 return true;
1177
1178         if (flags & SD_JOURNAL_CURRENT_USER) {
1179                 char prefix[5 + DECIMAL_STR_MAX(uid_t) + 1];
1180
1181                 xsprintf(prefix, "user-"UID_FMT, getuid());
1182
1183                 if (file_has_type_prefix(prefix, filename))
1184                         return true;
1185         }
1186
1187         return false;
1188 }
1189
1190 static int add_any_file(sd_journal *j, const char *path) {
1191         JournalFile *f = NULL;
1192         int r;
1193
1194         assert(j);
1195         assert(path);
1196
1197         if (ordered_hashmap_get(j->files, path))
1198                 return 0;
1199
1200         if (ordered_hashmap_size(j->files) >= JOURNAL_FILES_MAX) {
1201                 log_warning("Too many open journal files, not adding %s.", path);
1202                 return set_put_error(j, -ETOOMANYREFS);
1203         }
1204
1205         r = journal_file_open(path, O_RDONLY, 0, false, false, NULL, j->mmap, NULL, &f);
1206         if (r < 0)
1207                 return r;
1208
1209         /* journal_file_dump(f); */
1210
1211         r = ordered_hashmap_put(j->files, f->path, f);
1212         if (r < 0) {
1213                 journal_file_close(f);
1214                 return r;
1215         }
1216
1217         log_debug("File %s added.", f->path);
1218
1219         check_network(j, f->fd);
1220
1221         j->current_invalidate_counter ++;
1222
1223         return 0;
1224 }
1225
1226 static int add_file(sd_journal *j, const char *prefix, const char *filename) {
1227         _cleanup_free_ char *path = NULL;
1228         int r;
1229
1230         assert(j);
1231         assert(prefix);
1232         assert(filename);
1233
1234         if (j->no_new_files ||
1235             !file_type_wanted(j->flags, filename))
1236                 return 0;
1237
1238         path = strjoin(prefix, "/", filename, NULL);
1239         if (!path)
1240                 return -ENOMEM;
1241
1242         r = add_any_file(j, path);
1243         if (r == -ENOENT)
1244                 return 0;
1245         return 0;
1246 }
1247
1248 static int remove_file(sd_journal *j, const char *prefix, const char *filename) {
1249         _cleanup_free_ char *path;
1250         JournalFile *f;
1251
1252         assert(j);
1253         assert(prefix);
1254         assert(filename);
1255
1256         path = strjoin(prefix, "/", filename, NULL);
1257         if (!path)
1258                 return -ENOMEM;
1259
1260         f = ordered_hashmap_get(j->files, path);
1261         if (!f)
1262                 return 0;
1263
1264         remove_file_real(j, f);
1265         return 0;
1266 }
1267
1268 static void remove_file_real(sd_journal *j, JournalFile *f) {
1269         assert(j);
1270         assert(f);
1271
1272         ordered_hashmap_remove(j->files, f->path);
1273
1274         log_debug("File %s removed.", f->path);
1275
1276         if (j->current_file == f) {
1277                 j->current_file = NULL;
1278                 j->current_field = 0;
1279         }
1280
1281         if (j->unique_file == f) {
1282                 /* Jump to the next unique_file or NULL if that one was last */
1283                 j->unique_file = ordered_hashmap_next(j->files, j->unique_file->path);
1284                 j->unique_offset = 0;
1285                 if (!j->unique_file)
1286                         j->unique_file_lost = true;
1287         }
1288
1289         journal_file_close(f);
1290
1291         j->current_invalidate_counter ++;
1292 }
1293
1294 static int add_directory(sd_journal *j, const char *prefix, const char *dirname) {
1295         _cleanup_free_ char *path = NULL;
1296         int r;
1297         _cleanup_closedir_ DIR *d = NULL;
1298         sd_id128_t id, mid;
1299         Directory *m;
1300
1301         assert(j);
1302         assert(prefix);
1303         assert(dirname);
1304
1305         log_debug("Considering %s/%s.", prefix, dirname);
1306
1307         if ((j->flags & SD_JOURNAL_LOCAL_ONLY) &&
1308             (sd_id128_from_string(dirname, &id) < 0 ||
1309              sd_id128_get_machine(&mid) < 0 ||
1310              !(sd_id128_equal(id, mid) || path_startswith(prefix, "/run"))))
1311             return 0;
1312
1313         path = strjoin(prefix, "/", dirname, NULL);
1314         if (!path)
1315                 return -ENOMEM;
1316
1317         d = opendir(path);
1318         if (!d) {
1319                 log_debug_errno(errno, "Failed to open %s: %m", path);
1320                 if (errno == ENOENT)
1321                         return 0;
1322                 return -errno;
1323         }
1324
1325         m = hashmap_get(j->directories_by_path, path);
1326         if (!m) {
1327                 m = new0(Directory, 1);
1328                 if (!m)
1329                         return -ENOMEM;
1330
1331                 m->is_root = false;
1332                 m->path = path;
1333
1334                 if (hashmap_put(j->directories_by_path, m->path, m) < 0) {
1335                         free(m);
1336                         return -ENOMEM;
1337                 }
1338
1339                 path = NULL; /* avoid freeing in cleanup */
1340                 j->current_invalidate_counter ++;
1341
1342                 log_debug("Directory %s added.", m->path);
1343
1344         } else if (m->is_root)
1345                 return 0;
1346
1347         if (m->wd <= 0 && j->inotify_fd >= 0) {
1348
1349                 m->wd = inotify_add_watch(j->inotify_fd, m->path,
1350                                           IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE|
1351                                           IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT|IN_MOVED_FROM|
1352                                           IN_ONLYDIR);
1353
1354                 if (m->wd > 0 && hashmap_put(j->directories_by_wd, INT_TO_PTR(m->wd), m) < 0)
1355                         inotify_rm_watch(j->inotify_fd, m->wd);
1356         }
1357
1358         for (;;) {
1359                 struct dirent *de;
1360
1361                 errno = 0;
1362                 de = readdir(d);
1363                 if (!de && errno != 0) {
1364                         r = -errno;
1365                         log_debug_errno(errno, "Failed to read directory %s: %m", m->path);
1366                         return r;
1367                 }
1368                 if (!de)
1369                         break;
1370
1371                 if (dirent_is_file_with_suffix(de, ".journal") ||
1372                     dirent_is_file_with_suffix(de, ".journal~")) {
1373                         r = add_file(j, m->path, de->d_name);
1374                         if (r < 0) {
1375                                 log_debug_errno(r, "Failed to add file %s/%s: %m",
1376                                                 m->path, de->d_name);
1377                                 r = set_put_error(j, r);
1378                                 if (r < 0)
1379                                         return r;
1380                         }
1381                 }
1382         }
1383
1384         check_network(j, dirfd(d));
1385
1386         return 0;
1387 }
1388
1389 static int add_root_directory(sd_journal *j, const char *p) {
1390         _cleanup_closedir_ DIR *d = NULL;
1391         Directory *m;
1392         int r;
1393
1394         assert(j);
1395         assert(p);
1396
1397         if ((j->flags & SD_JOURNAL_RUNTIME_ONLY) &&
1398             !path_startswith(p, "/run"))
1399                 return -EINVAL;
1400
1401         if (j->prefix)
1402                 p = strjoina(j->prefix, p);
1403
1404         d = opendir(p);
1405         if (!d)
1406                 return -errno;
1407
1408         m = hashmap_get(j->directories_by_path, p);
1409         if (!m) {
1410                 m = new0(Directory, 1);
1411                 if (!m)
1412                         return -ENOMEM;
1413
1414                 m->is_root = true;
1415                 m->path = strdup(p);
1416                 if (!m->path) {
1417                         free(m);
1418                         return -ENOMEM;
1419                 }
1420
1421                 if (hashmap_put(j->directories_by_path, m->path, m) < 0) {
1422                         free(m->path);
1423                         free(m);
1424                         return -ENOMEM;
1425                 }
1426
1427                 j->current_invalidate_counter ++;
1428
1429                 log_debug("Root directory %s added.", m->path);
1430
1431         } else if (!m->is_root)
1432                 return 0;
1433
1434         if (m->wd <= 0 && j->inotify_fd >= 0) {
1435
1436                 m->wd = inotify_add_watch(j->inotify_fd, m->path,
1437                                           IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE|
1438                                           IN_ONLYDIR);
1439
1440                 if (m->wd > 0 && hashmap_put(j->directories_by_wd, INT_TO_PTR(m->wd), m) < 0)
1441                         inotify_rm_watch(j->inotify_fd, m->wd);
1442         }
1443
1444         if (j->no_new_files)
1445                 return 0;
1446
1447         for (;;) {
1448                 struct dirent *de;
1449                 sd_id128_t id;
1450
1451                 errno = 0;
1452                 de = readdir(d);
1453                 if (!de && errno != 0) {
1454                         r = -errno;
1455                         log_debug_errno(errno, "Failed to read directory %s: %m", m->path);
1456                         return r;
1457                 }
1458                 if (!de)
1459                         break;
1460
1461                 if (dirent_is_file_with_suffix(de, ".journal") ||
1462                     dirent_is_file_with_suffix(de, ".journal~")) {
1463                         r = add_file(j, m->path, de->d_name);
1464                         if (r < 0) {
1465                                 log_debug_errno(r, "Failed to add file %s/%s: %m",
1466                                                 m->path, de->d_name);
1467                                 r = set_put_error(j, r);
1468                                 if (r < 0)
1469                                         return r;
1470                         }
1471                 } else if ((de->d_type == DT_DIR || de->d_type == DT_LNK || de->d_type == DT_UNKNOWN) &&
1472                            sd_id128_from_string(de->d_name, &id) >= 0) {
1473
1474                         r = add_directory(j, m->path, de->d_name);
1475                         if (r < 0)
1476                                 log_debug_errno(r, "Failed to add directory %s/%s: %m", m->path, de->d_name);
1477                 }
1478         }
1479
1480         check_network(j, dirfd(d));
1481
1482         return 0;
1483 }
1484
1485 static int remove_directory(sd_journal *j, Directory *d) {
1486         assert(j);
1487
1488         if (d->wd > 0) {
1489                 hashmap_remove(j->directories_by_wd, INT_TO_PTR(d->wd));
1490
1491                 if (j->inotify_fd >= 0)
1492                         inotify_rm_watch(j->inotify_fd, d->wd);
1493         }
1494
1495         hashmap_remove(j->directories_by_path, d->path);
1496
1497         if (d->is_root)
1498                 log_debug("Root directory %s removed.", d->path);
1499         else
1500                 log_debug("Directory %s removed.", d->path);
1501
1502         free(d->path);
1503         free(d);
1504
1505         return 0;
1506 }
1507
1508 static int add_search_paths(sd_journal *j) {
1509         int r;
1510         const char search_paths[] =
1511                 "/run/log/journal\0"
1512                 "/var/log/journal\0";
1513         const char *p;
1514
1515         assert(j);
1516
1517         /* We ignore most errors here, since the idea is to only open
1518          * what's actually accessible, and ignore the rest. */
1519
1520         NULSTR_FOREACH(p, search_paths) {
1521                 r = add_root_directory(j, p);
1522                 if (r < 0 && r != -ENOENT) {
1523                         r = set_put_error(j, r);
1524                         if (r < 0)
1525                                 return r;
1526                 }
1527         }
1528
1529         return 0;
1530 }
1531
1532 static int add_current_paths(sd_journal *j) {
1533         Iterator i;
1534         JournalFile *f;
1535
1536         assert(j);
1537         assert(j->no_new_files);
1538
1539         /* Simply adds all directories for files we have open as
1540          * "root" directories. We don't expect errors here, so we
1541          * treat them as fatal. */
1542
1543         ORDERED_HASHMAP_FOREACH(f, j->files, i) {
1544                 _cleanup_free_ char *dir;
1545                 int r;
1546
1547                 dir = dirname_malloc(f->path);
1548                 if (!dir)
1549                         return -ENOMEM;
1550
1551                 r = add_root_directory(j, dir);
1552                 if (r < 0) {
1553                         set_put_error(j, r);
1554                         return r;
1555                 }
1556         }
1557
1558         return 0;
1559 }
1560
1561
1562 static int allocate_inotify(sd_journal *j) {
1563         assert(j);
1564
1565         if (j->inotify_fd < 0) {
1566                 j->inotify_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
1567                 if (j->inotify_fd < 0)
1568                         return -errno;
1569         }
1570
1571         if (!j->directories_by_wd) {
1572                 j->directories_by_wd = hashmap_new(NULL);
1573                 if (!j->directories_by_wd)
1574                         return -ENOMEM;
1575         }
1576
1577         return 0;
1578 }
1579
1580 static sd_journal *journal_new(int flags, const char *path) {
1581         sd_journal *j;
1582
1583         j = new0(sd_journal, 1);
1584         if (!j)
1585                 return NULL;
1586
1587         j->original_pid = getpid();
1588         j->inotify_fd = -1;
1589         j->flags = flags;
1590         j->data_threshold = DEFAULT_DATA_THRESHOLD;
1591
1592         if (path) {
1593                 j->path = strdup(path);
1594                 if (!j->path)
1595                         goto fail;
1596         }
1597
1598         j->files = ordered_hashmap_new(&string_hash_ops);
1599         j->directories_by_path = hashmap_new(&string_hash_ops);
1600         j->mmap = mmap_cache_new();
1601         if (!j->files || !j->directories_by_path || !j->mmap)
1602                 goto fail;
1603
1604         return j;
1605
1606 fail:
1607         sd_journal_close(j);
1608         return NULL;
1609 }
1610
1611 _public_ int sd_journal_open(sd_journal **ret, int flags) {
1612         sd_journal *j;
1613         int r;
1614
1615         assert_return(ret, -EINVAL);
1616         assert_return((flags & ~(SD_JOURNAL_LOCAL_ONLY|SD_JOURNAL_RUNTIME_ONLY|SD_JOURNAL_SYSTEM|SD_JOURNAL_CURRENT_USER)) == 0, -EINVAL);
1617
1618         j = journal_new(flags, NULL);
1619         if (!j)
1620                 return -ENOMEM;
1621
1622         r = add_search_paths(j);
1623         if (r < 0)
1624                 goto fail;
1625
1626         *ret = j;
1627         return 0;
1628
1629 fail:
1630         sd_journal_close(j);
1631
1632         return r;
1633 }
1634
1635 _public_ int sd_journal_open_container(sd_journal **ret, const char *machine, int flags) {
1636         _cleanup_free_ char *root = NULL, *class = NULL;
1637         sd_journal *j;
1638         char *p;
1639         int r;
1640
1641         assert_return(machine, -EINVAL);
1642         assert_return(ret, -EINVAL);
1643         assert_return((flags & ~(SD_JOURNAL_LOCAL_ONLY|SD_JOURNAL_SYSTEM)) == 0, -EINVAL);
1644         assert_return(machine_name_is_valid(machine), -EINVAL);
1645
1646         p = strjoina("/run/systemd/machines/", machine);
1647         r = parse_env_file(p, NEWLINE, "ROOT", &root, "CLASS", &class, NULL);
1648         if (r == -ENOENT)
1649                 return -EHOSTDOWN;
1650         if (r < 0)
1651                 return r;
1652         if (!root)
1653                 return -ENODATA;
1654
1655         if (!streq_ptr(class, "container"))
1656                 return -EIO;
1657
1658         j = journal_new(flags, NULL);
1659         if (!j)
1660                 return -ENOMEM;
1661
1662         j->prefix = root;
1663         root = NULL;
1664
1665         r = add_search_paths(j);
1666         if (r < 0)
1667                 goto fail;
1668
1669         *ret = j;
1670         return 0;
1671
1672 fail:
1673         sd_journal_close(j);
1674         return r;
1675 }
1676
1677 _public_ int sd_journal_open_directory(sd_journal **ret, const char *path, int flags) {
1678         sd_journal *j;
1679         int r;
1680
1681         assert_return(ret, -EINVAL);
1682         assert_return(path, -EINVAL);
1683         assert_return(flags == 0, -EINVAL);
1684
1685         j = journal_new(flags, path);
1686         if (!j)
1687                 return -ENOMEM;
1688
1689         r = add_root_directory(j, path);
1690         if (r < 0) {
1691                 set_put_error(j, r);
1692                 goto fail;
1693         }
1694
1695         *ret = j;
1696         return 0;
1697
1698 fail:
1699         sd_journal_close(j);
1700
1701         return r;
1702 }
1703
1704 _public_ int sd_journal_open_files(sd_journal **ret, const char **paths, int flags) {
1705         sd_journal *j;
1706         const char **path;
1707         int r;
1708
1709         assert_return(ret, -EINVAL);
1710         assert_return(flags == 0, -EINVAL);
1711
1712         j = journal_new(flags, NULL);
1713         if (!j)
1714                 return -ENOMEM;
1715
1716         STRV_FOREACH(path, paths) {
1717                 r = add_any_file(j, *path);
1718                 if (r < 0) {
1719                         log_error_errno(r, "Failed to open %s: %m", *path);
1720                         goto fail;
1721                 }
1722         }
1723
1724         j->no_new_files = true;
1725
1726         *ret = j;
1727         return 0;
1728
1729 fail:
1730         sd_journal_close(j);
1731
1732         return r;
1733 }
1734
1735 _public_ void sd_journal_close(sd_journal *j) {
1736         Directory *d;
1737         JournalFile *f;
1738
1739         if (!j)
1740                 return;
1741
1742         sd_journal_flush_matches(j);
1743
1744         while ((f = ordered_hashmap_steal_first(j->files)))
1745                 journal_file_close(f);
1746
1747         ordered_hashmap_free(j->files);
1748
1749         while ((d = hashmap_first(j->directories_by_path)))
1750                 remove_directory(j, d);
1751
1752         while ((d = hashmap_first(j->directories_by_wd)))
1753                 remove_directory(j, d);
1754
1755         hashmap_free(j->directories_by_path);
1756         hashmap_free(j->directories_by_wd);
1757
1758         safe_close(j->inotify_fd);
1759
1760         if (j->mmap) {
1761                 log_debug("mmap cache statistics: %u hit, %u miss", mmap_cache_get_hit(j->mmap), mmap_cache_get_missed(j->mmap));
1762                 mmap_cache_unref(j->mmap);
1763         }
1764
1765         free(j->path);
1766         free(j->prefix);
1767         free(j->unique_field);
1768         set_free(j->errors);
1769         free(j);
1770 }
1771
1772 _public_ int sd_journal_get_realtime_usec(sd_journal *j, uint64_t *ret) {
1773         Object *o;
1774         JournalFile *f;
1775         int r;
1776
1777         assert_return(j, -EINVAL);
1778         assert_return(!journal_pid_changed(j), -ECHILD);
1779         assert_return(ret, -EINVAL);
1780
1781         f = j->current_file;
1782         if (!f)
1783                 return -EADDRNOTAVAIL;
1784
1785         if (f->current_offset <= 0)
1786                 return -EADDRNOTAVAIL;
1787
1788         r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1789         if (r < 0)
1790                 return r;
1791
1792         *ret = le64toh(o->entry.realtime);
1793         return 0;
1794 }
1795
1796 _public_ int sd_journal_get_monotonic_usec(sd_journal *j, uint64_t *ret, sd_id128_t *ret_boot_id) {
1797         Object *o;
1798         JournalFile *f;
1799         int r;
1800         sd_id128_t id;
1801
1802         assert_return(j, -EINVAL);
1803         assert_return(!journal_pid_changed(j), -ECHILD);
1804
1805         f = j->current_file;
1806         if (!f)
1807                 return -EADDRNOTAVAIL;
1808
1809         if (f->current_offset <= 0)
1810                 return -EADDRNOTAVAIL;
1811
1812         r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1813         if (r < 0)
1814                 return r;
1815
1816         if (ret_boot_id)
1817                 *ret_boot_id = o->entry.boot_id;
1818         else {
1819                 r = sd_id128_get_boot(&id);
1820                 if (r < 0)
1821                         return r;
1822
1823                 if (!sd_id128_equal(id, o->entry.boot_id))
1824                         return -ESTALE;
1825         }
1826
1827         if (ret)
1828                 *ret = le64toh(o->entry.monotonic);
1829
1830         return 0;
1831 }
1832
1833 static bool field_is_valid(const char *field) {
1834         const char *p;
1835
1836         assert(field);
1837
1838         if (isempty(field))
1839                 return false;
1840
1841         if (startswith(field, "__"))
1842                 return false;
1843
1844         for (p = field; *p; p++) {
1845
1846                 if (*p == '_')
1847                         continue;
1848
1849                 if (*p >= 'A' && *p <= 'Z')
1850                         continue;
1851
1852                 if (*p >= '0' && *p <= '9')
1853                         continue;
1854
1855                 return false;
1856         }
1857
1858         return true;
1859 }
1860
1861 _public_ int sd_journal_get_data(sd_journal *j, const char *field, const void **data, size_t *size) {
1862         JournalFile *f;
1863         uint64_t i, n;
1864         size_t field_length;
1865         int r;
1866         Object *o;
1867
1868         assert_return(j, -EINVAL);
1869         assert_return(!journal_pid_changed(j), -ECHILD);
1870         assert_return(field, -EINVAL);
1871         assert_return(data, -EINVAL);
1872         assert_return(size, -EINVAL);
1873         assert_return(field_is_valid(field), -EINVAL);
1874
1875         f = j->current_file;
1876         if (!f)
1877                 return -EADDRNOTAVAIL;
1878
1879         if (f->current_offset <= 0)
1880                 return -EADDRNOTAVAIL;
1881
1882         r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1883         if (r < 0)
1884                 return r;
1885
1886         field_length = strlen(field);
1887
1888         n = journal_file_entry_n_items(o);
1889         for (i = 0; i < n; i++) {
1890                 uint64_t p, l;
1891                 le64_t le_hash;
1892                 size_t t;
1893                 int compression;
1894
1895                 p = le64toh(o->entry.items[i].object_offset);
1896                 le_hash = o->entry.items[i].hash;
1897                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
1898                 if (r < 0)
1899                         return r;
1900
1901                 if (le_hash != o->data.hash)
1902                         return -EBADMSG;
1903
1904                 l = le64toh(o->object.size) - offsetof(Object, data.payload);
1905
1906                 compression = o->object.flags & OBJECT_COMPRESSION_MASK;
1907                 if (compression) {
1908 #if defined(HAVE_XZ) || defined(HAVE_LZ4)
1909                         if (decompress_startswith(compression,
1910                                                   o->data.payload, l,
1911                                                   &f->compress_buffer, &f->compress_buffer_size,
1912                                                   field, field_length, '=')) {
1913
1914                                 size_t rsize;
1915
1916                                 r = decompress_blob(compression,
1917                                                     o->data.payload, l,
1918                                                     &f->compress_buffer, &f->compress_buffer_size, &rsize,
1919                                                     j->data_threshold);
1920                                 if (r < 0)
1921                                         return r;
1922
1923                                 *data = f->compress_buffer;
1924                                 *size = (size_t) rsize;
1925
1926                                 return 0;
1927                         }
1928 #else
1929                         return -EPROTONOSUPPORT;
1930 #endif
1931                 } else if (l >= field_length+1 &&
1932                            memcmp(o->data.payload, field, field_length) == 0 &&
1933                            o->data.payload[field_length] == '=') {
1934
1935                         t = (size_t) l;
1936
1937                         if ((uint64_t) t != l)
1938                                 return -E2BIG;
1939
1940                         *data = o->data.payload;
1941                         *size = t;
1942
1943                         return 0;
1944                 }
1945
1946                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1947                 if (r < 0)
1948                         return r;
1949         }
1950
1951         return -ENOENT;
1952 }
1953
1954 static int return_data(sd_journal *j, JournalFile *f, Object *o, const void **data, size_t *size) {
1955         size_t t;
1956         uint64_t l;
1957         int compression;
1958
1959         l = le64toh(o->object.size) - offsetof(Object, data.payload);
1960         t = (size_t) l;
1961
1962         /* We can't read objects larger than 4G on a 32bit machine */
1963         if ((uint64_t) t != l)
1964                 return -E2BIG;
1965
1966         compression = o->object.flags & OBJECT_COMPRESSION_MASK;
1967         if (compression) {
1968 #if defined(HAVE_XZ) || defined(HAVE_LZ4)
1969                 size_t rsize;
1970                 int r;
1971
1972                 r = decompress_blob(compression,
1973                                     o->data.payload, l, &f->compress_buffer,
1974                                     &f->compress_buffer_size, &rsize, j->data_threshold);
1975                 if (r < 0)
1976                         return r;
1977
1978                 *data = f->compress_buffer;
1979                 *size = (size_t) rsize;
1980 #else
1981                 return -EPROTONOSUPPORT;
1982 #endif
1983         } else {
1984                 *data = o->data.payload;
1985                 *size = t;
1986         }
1987
1988         return 0;
1989 }
1990
1991 _public_ int sd_journal_enumerate_data(sd_journal *j, const void **data, size_t *size) {
1992         JournalFile *f;
1993         uint64_t p, n;
1994         le64_t le_hash;
1995         int r;
1996         Object *o;
1997
1998         assert_return(j, -EINVAL);
1999         assert_return(!journal_pid_changed(j), -ECHILD);
2000         assert_return(data, -EINVAL);
2001         assert_return(size, -EINVAL);
2002
2003         f = j->current_file;
2004         if (!f)
2005                 return -EADDRNOTAVAIL;
2006
2007         if (f->current_offset <= 0)
2008                 return -EADDRNOTAVAIL;
2009
2010         r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
2011         if (r < 0)
2012                 return r;
2013
2014         n = journal_file_entry_n_items(o);
2015         if (j->current_field >= n)
2016                 return 0;
2017
2018         p = le64toh(o->entry.items[j->current_field].object_offset);
2019         le_hash = o->entry.items[j->current_field].hash;
2020         r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
2021         if (r < 0)
2022                 return r;
2023
2024         if (le_hash != o->data.hash)
2025                 return -EBADMSG;
2026
2027         r = return_data(j, f, o, data, size);
2028         if (r < 0)
2029                 return r;
2030
2031         j->current_field ++;
2032
2033         return 1;
2034 }
2035
2036 _public_ void sd_journal_restart_data(sd_journal *j) {
2037         if (!j)
2038                 return;
2039
2040         j->current_field = 0;
2041 }
2042
2043 _public_ int sd_journal_get_fd(sd_journal *j) {
2044         int r;
2045
2046         assert_return(j, -EINVAL);
2047         assert_return(!journal_pid_changed(j), -ECHILD);
2048
2049         if (j->inotify_fd >= 0)
2050                 return j->inotify_fd;
2051
2052         r = allocate_inotify(j);
2053         if (r < 0)
2054                 return r;
2055
2056         /* Iterate through all dirs again, to add them to the
2057          * inotify */
2058         if (j->no_new_files)
2059                 r = add_current_paths(j);
2060         else if (j->path)
2061                 r = add_root_directory(j, j->path);
2062         else
2063                 r = add_search_paths(j);
2064         if (r < 0)
2065                 return r;
2066
2067         return j->inotify_fd;
2068 }
2069
2070 _public_ int sd_journal_get_events(sd_journal *j) {
2071         int fd;
2072
2073         assert_return(j, -EINVAL);
2074         assert_return(!journal_pid_changed(j), -ECHILD);
2075
2076         fd = sd_journal_get_fd(j);
2077         if (fd < 0)
2078                 return fd;
2079
2080         return POLLIN;
2081 }
2082
2083 _public_ int sd_journal_get_timeout(sd_journal *j, uint64_t *timeout_usec) {
2084         int fd;
2085
2086         assert_return(j, -EINVAL);
2087         assert_return(!journal_pid_changed(j), -ECHILD);
2088         assert_return(timeout_usec, -EINVAL);
2089
2090         fd = sd_journal_get_fd(j);
2091         if (fd < 0)
2092                 return fd;
2093
2094         if (!j->on_network) {
2095                 *timeout_usec = (uint64_t) -1;
2096                 return 0;
2097         }
2098
2099         /* If we are on the network we need to regularly check for
2100          * changes manually */
2101
2102         *timeout_usec = j->last_process_usec + JOURNAL_FILES_RECHECK_USEC;
2103         return 1;
2104 }
2105
2106 static void process_inotify_event(sd_journal *j, struct inotify_event *e) {
2107         Directory *d;
2108         int r;
2109
2110         assert(j);
2111         assert(e);
2112
2113         /* Is this a subdirectory we watch? */
2114         d = hashmap_get(j->directories_by_wd, INT_TO_PTR(e->wd));
2115         if (d) {
2116                 sd_id128_t id;
2117
2118                 if (!(e->mask & IN_ISDIR) && e->len > 0 &&
2119                     (endswith(e->name, ".journal") ||
2120                      endswith(e->name, ".journal~"))) {
2121
2122                         /* Event for a journal file */
2123
2124                         if (e->mask & (IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB)) {
2125                                 r = add_file(j, d->path, e->name);
2126                                 if (r < 0) {
2127                                         log_debug_errno(r, "Failed to add file %s/%s: %m",
2128                                                         d->path, e->name);
2129                                         set_put_error(j, r);
2130                                 }
2131
2132                         } else if (e->mask & (IN_DELETE|IN_MOVED_FROM|IN_UNMOUNT)) {
2133
2134                                 r = remove_file(j, d->path, e->name);
2135                                 if (r < 0)
2136                                         log_debug_errno(r, "Failed to remove file %s/%s: %m", d->path, e->name);
2137                         }
2138
2139                 } else if (!d->is_root && e->len == 0) {
2140
2141                         /* Event for a subdirectory */
2142
2143                         if (e->mask & (IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT)) {
2144                                 r = remove_directory(j, d);
2145                                 if (r < 0)
2146                                         log_debug_errno(r, "Failed to remove directory %s: %m", d->path);
2147                         }
2148
2149
2150                 } else if (d->is_root && (e->mask & IN_ISDIR) && e->len > 0 && sd_id128_from_string(e->name, &id) >= 0) {
2151
2152                         /* Event for root directory */
2153
2154                         if (e->mask & (IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB)) {
2155                                 r = add_directory(j, d->path, e->name);
2156                                 if (r < 0)
2157                                         log_debug_errno(r, "Failed to add directory %s/%s: %m", d->path, e->name);
2158                         }
2159                 }
2160
2161                 return;
2162         }
2163
2164         if (e->mask & IN_IGNORED)
2165                 return;
2166
2167         log_warning("Unknown inotify event.");
2168 }
2169
2170 static int determine_change(sd_journal *j) {
2171         bool b;
2172
2173         assert(j);
2174
2175         b = j->current_invalidate_counter != j->last_invalidate_counter;
2176         j->last_invalidate_counter = j->current_invalidate_counter;
2177
2178         return b ? SD_JOURNAL_INVALIDATE : SD_JOURNAL_APPEND;
2179 }
2180
2181 _public_ int sd_journal_process(sd_journal *j) {
2182         bool got_something = false;
2183
2184         assert_return(j, -EINVAL);
2185         assert_return(!journal_pid_changed(j), -ECHILD);
2186
2187         j->last_process_usec = now(CLOCK_MONOTONIC);
2188
2189         for (;;) {
2190                 union inotify_event_buffer buffer;
2191                 struct inotify_event *e;
2192                 ssize_t l;
2193
2194                 l = read(j->inotify_fd, &buffer, sizeof(buffer));
2195                 if (l < 0) {
2196                         if (errno == EAGAIN || errno == EINTR)
2197                                 return got_something ? determine_change(j) : SD_JOURNAL_NOP;
2198
2199                         return -errno;
2200                 }
2201
2202                 got_something = true;
2203
2204                 FOREACH_INOTIFY_EVENT(e, buffer, l)
2205                         process_inotify_event(j, e);
2206         }
2207 }
2208
2209 _public_ int sd_journal_wait(sd_journal *j, uint64_t timeout_usec) {
2210         int r;
2211         uint64_t t;
2212
2213         assert_return(j, -EINVAL);
2214         assert_return(!journal_pid_changed(j), -ECHILD);
2215
2216         if (j->inotify_fd < 0) {
2217
2218                 /* This is the first invocation, hence create the
2219                  * inotify watch */
2220                 r = sd_journal_get_fd(j);
2221                 if (r < 0)
2222                         return r;
2223
2224                 /* The journal might have changed since the context
2225                  * object was created and we weren't watching before,
2226                  * hence don't wait for anything, and return
2227                  * immediately. */
2228                 return determine_change(j);
2229         }
2230
2231         r = sd_journal_get_timeout(j, &t);
2232         if (r < 0)
2233                 return r;
2234
2235         if (t != (uint64_t) -1) {
2236                 usec_t n;
2237
2238                 n = now(CLOCK_MONOTONIC);
2239                 t = t > n ? t - n : 0;
2240
2241                 if (timeout_usec == (uint64_t) -1 || timeout_usec > t)
2242                         timeout_usec = t;
2243         }
2244
2245         do {
2246                 r = fd_wait_for_event(j->inotify_fd, POLLIN, timeout_usec);
2247         } while (r == -EINTR);
2248
2249         if (r < 0)
2250                 return r;
2251
2252         return sd_journal_process(j);
2253 }
2254
2255 _public_ int sd_journal_get_cutoff_realtime_usec(sd_journal *j, uint64_t *from, uint64_t *to) {
2256         Iterator i;
2257         JournalFile *f;
2258         bool first = true;
2259         uint64_t fmin = 0, tmax = 0;
2260         int r;
2261
2262         assert_return(j, -EINVAL);
2263         assert_return(!journal_pid_changed(j), -ECHILD);
2264         assert_return(from || to, -EINVAL);
2265         assert_return(from != to, -EINVAL);
2266
2267         ORDERED_HASHMAP_FOREACH(f, j->files, i) {
2268                 usec_t fr, t;
2269
2270                 r = journal_file_get_cutoff_realtime_usec(f, &fr, &t);
2271                 if (r == -ENOENT)
2272                         continue;
2273                 if (r < 0)
2274                         return r;
2275                 if (r == 0)
2276                         continue;
2277
2278                 if (first) {
2279                         fmin = fr;
2280                         tmax = t;
2281                         first = false;
2282                 } else {
2283                         fmin = MIN(fr, fmin);
2284                         tmax = MAX(t, tmax);
2285                 }
2286         }
2287
2288         if (from)
2289                 *from = fmin;
2290         if (to)
2291                 *to = tmax;
2292
2293         return first ? 0 : 1;
2294 }
2295
2296 _public_ int sd_journal_get_cutoff_monotonic_usec(sd_journal *j, sd_id128_t boot_id, uint64_t *from, uint64_t *to) {
2297         Iterator i;
2298         JournalFile *f;
2299         bool found = false;
2300         int r;
2301
2302         assert_return(j, -EINVAL);
2303         assert_return(!journal_pid_changed(j), -ECHILD);
2304         assert_return(from || to, -EINVAL);
2305         assert_return(from != to, -EINVAL);
2306
2307         ORDERED_HASHMAP_FOREACH(f, j->files, i) {
2308                 usec_t fr, t;
2309
2310                 r = journal_file_get_cutoff_monotonic_usec(f, boot_id, &fr, &t);
2311                 if (r == -ENOENT)
2312                         continue;
2313                 if (r < 0)
2314                         return r;
2315                 if (r == 0)
2316                         continue;
2317
2318                 if (found) {
2319                         if (from)
2320                                 *from = MIN(fr, *from);
2321                         if (to)
2322                                 *to = MAX(t, *to);
2323                 } else {
2324                         if (from)
2325                                 *from = fr;
2326                         if (to)
2327                                 *to = t;
2328                         found = true;
2329                 }
2330         }
2331
2332         return found;
2333 }
2334
2335 void journal_print_header(sd_journal *j) {
2336         Iterator i;
2337         JournalFile *f;
2338         bool newline = false;
2339
2340         assert(j);
2341
2342         ORDERED_HASHMAP_FOREACH(f, j->files, i) {
2343                 if (newline)
2344                         putchar('\n');
2345                 else
2346                         newline = true;
2347
2348                 journal_file_print_header(f);
2349         }
2350 }
2351
2352 _public_ int sd_journal_get_usage(sd_journal *j, uint64_t *bytes) {
2353         Iterator i;
2354         JournalFile *f;
2355         uint64_t sum = 0;
2356
2357         assert_return(j, -EINVAL);
2358         assert_return(!journal_pid_changed(j), -ECHILD);
2359         assert_return(bytes, -EINVAL);
2360
2361         ORDERED_HASHMAP_FOREACH(f, j->files, i) {
2362                 struct stat st;
2363
2364                 if (fstat(f->fd, &st) < 0)
2365                         return -errno;
2366
2367                 sum += (uint64_t) st.st_blocks * 512ULL;
2368         }
2369
2370         *bytes = sum;
2371         return 0;
2372 }
2373
2374 _public_ int sd_journal_query_unique(sd_journal *j, const char *field) {
2375         char *f;
2376
2377         assert_return(j, -EINVAL);
2378         assert_return(!journal_pid_changed(j), -ECHILD);
2379         assert_return(!isempty(field), -EINVAL);
2380         assert_return(field_is_valid(field), -EINVAL);
2381
2382         f = strdup(field);
2383         if (!f)
2384                 return -ENOMEM;
2385
2386         free(j->unique_field);
2387         j->unique_field = f;
2388         j->unique_file = NULL;
2389         j->unique_offset = 0;
2390         j->unique_file_lost = false;
2391
2392         return 0;
2393 }
2394
2395 _public_ int sd_journal_enumerate_unique(sd_journal *j, const void **data, size_t *l) {
2396         size_t k;
2397
2398         assert_return(j, -EINVAL);
2399         assert_return(!journal_pid_changed(j), -ECHILD);
2400         assert_return(data, -EINVAL);
2401         assert_return(l, -EINVAL);
2402         assert_return(j->unique_field, -EINVAL);
2403
2404         k = strlen(j->unique_field);
2405
2406         if (!j->unique_file) {
2407                 if (j->unique_file_lost)
2408                         return 0;
2409
2410                 j->unique_file = ordered_hashmap_first(j->files);
2411                 if (!j->unique_file)
2412                         return 0;
2413
2414                 j->unique_offset = 0;
2415         }
2416
2417         for (;;) {
2418                 JournalFile *of;
2419                 Iterator i;
2420                 Object *o;
2421                 const void *odata;
2422                 size_t ol;
2423                 bool found;
2424                 int r;
2425
2426                 /* Proceed to next data object in the field's linked list */
2427                 if (j->unique_offset == 0) {
2428                         r = journal_file_find_field_object(j->unique_file, j->unique_field, k, &o, NULL);
2429                         if (r < 0)
2430                                 return r;
2431
2432                         j->unique_offset = r > 0 ? le64toh(o->field.head_data_offset) : 0;
2433                 } else {
2434                         r = journal_file_move_to_object(j->unique_file, OBJECT_DATA, j->unique_offset, &o);
2435                         if (r < 0)
2436                                 return r;
2437
2438                         j->unique_offset = le64toh(o->data.next_field_offset);
2439                 }
2440
2441                 /* We reached the end of the list? Then start again, with the next file */
2442                 if (j->unique_offset == 0) {
2443                         j->unique_file = ordered_hashmap_next(j->files, j->unique_file->path);
2444                         if (!j->unique_file)
2445                                 return 0;
2446
2447                         continue;
2448                 }
2449
2450                 /* We do not use OBJECT_DATA context here, but OBJECT_UNUSED
2451                  * instead, so that we can look at this data object at the same
2452                  * time as one on another file */
2453                 r = journal_file_move_to_object(j->unique_file, OBJECT_UNUSED, j->unique_offset, &o);
2454                 if (r < 0)
2455                         return r;
2456
2457                 /* Let's do the type check by hand, since we used 0 context above. */
2458                 if (o->object.type != OBJECT_DATA) {
2459                         log_debug("%s:offset " OFSfmt ": object has type %d, expected %d",
2460                                   j->unique_file->path, j->unique_offset,
2461                                   o->object.type, OBJECT_DATA);
2462                         return -EBADMSG;
2463                 }
2464
2465                 r = return_data(j, j->unique_file, o, &odata, &ol);
2466                 if (r < 0)
2467                         return r;
2468
2469                 /* Check if we have at least the field name and "=". */
2470                 if (ol <= k) {
2471                         log_debug("%s:offset " OFSfmt ": object has size %zu, expected at least %zu",
2472                                   j->unique_file->path, j->unique_offset,
2473                                   ol, k + 1);
2474                         return -EBADMSG;
2475                 }
2476
2477                 if (memcmp(odata, j->unique_field, k) || ((const char*) odata)[k] != '=') {
2478                         log_debug("%s:offset " OFSfmt ": object does not start with \"%s=\"",
2479                                   j->unique_file->path, j->unique_offset,
2480                                   j->unique_field);
2481                         return -EBADMSG;
2482                 }
2483
2484                 /* OK, now let's see if we already returned this data
2485                  * object by checking if it exists in the earlier
2486                  * traversed files. */
2487                 found = false;
2488                 ORDERED_HASHMAP_FOREACH(of, j->files, i) {
2489                         Object *oo;
2490                         uint64_t op;
2491
2492                         if (of == j->unique_file)
2493                                 break;
2494
2495                         /* Skip this file it didn't have any fields
2496                          * indexed */
2497                         if (JOURNAL_HEADER_CONTAINS(of->header, n_fields) &&
2498                             le64toh(of->header->n_fields) <= 0)
2499                                 continue;
2500
2501                         r = journal_file_find_data_object_with_hash(of, odata, ol, le64toh(o->data.hash), &oo, &op);
2502                         if (r < 0)
2503                                 return r;
2504
2505                         if (r > 0)
2506                                 found = true;
2507                 }
2508
2509                 if (found)
2510                         continue;
2511
2512                 r = return_data(j, j->unique_file, o, data, l);
2513                 if (r < 0)
2514                         return r;
2515
2516                 return 1;
2517         }
2518 }
2519
2520 _public_ void sd_journal_restart_unique(sd_journal *j) {
2521         if (!j)
2522                 return;
2523
2524         j->unique_file = NULL;
2525         j->unique_offset = 0;
2526         j->unique_file_lost = false;
2527 }
2528
2529 _public_ int sd_journal_reliable_fd(sd_journal *j) {
2530         assert_return(j, -EINVAL);
2531         assert_return(!journal_pid_changed(j), -ECHILD);
2532
2533         return !j->on_network;
2534 }
2535
2536 static char *lookup_field(const char *field, void *userdata) {
2537         sd_journal *j = userdata;
2538         const void *data;
2539         size_t size, d;
2540         int r;
2541
2542         assert(field);
2543         assert(j);
2544
2545         r = sd_journal_get_data(j, field, &data, &size);
2546         if (r < 0 ||
2547             size > REPLACE_VAR_MAX)
2548                 return strdup(field);
2549
2550         d = strlen(field) + 1;
2551
2552         return strndup((const char*) data + d, size - d);
2553 }
2554
2555 _public_ int sd_journal_get_catalog(sd_journal *j, char **ret) {
2556         const void *data;
2557         size_t size;
2558         sd_id128_t id;
2559         _cleanup_free_ char *text = NULL, *cid = NULL;
2560         char *t;
2561         int r;
2562
2563         assert_return(j, -EINVAL);
2564         assert_return(!journal_pid_changed(j), -ECHILD);
2565         assert_return(ret, -EINVAL);
2566
2567         r = sd_journal_get_data(j, "MESSAGE_ID", &data, &size);
2568         if (r < 0)
2569                 return r;
2570
2571         cid = strndup((const char*) data + 11, size - 11);
2572         if (!cid)
2573                 return -ENOMEM;
2574
2575         r = sd_id128_from_string(cid, &id);
2576         if (r < 0)
2577                 return r;
2578
2579         r = catalog_get(CATALOG_DATABASE, id, &text);
2580         if (r < 0)
2581                 return r;
2582
2583         t = replace_var(text, lookup_field, j);
2584         if (!t)
2585                 return -ENOMEM;
2586
2587         *ret = t;
2588         return 0;
2589 }
2590
2591 _public_ int sd_journal_get_catalog_for_message_id(sd_id128_t id, char **ret) {
2592         assert_return(ret, -EINVAL);
2593
2594         return catalog_get(CATALOG_DATABASE, id, ret);
2595 }
2596
2597 _public_ int sd_journal_set_data_threshold(sd_journal *j, size_t sz) {
2598         assert_return(j, -EINVAL);
2599         assert_return(!journal_pid_changed(j), -ECHILD);
2600
2601         j->data_threshold = sz;
2602         return 0;
2603 }
2604
2605 _public_ int sd_journal_get_data_threshold(sd_journal *j, size_t *sz) {
2606         assert_return(j, -EINVAL);
2607         assert_return(!journal_pid_changed(j), -ECHILD);
2608         assert_return(sz, -EINVAL);
2609
2610         *sz = j->data_threshold;
2611         return 0;
2612 }