chiark / gitweb /
machined: Move image discovery logic into src/shared, so that we can make use of...
[elogind.git] / src / journal / sd-journal.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <errno.h>
23 #include <fcntl.h>
24 #include <stddef.h>
25 #include <unistd.h>
26 #include <sys/inotify.h>
27 #include <sys/poll.h>
28 #include <sys/vfs.h>
29 #include <linux/magic.h>
30
31 #include "sd-journal.h"
32 #include "journal-def.h"
33 #include "journal-file.h"
34 #include "hashmap.h"
35 #include "list.h"
36 #include "strv.h"
37 #include "path-util.h"
38 #include "lookup3.h"
39 #include "compress.h"
40 #include "journal-internal.h"
41 #include "missing.h"
42 #include "catalog.h"
43 #include "replace-var.h"
44 #include "fileio.h"
45
46 #define JOURNAL_FILES_MAX 1024
47
48 #define JOURNAL_FILES_RECHECK_USEC (2 * USEC_PER_SEC)
49
50 #define REPLACE_VAR_MAX 256
51
52 #define DEFAULT_DATA_THRESHOLD (64*1024)
53
54 static void remove_file_real(sd_journal *j, JournalFile *f);
55
56 static bool journal_pid_changed(sd_journal *j) {
57         assert(j);
58
59         /* We don't support people creating a journal object and
60          * keeping it around over a fork(). Let's complain. */
61
62         return j->original_pid != getpid();
63 }
64
65 /* We return an error here only if we didn't manage to
66    memorize the real error. */
67 static int set_put_error(sd_journal *j, int r) {
68         int k;
69
70         if (r >= 0)
71                 return r;
72
73         k = set_ensure_allocated(&j->errors, NULL);
74         if (k < 0)
75                 return k;
76
77         return set_put(j->errors, INT_TO_PTR(r));
78 }
79
80 static void detach_location(sd_journal *j) {
81         Iterator i;
82         JournalFile *f;
83
84         assert(j);
85
86         j->current_file = NULL;
87         j->current_field = 0;
88
89         ORDERED_HASHMAP_FOREACH(f, j->files, i)
90                 journal_file_reset_location(f);
91 }
92
93 static void reset_location(sd_journal *j) {
94         assert(j);
95
96         detach_location(j);
97         zero(j->current_location);
98 }
99
100 static void init_location(Location *l, LocationType type, JournalFile *f, Object *o) {
101         assert(l);
102         assert(type == LOCATION_DISCRETE || type == LOCATION_SEEK);
103         assert(f);
104         assert(o->object.type == OBJECT_ENTRY);
105
106         l->type = type;
107         l->seqnum = le64toh(o->entry.seqnum);
108         l->seqnum_id = f->header->seqnum_id;
109         l->realtime = le64toh(o->entry.realtime);
110         l->monotonic = le64toh(o->entry.monotonic);
111         l->boot_id = o->entry.boot_id;
112         l->xor_hash = le64toh(o->entry.xor_hash);
113
114         l->seqnum_set = l->realtime_set = l->monotonic_set = l->xor_hash_set = true;
115 }
116
117 static void set_location(sd_journal *j, JournalFile *f, Object *o) {
118         assert(j);
119         assert(f);
120         assert(o);
121
122         init_location(&j->current_location, LOCATION_DISCRETE, f, o);
123
124         j->current_file = f;
125         j->current_field = 0;
126
127         /* Let f know its candidate entry was picked. */
128         assert(f->location_type == LOCATION_SEEK);
129         f->location_type = LOCATION_DISCRETE;
130 }
131
132 static int match_is_valid(const void *data, size_t size) {
133         const char *b, *p;
134
135         assert(data);
136
137         if (size < 2)
138                 return false;
139
140         if (startswith(data, "__"))
141                 return false;
142
143         b = data;
144         for (p = b; p < b + size; p++) {
145
146                 if (*p == '=')
147                         return p > b;
148
149                 if (*p == '_')
150                         continue;
151
152                 if (*p >= 'A' && *p <= 'Z')
153                         continue;
154
155                 if (*p >= '0' && *p <= '9')
156                         continue;
157
158                 return false;
159         }
160
161         return false;
162 }
163
164 static bool same_field(const void *_a, size_t s, const void *_b, size_t t) {
165         const uint8_t *a = _a, *b = _b;
166         size_t j;
167
168         for (j = 0; j < s && j < t; j++) {
169
170                 if (a[j] != b[j])
171                         return false;
172
173                 if (a[j] == '=')
174                         return true;
175         }
176
177         assert_not_reached("\"=\" not found");
178 }
179
180 static Match *match_new(Match *p, MatchType t) {
181         Match *m;
182
183         m = new0(Match, 1);
184         if (!m)
185                 return NULL;
186
187         m->type = t;
188
189         if (p) {
190                 m->parent = p;
191                 LIST_PREPEND(matches, p->matches, m);
192         }
193
194         return m;
195 }
196
197 static void match_free(Match *m) {
198         assert(m);
199
200         while (m->matches)
201                 match_free(m->matches);
202
203         if (m->parent)
204                 LIST_REMOVE(matches, m->parent->matches, m);
205
206         free(m->data);
207         free(m);
208 }
209
210 static void match_free_if_empty(Match *m) {
211         if (!m || m->matches)
212                 return;
213
214         match_free(m);
215 }
216
217 _public_ int sd_journal_add_match(sd_journal *j, const void *data, size_t size) {
218         Match *l3, *l4, *add_here = NULL, *m;
219         le64_t le_hash;
220
221         assert_return(j, -EINVAL);
222         assert_return(!journal_pid_changed(j), -ECHILD);
223         assert_return(data, -EINVAL);
224
225         if (size == 0)
226                 size = strlen(data);
227
228         assert_return(match_is_valid(data, size), -EINVAL);
229
230         /* level 0: AND term
231          * level 1: OR terms
232          * level 2: AND terms
233          * level 3: OR terms
234          * level 4: concrete matches */
235
236         if (!j->level0) {
237                 j->level0 = match_new(NULL, MATCH_AND_TERM);
238                 if (!j->level0)
239                         return -ENOMEM;
240         }
241
242         if (!j->level1) {
243                 j->level1 = match_new(j->level0, MATCH_OR_TERM);
244                 if (!j->level1)
245                         return -ENOMEM;
246         }
247
248         if (!j->level2) {
249                 j->level2 = match_new(j->level1, MATCH_AND_TERM);
250                 if (!j->level2)
251                         return -ENOMEM;
252         }
253
254         assert(j->level0->type == MATCH_AND_TERM);
255         assert(j->level1->type == MATCH_OR_TERM);
256         assert(j->level2->type == MATCH_AND_TERM);
257
258         le_hash = htole64(hash64(data, size));
259
260         LIST_FOREACH(matches, l3, j->level2->matches) {
261                 assert(l3->type == MATCH_OR_TERM);
262
263                 LIST_FOREACH(matches, l4, l3->matches) {
264                         assert(l4->type == MATCH_DISCRETE);
265
266                         /* Exactly the same match already? Then ignore
267                          * this addition */
268                         if (l4->le_hash == le_hash &&
269                             l4->size == size &&
270                             memcmp(l4->data, data, size) == 0)
271                                 return 0;
272
273                         /* Same field? Then let's add this to this OR term */
274                         if (same_field(data, size, l4->data, l4->size)) {
275                                 add_here = l3;
276                                 break;
277                         }
278                 }
279
280                 if (add_here)
281                         break;
282         }
283
284         if (!add_here) {
285                 add_here = match_new(j->level2, MATCH_OR_TERM);
286                 if (!add_here)
287                         goto fail;
288         }
289
290         m = match_new(add_here, MATCH_DISCRETE);
291         if (!m)
292                 goto fail;
293
294         m->le_hash = le_hash;
295         m->size = size;
296         m->data = memdup(data, size);
297         if (!m->data)
298                 goto fail;
299
300         detach_location(j);
301
302         return 0;
303
304 fail:
305         match_free_if_empty(add_here);
306         match_free_if_empty(j->level2);
307         match_free_if_empty(j->level1);
308         match_free_if_empty(j->level0);
309
310         return -ENOMEM;
311 }
312
313 _public_ int sd_journal_add_conjunction(sd_journal *j) {
314         assert_return(j, -EINVAL);
315         assert_return(!journal_pid_changed(j), -ECHILD);
316
317         if (!j->level0)
318                 return 0;
319
320         if (!j->level1)
321                 return 0;
322
323         if (!j->level1->matches)
324                 return 0;
325
326         j->level1 = NULL;
327         j->level2 = NULL;
328
329         return 0;
330 }
331
332 _public_ int sd_journal_add_disjunction(sd_journal *j) {
333         assert_return(j, -EINVAL);
334         assert_return(!journal_pid_changed(j), -ECHILD);
335
336         if (!j->level0)
337                 return 0;
338
339         if (!j->level1)
340                 return 0;
341
342         if (!j->level2)
343                 return 0;
344
345         if (!j->level2->matches)
346                 return 0;
347
348         j->level2 = NULL;
349         return 0;
350 }
351
352 static char *match_make_string(Match *m) {
353         char *p, *r;
354         Match *i;
355         bool enclose = false;
356
357         if (!m)
358                 return strdup("none");
359
360         if (m->type == MATCH_DISCRETE)
361                 return strndup(m->data, m->size);
362
363         p = NULL;
364         LIST_FOREACH(matches, i, m->matches) {
365                 char *t, *k;
366
367                 t = match_make_string(i);
368                 if (!t) {
369                         free(p);
370                         return NULL;
371                 }
372
373                 if (p) {
374                         k = strjoin(p, m->type == MATCH_OR_TERM ? " OR " : " AND ", t, NULL);
375                         free(p);
376                         free(t);
377
378                         if (!k)
379                                 return NULL;
380
381                         p = k;
382
383                         enclose = true;
384                 } else
385                         p = t;
386         }
387
388         if (enclose) {
389                 r = strjoin("(", p, ")", NULL);
390                 free(p);
391                 return r;
392         }
393
394         return p;
395 }
396
397 char *journal_make_match_string(sd_journal *j) {
398         assert(j);
399
400         return match_make_string(j->level0);
401 }
402
403 _public_ void sd_journal_flush_matches(sd_journal *j) {
404         if (!j)
405                 return;
406
407         if (j->level0)
408                 match_free(j->level0);
409
410         j->level0 = j->level1 = j->level2 = NULL;
411
412         detach_location(j);
413 }
414
415 _pure_ static int compare_with_location(JournalFile *f, Location *l) {
416         assert(f);
417         assert(l);
418         assert(f->location_type == LOCATION_SEEK);
419         assert(l->type == LOCATION_DISCRETE || l->type == LOCATION_SEEK);
420
421         if (l->monotonic_set &&
422             sd_id128_equal(f->current_boot_id, l->boot_id) &&
423             l->realtime_set &&
424             f->current_realtime == l->realtime &&
425             l->xor_hash_set &&
426             f->current_xor_hash == l->xor_hash)
427                 return 0;
428
429         if (l->seqnum_set &&
430             sd_id128_equal(f->header->seqnum_id, l->seqnum_id)) {
431
432                 if (f->current_seqnum < l->seqnum)
433                         return -1;
434                 if (f->current_seqnum > l->seqnum)
435                         return 1;
436         }
437
438         if (l->monotonic_set &&
439             sd_id128_equal(f->current_boot_id, l->boot_id)) {
440
441                 if (f->current_monotonic < l->monotonic)
442                         return -1;
443                 if (f->current_monotonic > l->monotonic)
444                         return 1;
445         }
446
447         if (l->realtime_set) {
448
449                 if (f->current_realtime < l->realtime)
450                         return -1;
451                 if (f->current_realtime > l->realtime)
452                         return 1;
453         }
454
455         if (l->xor_hash_set) {
456
457                 if (f->current_xor_hash < l->xor_hash)
458                         return -1;
459                 if (f->current_xor_hash > l->xor_hash)
460                         return 1;
461         }
462
463         return 0;
464 }
465
466 static int next_for_match(
467                 sd_journal *j,
468                 Match *m,
469                 JournalFile *f,
470                 uint64_t after_offset,
471                 direction_t direction,
472                 Object **ret,
473                 uint64_t *offset) {
474
475         int r;
476         uint64_t np = 0;
477         Object *n;
478
479         assert(j);
480         assert(m);
481         assert(f);
482
483         if (m->type == MATCH_DISCRETE) {
484                 uint64_t dp;
485
486                 r = journal_file_find_data_object_with_hash(f, m->data, m->size, le64toh(m->le_hash), NULL, &dp);
487                 if (r <= 0)
488                         return r;
489
490                 return journal_file_move_to_entry_by_offset_for_data(f, dp, after_offset, direction, ret, offset);
491
492         } else if (m->type == MATCH_OR_TERM) {
493                 Match *i;
494
495                 /* Find the earliest match beyond after_offset */
496
497                 LIST_FOREACH(matches, i, m->matches) {
498                         uint64_t cp;
499
500                         r = next_for_match(j, i, f, after_offset, direction, NULL, &cp);
501                         if (r < 0)
502                                 return r;
503                         else if (r > 0) {
504                                 if (np == 0 || (direction == DIRECTION_DOWN ? cp < np : cp > np))
505                                         np = cp;
506                         }
507                 }
508
509                 if (np == 0)
510                         return 0;
511
512         } else if (m->type == MATCH_AND_TERM) {
513                 Match *i, *last_moved;
514
515                 /* Always jump to the next matching entry and repeat
516                  * this until we find an offset that matches for all
517                  * matches. */
518
519                 if (!m->matches)
520                         return 0;
521
522                 r = next_for_match(j, m->matches, f, after_offset, direction, NULL, &np);
523                 if (r <= 0)
524                         return r;
525
526                 assert(direction == DIRECTION_DOWN ? np >= after_offset : np <= after_offset);
527                 last_moved = m->matches;
528
529                 LIST_LOOP_BUT_ONE(matches, i, m->matches, last_moved) {
530                         uint64_t cp;
531
532                         r = next_for_match(j, i, f, np, direction, NULL, &cp);
533                         if (r <= 0)
534                                 return r;
535
536                         assert(direction == DIRECTION_DOWN ? cp >= np : cp <= np);
537                         if (direction == DIRECTION_DOWN ? cp > np : cp < np) {
538                                 np = cp;
539                                 last_moved = i;
540                         }
541                 }
542         }
543
544         assert(np > 0);
545
546         r = journal_file_move_to_object(f, OBJECT_ENTRY, np, &n);
547         if (r < 0)
548                 return r;
549
550         if (ret)
551                 *ret = n;
552         if (offset)
553                 *offset = np;
554
555         return 1;
556 }
557
558 static int find_location_for_match(
559                 sd_journal *j,
560                 Match *m,
561                 JournalFile *f,
562                 direction_t direction,
563                 Object **ret,
564                 uint64_t *offset) {
565
566         int r;
567
568         assert(j);
569         assert(m);
570         assert(f);
571
572         if (m->type == MATCH_DISCRETE) {
573                 uint64_t dp;
574
575                 r = journal_file_find_data_object_with_hash(f, m->data, m->size, le64toh(m->le_hash), NULL, &dp);
576                 if (r <= 0)
577                         return r;
578
579                 /* FIXME: missing: find by monotonic */
580
581                 if (j->current_location.type == LOCATION_HEAD)
582                         return journal_file_next_entry_for_data(f, NULL, 0, dp, DIRECTION_DOWN, ret, offset);
583                 if (j->current_location.type == LOCATION_TAIL)
584                         return journal_file_next_entry_for_data(f, NULL, 0, dp, DIRECTION_UP, ret, offset);
585                 if (j->current_location.seqnum_set && sd_id128_equal(j->current_location.seqnum_id, f->header->seqnum_id))
586                         return journal_file_move_to_entry_by_seqnum_for_data(f, dp, j->current_location.seqnum, direction, ret, offset);
587                 if (j->current_location.monotonic_set) {
588                         r = journal_file_move_to_entry_by_monotonic_for_data(f, dp, j->current_location.boot_id, j->current_location.monotonic, direction, ret, offset);
589                         if (r != -ENOENT)
590                                 return r;
591                 }
592                 if (j->current_location.realtime_set)
593                         return journal_file_move_to_entry_by_realtime_for_data(f, dp, j->current_location.realtime, direction, ret, offset);
594
595                 return journal_file_next_entry_for_data(f, NULL, 0, dp, direction, ret, offset);
596
597         } else if (m->type == MATCH_OR_TERM) {
598                 uint64_t np = 0;
599                 Object *n;
600                 Match *i;
601
602                 /* Find the earliest match */
603
604                 LIST_FOREACH(matches, i, m->matches) {
605                         uint64_t cp;
606
607                         r = find_location_for_match(j, i, f, direction, NULL, &cp);
608                         if (r < 0)
609                                 return r;
610                         else if (r > 0) {
611                                 if (np == 0 || (direction == DIRECTION_DOWN ? np > cp : np < cp))
612                                         np = cp;
613                         }
614                 }
615
616                 if (np == 0)
617                         return 0;
618
619                 r = journal_file_move_to_object(f, OBJECT_ENTRY, np, &n);
620                 if (r < 0)
621                         return r;
622
623                 if (ret)
624                         *ret = n;
625                 if (offset)
626                         *offset = np;
627
628                 return 1;
629
630         } else {
631                 Match *i;
632                 uint64_t np = 0;
633
634                 assert(m->type == MATCH_AND_TERM);
635
636                 /* First jump to the last match, and then find the
637                  * next one where all matches match */
638
639                 if (!m->matches)
640                         return 0;
641
642                 LIST_FOREACH(matches, i, m->matches) {
643                         uint64_t cp;
644
645                         r = find_location_for_match(j, i, f, direction, NULL, &cp);
646                         if (r <= 0)
647                                 return r;
648
649                         if (np == 0 || (direction == DIRECTION_DOWN ? cp > np : cp < np))
650                                 np = cp;
651                 }
652
653                 return next_for_match(j, m, f, np, direction, ret, offset);
654         }
655 }
656
657 static int find_location_with_matches(
658                 sd_journal *j,
659                 JournalFile *f,
660                 direction_t direction,
661                 Object **ret,
662                 uint64_t *offset) {
663
664         int r;
665
666         assert(j);
667         assert(f);
668         assert(ret);
669         assert(offset);
670
671         if (!j->level0) {
672                 /* No matches is simple */
673
674                 if (j->current_location.type == LOCATION_HEAD)
675                         return journal_file_next_entry(f, 0, DIRECTION_DOWN, ret, offset);
676                 if (j->current_location.type == LOCATION_TAIL)
677                         return journal_file_next_entry(f, 0, DIRECTION_UP, ret, offset);
678                 if (j->current_location.seqnum_set && sd_id128_equal(j->current_location.seqnum_id, f->header->seqnum_id))
679                         return journal_file_move_to_entry_by_seqnum(f, j->current_location.seqnum, direction, ret, offset);
680                 if (j->current_location.monotonic_set) {
681                         r = journal_file_move_to_entry_by_monotonic(f, j->current_location.boot_id, j->current_location.monotonic, direction, ret, offset);
682                         if (r != -ENOENT)
683                                 return r;
684                 }
685                 if (j->current_location.realtime_set)
686                         return journal_file_move_to_entry_by_realtime(f, j->current_location.realtime, direction, ret, offset);
687
688                 return journal_file_next_entry(f, 0, direction, ret, offset);
689         } else
690                 return find_location_for_match(j, j->level0, f, direction, ret, offset);
691 }
692
693 static int next_with_matches(
694                 sd_journal *j,
695                 JournalFile *f,
696                 direction_t direction,
697                 Object **ret,
698                 uint64_t *offset) {
699
700         assert(j);
701         assert(f);
702         assert(ret);
703         assert(offset);
704
705         /* No matches is easy. We simple advance the file
706          * pointer by one. */
707         if (!j->level0)
708                 return journal_file_next_entry(f, f->current_offset, direction, ret, offset);
709
710         /* If we have a match then we look for the next matching entry
711          * with an offset at least one step larger */
712         return next_for_match(j, j->level0, f,
713                               direction == DIRECTION_DOWN ? f->current_offset + 1
714                                                           : f->current_offset - 1,
715                               direction, ret, offset);
716 }
717
718 static int next_beyond_location(sd_journal *j, JournalFile *f, direction_t direction) {
719         Object *c;
720         uint64_t cp, n_entries;
721         int r;
722
723         assert(j);
724         assert(f);
725
726         if (f->last_direction == direction && f->current_offset > 0) {
727                 /* If we hit EOF before, recheck if any new entries arrived. */
728                 n_entries = le64toh(f->header->n_entries);
729                 if (f->location_type == LOCATION_TAIL && n_entries == f->last_n_entries)
730                         return 0;
731                 f->last_n_entries = n_entries;
732
733                 /* LOCATION_SEEK here means we did the work in a previous
734                  * iteration and the current location already points to a
735                  * candidate entry. */
736                 if (f->location_type != LOCATION_SEEK) {
737                         r = next_with_matches(j, f, direction, &c, &cp);
738                         if (r <= 0)
739                                 return r;
740
741                         journal_file_save_location(f, direction, c, cp);
742                 }
743         } else {
744                 r = find_location_with_matches(j, f, direction, &c, &cp);
745                 if (r <= 0)
746                         return r;
747
748                 journal_file_save_location(f, direction, c, cp);
749         }
750
751         /* OK, we found the spot, now let's advance until an entry
752          * that is actually different from what we were previously
753          * looking at. This is necessary to handle entries which exist
754          * in two (or more) journal files, and which shall all be
755          * suppressed but one. */
756
757         for (;;) {
758                 bool found;
759
760                 if (j->current_location.type == LOCATION_DISCRETE) {
761                         int k;
762
763                         k = compare_with_location(f, &j->current_location);
764
765                         found = direction == DIRECTION_DOWN ? k > 0 : k < 0;
766                 } else
767                         found = true;
768
769                 if (found)
770                         return 1;
771
772                 r = next_with_matches(j, f, direction, &c, &cp);
773                 if (r <= 0)
774                         return r;
775
776                 journal_file_save_location(f, direction, c, cp);
777         }
778 }
779
780 static int real_journal_next(sd_journal *j, direction_t direction) {
781         JournalFile *f, *new_file = NULL;
782         Iterator i;
783         Object *o;
784         int r;
785
786         assert_return(j, -EINVAL);
787         assert_return(!journal_pid_changed(j), -ECHILD);
788
789         ORDERED_HASHMAP_FOREACH(f, j->files, i) {
790                 bool found;
791
792                 r = next_beyond_location(j, f, direction);
793                 if (r < 0) {
794                         log_debug_errno(r, "Can't iterate through %s, ignoring: %m", f->path);
795                         remove_file_real(j, f);
796                         continue;
797                 } else if (r == 0) {
798                         f->location_type = LOCATION_TAIL;
799                         continue;
800                 }
801
802                 if (!new_file)
803                         found = true;
804                 else {
805                         int k;
806
807                         k = journal_file_compare_locations(f, new_file);
808
809                         found = direction == DIRECTION_DOWN ? k < 0 : k > 0;
810                 }
811
812                 if (found)
813                         new_file = f;
814         }
815
816         if (!new_file)
817                 return 0;
818
819         r = journal_file_move_to_object(new_file, OBJECT_ENTRY, new_file->current_offset, &o);
820         if (r < 0)
821                 return r;
822
823         set_location(j, new_file, o);
824
825         return 1;
826 }
827
828 _public_ int sd_journal_next(sd_journal *j) {
829         return real_journal_next(j, DIRECTION_DOWN);
830 }
831
832 _public_ int sd_journal_previous(sd_journal *j) {
833         return real_journal_next(j, DIRECTION_UP);
834 }
835
836 static int real_journal_next_skip(sd_journal *j, direction_t direction, uint64_t skip) {
837         int c = 0, r;
838
839         assert_return(j, -EINVAL);
840         assert_return(!journal_pid_changed(j), -ECHILD);
841
842         if (skip == 0) {
843                 /* If this is not a discrete skip, then at least
844                  * resolve the current location */
845                 if (j->current_location.type != LOCATION_DISCRETE)
846                         return real_journal_next(j, direction);
847
848                 return 0;
849         }
850
851         do {
852                 r = real_journal_next(j, direction);
853                 if (r < 0)
854                         return r;
855
856                 if (r == 0)
857                         return c;
858
859                 skip--;
860                 c++;
861         } while (skip > 0);
862
863         return c;
864 }
865
866 _public_ int sd_journal_next_skip(sd_journal *j, uint64_t skip) {
867         return real_journal_next_skip(j, DIRECTION_DOWN, skip);
868 }
869
870 _public_ int sd_journal_previous_skip(sd_journal *j, uint64_t skip) {
871         return real_journal_next_skip(j, DIRECTION_UP, skip);
872 }
873
874 _public_ int sd_journal_get_cursor(sd_journal *j, char **cursor) {
875         Object *o;
876         int r;
877         char bid[33], sid[33];
878
879         assert_return(j, -EINVAL);
880         assert_return(!journal_pid_changed(j), -ECHILD);
881         assert_return(cursor, -EINVAL);
882
883         if (!j->current_file || j->current_file->current_offset <= 0)
884                 return -EADDRNOTAVAIL;
885
886         r = journal_file_move_to_object(j->current_file, OBJECT_ENTRY, j->current_file->current_offset, &o);
887         if (r < 0)
888                 return r;
889
890         sd_id128_to_string(j->current_file->header->seqnum_id, sid);
891         sd_id128_to_string(o->entry.boot_id, bid);
892
893         if (asprintf(cursor,
894                      "s=%s;i=%"PRIx64";b=%s;m=%"PRIx64";t=%"PRIx64";x=%"PRIx64,
895                      sid, le64toh(o->entry.seqnum),
896                      bid, le64toh(o->entry.monotonic),
897                      le64toh(o->entry.realtime),
898                      le64toh(o->entry.xor_hash)) < 0)
899                 return -ENOMEM;
900
901         return 0;
902 }
903
904 _public_ int sd_journal_seek_cursor(sd_journal *j, const char *cursor) {
905         const char *word, *state;
906         size_t l;
907         unsigned long long seqnum, monotonic, realtime, xor_hash;
908         bool
909                 seqnum_id_set = false,
910                 seqnum_set = false,
911                 boot_id_set = false,
912                 monotonic_set = false,
913                 realtime_set = false,
914                 xor_hash_set = false;
915         sd_id128_t seqnum_id, boot_id;
916
917         assert_return(j, -EINVAL);
918         assert_return(!journal_pid_changed(j), -ECHILD);
919         assert_return(!isempty(cursor), -EINVAL);
920
921         FOREACH_WORD_SEPARATOR(word, l, cursor, ";", state) {
922                 char *item;
923                 int k = 0;
924
925                 if (l < 2 || word[1] != '=')
926                         return -EINVAL;
927
928                 item = strndup(word, l);
929                 if (!item)
930                         return -ENOMEM;
931
932                 switch (word[0]) {
933
934                 case 's':
935                         seqnum_id_set = true;
936                         k = sd_id128_from_string(item+2, &seqnum_id);
937                         break;
938
939                 case 'i':
940                         seqnum_set = true;
941                         if (sscanf(item+2, "%llx", &seqnum) != 1)
942                                 k = -EINVAL;
943                         break;
944
945                 case 'b':
946                         boot_id_set = true;
947                         k = sd_id128_from_string(item+2, &boot_id);
948                         break;
949
950                 case 'm':
951                         monotonic_set = true;
952                         if (sscanf(item+2, "%llx", &monotonic) != 1)
953                                 k = -EINVAL;
954                         break;
955
956                 case 't':
957                         realtime_set = true;
958                         if (sscanf(item+2, "%llx", &realtime) != 1)
959                                 k = -EINVAL;
960                         break;
961
962                 case 'x':
963                         xor_hash_set = true;
964                         if (sscanf(item+2, "%llx", &xor_hash) != 1)
965                                 k = -EINVAL;
966                         break;
967                 }
968
969                 free(item);
970
971                 if (k < 0)
972                         return k;
973         }
974
975         if ((!seqnum_set || !seqnum_id_set) &&
976             (!monotonic_set || !boot_id_set) &&
977             !realtime_set)
978                 return -EINVAL;
979
980         reset_location(j);
981
982         j->current_location.type = LOCATION_SEEK;
983
984         if (realtime_set) {
985                 j->current_location.realtime = (uint64_t) realtime;
986                 j->current_location.realtime_set = true;
987         }
988
989         if (seqnum_set && seqnum_id_set) {
990                 j->current_location.seqnum = (uint64_t) seqnum;
991                 j->current_location.seqnum_id = seqnum_id;
992                 j->current_location.seqnum_set = true;
993         }
994
995         if (monotonic_set && boot_id_set) {
996                 j->current_location.monotonic = (uint64_t) monotonic;
997                 j->current_location.boot_id = boot_id;
998                 j->current_location.monotonic_set = true;
999         }
1000
1001         if (xor_hash_set) {
1002                 j->current_location.xor_hash = (uint64_t) xor_hash;
1003                 j->current_location.xor_hash_set = true;
1004         }
1005
1006         return 0;
1007 }
1008
1009 _public_ int sd_journal_test_cursor(sd_journal *j, const char *cursor) {
1010         int r;
1011         const char *word, *state;
1012         size_t l;
1013         Object *o;
1014
1015         assert_return(j, -EINVAL);
1016         assert_return(!journal_pid_changed(j), -ECHILD);
1017         assert_return(!isempty(cursor), -EINVAL);
1018
1019         if (!j->current_file || j->current_file->current_offset <= 0)
1020                 return -EADDRNOTAVAIL;
1021
1022         r = journal_file_move_to_object(j->current_file, OBJECT_ENTRY, j->current_file->current_offset, &o);
1023         if (r < 0)
1024                 return r;
1025
1026         FOREACH_WORD_SEPARATOR(word, l, cursor, ";", state) {
1027                 _cleanup_free_ char *item = NULL;
1028                 sd_id128_t id;
1029                 unsigned long long ll;
1030                 int k = 0;
1031
1032                 if (l < 2 || word[1] != '=')
1033                         return -EINVAL;
1034
1035                 item = strndup(word, l);
1036                 if (!item)
1037                         return -ENOMEM;
1038
1039                 switch (word[0]) {
1040
1041                 case 's':
1042                         k = sd_id128_from_string(item+2, &id);
1043                         if (k < 0)
1044                                 return k;
1045                         if (!sd_id128_equal(id, j->current_file->header->seqnum_id))
1046                                 return 0;
1047                         break;
1048
1049                 case 'i':
1050                         if (sscanf(item+2, "%llx", &ll) != 1)
1051                                 return -EINVAL;
1052                         if (ll != le64toh(o->entry.seqnum))
1053                                 return 0;
1054                         break;
1055
1056                 case 'b':
1057                         k = sd_id128_from_string(item+2, &id);
1058                         if (k < 0)
1059                                 return k;
1060                         if (!sd_id128_equal(id, o->entry.boot_id))
1061                                 return 0;
1062                         break;
1063
1064                 case 'm':
1065                         if (sscanf(item+2, "%llx", &ll) != 1)
1066                                 return -EINVAL;
1067                         if (ll != le64toh(o->entry.monotonic))
1068                                 return 0;
1069                         break;
1070
1071                 case 't':
1072                         if (sscanf(item+2, "%llx", &ll) != 1)
1073                                 return -EINVAL;
1074                         if (ll != le64toh(o->entry.realtime))
1075                                 return 0;
1076                         break;
1077
1078                 case 'x':
1079                         if (sscanf(item+2, "%llx", &ll) != 1)
1080                                 return -EINVAL;
1081                         if (ll != le64toh(o->entry.xor_hash))
1082                                 return 0;
1083                         break;
1084                 }
1085         }
1086
1087         return 1;
1088 }
1089
1090
1091 _public_ int sd_journal_seek_monotonic_usec(sd_journal *j, sd_id128_t boot_id, uint64_t usec) {
1092         assert_return(j, -EINVAL);
1093         assert_return(!journal_pid_changed(j), -ECHILD);
1094
1095         reset_location(j);
1096         j->current_location.type = LOCATION_SEEK;
1097         j->current_location.boot_id = boot_id;
1098         j->current_location.monotonic = usec;
1099         j->current_location.monotonic_set = true;
1100
1101         return 0;
1102 }
1103
1104 _public_ int sd_journal_seek_realtime_usec(sd_journal *j, uint64_t usec) {
1105         assert_return(j, -EINVAL);
1106         assert_return(!journal_pid_changed(j), -ECHILD);
1107
1108         reset_location(j);
1109         j->current_location.type = LOCATION_SEEK;
1110         j->current_location.realtime = usec;
1111         j->current_location.realtime_set = true;
1112
1113         return 0;
1114 }
1115
1116 _public_ int sd_journal_seek_head(sd_journal *j) {
1117         assert_return(j, -EINVAL);
1118         assert_return(!journal_pid_changed(j), -ECHILD);
1119
1120         reset_location(j);
1121         j->current_location.type = LOCATION_HEAD;
1122
1123         return 0;
1124 }
1125
1126 _public_ int sd_journal_seek_tail(sd_journal *j) {
1127         assert_return(j, -EINVAL);
1128         assert_return(!journal_pid_changed(j), -ECHILD);
1129
1130         reset_location(j);
1131         j->current_location.type = LOCATION_TAIL;
1132
1133         return 0;
1134 }
1135
1136 static void check_network(sd_journal *j, int fd) {
1137         struct statfs sfs;
1138
1139         assert(j);
1140
1141         if (j->on_network)
1142                 return;
1143
1144         if (fstatfs(fd, &sfs) < 0)
1145                 return;
1146
1147         j->on_network =
1148                 F_TYPE_EQUAL(sfs.f_type, CIFS_MAGIC_NUMBER) ||
1149                 F_TYPE_EQUAL(sfs.f_type, CODA_SUPER_MAGIC) ||
1150                 F_TYPE_EQUAL(sfs.f_type, NCP_SUPER_MAGIC) ||
1151                 F_TYPE_EQUAL(sfs.f_type, NFS_SUPER_MAGIC) ||
1152                 F_TYPE_EQUAL(sfs.f_type, SMB_SUPER_MAGIC);
1153 }
1154
1155 static bool file_has_type_prefix(const char *prefix, const char *filename) {
1156         const char *full, *tilded, *atted;
1157
1158         full = strappenda(prefix, ".journal");
1159         tilded = strappenda(full, "~");
1160         atted = strappenda(prefix, "@");
1161
1162         return streq(filename, full) ||
1163                streq(filename, tilded) ||
1164                startswith(filename, atted);
1165 }
1166
1167 static bool file_type_wanted(int flags, const char *filename) {
1168         if (!endswith(filename, ".journal") && !endswith(filename, ".journal~"))
1169                 return false;
1170
1171         /* no flags set â†’ every type is OK */
1172         if (!(flags & (SD_JOURNAL_SYSTEM | SD_JOURNAL_CURRENT_USER)))
1173                 return true;
1174
1175         if (flags & SD_JOURNAL_SYSTEM && file_has_type_prefix("system", filename))
1176                 return true;
1177
1178         if (flags & SD_JOURNAL_CURRENT_USER) {
1179                 char prefix[5 + DECIMAL_STR_MAX(uid_t) + 1];
1180
1181                 assert_se(snprintf(prefix, sizeof(prefix), "user-"UID_FMT, getuid())
1182                           < (int) sizeof(prefix));
1183
1184                 if (file_has_type_prefix(prefix, filename))
1185                         return true;
1186         }
1187
1188         return false;
1189 }
1190
1191 static int add_any_file(sd_journal *j, const char *path) {
1192         JournalFile *f = NULL;
1193         int r;
1194
1195         assert(j);
1196         assert(path);
1197
1198         if (ordered_hashmap_get(j->files, path))
1199                 return 0;
1200
1201         if (ordered_hashmap_size(j->files) >= JOURNAL_FILES_MAX) {
1202                 log_warning("Too many open journal files, not adding %s.", path);
1203                 return set_put_error(j, -ETOOMANYREFS);
1204         }
1205
1206         r = journal_file_open(path, O_RDONLY, 0, false, false, NULL, j->mmap, NULL, &f);
1207         if (r < 0)
1208                 return r;
1209
1210         /* journal_file_dump(f); */
1211
1212         r = ordered_hashmap_put(j->files, f->path, f);
1213         if (r < 0) {
1214                 journal_file_close(f);
1215                 return r;
1216         }
1217
1218         log_debug("File %s added.", f->path);
1219
1220         check_network(j, f->fd);
1221
1222         j->current_invalidate_counter ++;
1223
1224         return 0;
1225 }
1226
1227 static int add_file(sd_journal *j, const char *prefix, const char *filename) {
1228         _cleanup_free_ char *path = NULL;
1229         int r;
1230
1231         assert(j);
1232         assert(prefix);
1233         assert(filename);
1234
1235         if (j->no_new_files ||
1236             !file_type_wanted(j->flags, filename))
1237                 return 0;
1238
1239         path = strjoin(prefix, "/", filename, NULL);
1240         if (!path)
1241                 return -ENOMEM;
1242
1243         r = add_any_file(j, path);
1244         if (r == -ENOENT)
1245                 return 0;
1246         return 0;
1247 }
1248
1249 static int remove_file(sd_journal *j, const char *prefix, const char *filename) {
1250         _cleanup_free_ char *path;
1251         JournalFile *f;
1252
1253         assert(j);
1254         assert(prefix);
1255         assert(filename);
1256
1257         path = strjoin(prefix, "/", filename, NULL);
1258         if (!path)
1259                 return -ENOMEM;
1260
1261         f = ordered_hashmap_get(j->files, path);
1262         if (!f)
1263                 return 0;
1264
1265         remove_file_real(j, f);
1266         return 0;
1267 }
1268
1269 static void remove_file_real(sd_journal *j, JournalFile *f) {
1270         assert(j);
1271         assert(f);
1272
1273         ordered_hashmap_remove(j->files, f->path);
1274
1275         log_debug("File %s removed.", f->path);
1276
1277         if (j->current_file == f) {
1278                 j->current_file = NULL;
1279                 j->current_field = 0;
1280         }
1281
1282         if (j->unique_file == f) {
1283                 /* Jump to the next unique_file or NULL if that one was last */
1284                 j->unique_file = ordered_hashmap_next(j->files, j->unique_file->path);
1285                 j->unique_offset = 0;
1286                 if (!j->unique_file)
1287                         j->unique_file_lost = true;
1288         }
1289
1290         journal_file_close(f);
1291
1292         j->current_invalidate_counter ++;
1293 }
1294
1295 static int add_directory(sd_journal *j, const char *prefix, const char *dirname) {
1296         _cleanup_free_ char *path = NULL;
1297         int r;
1298         _cleanup_closedir_ DIR *d = NULL;
1299         sd_id128_t id, mid;
1300         Directory *m;
1301
1302         assert(j);
1303         assert(prefix);
1304         assert(dirname);
1305
1306         log_debug("Considering %s/%s.", prefix, dirname);
1307
1308         if ((j->flags & SD_JOURNAL_LOCAL_ONLY) &&
1309             (sd_id128_from_string(dirname, &id) < 0 ||
1310              sd_id128_get_machine(&mid) < 0 ||
1311              !(sd_id128_equal(id, mid) || path_startswith(prefix, "/run"))))
1312             return 0;
1313
1314         path = strjoin(prefix, "/", dirname, NULL);
1315         if (!path)
1316                 return -ENOMEM;
1317
1318         d = opendir(path);
1319         if (!d) {
1320                 log_debug_errno(errno, "Failed to open %s: %m", path);
1321                 if (errno == ENOENT)
1322                         return 0;
1323                 return -errno;
1324         }
1325
1326         m = hashmap_get(j->directories_by_path, path);
1327         if (!m) {
1328                 m = new0(Directory, 1);
1329                 if (!m)
1330                         return -ENOMEM;
1331
1332                 m->is_root = false;
1333                 m->path = path;
1334
1335                 if (hashmap_put(j->directories_by_path, m->path, m) < 0) {
1336                         free(m);
1337                         return -ENOMEM;
1338                 }
1339
1340                 path = NULL; /* avoid freeing in cleanup */
1341                 j->current_invalidate_counter ++;
1342
1343                 log_debug("Directory %s added.", m->path);
1344
1345         } else if (m->is_root)
1346                 return 0;
1347
1348         if (m->wd <= 0 && j->inotify_fd >= 0) {
1349
1350                 m->wd = inotify_add_watch(j->inotify_fd, m->path,
1351                                           IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE|
1352                                           IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT|IN_MOVED_FROM|
1353                                           IN_ONLYDIR);
1354
1355                 if (m->wd > 0 && hashmap_put(j->directories_by_wd, INT_TO_PTR(m->wd), m) < 0)
1356                         inotify_rm_watch(j->inotify_fd, m->wd);
1357         }
1358
1359         for (;;) {
1360                 struct dirent *de;
1361
1362                 errno = 0;
1363                 de = readdir(d);
1364                 if (!de && errno != 0) {
1365                         r = -errno;
1366                         log_debug_errno(errno, "Failed to read directory %s: %m", m->path);
1367                         return r;
1368                 }
1369                 if (!de)
1370                         break;
1371
1372                 if (dirent_is_file_with_suffix(de, ".journal") ||
1373                     dirent_is_file_with_suffix(de, ".journal~")) {
1374                         r = add_file(j, m->path, de->d_name);
1375                         if (r < 0) {
1376                                 log_debug_errno(r, "Failed to add file %s/%s: %m",
1377                                                 m->path, de->d_name);
1378                                 r = set_put_error(j, r);
1379                                 if (r < 0)
1380                                         return r;
1381                         }
1382                 }
1383         }
1384
1385         check_network(j, dirfd(d));
1386
1387         return 0;
1388 }
1389
1390 static int add_root_directory(sd_journal *j, const char *p) {
1391         _cleanup_closedir_ DIR *d = NULL;
1392         Directory *m;
1393         int r;
1394
1395         assert(j);
1396         assert(p);
1397
1398         if ((j->flags & SD_JOURNAL_RUNTIME_ONLY) &&
1399             !path_startswith(p, "/run"))
1400                 return -EINVAL;
1401
1402         if (j->prefix)
1403                 p = strappenda(j->prefix, p);
1404
1405         d = opendir(p);
1406         if (!d)
1407                 return -errno;
1408
1409         m = hashmap_get(j->directories_by_path, p);
1410         if (!m) {
1411                 m = new0(Directory, 1);
1412                 if (!m)
1413                         return -ENOMEM;
1414
1415                 m->is_root = true;
1416                 m->path = strdup(p);
1417                 if (!m->path) {
1418                         free(m);
1419                         return -ENOMEM;
1420                 }
1421
1422                 if (hashmap_put(j->directories_by_path, m->path, m) < 0) {
1423                         free(m->path);
1424                         free(m);
1425                         return -ENOMEM;
1426                 }
1427
1428                 j->current_invalidate_counter ++;
1429
1430                 log_debug("Root directory %s added.", m->path);
1431
1432         } else if (!m->is_root)
1433                 return 0;
1434
1435         if (m->wd <= 0 && j->inotify_fd >= 0) {
1436
1437                 m->wd = inotify_add_watch(j->inotify_fd, m->path,
1438                                           IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE|
1439                                           IN_ONLYDIR);
1440
1441                 if (m->wd > 0 && hashmap_put(j->directories_by_wd, INT_TO_PTR(m->wd), m) < 0)
1442                         inotify_rm_watch(j->inotify_fd, m->wd);
1443         }
1444
1445         if (j->no_new_files)
1446                 return 0;
1447
1448         for (;;) {
1449                 struct dirent *de;
1450                 sd_id128_t id;
1451
1452                 errno = 0;
1453                 de = readdir(d);
1454                 if (!de && errno != 0) {
1455                         r = -errno;
1456                         log_debug_errno(errno, "Failed to read directory %s: %m", m->path);
1457                         return r;
1458                 }
1459                 if (!de)
1460                         break;
1461
1462                 if (dirent_is_file_with_suffix(de, ".journal") ||
1463                     dirent_is_file_with_suffix(de, ".journal~")) {
1464                         r = add_file(j, m->path, de->d_name);
1465                         if (r < 0) {
1466                                 log_debug_errno(r, "Failed to add file %s/%s: %m",
1467                                                 m->path, de->d_name);
1468                                 r = set_put_error(j, r);
1469                                 if (r < 0)
1470                                         return r;
1471                         }
1472                 } else if ((de->d_type == DT_DIR || de->d_type == DT_LNK || de->d_type == DT_UNKNOWN) &&
1473                            sd_id128_from_string(de->d_name, &id) >= 0) {
1474
1475                         r = add_directory(j, m->path, de->d_name);
1476                         if (r < 0)
1477                                 log_debug_errno(r, "Failed to add directory %s/%s: %m", m->path, de->d_name);
1478                 }
1479         }
1480
1481         check_network(j, dirfd(d));
1482
1483         return 0;
1484 }
1485
1486 static int remove_directory(sd_journal *j, Directory *d) {
1487         assert(j);
1488
1489         if (d->wd > 0) {
1490                 hashmap_remove(j->directories_by_wd, INT_TO_PTR(d->wd));
1491
1492                 if (j->inotify_fd >= 0)
1493                         inotify_rm_watch(j->inotify_fd, d->wd);
1494         }
1495
1496         hashmap_remove(j->directories_by_path, d->path);
1497
1498         if (d->is_root)
1499                 log_debug("Root directory %s removed.", d->path);
1500         else
1501                 log_debug("Directory %s removed.", d->path);
1502
1503         free(d->path);
1504         free(d);
1505
1506         return 0;
1507 }
1508
1509 static int add_search_paths(sd_journal *j) {
1510         int r;
1511         const char search_paths[] =
1512                 "/run/log/journal\0"
1513                 "/var/log/journal\0";
1514         const char *p;
1515
1516         assert(j);
1517
1518         /* We ignore most errors here, since the idea is to only open
1519          * what's actually accessible, and ignore the rest. */
1520
1521         NULSTR_FOREACH(p, search_paths) {
1522                 r = add_root_directory(j, p);
1523                 if (r < 0 && r != -ENOENT) {
1524                         r = set_put_error(j, r);
1525                         if (r < 0)
1526                                 return r;
1527                 }
1528         }
1529
1530         return 0;
1531 }
1532
1533 static int add_current_paths(sd_journal *j) {
1534         Iterator i;
1535         JournalFile *f;
1536
1537         assert(j);
1538         assert(j->no_new_files);
1539
1540         /* Simply adds all directories for files we have open as
1541          * "root" directories. We don't expect errors here, so we
1542          * treat them as fatal. */
1543
1544         ORDERED_HASHMAP_FOREACH(f, j->files, i) {
1545                 _cleanup_free_ char *dir;
1546                 int r;
1547
1548                 dir = dirname_malloc(f->path);
1549                 if (!dir)
1550                         return -ENOMEM;
1551
1552                 r = add_root_directory(j, dir);
1553                 if (r < 0) {
1554                         set_put_error(j, r);
1555                         return r;
1556                 }
1557         }
1558
1559         return 0;
1560 }
1561
1562
1563 static int allocate_inotify(sd_journal *j) {
1564         assert(j);
1565
1566         if (j->inotify_fd < 0) {
1567                 j->inotify_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
1568                 if (j->inotify_fd < 0)
1569                         return -errno;
1570         }
1571
1572         if (!j->directories_by_wd) {
1573                 j->directories_by_wd = hashmap_new(NULL);
1574                 if (!j->directories_by_wd)
1575                         return -ENOMEM;
1576         }
1577
1578         return 0;
1579 }
1580
1581 static sd_journal *journal_new(int flags, const char *path) {
1582         sd_journal *j;
1583
1584         j = new0(sd_journal, 1);
1585         if (!j)
1586                 return NULL;
1587
1588         j->original_pid = getpid();
1589         j->inotify_fd = -1;
1590         j->flags = flags;
1591         j->data_threshold = DEFAULT_DATA_THRESHOLD;
1592
1593         if (path) {
1594                 j->path = strdup(path);
1595                 if (!j->path)
1596                         goto fail;
1597         }
1598
1599         j->files = ordered_hashmap_new(&string_hash_ops);
1600         j->directories_by_path = hashmap_new(&string_hash_ops);
1601         j->mmap = mmap_cache_new();
1602         if (!j->files || !j->directories_by_path || !j->mmap)
1603                 goto fail;
1604
1605         return j;
1606
1607 fail:
1608         sd_journal_close(j);
1609         return NULL;
1610 }
1611
1612 _public_ int sd_journal_open(sd_journal **ret, int flags) {
1613         sd_journal *j;
1614         int r;
1615
1616         assert_return(ret, -EINVAL);
1617         assert_return((flags & ~(SD_JOURNAL_LOCAL_ONLY|SD_JOURNAL_RUNTIME_ONLY|SD_JOURNAL_SYSTEM|SD_JOURNAL_CURRENT_USER)) == 0, -EINVAL);
1618
1619         j = journal_new(flags, NULL);
1620         if (!j)
1621                 return -ENOMEM;
1622
1623         r = add_search_paths(j);
1624         if (r < 0)
1625                 goto fail;
1626
1627         *ret = j;
1628         return 0;
1629
1630 fail:
1631         sd_journal_close(j);
1632
1633         return r;
1634 }
1635
1636 _public_ int sd_journal_open_container(sd_journal **ret, const char *machine, int flags) {
1637         _cleanup_free_ char *root = NULL, *class = NULL;
1638         sd_journal *j;
1639         char *p;
1640         int r;
1641
1642         assert_return(machine, -EINVAL);
1643         assert_return(ret, -EINVAL);
1644         assert_return((flags & ~(SD_JOURNAL_LOCAL_ONLY|SD_JOURNAL_SYSTEM)) == 0, -EINVAL);
1645         assert_return(machine_name_is_valid(machine), -EINVAL);
1646
1647         p = strappenda("/run/systemd/machines/", machine);
1648         r = parse_env_file(p, NEWLINE, "ROOT", &root, "CLASS", &class, NULL);
1649         if (r == -ENOENT)
1650                 return -EHOSTDOWN;
1651         if (r < 0)
1652                 return r;
1653         if (!root)
1654                 return -ENODATA;
1655
1656         if (!streq_ptr(class, "container"))
1657                 return -EIO;
1658
1659         j = journal_new(flags, NULL);
1660         if (!j)
1661                 return -ENOMEM;
1662
1663         j->prefix = root;
1664         root = NULL;
1665
1666         r = add_search_paths(j);
1667         if (r < 0)
1668                 goto fail;
1669
1670         *ret = j;
1671         return 0;
1672
1673 fail:
1674         sd_journal_close(j);
1675         return r;
1676 }
1677
1678 _public_ int sd_journal_open_directory(sd_journal **ret, const char *path, int flags) {
1679         sd_journal *j;
1680         int r;
1681
1682         assert_return(ret, -EINVAL);
1683         assert_return(path, -EINVAL);
1684         assert_return(flags == 0, -EINVAL);
1685
1686         j = journal_new(flags, path);
1687         if (!j)
1688                 return -ENOMEM;
1689
1690         r = add_root_directory(j, path);
1691         if (r < 0) {
1692                 set_put_error(j, r);
1693                 goto fail;
1694         }
1695
1696         *ret = j;
1697         return 0;
1698
1699 fail:
1700         sd_journal_close(j);
1701
1702         return r;
1703 }
1704
1705 _public_ int sd_journal_open_files(sd_journal **ret, const char **paths, int flags) {
1706         sd_journal *j;
1707         const char **path;
1708         int r;
1709
1710         assert_return(ret, -EINVAL);
1711         assert_return(flags == 0, -EINVAL);
1712
1713         j = journal_new(flags, NULL);
1714         if (!j)
1715                 return -ENOMEM;
1716
1717         STRV_FOREACH(path, paths) {
1718                 r = add_any_file(j, *path);
1719                 if (r < 0) {
1720                         log_error_errno(r, "Failed to open %s: %m", *path);
1721                         goto fail;
1722                 }
1723         }
1724
1725         j->no_new_files = true;
1726
1727         *ret = j;
1728         return 0;
1729
1730 fail:
1731         sd_journal_close(j);
1732
1733         return r;
1734 }
1735
1736 _public_ void sd_journal_close(sd_journal *j) {
1737         Directory *d;
1738         JournalFile *f;
1739
1740         if (!j)
1741                 return;
1742
1743         sd_journal_flush_matches(j);
1744
1745         while ((f = ordered_hashmap_steal_first(j->files)))
1746                 journal_file_close(f);
1747
1748         ordered_hashmap_free(j->files);
1749
1750         while ((d = hashmap_first(j->directories_by_path)))
1751                 remove_directory(j, d);
1752
1753         while ((d = hashmap_first(j->directories_by_wd)))
1754                 remove_directory(j, d);
1755
1756         hashmap_free(j->directories_by_path);
1757         hashmap_free(j->directories_by_wd);
1758
1759         safe_close(j->inotify_fd);
1760
1761         if (j->mmap) {
1762                 log_debug("mmap cache statistics: %u hit, %u miss", mmap_cache_get_hit(j->mmap), mmap_cache_get_missed(j->mmap));
1763                 mmap_cache_unref(j->mmap);
1764         }
1765
1766         free(j->path);
1767         free(j->prefix);
1768         free(j->unique_field);
1769         set_free(j->errors);
1770         free(j);
1771 }
1772
1773 _public_ int sd_journal_get_realtime_usec(sd_journal *j, uint64_t *ret) {
1774         Object *o;
1775         JournalFile *f;
1776         int r;
1777
1778         assert_return(j, -EINVAL);
1779         assert_return(!journal_pid_changed(j), -ECHILD);
1780         assert_return(ret, -EINVAL);
1781
1782         f = j->current_file;
1783         if (!f)
1784                 return -EADDRNOTAVAIL;
1785
1786         if (f->current_offset <= 0)
1787                 return -EADDRNOTAVAIL;
1788
1789         r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1790         if (r < 0)
1791                 return r;
1792
1793         *ret = le64toh(o->entry.realtime);
1794         return 0;
1795 }
1796
1797 _public_ int sd_journal_get_monotonic_usec(sd_journal *j, uint64_t *ret, sd_id128_t *ret_boot_id) {
1798         Object *o;
1799         JournalFile *f;
1800         int r;
1801         sd_id128_t id;
1802
1803         assert_return(j, -EINVAL);
1804         assert_return(!journal_pid_changed(j), -ECHILD);
1805
1806         f = j->current_file;
1807         if (!f)
1808                 return -EADDRNOTAVAIL;
1809
1810         if (f->current_offset <= 0)
1811                 return -EADDRNOTAVAIL;
1812
1813         r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1814         if (r < 0)
1815                 return r;
1816
1817         if (ret_boot_id)
1818                 *ret_boot_id = o->entry.boot_id;
1819         else {
1820                 r = sd_id128_get_boot(&id);
1821                 if (r < 0)
1822                         return r;
1823
1824                 if (!sd_id128_equal(id, o->entry.boot_id))
1825                         return -ESTALE;
1826         }
1827
1828         if (ret)
1829                 *ret = le64toh(o->entry.monotonic);
1830
1831         return 0;
1832 }
1833
1834 static bool field_is_valid(const char *field) {
1835         const char *p;
1836
1837         assert(field);
1838
1839         if (isempty(field))
1840                 return false;
1841
1842         if (startswith(field, "__"))
1843                 return false;
1844
1845         for (p = field; *p; p++) {
1846
1847                 if (*p == '_')
1848                         continue;
1849
1850                 if (*p >= 'A' && *p <= 'Z')
1851                         continue;
1852
1853                 if (*p >= '0' && *p <= '9')
1854                         continue;
1855
1856                 return false;
1857         }
1858
1859         return true;
1860 }
1861
1862 _public_ int sd_journal_get_data(sd_journal *j, const char *field, const void **data, size_t *size) {
1863         JournalFile *f;
1864         uint64_t i, n;
1865         size_t field_length;
1866         int r;
1867         Object *o;
1868
1869         assert_return(j, -EINVAL);
1870         assert_return(!journal_pid_changed(j), -ECHILD);
1871         assert_return(field, -EINVAL);
1872         assert_return(data, -EINVAL);
1873         assert_return(size, -EINVAL);
1874         assert_return(field_is_valid(field), -EINVAL);
1875
1876         f = j->current_file;
1877         if (!f)
1878                 return -EADDRNOTAVAIL;
1879
1880         if (f->current_offset <= 0)
1881                 return -EADDRNOTAVAIL;
1882
1883         r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1884         if (r < 0)
1885                 return r;
1886
1887         field_length = strlen(field);
1888
1889         n = journal_file_entry_n_items(o);
1890         for (i = 0; i < n; i++) {
1891                 uint64_t p, l;
1892                 le64_t le_hash;
1893                 size_t t;
1894                 int compression;
1895
1896                 p = le64toh(o->entry.items[i].object_offset);
1897                 le_hash = o->entry.items[i].hash;
1898                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
1899                 if (r < 0)
1900                         return r;
1901
1902                 if (le_hash != o->data.hash)
1903                         return -EBADMSG;
1904
1905                 l = le64toh(o->object.size) - offsetof(Object, data.payload);
1906
1907                 compression = o->object.flags & OBJECT_COMPRESSION_MASK;
1908                 if (compression) {
1909 #if defined(HAVE_XZ) || defined(HAVE_LZ4)
1910                         if (decompress_startswith(compression,
1911                                                   o->data.payload, l,
1912                                                   &f->compress_buffer, &f->compress_buffer_size,
1913                                                   field, field_length, '=')) {
1914
1915                                 size_t rsize;
1916
1917                                 r = decompress_blob(compression,
1918                                                     o->data.payload, l,
1919                                                     &f->compress_buffer, &f->compress_buffer_size, &rsize,
1920                                                     j->data_threshold);
1921                                 if (r < 0)
1922                                         return r;
1923
1924                                 *data = f->compress_buffer;
1925                                 *size = (size_t) rsize;
1926
1927                                 return 0;
1928                         }
1929 #else
1930                         return -EPROTONOSUPPORT;
1931 #endif
1932                 } else if (l >= field_length+1 &&
1933                            memcmp(o->data.payload, field, field_length) == 0 &&
1934                            o->data.payload[field_length] == '=') {
1935
1936                         t = (size_t) l;
1937
1938                         if ((uint64_t) t != l)
1939                                 return -E2BIG;
1940
1941                         *data = o->data.payload;
1942                         *size = t;
1943
1944                         return 0;
1945                 }
1946
1947                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1948                 if (r < 0)
1949                         return r;
1950         }
1951
1952         return -ENOENT;
1953 }
1954
1955 static int return_data(sd_journal *j, JournalFile *f, Object *o, const void **data, size_t *size) {
1956         size_t t;
1957         uint64_t l;
1958         int compression;
1959
1960         l = le64toh(o->object.size) - offsetof(Object, data.payload);
1961         t = (size_t) l;
1962
1963         /* We can't read objects larger than 4G on a 32bit machine */
1964         if ((uint64_t) t != l)
1965                 return -E2BIG;
1966
1967         compression = o->object.flags & OBJECT_COMPRESSION_MASK;
1968         if (compression) {
1969 #if defined(HAVE_XZ) || defined(HAVE_LZ4)
1970                 size_t rsize;
1971                 int r;
1972
1973                 r = decompress_blob(compression,
1974                                     o->data.payload, l, &f->compress_buffer,
1975                                     &f->compress_buffer_size, &rsize, j->data_threshold);
1976                 if (r < 0)
1977                         return r;
1978
1979                 *data = f->compress_buffer;
1980                 *size = (size_t) rsize;
1981 #else
1982                 return -EPROTONOSUPPORT;
1983 #endif
1984         } else {
1985                 *data = o->data.payload;
1986                 *size = t;
1987         }
1988
1989         return 0;
1990 }
1991
1992 _public_ int sd_journal_enumerate_data(sd_journal *j, const void **data, size_t *size) {
1993         JournalFile *f;
1994         uint64_t p, n;
1995         le64_t le_hash;
1996         int r;
1997         Object *o;
1998
1999         assert_return(j, -EINVAL);
2000         assert_return(!journal_pid_changed(j), -ECHILD);
2001         assert_return(data, -EINVAL);
2002         assert_return(size, -EINVAL);
2003
2004         f = j->current_file;
2005         if (!f)
2006                 return -EADDRNOTAVAIL;
2007
2008         if (f->current_offset <= 0)
2009                 return -EADDRNOTAVAIL;
2010
2011         r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
2012         if (r < 0)
2013                 return r;
2014
2015         n = journal_file_entry_n_items(o);
2016         if (j->current_field >= n)
2017                 return 0;
2018
2019         p = le64toh(o->entry.items[j->current_field].object_offset);
2020         le_hash = o->entry.items[j->current_field].hash;
2021         r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
2022         if (r < 0)
2023                 return r;
2024
2025         if (le_hash != o->data.hash)
2026                 return -EBADMSG;
2027
2028         r = return_data(j, f, o, data, size);
2029         if (r < 0)
2030                 return r;
2031
2032         j->current_field ++;
2033
2034         return 1;
2035 }
2036
2037 _public_ void sd_journal_restart_data(sd_journal *j) {
2038         if (!j)
2039                 return;
2040
2041         j->current_field = 0;
2042 }
2043
2044 _public_ int sd_journal_get_fd(sd_journal *j) {
2045         int r;
2046
2047         assert_return(j, -EINVAL);
2048         assert_return(!journal_pid_changed(j), -ECHILD);
2049
2050         if (j->inotify_fd >= 0)
2051                 return j->inotify_fd;
2052
2053         r = allocate_inotify(j);
2054         if (r < 0)
2055                 return r;
2056
2057         /* Iterate through all dirs again, to add them to the
2058          * inotify */
2059         if (j->no_new_files)
2060                 r = add_current_paths(j);
2061         else if (j->path)
2062                 r = add_root_directory(j, j->path);
2063         else
2064                 r = add_search_paths(j);
2065         if (r < 0)
2066                 return r;
2067
2068         return j->inotify_fd;
2069 }
2070
2071 _public_ int sd_journal_get_events(sd_journal *j) {
2072         int fd;
2073
2074         assert_return(j, -EINVAL);
2075         assert_return(!journal_pid_changed(j), -ECHILD);
2076
2077         fd = sd_journal_get_fd(j);
2078         if (fd < 0)
2079                 return fd;
2080
2081         return POLLIN;
2082 }
2083
2084 _public_ int sd_journal_get_timeout(sd_journal *j, uint64_t *timeout_usec) {
2085         int fd;
2086
2087         assert_return(j, -EINVAL);
2088         assert_return(!journal_pid_changed(j), -ECHILD);
2089         assert_return(timeout_usec, -EINVAL);
2090
2091         fd = sd_journal_get_fd(j);
2092         if (fd < 0)
2093                 return fd;
2094
2095         if (!j->on_network) {
2096                 *timeout_usec = (uint64_t) -1;
2097                 return 0;
2098         }
2099
2100         /* If we are on the network we need to regularly check for
2101          * changes manually */
2102
2103         *timeout_usec = j->last_process_usec + JOURNAL_FILES_RECHECK_USEC;
2104         return 1;
2105 }
2106
2107 static void process_inotify_event(sd_journal *j, struct inotify_event *e) {
2108         Directory *d;
2109         int r;
2110
2111         assert(j);
2112         assert(e);
2113
2114         /* Is this a subdirectory we watch? */
2115         d = hashmap_get(j->directories_by_wd, INT_TO_PTR(e->wd));
2116         if (d) {
2117                 sd_id128_t id;
2118
2119                 if (!(e->mask & IN_ISDIR) && e->len > 0 &&
2120                     (endswith(e->name, ".journal") ||
2121                      endswith(e->name, ".journal~"))) {
2122
2123                         /* Event for a journal file */
2124
2125                         if (e->mask & (IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB)) {
2126                                 r = add_file(j, d->path, e->name);
2127                                 if (r < 0) {
2128                                         log_debug_errno(r, "Failed to add file %s/%s: %m",
2129                                                         d->path, e->name);
2130                                         set_put_error(j, r);
2131                                 }
2132
2133                         } else if (e->mask & (IN_DELETE|IN_MOVED_FROM|IN_UNMOUNT)) {
2134
2135                                 r = remove_file(j, d->path, e->name);
2136                                 if (r < 0)
2137                                         log_debug_errno(r, "Failed to remove file %s/%s: %m", d->path, e->name);
2138                         }
2139
2140                 } else if (!d->is_root && e->len == 0) {
2141
2142                         /* Event for a subdirectory */
2143
2144                         if (e->mask & (IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT)) {
2145                                 r = remove_directory(j, d);
2146                                 if (r < 0)
2147                                         log_debug_errno(r, "Failed to remove directory %s: %m", d->path);
2148                         }
2149
2150
2151                 } else if (d->is_root && (e->mask & IN_ISDIR) && e->len > 0 && sd_id128_from_string(e->name, &id) >= 0) {
2152
2153                         /* Event for root directory */
2154
2155                         if (e->mask & (IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB)) {
2156                                 r = add_directory(j, d->path, e->name);
2157                                 if (r < 0)
2158                                         log_debug_errno(r, "Failed to add directory %s/%s: %m", d->path, e->name);
2159                         }
2160                 }
2161
2162                 return;
2163         }
2164
2165         if (e->mask & IN_IGNORED)
2166                 return;
2167
2168         log_warning("Unknown inotify event.");
2169 }
2170
2171 static int determine_change(sd_journal *j) {
2172         bool b;
2173
2174         assert(j);
2175
2176         b = j->current_invalidate_counter != j->last_invalidate_counter;
2177         j->last_invalidate_counter = j->current_invalidate_counter;
2178
2179         return b ? SD_JOURNAL_INVALIDATE : SD_JOURNAL_APPEND;
2180 }
2181
2182 _public_ int sd_journal_process(sd_journal *j) {
2183         bool got_something = false;
2184
2185         assert_return(j, -EINVAL);
2186         assert_return(!journal_pid_changed(j), -ECHILD);
2187
2188         j->last_process_usec = now(CLOCK_MONOTONIC);
2189
2190         for (;;) {
2191                 union inotify_event_buffer buffer;
2192                 struct inotify_event *e;
2193                 ssize_t l;
2194
2195                 l = read(j->inotify_fd, &buffer, sizeof(buffer));
2196                 if (l < 0) {
2197                         if (errno == EAGAIN || errno == EINTR)
2198                                 return got_something ? determine_change(j) : SD_JOURNAL_NOP;
2199
2200                         return -errno;
2201                 }
2202
2203                 got_something = true;
2204
2205                 FOREACH_INOTIFY_EVENT(e, buffer, l)
2206                         process_inotify_event(j, e);
2207         }
2208 }
2209
2210 _public_ int sd_journal_wait(sd_journal *j, uint64_t timeout_usec) {
2211         int r;
2212         uint64_t t;
2213
2214         assert_return(j, -EINVAL);
2215         assert_return(!journal_pid_changed(j), -ECHILD);
2216
2217         if (j->inotify_fd < 0) {
2218
2219                 /* This is the first invocation, hence create the
2220                  * inotify watch */
2221                 r = sd_journal_get_fd(j);
2222                 if (r < 0)
2223                         return r;
2224
2225                 /* The journal might have changed since the context
2226                  * object was created and we weren't watching before,
2227                  * hence don't wait for anything, and return
2228                  * immediately. */
2229                 return determine_change(j);
2230         }
2231
2232         r = sd_journal_get_timeout(j, &t);
2233         if (r < 0)
2234                 return r;
2235
2236         if (t != (uint64_t) -1) {
2237                 usec_t n;
2238
2239                 n = now(CLOCK_MONOTONIC);
2240                 t = t > n ? t - n : 0;
2241
2242                 if (timeout_usec == (uint64_t) -1 || timeout_usec > t)
2243                         timeout_usec = t;
2244         }
2245
2246         do {
2247                 r = fd_wait_for_event(j->inotify_fd, POLLIN, timeout_usec);
2248         } while (r == -EINTR);
2249
2250         if (r < 0)
2251                 return r;
2252
2253         return sd_journal_process(j);
2254 }
2255
2256 _public_ int sd_journal_get_cutoff_realtime_usec(sd_journal *j, uint64_t *from, uint64_t *to) {
2257         Iterator i;
2258         JournalFile *f;
2259         bool first = true;
2260         uint64_t fmin = 0, tmax = 0;
2261         int r;
2262
2263         assert_return(j, -EINVAL);
2264         assert_return(!journal_pid_changed(j), -ECHILD);
2265         assert_return(from || to, -EINVAL);
2266         assert_return(from != to, -EINVAL);
2267
2268         ORDERED_HASHMAP_FOREACH(f, j->files, i) {
2269                 usec_t fr, t;
2270
2271                 r = journal_file_get_cutoff_realtime_usec(f, &fr, &t);
2272                 if (r == -ENOENT)
2273                         continue;
2274                 if (r < 0)
2275                         return r;
2276                 if (r == 0)
2277                         continue;
2278
2279                 if (first) {
2280                         fmin = fr;
2281                         tmax = t;
2282                         first = false;
2283                 } else {
2284                         fmin = MIN(fr, fmin);
2285                         tmax = MAX(t, tmax);
2286                 }
2287         }
2288
2289         if (from)
2290                 *from = fmin;
2291         if (to)
2292                 *to = tmax;
2293
2294         return first ? 0 : 1;
2295 }
2296
2297 _public_ int sd_journal_get_cutoff_monotonic_usec(sd_journal *j, sd_id128_t boot_id, uint64_t *from, uint64_t *to) {
2298         Iterator i;
2299         JournalFile *f;
2300         bool found = false;
2301         int r;
2302
2303         assert_return(j, -EINVAL);
2304         assert_return(!journal_pid_changed(j), -ECHILD);
2305         assert_return(from || to, -EINVAL);
2306         assert_return(from != to, -EINVAL);
2307
2308         ORDERED_HASHMAP_FOREACH(f, j->files, i) {
2309                 usec_t fr, t;
2310
2311                 r = journal_file_get_cutoff_monotonic_usec(f, boot_id, &fr, &t);
2312                 if (r == -ENOENT)
2313                         continue;
2314                 if (r < 0)
2315                         return r;
2316                 if (r == 0)
2317                         continue;
2318
2319                 if (found) {
2320                         if (from)
2321                                 *from = MIN(fr, *from);
2322                         if (to)
2323                                 *to = MAX(t, *to);
2324                 } else {
2325                         if (from)
2326                                 *from = fr;
2327                         if (to)
2328                                 *to = t;
2329                         found = true;
2330                 }
2331         }
2332
2333         return found;
2334 }
2335
2336 void journal_print_header(sd_journal *j) {
2337         Iterator i;
2338         JournalFile *f;
2339         bool newline = false;
2340
2341         assert(j);
2342
2343         ORDERED_HASHMAP_FOREACH(f, j->files, i) {
2344                 if (newline)
2345                         putchar('\n');
2346                 else
2347                         newline = true;
2348
2349                 journal_file_print_header(f);
2350         }
2351 }
2352
2353 _public_ int sd_journal_get_usage(sd_journal *j, uint64_t *bytes) {
2354         Iterator i;
2355         JournalFile *f;
2356         uint64_t sum = 0;
2357
2358         assert_return(j, -EINVAL);
2359         assert_return(!journal_pid_changed(j), -ECHILD);
2360         assert_return(bytes, -EINVAL);
2361
2362         ORDERED_HASHMAP_FOREACH(f, j->files, i) {
2363                 struct stat st;
2364
2365                 if (fstat(f->fd, &st) < 0)
2366                         return -errno;
2367
2368                 sum += (uint64_t) st.st_blocks * 512ULL;
2369         }
2370
2371         *bytes = sum;
2372         return 0;
2373 }
2374
2375 _public_ int sd_journal_query_unique(sd_journal *j, const char *field) {
2376         char *f;
2377
2378         assert_return(j, -EINVAL);
2379         assert_return(!journal_pid_changed(j), -ECHILD);
2380         assert_return(!isempty(field), -EINVAL);
2381         assert_return(field_is_valid(field), -EINVAL);
2382
2383         f = strdup(field);
2384         if (!f)
2385                 return -ENOMEM;
2386
2387         free(j->unique_field);
2388         j->unique_field = f;
2389         j->unique_file = NULL;
2390         j->unique_offset = 0;
2391         j->unique_file_lost = false;
2392
2393         return 0;
2394 }
2395
2396 _public_ int sd_journal_enumerate_unique(sd_journal *j, const void **data, size_t *l) {
2397         size_t k;
2398
2399         assert_return(j, -EINVAL);
2400         assert_return(!journal_pid_changed(j), -ECHILD);
2401         assert_return(data, -EINVAL);
2402         assert_return(l, -EINVAL);
2403         assert_return(j->unique_field, -EINVAL);
2404
2405         k = strlen(j->unique_field);
2406
2407         if (!j->unique_file) {
2408                 if (j->unique_file_lost)
2409                         return 0;
2410
2411                 j->unique_file = ordered_hashmap_first(j->files);
2412                 if (!j->unique_file)
2413                         return 0;
2414
2415                 j->unique_offset = 0;
2416         }
2417
2418         for (;;) {
2419                 JournalFile *of;
2420                 Iterator i;
2421                 Object *o;
2422                 const void *odata;
2423                 size_t ol;
2424                 bool found;
2425                 int r;
2426
2427                 /* Proceed to next data object in the field's linked list */
2428                 if (j->unique_offset == 0) {
2429                         r = journal_file_find_field_object(j->unique_file, j->unique_field, k, &o, NULL);
2430                         if (r < 0)
2431                                 return r;
2432
2433                         j->unique_offset = r > 0 ? le64toh(o->field.head_data_offset) : 0;
2434                 } else {
2435                         r = journal_file_move_to_object(j->unique_file, OBJECT_DATA, j->unique_offset, &o);
2436                         if (r < 0)
2437                                 return r;
2438
2439                         j->unique_offset = le64toh(o->data.next_field_offset);
2440                 }
2441
2442                 /* We reached the end of the list? Then start again, with the next file */
2443                 if (j->unique_offset == 0) {
2444                         j->unique_file = ordered_hashmap_next(j->files, j->unique_file->path);
2445                         if (!j->unique_file)
2446                                 return 0;
2447
2448                         continue;
2449                 }
2450
2451                 /* We do not use OBJECT_DATA context here, but OBJECT_UNUSED
2452                  * instead, so that we can look at this data object at the same
2453                  * time as one on another file */
2454                 r = journal_file_move_to_object(j->unique_file, OBJECT_UNUSED, j->unique_offset, &o);
2455                 if (r < 0)
2456                         return r;
2457
2458                 /* Let's do the type check by hand, since we used 0 context above. */
2459                 if (o->object.type != OBJECT_DATA) {
2460                         log_debug("%s:offset " OFSfmt ": object has type %d, expected %d",
2461                                   j->unique_file->path, j->unique_offset,
2462                                   o->object.type, OBJECT_DATA);
2463                         return -EBADMSG;
2464                 }
2465
2466                 r = return_data(j, j->unique_file, o, &odata, &ol);
2467                 if (r < 0)
2468                         return r;
2469
2470                 /* Check if we have at least the field name and "=". */
2471                 if (ol <= k) {
2472                         log_debug("%s:offset " OFSfmt ": object has size %zu, expected at least %zu",
2473                                   j->unique_file->path, j->unique_offset,
2474                                   ol, k + 1);
2475                         return -EBADMSG;
2476                 }
2477
2478                 if (memcmp(odata, j->unique_field, k) || ((const char*) odata)[k] != '=') {
2479                         log_debug("%s:offset " OFSfmt ": object does not start with \"%s=\"",
2480                                   j->unique_file->path, j->unique_offset,
2481                                   j->unique_field);
2482                         return -EBADMSG;
2483                 }
2484
2485                 /* OK, now let's see if we already returned this data
2486                  * object by checking if it exists in the earlier
2487                  * traversed files. */
2488                 found = false;
2489                 ORDERED_HASHMAP_FOREACH(of, j->files, i) {
2490                         Object *oo;
2491                         uint64_t op;
2492
2493                         if (of == j->unique_file)
2494                                 break;
2495
2496                         /* Skip this file it didn't have any fields
2497                          * indexed */
2498                         if (JOURNAL_HEADER_CONTAINS(of->header, n_fields) &&
2499                             le64toh(of->header->n_fields) <= 0)
2500                                 continue;
2501
2502                         r = journal_file_find_data_object_with_hash(of, odata, ol, le64toh(o->data.hash), &oo, &op);
2503                         if (r < 0)
2504                                 return r;
2505
2506                         if (r > 0)
2507                                 found = true;
2508                 }
2509
2510                 if (found)
2511                         continue;
2512
2513                 r = return_data(j, j->unique_file, o, data, l);
2514                 if (r < 0)
2515                         return r;
2516
2517                 return 1;
2518         }
2519 }
2520
2521 _public_ void sd_journal_restart_unique(sd_journal *j) {
2522         if (!j)
2523                 return;
2524
2525         j->unique_file = NULL;
2526         j->unique_offset = 0;
2527         j->unique_file_lost = false;
2528 }
2529
2530 _public_ int sd_journal_reliable_fd(sd_journal *j) {
2531         assert_return(j, -EINVAL);
2532         assert_return(!journal_pid_changed(j), -ECHILD);
2533
2534         return !j->on_network;
2535 }
2536
2537 static char *lookup_field(const char *field, void *userdata) {
2538         sd_journal *j = userdata;
2539         const void *data;
2540         size_t size, d;
2541         int r;
2542
2543         assert(field);
2544         assert(j);
2545
2546         r = sd_journal_get_data(j, field, &data, &size);
2547         if (r < 0 ||
2548             size > REPLACE_VAR_MAX)
2549                 return strdup(field);
2550
2551         d = strlen(field) + 1;
2552
2553         return strndup((const char*) data + d, size - d);
2554 }
2555
2556 _public_ int sd_journal_get_catalog(sd_journal *j, char **ret) {
2557         const void *data;
2558         size_t size;
2559         sd_id128_t id;
2560         _cleanup_free_ char *text = NULL, *cid = NULL;
2561         char *t;
2562         int r;
2563
2564         assert_return(j, -EINVAL);
2565         assert_return(!journal_pid_changed(j), -ECHILD);
2566         assert_return(ret, -EINVAL);
2567
2568         r = sd_journal_get_data(j, "MESSAGE_ID", &data, &size);
2569         if (r < 0)
2570                 return r;
2571
2572         cid = strndup((const char*) data + 11, size - 11);
2573         if (!cid)
2574                 return -ENOMEM;
2575
2576         r = sd_id128_from_string(cid, &id);
2577         if (r < 0)
2578                 return r;
2579
2580         r = catalog_get(CATALOG_DATABASE, id, &text);
2581         if (r < 0)
2582                 return r;
2583
2584         t = replace_var(text, lookup_field, j);
2585         if (!t)
2586                 return -ENOMEM;
2587
2588         *ret = t;
2589         return 0;
2590 }
2591
2592 _public_ int sd_journal_get_catalog_for_message_id(sd_id128_t id, char **ret) {
2593         assert_return(ret, -EINVAL);
2594
2595         return catalog_get(CATALOG_DATABASE, id, ret);
2596 }
2597
2598 _public_ int sd_journal_set_data_threshold(sd_journal *j, size_t sz) {
2599         assert_return(j, -EINVAL);
2600         assert_return(!journal_pid_changed(j), -ECHILD);
2601
2602         j->data_threshold = sz;
2603         return 0;
2604 }
2605
2606 _public_ int sd_journal_get_data_threshold(sd_journal *j, size_t *sz) {
2607         assert_return(j, -EINVAL);
2608         assert_return(!journal_pid_changed(j), -ECHILD);
2609         assert_return(sz, -EINVAL);
2610
2611         *sz = j->data_threshold;
2612         return 0;
2613 }