chiark / gitweb /
journal: don't mind too much if we can't find a monotonic timestamp
[elogind.git] / src / journal / sd-journal.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU General Public License as published by
10   the Free Software Foundation; either version 2 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   General Public License for more details.
17
18   You should have received a copy of the GNU General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <errno.h>
23 #include <fcntl.h>
24 #include <stddef.h>
25 #include <unistd.h>
26 #include <sys/inotify.h>
27
28 #include "sd-journal.h"
29 #include "journal-def.h"
30 #include "journal-file.h"
31 #include "hashmap.h"
32 #include "list.h"
33 #include "lookup3.h"
34 #include "compress.h"
35 #include "journal-internal.h"
36
37 #define JOURNAL_FILES_MAX 1024
38
39 static void detach_location(sd_journal *j) {
40         Iterator i;
41         JournalFile *f;
42
43         assert(j);
44
45         j->current_file = NULL;
46         j->current_field = 0;
47
48         HASHMAP_FOREACH(f, j->files, i)
49                 f->current_offset = 0;
50 }
51
52 static void reset_location(sd_journal *j) {
53         assert(j);
54
55         detach_location(j);
56         zero(j->current_location);
57 }
58
59 static void init_location(Location *l, JournalFile *f, Object *o) {
60         assert(l);
61         assert(f);
62         assert(o->object.type == OBJECT_ENTRY);
63
64         l->type = LOCATION_DISCRETE;
65         l->seqnum = le64toh(o->entry.seqnum);
66         l->seqnum_id = f->header->seqnum_id;
67         l->realtime = le64toh(o->entry.realtime);
68         l->monotonic = le64toh(o->entry.monotonic);
69         l->boot_id = le64toh(o->entry.boot_id);
70         l->xor_hash = le64toh(o->entry.xor_hash);
71
72         l->seqnum_set = l->realtime_set = l->monotonic_set = l->xor_hash_set = true;
73 }
74
75 static void set_location(sd_journal *j, JournalFile *f, Object *o, uint64_t offset) {
76         assert(j);
77         assert(f);
78         assert(o);
79
80         init_location(&j->current_location, f, o);
81
82         j->current_file = f;
83         j->current_field = 0;
84
85         f->current_offset = offset;
86 }
87
88 static int same_field(const void *_a, size_t s, const void *_b, size_t t) {
89         const uint8_t *a = _a, *b = _b;
90         size_t j;
91         bool a_good = false, b_good = false, different = false;
92
93         for (j = 0; j < s && j < t; j++) {
94
95                 if (a[j] == '=')
96                         a_good = true;
97                 if (b[j] == '=')
98                         b_good = true;
99                 if (a[j] != b[j])
100                         different = true;
101
102                 if (a_good && b_good)
103                         return different ? 0 : 1;
104         }
105
106         return -EINVAL;
107 }
108
109 _public_ int sd_journal_add_match(sd_journal *j, const void *data, size_t size) {
110         Match *m, *after = NULL;
111         uint64_t le_hash;
112
113         if (!j)
114                 return -EINVAL;
115         if (!data)
116                 return -EINVAL;
117         if (size <= 0)
118                 return -EINVAL;
119
120         le_hash = htole64(hash64(data, size));
121
122         LIST_FOREACH(matches, m, j->matches) {
123                 int r;
124
125                 if (m->le_hash == le_hash &&
126                     m->size == size &&
127                     memcmp(m->data, data, size) == 0)
128                         return 0;
129
130                 r = same_field(data, size, m->data, m->size);
131                 if (r < 0)
132                         return r;
133                 else if (r > 0)
134                         after = m;
135         }
136
137         m = new0(Match, 1);
138         if (!m)
139                 return -ENOMEM;
140
141         m->size = size;
142
143         m->data = malloc(m->size);
144         if (!m->data) {
145                 free(m);
146                 return -ENOMEM;
147         }
148
149         memcpy(m->data, data, size);
150         m->le_hash = le_hash;
151
152         /* Matches for the same fields we order adjacent to each
153          * other */
154         LIST_INSERT_AFTER(Match, matches, j->matches, after, m);
155         j->n_matches ++;
156
157         detach_location(j);
158
159         return 0;
160 }
161
162 _public_ void sd_journal_flush_matches(sd_journal *j) {
163         if (!j)
164                 return;
165
166         while (j->matches) {
167                 Match *m = j->matches;
168
169                 LIST_REMOVE(Match, matches, j->matches, m);
170                 free(m->data);
171                 free(m);
172         }
173
174         j->n_matches = 0;
175
176         detach_location(j);
177 }
178
179 static int compare_order(JournalFile *af, Object *ao,
180                          JournalFile *bf, Object *bo) {
181
182         uint64_t a, b;
183
184         assert(af);
185         assert(ao);
186         assert(bf);
187         assert(bo);
188
189         /* We operate on two different files here, hence we can access
190          * two objects at the same time, which we normally can't.
191          *
192          * If contents and timestamps match, these entries are
193          * identical, even if the seqnum does not match */
194
195         if (sd_id128_equal(ao->entry.boot_id, bo->entry.boot_id) &&
196             ao->entry.monotonic == bo->entry.monotonic &&
197             ao->entry.realtime == bo->entry.realtime &&
198             ao->entry.xor_hash == bo->entry.xor_hash)
199                 return 0;
200
201         if (sd_id128_equal(af->header->seqnum_id, bf->header->seqnum_id)) {
202
203                 /* If this is from the same seqnum source, compare
204                  * seqnums */
205                 a = le64toh(ao->entry.seqnum);
206                 b = le64toh(bo->entry.seqnum);
207
208                 if (a < b)
209                         return -1;
210                 if (a > b)
211                         return 1;
212
213                 /* Wow! This is weird, different data but the same
214                  * seqnums? Something is borked, but let's make the
215                  * best of it and compare by time. */
216         }
217
218         if (sd_id128_equal(ao->entry.boot_id, bo->entry.boot_id)) {
219
220                 /* If the boot id matches compare monotonic time */
221                 a = le64toh(ao->entry.monotonic);
222                 b = le64toh(bo->entry.monotonic);
223
224                 if (a < b)
225                         return -1;
226                 if (a > b)
227                         return 1;
228         }
229
230         /* Otherwise compare UTC time */
231         a = le64toh(ao->entry.realtime);
232         b = le64toh(ao->entry.realtime);
233
234         if (a < b)
235                 return -1;
236         if (a > b)
237                 return 1;
238
239         /* Finally, compare by contents */
240         a = le64toh(ao->entry.xor_hash);
241         b = le64toh(ao->entry.xor_hash);
242
243         if (a < b)
244                 return -1;
245         if (a > b)
246                 return 1;
247
248         return 0;
249 }
250
251 static int compare_with_location(JournalFile *af, Object *ao, Location *l) {
252         uint64_t a;
253
254         assert(af);
255         assert(ao);
256         assert(l);
257         assert(l->type == LOCATION_DISCRETE);
258
259         if (l->monotonic_set &&
260             sd_id128_equal(ao->entry.boot_id, l->boot_id) &&
261             l->realtime_set &&
262             le64toh(ao->entry.realtime) == l->realtime &&
263             l->xor_hash_set &&
264             le64toh(ao->entry.xor_hash) == l->xor_hash)
265                 return 0;
266
267         if (l->seqnum_set &&
268             sd_id128_equal(af->header->seqnum_id, l->seqnum_id)) {
269
270                 a = le64toh(ao->entry.seqnum);
271
272                 if (a < l->seqnum)
273                         return -1;
274                 if (a > l->seqnum)
275                         return 1;
276         }
277
278         if (l->monotonic_set &&
279             sd_id128_equal(ao->entry.boot_id, l->boot_id)) {
280
281                 a = le64toh(ao->entry.monotonic);
282
283                 if (a < l->monotonic)
284                         return -1;
285                 if (a > l->monotonic)
286                         return 1;
287         }
288
289         if (l->realtime_set) {
290
291                 a = le64toh(ao->entry.realtime);
292
293                 if (a < l->realtime)
294                         return -1;
295                 if (a > l->realtime)
296                         return 1;
297         }
298
299         if (l->xor_hash_set) {
300                 a = le64toh(ao->entry.xor_hash);
301
302                 if (a < l->xor_hash)
303                         return -1;
304                 if (a > l->xor_hash)
305                         return 1;
306         }
307
308         return 0;
309 }
310
311 static int find_location(sd_journal *j, JournalFile *f, direction_t direction, Object **ret, uint64_t *offset) {
312         Object *o = NULL;
313         uint64_t p = 0;
314         int r;
315
316         assert(j);
317
318         if (!j->matches) {
319                 /* No matches is simple */
320
321                 if (j->current_location.type == LOCATION_HEAD)
322                         r = journal_file_next_entry(f, NULL, 0, DIRECTION_DOWN, &o, &p);
323                 else if (j->current_location.type == LOCATION_TAIL)
324                         r = journal_file_next_entry(f, NULL, 0, DIRECTION_UP, &o, &p);
325                 else if (j->current_location.seqnum_set &&
326                          sd_id128_equal(j->current_location.seqnum_id, f->header->seqnum_id))
327                         r = journal_file_move_to_entry_by_seqnum(f, j->current_location.seqnum, direction, &o, &p);
328                 else if (j->current_location.monotonic_set)
329                         r = journal_file_move_to_entry_by_monotonic(f, j->current_location.boot_id, j->current_location.monotonic, direction, &o, &p);
330                 else if (j->current_location.realtime_set)
331                         r = journal_file_move_to_entry_by_realtime(f, j->current_location.realtime, direction, &o, &p);
332                 else
333                         r = journal_file_next_entry(f, NULL, 0, direction, &o, &p);
334
335                 if (r <= 0)
336                         return r;
337
338         } else  {
339                 Match *m, *term_match = NULL;
340                 Object *to = NULL;
341                 uint64_t tp = 0;
342
343                 /* We have matches, first, let's jump to the monotonic
344                  * position if we have any, since it implies a
345                  * match. */
346
347                 if (j->current_location.type == LOCATION_DISCRETE &&
348                     j->current_location.monotonic_set) {
349
350                         r = journal_file_move_to_entry_by_monotonic(f, j->current_location.boot_id, j->current_location.monotonic, direction, &o, &p);
351                         if (r <= 0)
352                                 return r == -ENOENT ? 0 : r;
353                 }
354
355                 LIST_FOREACH(matches, m, j->matches) {
356                         Object *c, *d;
357                         uint64_t cp, dp;
358
359                         r = journal_file_find_data_object_with_hash(f, m->data, m->size, m->le_hash, &d, &dp);
360                         if (r <= 0)
361                                 return r;
362
363                         if (j->current_location.type == LOCATION_HEAD)
364                                 r = journal_file_next_entry_for_data(f, NULL, 0, dp, DIRECTION_DOWN, &c, &cp);
365                         else if (j->current_location.type == LOCATION_TAIL)
366                                 r = journal_file_next_entry_for_data(f, NULL, 0, dp, DIRECTION_UP, &c, &cp);
367                         else if (j->current_location.seqnum_set &&
368                                  sd_id128_equal(j->current_location.seqnum_id, f->header->seqnum_id))
369                                 r = journal_file_move_to_entry_by_seqnum_for_data(f, dp, j->current_location.seqnum, direction, &c, &cp);
370                         else if (j->current_location.realtime_set)
371                                 r = journal_file_move_to_entry_by_realtime_for_data(f, dp, j->current_location.realtime, direction, &c, &cp);
372                         else
373                                 r = journal_file_next_entry_for_data(f, NULL, 0, dp, direction, &c, &cp);
374
375                         if (!term_match) {
376                                 term_match = m;
377
378                                 if (r > 0) {
379                                         to = c;
380                                         tp = cp;
381                                 }
382                         } else if (same_field(term_match->data, term_match->size, m->data, m->size)) {
383
384                                 /* Same field as previous match... */
385                                 if (r > 0) {
386
387                                         /* Find the earliest of the OR matches */
388
389                                         if (!to ||
390                                             (direction == DIRECTION_DOWN && cp < tp) ||
391                                             (direction == DIRECTION_UP && cp > tp)) {
392                                                 to = c;
393                                                 tp = cp;
394                                         }
395
396                                 }
397
398                         } else {
399
400                                 /* Previous term is finished, did anything match? */
401                                 if (!to)
402                                         return 0;
403
404                                 /* Find the last of the AND matches */
405                                 if (!o ||
406                                     (direction == DIRECTION_DOWN && tp > p) ||
407                                     (direction == DIRECTION_UP && tp < p)) {
408                                         o = to;
409                                         p = tp;
410                                 }
411
412                                 term_match = m;
413
414                                 if (r > 0) {
415                                         to = c;
416                                         tp = cp;
417                                 } else {
418                                         to = NULL;
419                                         tp = 0;
420                                 }
421                         }
422                 }
423
424                 /* Last term is finished, did anything match? */
425                 if (!to)
426                         return 0;
427
428                 if (!o ||
429                     (direction == DIRECTION_DOWN && tp > p) ||
430                     (direction == DIRECTION_UP && tp < p)) {
431                         o = to;
432                         p = tp;
433                 }
434
435                 if (!o)
436                         return 0;
437         }
438
439         if (ret)
440                 *ret = o;
441
442         if (offset)
443                 *offset = p;
444
445         return 1;
446 }
447
448 static int next_with_matches(sd_journal *j, JournalFile *f, direction_t direction, Object **ret, uint64_t *offset) {
449         int r;
450         uint64_t cp;
451         Object *c;
452
453         assert(j);
454         assert(f);
455         assert(ret);
456         assert(offset);
457
458         c = *ret;
459         cp = *offset;
460
461         if (!j->matches) {
462                 /* No matches is easy */
463
464                 r = journal_file_next_entry(f, c, cp, direction, &c, &cp);
465                 if (r <= 0)
466                         return r;
467
468                 if (ret)
469                         *ret = c;
470                 if (offset)
471                         *offset = cp;
472                 return 1;
473         }
474
475         /* So there are matches we have to adhere to, let's find the
476          * first entry that matches all of them */
477
478         for (;;) {
479                 uint64_t np, n;
480                 bool found, term_result = false;
481                 Match *m, *term_match = NULL;
482                 Object *npo = NULL;
483
484                 n = journal_file_entry_n_items(c);
485
486                 /* Make sure we don't match the entry we are starting
487                  * from. */
488                 found = cp > *offset;
489
490                 np = 0;
491                 LIST_FOREACH(matches, m, j->matches) {
492                         uint64_t q, k;
493                         Object *qo = NULL;
494
495                         /* Let's check if this is the beginning of a
496                          * new term, i.e. has a different field prefix
497                          * as the preceeding match. */
498                         if (!term_match) {
499                                 term_match = m;
500                                 term_result = false;
501                         } else if (!same_field(term_match->data, term_match->size, m->data, m->size)) {
502                                 if (!term_result)
503                                         found = false;
504
505                                 term_match = m;
506                                 term_result = false;
507                         }
508
509                         for (k = 0; k < n; k++)
510                                 if (c->entry.items[k].hash == m->le_hash)
511                                         break;
512
513                         if (k >= n) {
514                                 /* Hmm, didn't find any field that
515                                  * matched this rule, so ignore this
516                                  * match. Go on with next match */
517                                 continue;
518                         }
519
520                         term_result = true;
521
522                         /* Hmm, so, this field matched, let's remember
523                          * where we'd have to try next, in case the other
524                          * matches are not OK */
525
526                         r = journal_file_next_entry_for_data(f, c, cp, le64toh(c->entry.items[k].object_offset), direction, &qo, &q);
527                         if (r < 0)
528                                 return r;
529
530                         if (r > 0) {
531
532                                 if (direction == DIRECTION_DOWN) {
533                                         if (q > np) {
534                                                 np = q;
535                                                 npo = qo;
536                                         }
537                                 } else {
538                                         if (np == 0 || q < np) {
539                                                 np = q;
540                                                 npo = qo;
541                                         }
542                                 }
543                         }
544                 }
545
546                 /* Check the last term */
547                 if (term_match && !term_result)
548                         found = false;
549
550                 /* Did this entry match against all matches? */
551                 if (found) {
552                         if (ret)
553                                 *ret = c;
554                         if (offset)
555                                 *offset = cp;
556                         return 1;
557                 }
558
559                 /* Did we find a subsequent entry? */
560                 if (np == 0)
561                         return 0;
562
563                 /* Hmm, ok, this entry only matched partially, so
564                  * let's try another one */
565                 cp = np;
566                 c = npo;
567         }
568 }
569
570 static int next_beyond_location(sd_journal *j, JournalFile *f, direction_t direction, Object **ret, uint64_t *offset) {
571         Object *c;
572         uint64_t cp;
573         int compare_value, r;
574
575         assert(j);
576         assert(f);
577
578         if (f->current_offset > 0) {
579                 cp = f->current_offset;
580
581                 r = journal_file_move_to_object(f, OBJECT_ENTRY, cp, &c);
582                 if (r < 0)
583                         return r;
584
585                 r = next_with_matches(j, f, direction, &c, &cp);
586                 if (r <= 0)
587                         return r;
588
589                 compare_value = 1;
590         } else {
591                 r = find_location(j, f, direction, &c, &cp);
592                 if (r <= 0)
593                         return r;
594
595                 compare_value = 0;
596         }
597
598         for (;;) {
599                 bool found;
600
601                 if (j->current_location.type == LOCATION_DISCRETE) {
602                         int k;
603
604                         k = compare_with_location(f, c, &j->current_location);
605                         if (direction == DIRECTION_DOWN)
606                                 found = k >= compare_value;
607                         else
608                                 found = k <= -compare_value;
609                 } else
610                         found = true;
611
612                 if (found) {
613                         if (ret)
614                                 *ret = c;
615                         if (offset)
616                                 *offset = cp;
617                         return 1;
618                 }
619
620                 r = next_with_matches(j, f, direction, &c, &cp);
621                 if (r <= 0)
622                         return r;
623         }
624 }
625
626 static int real_journal_next(sd_journal *j, direction_t direction) {
627         JournalFile *f, *new_current = NULL;
628         Iterator i;
629         int r;
630         uint64_t new_offset = 0;
631         Object *new_entry = NULL;
632
633         if (!j)
634                 return -EINVAL;
635
636         HASHMAP_FOREACH(f, j->files, i) {
637                 Object *o;
638                 uint64_t p;
639                 bool found;
640
641                 r = next_beyond_location(j, f, direction, &o, &p);
642                 if (r < 0)
643                         return r;
644                 else if (r == 0)
645                         continue;
646
647                 if (!new_current)
648                         found = true;
649                 else {
650                         int k;
651
652                         k = compare_order(f, o, new_current, new_entry);
653
654                         if (direction == DIRECTION_DOWN)
655                                 found = k < 0;
656                         else
657                                 found = k > 0;
658                 }
659
660                 if (found) {
661                         new_current = f;
662                         new_entry = o;
663                         new_offset = p;
664                 }
665         }
666
667         if (!new_current)
668                 return 0;
669
670         set_location(j, new_current, new_entry, new_offset);
671
672         return 1;
673 }
674
675 _public_ int sd_journal_next(sd_journal *j) {
676         return real_journal_next(j, DIRECTION_DOWN);
677 }
678
679 _public_ int sd_journal_previous(sd_journal *j) {
680         return real_journal_next(j, DIRECTION_UP);
681 }
682
683 _public_ int sd_journal_next_skip(sd_journal *j, uint64_t skip) {
684         int c = 0, r;
685
686         if (!j)
687                 return -EINVAL;
688
689         while (skip > 0) {
690                 r = sd_journal_next(j);
691                 if (r < 0)
692                         return r;
693
694                 if (r == 0)
695                         return c;
696
697                 skip--;
698                 c++;
699         }
700
701         return c;
702 }
703
704 _public_ int sd_journal_previous_skip(sd_journal *j, uint64_t skip) {
705         int c = 0, r;
706
707         if (!j)
708                 return -EINVAL;
709
710         while (skip > 0) {
711                 r = sd_journal_previous(j);
712                 if (r < 0)
713                         return r;
714
715                 if (r == 0)
716                         return c;
717
718                 skip--;
719                 c++;
720         }
721
722         return 1;
723 }
724
725 _public_ int sd_journal_get_cursor(sd_journal *j, char **cursor) {
726         Object *o;
727         int r;
728         char bid[33], sid[33];
729
730         if (!j)
731                 return -EINVAL;
732         if (!cursor)
733                 return -EINVAL;
734
735         if (!j->current_file || j->current_file->current_offset <= 0)
736                 return -EADDRNOTAVAIL;
737
738         r = journal_file_move_to_object(j->current_file, OBJECT_ENTRY, j->current_file->current_offset, &o);
739         if (r < 0)
740                 return r;
741
742         sd_id128_to_string(j->current_file->header->seqnum_id, sid);
743         sd_id128_to_string(o->entry.boot_id, bid);
744
745         if (asprintf(cursor,
746                      "s=%s;i=%llx;b=%s;m=%llx;t=%llx;x=%llx;p=%s",
747                      sid, (unsigned long long) le64toh(o->entry.seqnum),
748                      bid, (unsigned long long) le64toh(o->entry.monotonic),
749                      (unsigned long long) le64toh(o->entry.realtime),
750                      (unsigned long long) le64toh(o->entry.xor_hash),
751                      file_name_from_path(j->current_file->path)) < 0)
752                 return -ENOMEM;
753
754         return 1;
755 }
756
757 _public_ int sd_journal_seek_cursor(sd_journal *j, const char *cursor) {
758         char *w;
759         size_t l;
760         char *state;
761         unsigned long long seqnum, monotonic, realtime, xor_hash;
762         bool
763                 seqnum_id_set = false,
764                 seqnum_set = false,
765                 boot_id_set = false,
766                 monotonic_set = false,
767                 realtime_set = false,
768                 xor_hash_set = false;
769         sd_id128_t seqnum_id, boot_id;
770
771         if (!j)
772                 return -EINVAL;
773         if (!cursor)
774                 return -EINVAL;
775
776         FOREACH_WORD_SEPARATOR(w, l, cursor, ";", state) {
777                 char *item;
778                 int k = 0;
779
780                 if (l < 2 || w[1] != '=')
781                         return -EINVAL;
782
783                 item = strndup(w, l);
784                 if (!item)
785                         return -ENOMEM;
786
787                 switch (w[0]) {
788
789                 case 's':
790                         seqnum_id_set = true;
791                         k = sd_id128_from_string(w+2, &seqnum_id);
792                         break;
793
794                 case 'i':
795                         seqnum_set = true;
796                         if (sscanf(w+2, "%llx", &seqnum) != 1)
797                                 k = -EINVAL;
798                         break;
799
800                 case 'b':
801                         boot_id_set = true;
802                         k = sd_id128_from_string(w+2, &boot_id);
803                         break;
804
805                 case 'm':
806                         monotonic_set = true;
807                         if (sscanf(w+2, "%llx", &monotonic) != 1)
808                                 k = -EINVAL;
809                         break;
810
811                 case 't':
812                         realtime_set = true;
813                         if (sscanf(w+2, "%llx", &realtime) != 1)
814                                 k = -EINVAL;
815                         break;
816
817                 case 'x':
818                         xor_hash_set = true;
819                         if (sscanf(w+2, "%llx", &xor_hash) != 1)
820                                 k = -EINVAL;
821                         break;
822                 }
823
824                 free(item);
825
826                 if (k < 0)
827                         return k;
828         }
829
830         if ((!seqnum_set || !seqnum_id_set) &&
831             (!monotonic_set || !boot_id_set) &&
832             !realtime_set)
833                 return -EINVAL;
834
835         reset_location(j);
836
837         j->current_location.type = LOCATION_DISCRETE;
838
839         if (realtime_set) {
840                 j->current_location.realtime = (uint64_t) realtime;
841                 j->current_location.realtime_set = true;
842         }
843
844         if (seqnum_set && seqnum_id_set) {
845                 j->current_location.seqnum = (uint64_t) seqnum;
846                 j->current_location.seqnum_id = seqnum_id;
847                 j->current_location.seqnum_set = true;
848         }
849
850         if (monotonic_set && boot_id_set) {
851                 j->current_location.monotonic = (uint64_t) monotonic;
852                 j->current_location.boot_id = boot_id;
853                 j->current_location.monotonic_set = true;
854         }
855
856         if (xor_hash_set) {
857                 j->current_location.xor_hash = (uint64_t) xor_hash;
858                 j->current_location.xor_hash_set = true;
859         }
860
861         return 0;
862 }
863
864 _public_ int sd_journal_seek_monotonic_usec(sd_journal *j, sd_id128_t boot_id, uint64_t usec) {
865         if (!j)
866                 return -EINVAL;
867
868         reset_location(j);
869         j->current_location.type = LOCATION_DISCRETE;
870         j->current_location.boot_id = boot_id;
871         j->current_location.monotonic = usec;
872         j->current_location.monotonic_set = true;
873
874         return 0;
875 }
876
877 _public_ int sd_journal_seek_realtime_usec(sd_journal *j, uint64_t usec) {
878         if (!j)
879                 return -EINVAL;
880
881         reset_location(j);
882         j->current_location.type = LOCATION_DISCRETE;
883         j->current_location.realtime = usec;
884         j->current_location.realtime_set = true;
885
886         return 0;
887 }
888
889 _public_ int sd_journal_seek_head(sd_journal *j) {
890         if (!j)
891                 return -EINVAL;
892
893         reset_location(j);
894         j->current_location.type = LOCATION_HEAD;
895
896         return 0;
897 }
898
899 _public_ int sd_journal_seek_tail(sd_journal *j) {
900         if (!j)
901                 return -EINVAL;
902
903         reset_location(j);
904         j->current_location.type = LOCATION_TAIL;
905
906         return 0;
907 }
908
909 static int add_file(sd_journal *j, const char *prefix, const char *dir, const char *filename) {
910         char *fn;
911         int r;
912         JournalFile *f;
913
914         assert(j);
915         assert(prefix);
916         assert(filename);
917
918         if ((j->flags & SD_JOURNAL_SYSTEM_ONLY) &&
919             !startswith(filename, "system.journal"))
920                 return 0;
921
922         if (dir)
923                 fn = join(prefix, "/", dir, "/", filename, NULL);
924         else
925                 fn = join(prefix, "/", filename, NULL);
926
927         if (!fn)
928                 return -ENOMEM;
929
930         if (hashmap_get(j->files, fn)) {
931                 free(fn);
932                 return 0;
933         }
934
935         if (hashmap_size(j->files) >= JOURNAL_FILES_MAX) {
936                 log_debug("Too many open journal files, not adding %s, ignoring.", fn);
937                 free(fn);
938                 return 0;
939         }
940
941         r = journal_file_open(fn, O_RDONLY, 0, NULL, &f);
942         free(fn);
943
944         if (r < 0) {
945                 if (errno == ENOENT)
946                         return 0;
947
948                 return r;
949         }
950
951         /* journal_file_dump(f); */
952
953         r = hashmap_put(j->files, f->path, f);
954         if (r < 0) {
955                 journal_file_close(f);
956                 return r;
957         }
958
959         log_debug("File %s got added.", f->path);
960
961         return 0;
962 }
963
964 static int remove_file(sd_journal *j, const char *prefix, const char *dir, const char *filename) {
965         char *fn;
966         JournalFile *f;
967
968         assert(j);
969         assert(prefix);
970         assert(filename);
971
972         if (dir)
973                 fn = join(prefix, "/", dir, "/", filename, NULL);
974         else
975                 fn = join(prefix, "/", filename, NULL);
976
977         if (!fn)
978                 return -ENOMEM;
979
980         f = hashmap_get(j->files, fn);
981         free(fn);
982
983         if (!f)
984                 return 0;
985
986         hashmap_remove(j->files, f->path);
987         journal_file_close(f);
988
989         log_debug("File %s got removed.", f->path);
990         return 0;
991 }
992
993 static int add_directory(sd_journal *j, const char *prefix, const char *dir) {
994         char *fn;
995         int r;
996         DIR *d;
997         int wd;
998         sd_id128_t id, mid;
999
1000         assert(j);
1001         assert(prefix);
1002         assert(dir);
1003
1004         if ((j->flags & SD_JOURNAL_LOCAL_ONLY) &&
1005             (sd_id128_from_string(dir, &id) < 0 ||
1006              sd_id128_get_machine(&mid) < 0 ||
1007              !sd_id128_equal(id, mid)))
1008             return 0;
1009
1010         fn = join(prefix, "/", dir, NULL);
1011         if (!fn)
1012                 return -ENOMEM;
1013
1014         d = opendir(fn);
1015
1016         if (!d) {
1017                 free(fn);
1018                 if (errno == ENOENT)
1019                         return 0;
1020
1021                 return -errno;
1022         }
1023
1024         wd = inotify_add_watch(j->inotify_fd, fn,
1025                                IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE|
1026                                IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT|
1027                                IN_DONT_FOLLOW|IN_ONLYDIR);
1028         if (wd > 0) {
1029                 if (hashmap_put(j->inotify_wd_dirs, INT_TO_PTR(wd), fn) < 0)
1030                         inotify_rm_watch(j->inotify_fd, wd);
1031                 else
1032                         fn = NULL;
1033         }
1034
1035         free(fn);
1036
1037         for (;;) {
1038                 struct dirent buf, *de;
1039
1040                 r = readdir_r(d, &buf, &de);
1041                 if (r != 0 || !de)
1042                         break;
1043
1044                 if (!dirent_is_file_with_suffix(de, ".journal"))
1045                         continue;
1046
1047                 r = add_file(j, prefix, dir, de->d_name);
1048                 if (r < 0)
1049                         log_debug("Failed to add file %s/%s/%s: %s", prefix, dir, de->d_name, strerror(-r));
1050         }
1051
1052         closedir(d);
1053
1054         log_debug("Directory %s/%s got added.", prefix, dir);
1055
1056         return 0;
1057 }
1058
1059 static void remove_directory_wd(sd_journal *j, int wd) {
1060         char *p;
1061
1062         assert(j);
1063         assert(wd > 0);
1064
1065         if (j->inotify_fd >= 0)
1066                 inotify_rm_watch(j->inotify_fd, wd);
1067
1068         p = hashmap_remove(j->inotify_wd_dirs, INT_TO_PTR(wd));
1069
1070         if (p) {
1071                 log_debug("Directory %s got removed.", p);
1072                 free(p);
1073         }
1074 }
1075
1076 static void add_root_wd(sd_journal *j, const char *p) {
1077         int wd;
1078         char *k;
1079
1080         assert(j);
1081         assert(p);
1082
1083         wd = inotify_add_watch(j->inotify_fd, p,
1084                                IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE|
1085                                IN_DONT_FOLLOW|IN_ONLYDIR);
1086         if (wd <= 0)
1087                 return;
1088
1089         k = strdup(p);
1090         if (!k || hashmap_put(j->inotify_wd_roots, INT_TO_PTR(wd), k) < 0) {
1091                 inotify_rm_watch(j->inotify_fd, wd);
1092                 free(k);
1093         }
1094 }
1095
1096 static void remove_root_wd(sd_journal *j, int wd) {
1097         char *p;
1098
1099         assert(j);
1100         assert(wd > 0);
1101
1102         if (j->inotify_fd >= 0)
1103                 inotify_rm_watch(j->inotify_fd, wd);
1104
1105         p = hashmap_remove(j->inotify_wd_roots, INT_TO_PTR(wd));
1106
1107         if (p) {
1108                 log_debug("Root %s got removed.", p);
1109                 free(p);
1110         }
1111 }
1112
1113 _public_ int sd_journal_open(sd_journal **ret, int flags) {
1114         sd_journal *j;
1115         const char *p;
1116         const char search_paths[] =
1117                 "/run/log/journal\0"
1118                 "/var/log/journal\0";
1119         int r;
1120
1121         if (!ret)
1122                 return -EINVAL;
1123
1124         if (flags & ~(SD_JOURNAL_LOCAL_ONLY|
1125                       SD_JOURNAL_RUNTIME_ONLY|
1126                       SD_JOURNAL_SYSTEM_ONLY))
1127                 return -EINVAL;
1128
1129         j = new0(sd_journal, 1);
1130         if (!j)
1131                 return -ENOMEM;
1132
1133         j->flags = flags;
1134
1135         j->inotify_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
1136         if (j->inotify_fd < 0) {
1137                 r = -errno;
1138                 goto fail;
1139         }
1140
1141         j->files = hashmap_new(string_hash_func, string_compare_func);
1142         if (!j->files) {
1143                 r = -ENOMEM;
1144                 goto fail;
1145         }
1146
1147         j->inotify_wd_dirs = hashmap_new(trivial_hash_func, trivial_compare_func);
1148         j->inotify_wd_roots = hashmap_new(trivial_hash_func, trivial_compare_func);
1149
1150         if (!j->inotify_wd_dirs || !j->inotify_wd_roots) {
1151                 r = -ENOMEM;
1152                 goto fail;
1153         }
1154
1155         /* We ignore most errors here, since the idea is to only open
1156          * what's actually accessible, and ignore the rest. */
1157
1158         NULSTR_FOREACH(p, search_paths) {
1159                 DIR *d;
1160
1161                 if ((flags & SD_JOURNAL_RUNTIME_ONLY) &&
1162                     !path_startswith(p, "/run"))
1163                         continue;
1164
1165                 d = opendir(p);
1166                 if (!d) {
1167                         if (errno != ENOENT)
1168                                 log_debug("Failed to open %s: %m", p);
1169                         continue;
1170                 }
1171
1172                 add_root_wd(j, p);
1173
1174                 for (;;) {
1175                         struct dirent buf, *de;
1176                         sd_id128_t id;
1177
1178                         r = readdir_r(d, &buf, &de);
1179                         if (r != 0 || !de)
1180                                 break;
1181
1182                         if (dirent_is_file_with_suffix(de, ".journal")) {
1183                                 r = add_file(j, p, NULL, de->d_name);
1184                                 if (r < 0)
1185                                         log_debug("Failed to add file %s/%s: %s", p, de->d_name, strerror(-r));
1186
1187                         } else if ((de->d_type == DT_DIR || de->d_type == DT_UNKNOWN) &&
1188                                    sd_id128_from_string(de->d_name, &id) >= 0) {
1189
1190                                 r = add_directory(j, p, de->d_name);
1191                                 if (r < 0)
1192                                         log_debug("Failed to add directory %s/%s: %s", p, de->d_name, strerror(-r));
1193                         }
1194                 }
1195
1196                 closedir(d);
1197         }
1198
1199         *ret = j;
1200         return 0;
1201
1202 fail:
1203         sd_journal_close(j);
1204
1205         return r;
1206 };
1207
1208 _public_ void sd_journal_close(sd_journal *j) {
1209         if (!j)
1210                 return;
1211
1212         if (j->inotify_wd_dirs) {
1213                 void *k;
1214
1215                 while ((k = hashmap_first_key(j->inotify_wd_dirs)))
1216                         remove_directory_wd(j, PTR_TO_INT(k));
1217
1218                 hashmap_free(j->inotify_wd_dirs);
1219         }
1220
1221         if (j->inotify_wd_roots) {
1222                 void *k;
1223
1224                 while ((k = hashmap_first_key(j->inotify_wd_roots)))
1225                         remove_root_wd(j, PTR_TO_INT(k));
1226
1227                 hashmap_free(j->inotify_wd_roots);
1228         }
1229
1230         if (j->files) {
1231                 JournalFile *f;
1232
1233                 while ((f = hashmap_steal_first(j->files)))
1234                         journal_file_close(f);
1235
1236                 hashmap_free(j->files);
1237         }
1238
1239         sd_journal_flush_matches(j);
1240
1241         if (j->inotify_fd >= 0)
1242                 close_nointr_nofail(j->inotify_fd);
1243
1244         free(j);
1245 }
1246
1247 _public_ int sd_journal_get_realtime_usec(sd_journal *j, uint64_t *ret) {
1248         Object *o;
1249         JournalFile *f;
1250         int r;
1251
1252         if (!j)
1253                 return -EINVAL;
1254         if (!ret)
1255                 return -EINVAL;
1256
1257         f = j->current_file;
1258         if (!f)
1259                 return -EADDRNOTAVAIL;
1260
1261         if (f->current_offset <= 0)
1262                 return -EADDRNOTAVAIL;
1263
1264         r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1265         if (r < 0)
1266                 return r;
1267
1268         *ret = le64toh(o->entry.realtime);
1269         return 0;
1270 }
1271
1272 _public_ int sd_journal_get_monotonic_usec(sd_journal *j, uint64_t *ret, sd_id128_t *ret_boot_id) {
1273         Object *o;
1274         JournalFile *f;
1275         int r;
1276         sd_id128_t id;
1277
1278         if (!j)
1279                 return -EINVAL;
1280         if (!ret)
1281                 return -EINVAL;
1282
1283         f = j->current_file;
1284         if (!f)
1285                 return -EADDRNOTAVAIL;
1286
1287         if (f->current_offset <= 0)
1288                 return -EADDRNOTAVAIL;
1289
1290         r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1291         if (r < 0)
1292                 return r;
1293
1294         if (ret_boot_id)
1295                 *ret_boot_id = o->entry.boot_id;
1296         else {
1297                 r = sd_id128_get_boot(&id);
1298                 if (r < 0)
1299                         return r;
1300
1301                 if (!sd_id128_equal(id, o->entry.boot_id))
1302                         return -ENOENT;
1303         }
1304
1305         *ret = le64toh(o->entry.monotonic);
1306         return 0;
1307 }
1308
1309 _public_ int sd_journal_get_data(sd_journal *j, const char *field, const void **data, size_t *size) {
1310         JournalFile *f;
1311         uint64_t i, n;
1312         size_t field_length;
1313         int r;
1314         Object *o;
1315
1316         if (!j)
1317                 return -EINVAL;
1318         if (!field)
1319                 return -EINVAL;
1320         if (!data)
1321                 return -EINVAL;
1322         if (!size)
1323                 return -EINVAL;
1324
1325         if (isempty(field) || strchr(field, '='))
1326                 return -EINVAL;
1327
1328         f = j->current_file;
1329         if (!f)
1330                 return -EADDRNOTAVAIL;
1331
1332         if (f->current_offset <= 0)
1333                 return -EADDRNOTAVAIL;
1334
1335         r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1336         if (r < 0)
1337                 return r;
1338
1339         field_length = strlen(field);
1340
1341         n = journal_file_entry_n_items(o);
1342         for (i = 0; i < n; i++) {
1343                 uint64_t p, l, le_hash;
1344                 size_t t;
1345
1346                 p = le64toh(o->entry.items[i].object_offset);
1347                 le_hash = o->entry.items[i].hash;
1348                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
1349                 if (r < 0)
1350                         return r;
1351
1352                 if (le_hash != o->data.hash)
1353                         return -EBADMSG;
1354
1355                 l = le64toh(o->object.size) - offsetof(Object, data.payload);
1356
1357                 if (o->object.flags & OBJECT_COMPRESSED) {
1358
1359 #ifdef HAVE_XZ
1360                         if (uncompress_startswith(o->data.payload, l,
1361                                                   &f->compress_buffer, &f->compress_buffer_size,
1362                                                   field, field_length, '=')) {
1363
1364                                 uint64_t rsize;
1365
1366                                 if (!uncompress_blob(o->data.payload, l,
1367                                                      &f->compress_buffer, &f->compress_buffer_size, &rsize))
1368                                         return -EBADMSG;
1369
1370                                 *data = f->compress_buffer;
1371                                 *size = (size_t) rsize;
1372
1373                                 return 0;
1374                         }
1375 #else
1376                         return -EPROTONOSUPPORT;
1377 #endif
1378
1379                 } else if (l >= field_length+1 &&
1380                            memcmp(o->data.payload, field, field_length) == 0 &&
1381                            o->data.payload[field_length] == '=') {
1382
1383                         t = (size_t) l;
1384
1385                         if ((uint64_t) t != l)
1386                                 return -E2BIG;
1387
1388                         *data = o->data.payload;
1389                         *size = t;
1390
1391                         return 0;
1392                 }
1393
1394                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1395                 if (r < 0)
1396                         return r;
1397         }
1398
1399         return -ENOENT;
1400 }
1401
1402 _public_ int sd_journal_enumerate_data(sd_journal *j, const void **data, size_t *size) {
1403         JournalFile *f;
1404         uint64_t p, l, n, le_hash;
1405         int r;
1406         Object *o;
1407         size_t t;
1408
1409         if (!j)
1410                 return -EINVAL;
1411         if (!data)
1412                 return -EINVAL;
1413         if (!size)
1414                 return -EINVAL;
1415
1416         f = j->current_file;
1417         if (!f)
1418                 return -EADDRNOTAVAIL;
1419
1420         if (f->current_offset <= 0)
1421                 return -EADDRNOTAVAIL;
1422
1423         r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1424         if (r < 0)
1425                 return r;
1426
1427         n = journal_file_entry_n_items(o);
1428         if (j->current_field >= n)
1429                 return 0;
1430
1431         p = le64toh(o->entry.items[j->current_field].object_offset);
1432         le_hash = o->entry.items[j->current_field].hash;
1433         r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
1434         if (r < 0)
1435                 return r;
1436
1437         if (le_hash != o->data.hash)
1438                 return -EBADMSG;
1439
1440         l = le64toh(o->object.size) - offsetof(Object, data.payload);
1441         t = (size_t) l;
1442
1443         /* We can't read objects larger than 4G on a 32bit machine */
1444         if ((uint64_t) t != l)
1445                 return -E2BIG;
1446
1447         if (o->object.flags & OBJECT_COMPRESSED) {
1448 #ifdef HAVE_XZ
1449                 uint64_t rsize;
1450
1451                 if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize))
1452                         return -EBADMSG;
1453
1454                 *data = f->compress_buffer;
1455                 *size = (size_t) rsize;
1456 #else
1457                 return -EPROTONOSUPPORT;
1458 #endif
1459         } else {
1460                 *data = o->data.payload;
1461                 *size = t;
1462         }
1463
1464         j->current_field ++;
1465
1466         return 1;
1467 }
1468
1469 _public_ void sd_journal_restart_data(sd_journal *j) {
1470         if (!j)
1471                 return;
1472
1473         j->current_field = 0;
1474 }
1475
1476 _public_ int sd_journal_get_fd(sd_journal *j) {
1477         if (!j)
1478                 return -EINVAL;
1479
1480         return j->inotify_fd;
1481 }
1482
1483 static void process_inotify_event(sd_journal *j, struct inotify_event *e) {
1484         char *p;
1485         int r;
1486
1487         assert(j);
1488         assert(e);
1489
1490         /* Is this a subdirectory we watch? */
1491         p = hashmap_get(j->inotify_wd_dirs, INT_TO_PTR(e->wd));
1492         if (p) {
1493
1494                 if (!(e->mask & IN_ISDIR) && e->len > 0 && endswith(e->name, ".journal")) {
1495
1496                         /* Event for a journal file */
1497
1498                         if (e->mask & (IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB)) {
1499                                 r = add_file(j, p, NULL, e->name);
1500                                 if (r < 0)
1501                                         log_debug("Failed to add file %s/%s: %s", p, e->name, strerror(-r));
1502                         } else if (e->mask & (IN_DELETE|IN_UNMOUNT)) {
1503
1504                                 r = remove_file(j, p, NULL, e->name);
1505                                 if (r < 0)
1506                                         log_debug("Failed to remove file %s/%s: %s", p, e->name, strerror(-r));
1507                         }
1508
1509                 } else if (e->len == 0) {
1510
1511                         /* Event for the directory itself */
1512
1513                         if (e->mask & (IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT))
1514                                 remove_directory_wd(j, e->wd);
1515                 }
1516
1517                 return;
1518         }
1519
1520         /* Must be the root directory then? */
1521         p = hashmap_get(j->inotify_wd_roots, INT_TO_PTR(e->wd));
1522         if (p) {
1523                 sd_id128_t id;
1524
1525                 if (!(e->mask & IN_ISDIR) && e->len > 0 && endswith(e->name, ".journal")) {
1526
1527                         /* Event for a journal file */
1528
1529                         if (e->mask & (IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB)) {
1530                                 r = add_file(j, p, NULL, e->name);
1531                                 if (r < 0)
1532                                         log_debug("Failed to add file %s/%s: %s", p, e->name, strerror(-r));
1533                         } else if (e->mask & (IN_DELETE|IN_UNMOUNT)) {
1534
1535                                 r = remove_file(j, p, NULL, e->name);
1536                                 if (r < 0)
1537                                         log_debug("Failed to remove file %s/%s: %s", p, e->name, strerror(-r));
1538                         }
1539
1540                 } else if ((e->mask & IN_ISDIR) && e->len > 0 && sd_id128_from_string(e->name, &id) >= 0) {
1541
1542                         /* Event for subdirectory */
1543
1544                         if (e->mask & (IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB)) {
1545
1546                                 r = add_directory(j, p, e->name);
1547                                 if (r < 0)
1548                                         log_debug("Failed to add directory %s/%s: %s", p, e->name, strerror(-r));
1549                         }
1550                 }
1551
1552                 return;
1553         }
1554
1555         if (e->mask & IN_IGNORED)
1556                 return;
1557
1558         log_warning("Unknown inotify event.");
1559 }
1560
1561 _public_ int sd_journal_process(sd_journal *j) {
1562         uint8_t buffer[sizeof(struct inotify_event) + FILENAME_MAX];
1563
1564         if (!j)
1565                 return -EINVAL;
1566
1567         for (;;) {
1568                 struct inotify_event *e;
1569                 ssize_t l;
1570
1571                 l = read(j->inotify_fd, buffer, sizeof(buffer));
1572                 if (l < 0) {
1573                         if (errno == EINTR || errno == EAGAIN)
1574                                 return 0;
1575
1576                         return -errno;
1577                 }
1578
1579                 e = (struct inotify_event*) buffer;
1580                 while (l > 0) {
1581                         size_t step;
1582
1583                         process_inotify_event(j, e);
1584
1585                         step = sizeof(struct inotify_event) + e->len;
1586                         assert(step <= (size_t) l);
1587
1588                         e = (struct inotify_event*) ((uint8_t*) e + step);
1589                         l -= step;
1590                 }
1591         }
1592 }
1593
1594 _public_ int sd_journal_query_unique(sd_journal *j, const char *field) {
1595         if (!j)
1596                 return -EINVAL;
1597         if (!field)
1598                 return -EINVAL;
1599
1600         return -ENOTSUP;
1601 }
1602
1603 _public_ int sd_journal_enumerate_unique(sd_journal *j, const void **data, size_t *l) {
1604         if (!j)
1605                 return -EINVAL;
1606         if (!data)
1607                 return -EINVAL;
1608         if (!l)
1609                 return -EINVAL;
1610
1611         return -ENOTSUP;
1612 }
1613
1614 _public_ void sd_journal_restart_unique(sd_journal *j) {
1615         if (!j)
1616                 return;
1617 }