chiark / gitweb /
journal: fix matches
[elogind.git] / src / journal / sd-journal.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU General Public License as published by
10   the Free Software Foundation; either version 2 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   General Public License for more details.
17
18   You should have received a copy of the GNU General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <errno.h>
23 #include <fcntl.h>
24 #include <stddef.h>
25 #include <unistd.h>
26 #include <sys/inotify.h>
27
28 #include "sd-journal.h"
29 #include "journal-def.h"
30 #include "journal-file.h"
31 #include "hashmap.h"
32 #include "list.h"
33 #include "lookup3.h"
34
35 #define JOURNAL_FILES_MAX 1024
36
37 typedef struct Match Match;
38
39 struct Match {
40         char *data;
41         size_t size;
42         uint64_t le_hash;
43
44         LIST_FIELDS(Match, matches);
45 };
46
47 typedef enum location_type {
48         LOCATION_HEAD,
49         LOCATION_TAIL,
50         LOCATION_DISCRETE
51 } location_type_t;
52
53 typedef struct Location {
54         location_type_t type;
55
56         uint64_t seqnum;
57         sd_id128_t seqnum_id;
58         bool seqnum_set;
59
60         uint64_t realtime;
61         bool realtime_set;
62
63         uint64_t monotonic;
64         sd_id128_t boot_id;
65         bool monotonic_set;
66
67         uint64_t xor_hash;
68         bool xor_hash_set;
69 } Location;
70
71 struct sd_journal {
72         Hashmap *files;
73
74         Location current_location;
75         JournalFile *current_file;
76         uint64_t current_field;
77
78         int inotify_fd;
79         Hashmap *inotify_wd_dirs;
80         Hashmap *inotify_wd_roots;
81
82         LIST_HEAD(Match, matches);
83         unsigned n_matches;
84 };
85
86 static void detach_location(sd_journal *j) {
87         Iterator i;
88         JournalFile *f;
89
90         assert(j);
91
92         j->current_file = NULL;
93         j->current_field = 0;
94
95         HASHMAP_FOREACH(f, j->files, i)
96                 f->current_offset = 0;
97 }
98
99 static void reset_location(sd_journal *j) {
100         assert(j);
101
102         detach_location(j);
103         zero(j->current_location);
104 }
105
106 static void init_location(Location *l, JournalFile *f, Object *o) {
107         assert(l);
108         assert(f);
109         assert(o->object.type == OBJECT_ENTRY);
110
111         l->type = LOCATION_DISCRETE;
112         l->seqnum = le64toh(o->entry.seqnum);
113         l->seqnum_id = f->header->seqnum_id;
114         l->realtime = le64toh(o->entry.realtime);
115         l->monotonic = le64toh(o->entry.monotonic);
116         l->boot_id = le64toh(o->entry.boot_id);
117         l->xor_hash = le64toh(o->entry.xor_hash);
118
119         l->seqnum_set = l->realtime_set = l->monotonic_set = l->xor_hash_set = true;
120 }
121
122 static void set_location(sd_journal *j, JournalFile *f, Object *o, uint64_t offset) {
123         assert(j);
124         assert(f);
125         assert(o);
126
127         init_location(&j->current_location, f, o);
128
129         j->current_file = f;
130         j->current_field = 0;
131
132         f->current_offset = offset;
133 }
134
135 static int same_field(const void *_a, size_t s, const void *_b, size_t t) {
136         const uint8_t *a = _a, *b = _b;
137         size_t j;
138         bool a_good = false, b_good = false, different = false;
139
140         for (j = 0; j < s && j < t; j++) {
141
142                 if (a[j] == '=')
143                         a_good = true;
144                 if (b[j] == '=')
145                         b_good = true;
146                 if (a[j] != b[j])
147                         different = true;
148
149                 if (a_good && b_good)
150                         return different ? 0 : 1;
151         }
152
153         return -EINVAL;
154 }
155
156 int sd_journal_add_match(sd_journal *j, const void *data, size_t size) {
157         Match *m, *after = NULL;
158         uint64_t le_hash;
159
160         assert(j);
161
162         if (size <= 0)
163                 return -EINVAL;
164
165         assert(data);
166
167         le_hash = htole64(hash64(data, size));
168
169         LIST_FOREACH(matches, m, j->matches) {
170                 int r;
171
172                 if (m->le_hash == le_hash &&
173                     m->size == size &&
174                     memcmp(m->data, data, size) == 0)
175                         return 0;
176
177                 r = same_field(data, size, m->data, m->size);
178                 if (r < 0)
179                         return r;
180                 else if (r > 0)
181                         after = m;
182         }
183
184         m = new0(Match, 1);
185         if (!m)
186                 return -ENOMEM;
187
188         m->size = size;
189
190         m->data = malloc(m->size);
191         if (!m->data) {
192                 free(m);
193                 return -ENOMEM;
194         }
195
196         memcpy(m->data, data, size);
197         m->le_hash = le_hash;
198
199         /* Matches for the same fields we order adjacent to each
200          * other */
201         LIST_INSERT_AFTER(Match, matches, j->matches, after, m);
202         j->n_matches ++;
203
204         detach_location(j);
205
206         return 0;
207 }
208
209 void sd_journal_flush_matches(sd_journal *j) {
210         assert(j);
211
212         while (j->matches) {
213                 Match *m = j->matches;
214
215                 LIST_REMOVE(Match, matches, j->matches, m);
216                 free(m->data);
217                 free(m);
218         }
219
220         j->n_matches = 0;
221
222         detach_location(j);
223 }
224
225 static int compare_order(JournalFile *af, Object *ao,
226                          JournalFile *bf, Object *bo) {
227
228         uint64_t a, b;
229
230         assert(af);
231         assert(ao);
232         assert(bf);
233         assert(bo);
234
235         /* We operate on two different files here, hence we can access
236          * two objects at the same time, which we normally can't.
237          *
238          * If contents and timestamps match, these entries are
239          * identical, even if the seqnum does not match */
240
241         if (sd_id128_equal(ao->entry.boot_id, bo->entry.boot_id) &&
242             ao->entry.monotonic == bo->entry.monotonic &&
243             ao->entry.realtime == bo->entry.realtime &&
244             ao->entry.xor_hash == bo->entry.xor_hash)
245                 return 0;
246
247         if (sd_id128_equal(af->header->seqnum_id, bf->header->seqnum_id)) {
248
249                 /* If this is from the same seqnum source, compare
250                  * seqnums */
251                 a = le64toh(ao->entry.seqnum);
252                 b = le64toh(bo->entry.seqnum);
253
254                 if (a < b)
255                         return -1;
256                 if (a > b)
257                         return 1;
258
259                 /* Wow! This is weird, different data but the same
260                  * seqnums? Something is borked, but let's make the
261                  * best of it and compare by time. */
262         }
263
264         if (sd_id128_equal(ao->entry.boot_id, bo->entry.boot_id)) {
265
266                 /* If the boot id matches compare monotonic time */
267                 a = le64toh(ao->entry.monotonic);
268                 b = le64toh(bo->entry.monotonic);
269
270                 if (a < b)
271                         return -1;
272                 if (a > b)
273                         return 1;
274         }
275
276         /* Otherwise compare UTC time */
277         a = le64toh(ao->entry.realtime);
278         b = le64toh(ao->entry.realtime);
279
280         if (a < b)
281                 return -1;
282         if (a > b)
283                 return 1;
284
285         /* Finally, compare by contents */
286         a = le64toh(ao->entry.xor_hash);
287         b = le64toh(ao->entry.xor_hash);
288
289         if (a < b)
290                 return -1;
291         if (a > b)
292                 return 1;
293
294         return 0;
295 }
296
297 static int compare_with_location(JournalFile *af, Object *ao, Location *l) {
298         uint64_t a;
299
300         assert(af);
301         assert(ao);
302         assert(l);
303         assert(l->type == LOCATION_DISCRETE);
304
305         if (l->monotonic_set &&
306             sd_id128_equal(ao->entry.boot_id, l->boot_id) &&
307             l->realtime_set &&
308             le64toh(ao->entry.realtime) == l->realtime &&
309             l->xor_hash_set &&
310             le64toh(ao->entry.xor_hash) == l->xor_hash)
311                 return 0;
312
313         if (l->seqnum_set &&
314             sd_id128_equal(af->header->seqnum_id, l->seqnum_id)) {
315
316                 a = le64toh(ao->entry.seqnum);
317
318                 if (a < l->seqnum)
319                         return -1;
320                 if (a > l->seqnum)
321                         return 1;
322         }
323
324         if (l->monotonic_set &&
325             sd_id128_equal(ao->entry.boot_id, l->boot_id)) {
326
327                 a = le64toh(ao->entry.monotonic);
328
329                 if (a < l->monotonic)
330                         return -1;
331                 if (a > l->monotonic)
332                         return 1;
333         }
334
335         if (l->realtime_set) {
336
337                 a = le64toh(ao->entry.realtime);
338
339                 if (a < l->realtime)
340                         return -1;
341                 if (a > l->realtime)
342                         return 1;
343         }
344
345         if (l->xor_hash_set) {
346                 a = le64toh(ao->entry.xor_hash);
347
348                 if (a < l->xor_hash)
349                         return -1;
350                 if (a > l->xor_hash)
351                         return 1;
352         }
353
354         return 0;
355 }
356
357 static int find_location(sd_journal *j, JournalFile *f, direction_t direction, Object **ret, uint64_t *offset) {
358         Object *o = NULL;
359         uint64_t p = 0;
360         int r;
361
362         assert(j);
363
364         if (!j->matches) {
365                 /* No matches is simple */
366
367                 if (j->current_location.type == LOCATION_HEAD)
368                         r = journal_file_next_entry(f, NULL, 0, DIRECTION_DOWN, &o, &p);
369                 else if (j->current_location.type == LOCATION_TAIL)
370                         r = journal_file_next_entry(f, NULL, 0, DIRECTION_UP, &o, &p);
371                 else if (j->current_location.seqnum_set &&
372                          sd_id128_equal(j->current_location.seqnum_id, f->header->seqnum_id))
373                         r = journal_file_move_to_entry_by_seqnum(f, j->current_location.seqnum, direction, &o, &p);
374                 else if (j->current_location.monotonic_set)
375                         r = journal_file_move_to_entry_by_monotonic(f, j->current_location.boot_id, j->current_location.monotonic, direction, &o, &p);
376                 else if (j->current_location.realtime_set)
377                         r = journal_file_move_to_entry_by_realtime(f, j->current_location.realtime, direction, &o, &p);
378                 else
379                         r = journal_file_next_entry(f, NULL, 0, direction, &o, &p);
380
381                 if (r <= 0)
382                         return r;
383
384         } else  {
385                 Match *m, *term_match = NULL;
386                 Object *to = NULL;
387                 uint64_t tp = 0;
388
389                 /* We have matches, first, let's jump to the monotonic
390                  * position if we have any, since it implies a
391                  * match. */
392
393                 if (j->current_location.type == LOCATION_DISCRETE &&
394                     j->current_location.monotonic_set) {
395
396                         r = journal_file_move_to_entry_by_monotonic(f, j->current_location.boot_id, j->current_location.monotonic, direction, &o, &p);
397                         if (r <= 0)
398                                 return r;
399                 }
400
401                 LIST_FOREACH(matches, m, j->matches) {
402                         Object *c, *d;
403                         uint64_t cp, dp;
404
405                         r = journal_file_find_data_object_with_hash(f, m->data, m->size, m->le_hash, &d, &dp);
406                         if (r <= 0)
407                                 return r;
408
409                         if (j->current_location.type == LOCATION_HEAD)
410                                 r = journal_file_next_entry_for_data(f, NULL, 0, dp, DIRECTION_DOWN, &c, &cp);
411                         else if (j->current_location.type == LOCATION_TAIL)
412                                 r = journal_file_next_entry_for_data(f, NULL, 0, dp, DIRECTION_UP, &c, &cp);
413                         else if (j->current_location.seqnum_set &&
414                                  sd_id128_equal(j->current_location.seqnum_id, f->header->seqnum_id))
415                                 r = journal_file_move_to_entry_by_seqnum_for_data(f, dp, j->current_location.seqnum, direction, &c, &cp);
416                         else if (j->current_location.realtime_set)
417                                 r = journal_file_move_to_entry_by_realtime_for_data(f, dp, j->current_location.realtime, direction, &c, &cp);
418                         else
419                                 r = journal_file_next_entry_for_data(f, NULL, 0, dp, direction, &c, &cp);
420
421                         if (!term_match) {
422                                 term_match = m;
423
424                                 if (r > 0) {
425                                         to = c;
426                                         tp = cp;
427                                 }
428                         } else if (same_field(term_match->data, term_match->size, m->data, m->size)) {
429
430                                 /* Same field as previous match... */
431                                 if (r > 0) {
432
433                                         /* Find the earliest of the OR matches */
434
435                                         if (!to ||
436                                             (direction == DIRECTION_DOWN && cp < tp) ||
437                                             (direction == DIRECTION_UP && cp > tp)) {
438                                                 to = c;
439                                                 tp = tp;
440                                         }
441
442                                 }
443
444                         } else {
445
446                                 /* Previous term is finished, did anything match? */
447                                 if (!to)
448                                         return 0;
449
450                                 /* Find the last of the AND matches */
451                                 if (!o ||
452                                     (direction == DIRECTION_DOWN && tp > p) ||
453                                     (direction == DIRECTION_UP && tp < p)) {
454                                         o = to;
455                                         p = tp;
456                                 }
457
458                                 term_match = m;
459
460                                 if (r > 0) {
461                                         to = c;
462                                         tp = cp;
463                                 } else {
464                                         to = NULL;
465                                         tp = 0;
466                                 }
467                         }
468                 }
469
470                 /* Last term is finished, did anything match? */
471                 if (!to)
472                         return 0;
473
474                 if (!o ||
475                     (direction == DIRECTION_DOWN && tp > p) ||
476                     (direction == DIRECTION_UP && tp < p)) {
477                         o = to;
478                         p = tp;
479                 }
480
481                 if (!o)
482                         return 0;
483         }
484
485         if (ret)
486                 *ret = o;
487
488         if (offset)
489                 *offset = p;
490
491         return 1;
492 }
493
494 static int next_with_matches(sd_journal *j, JournalFile *f, direction_t direction, Object **ret, uint64_t *offset) {
495         int r;
496         uint64_t cp;
497         Object *c;
498
499         assert(j);
500         assert(f);
501         assert(ret);
502         assert(offset);
503
504         c = *ret;
505         cp = *offset;
506
507         if (!j->matches) {
508                 /* No matches is easy */
509
510                 r = journal_file_next_entry(f, c, cp, direction, &c, &cp);
511                 if (r <= 0)
512                         return r;
513
514                 if (ret)
515                         *ret = c;
516                 if (offset)
517                         *offset = cp;
518                 return 1;
519         }
520
521         /* So there are matches we have to adhere to, let's find the
522          * first entry that matches all of them */
523
524         for (;;) {
525                 uint64_t np, n;
526                 bool found, term_result = false;
527                 Match *m, *term_match = NULL;
528                 Object *npo = NULL;
529
530                 n = journal_file_entry_n_items(c);
531
532                 /* Make sure we don't match the entry we are starting
533                  * from. */
534                 found = cp > *offset;
535
536                 np = 0;
537                 LIST_FOREACH(matches, m, j->matches) {
538                         uint64_t q, k;
539                         Object *qo = NULL;
540
541                         /* Let's check if this is the beginning of a
542                          * new term, i.e. has a different field prefix
543                          * as the preceeding match. */
544                         if (!term_match) {
545                                 term_match = m;
546                                 term_result = false;
547                         } else if (!same_field(term_match->data, term_match->size, m->data, m->size)) {
548                                 if (!term_result)
549                                         found = false;
550
551                                 term_match = m;
552                                 term_result = false;
553                         }
554
555                         for (k = 0; k < n; k++)
556                                 if (c->entry.items[k].hash == m->le_hash)
557                                         break;
558
559                         if (k >= n) {
560                                 /* Hmm, didn't find any field that
561                                  * matched this rule, so ignore this
562                                  * match. Go on with next match */
563                                 continue;
564                         }
565
566                         term_result = true;
567
568                         /* Hmm, so, this field matched, let's remember
569                          * where we'd have to try next, in case the other
570                          * matches are not OK */
571
572                         r = journal_file_next_entry_for_data(f, c, cp, le64toh(c->entry.items[k].object_offset), direction, &qo, &q);
573                         if (r > 0) {
574
575                                 if (direction == DIRECTION_DOWN) {
576                                         if (q > np) {
577                                                 np = q;
578                                                 npo = qo;
579                                         }
580                                 } else {
581                                         if (np == 0 || q < np) {
582                                                 np = q;
583                                                 npo = qo;
584                                         }
585                                 }
586                         }
587                 }
588
589                 /* Check the last term */
590                 if (term_match && !term_result)
591                         found = false;
592
593                 /* Did this entry match against all matches? */
594                 if (found) {
595                         if (ret)
596                                 *ret = c;
597                         if (offset)
598                                 *offset = cp;
599                         return 1;
600                 }
601
602                 /* Did we find a subsequent entry? */
603                 if (np == 0)
604                         return 0;
605
606                 /* Hmm, ok, this entry only matched partially, so
607                  * let's try another one */
608                 cp = np;
609                 c = npo;
610         }
611 }
612
613 static int next_beyond_location(sd_journal *j, JournalFile *f, direction_t direction, Object **ret, uint64_t *offset) {
614         Object *c;
615         uint64_t cp;
616         int compare_value, r;
617
618         assert(j);
619         assert(f);
620
621         if (f->current_offset > 0) {
622                 cp = f->current_offset;
623
624                 r = journal_file_move_to_object(f, OBJECT_ENTRY, cp, &c);
625                 if (r < 0)
626                         return r;
627
628                 r = next_with_matches(j, f, direction, &c, &cp);
629                 if (r <= 0)
630                         return r;
631
632                 compare_value = 1;
633         } else {
634                 r = find_location(j, f, direction, &c, &cp);
635                 if (r <= 0)
636                         return r;
637
638                 compare_value = 0;
639         }
640
641         for (;;) {
642                 bool found;
643
644                 if (j->current_location.type == LOCATION_DISCRETE) {
645                         int k;
646
647                         k = compare_with_location(f, c, &j->current_location);
648                         if (direction == DIRECTION_DOWN)
649                                 found = k >= compare_value;
650                         else
651                                 found = k <= -compare_value;
652                 } else
653                         found = true;
654
655                 if (found) {
656                         if (ret)
657                                 *ret = c;
658                         if (offset)
659                                 *offset = cp;
660                         return 1;
661                 }
662
663                 r = next_with_matches(j, f, direction, &c, &cp);
664                 if (r <= 0)
665                         return r;
666         }
667 }
668
669 static int real_journal_next(sd_journal *j, direction_t direction) {
670         JournalFile *f, *new_current = NULL;
671         Iterator i;
672         int r;
673         uint64_t new_offset = 0;
674         Object *new_entry = NULL;
675
676         assert(j);
677
678         HASHMAP_FOREACH(f, j->files, i) {
679                 Object *o;
680                 uint64_t p;
681                 bool found;
682
683                 r = next_beyond_location(j, f, direction, &o, &p);
684                 if (r < 0)
685                         return r;
686                 else if (r == 0)
687                         continue;
688
689                 if (!new_current)
690                         found = true;
691                 else {
692                         int k;
693
694                         k = compare_order(f, o, new_current, new_entry);
695
696                         if (direction == DIRECTION_DOWN)
697                                 found = k < 0;
698                         else
699                                 found = k > 0;
700                 }
701
702                 if (found) {
703                         new_current = f;
704                         new_entry = o;
705                         new_offset = p;
706                 }
707         }
708
709         if (!new_current)
710                 return 0;
711
712         set_location(j, new_current, new_entry, new_offset);
713
714         return 1;
715 }
716
717 int sd_journal_next(sd_journal *j) {
718         return real_journal_next(j, DIRECTION_DOWN);
719 }
720
721 int sd_journal_previous(sd_journal *j) {
722         return real_journal_next(j, DIRECTION_UP);
723 }
724
725 int sd_journal_next_skip(sd_journal *j, uint64_t skip) {
726         int c = 0, r;
727
728         assert(j);
729
730         while (skip > 0) {
731                 r = sd_journal_next(j);
732                 if (r < 0)
733                         return r;
734
735                 if (r == 0)
736                         return c;
737
738                 skip--;
739                 c++;
740         }
741
742         return c;
743 }
744
745 int sd_journal_previous_skip(sd_journal *j, uint64_t skip) {
746         int c = 0, r;
747
748         assert(j);
749
750         while (skip > 0) {
751                 r = sd_journal_previous(j);
752                 if (r < 0)
753                         return r;
754
755                 if (r == 0)
756                         return c;
757
758                 skip--;
759                 c++;
760         }
761
762         return 1;
763 }
764
765 int sd_journal_get_cursor(sd_journal *j, char **cursor) {
766         Object *o;
767         int r;
768         char bid[33], sid[33];
769
770         assert(j);
771         assert(cursor);
772
773         if (!j->current_file || j->current_file->current_offset <= 0)
774                 return -EADDRNOTAVAIL;
775
776         r = journal_file_move_to_object(j->current_file, OBJECT_ENTRY, j->current_file->current_offset, &o);
777         if (r < 0)
778                 return r;
779
780         sd_id128_to_string(j->current_file->header->seqnum_id, sid);
781         sd_id128_to_string(o->entry.boot_id, bid);
782
783         if (asprintf(cursor,
784                      "s=%s;i=%llx;b=%s;m=%llx;t=%llx;x=%llx;p=%s",
785                      sid, (unsigned long long) le64toh(o->entry.seqnum),
786                      bid, (unsigned long long) le64toh(o->entry.monotonic),
787                      (unsigned long long) le64toh(o->entry.realtime),
788                      (unsigned long long) le64toh(o->entry.xor_hash),
789                      file_name_from_path(j->current_file->path)) < 0)
790                 return -ENOMEM;
791
792         return 1;
793 }
794
795 int sd_journal_seek_cursor(sd_journal *j, const char *cursor) {
796         char *w;
797         size_t l;
798         char *state;
799         unsigned long long seqnum, monotonic, realtime, xor_hash;
800         bool
801                 seqnum_id_set = false,
802                 seqnum_set = false,
803                 boot_id_set = false,
804                 monotonic_set = false,
805                 realtime_set = false,
806                 xor_hash_set = false;
807         sd_id128_t seqnum_id, boot_id;
808
809         assert(j);
810         assert(cursor);
811
812         FOREACH_WORD_SEPARATOR(w, l, cursor, ";", state) {
813                 char *item;
814                 int k = 0;
815
816                 if (l < 2 || w[1] != '=')
817                         return -EINVAL;
818
819                 item = strndup(w, l);
820                 if (!item)
821                         return -ENOMEM;
822
823                 switch (w[0]) {
824
825                 case 's':
826                         seqnum_id_set = true;
827                         k = sd_id128_from_string(w+2, &seqnum_id);
828                         break;
829
830                 case 'i':
831                         seqnum_set = true;
832                         if (sscanf(w+2, "%llx", &seqnum) != 1)
833                                 k = -EINVAL;
834                         break;
835
836                 case 'b':
837                         boot_id_set = true;
838                         k = sd_id128_from_string(w+2, &boot_id);
839                         break;
840
841                 case 'm':
842                         monotonic_set = true;
843                         if (sscanf(w+2, "%llx", &monotonic) != 1)
844                                 k = -EINVAL;
845                         break;
846
847                 case 't':
848                         realtime_set = true;
849                         if (sscanf(w+2, "%llx", &realtime) != 1)
850                                 k = -EINVAL;
851                         break;
852
853                 case 'x':
854                         xor_hash_set = true;
855                         if (sscanf(w+2, "%llx", &xor_hash) != 1)
856                                 k = -EINVAL;
857                         break;
858                 }
859
860                 free(item);
861
862                 if (k < 0)
863                         return k;
864         }
865
866         if ((!seqnum_set || !seqnum_id_set) &&
867             (!monotonic_set || !boot_id_set) &&
868             !realtime_set)
869                 return -EINVAL;
870
871         reset_location(j);
872
873         j->current_location.type = LOCATION_DISCRETE;
874
875         if (realtime_set) {
876                 j->current_location.realtime = (uint64_t) realtime;
877                 j->current_location.realtime_set = true;
878         }
879
880         if (seqnum_set && seqnum_id_set) {
881                 j->current_location.seqnum = (uint64_t) seqnum;
882                 j->current_location.seqnum_id = seqnum_id;
883                 j->current_location.seqnum_set = true;
884         }
885
886         if (monotonic_set && boot_id_set) {
887                 j->current_location.monotonic = (uint64_t) monotonic;
888                 j->current_location.boot_id = boot_id;
889                 j->current_location.monotonic_set = true;
890         }
891
892         if (xor_hash_set) {
893                 j->current_location.xor_hash = (uint64_t) xor_hash;
894                 j->current_location.xor_hash_set = true;
895         }
896
897         return 0;
898 }
899
900 int sd_journal_seek_monotonic_usec(sd_journal *j, sd_id128_t boot_id, uint64_t usec) {
901         assert(j);
902
903         reset_location(j);
904         j->current_location.type = LOCATION_DISCRETE;
905         j->current_location.boot_id = boot_id;
906         j->current_location.monotonic = usec;
907         j->current_location.monotonic_set = true;
908
909         return 0;
910 }
911
912 int sd_journal_seek_realtime_usec(sd_journal *j, uint64_t usec) {
913         assert(j);
914
915         reset_location(j);
916         j->current_location.type = LOCATION_DISCRETE;
917         j->current_location.realtime = usec;
918         j->current_location.realtime_set = true;
919
920         return 0;
921 }
922
923 int sd_journal_seek_head(sd_journal *j) {
924         assert(j);
925
926         reset_location(j);
927         j->current_location.type = LOCATION_HEAD;
928
929         return 0;
930 }
931
932 int sd_journal_seek_tail(sd_journal *j) {
933         assert(j);
934
935         reset_location(j);
936         j->current_location.type = LOCATION_TAIL;
937
938         return 0;
939 }
940
941 static int add_file(sd_journal *j, const char *prefix, const char *dir, const char *filename) {
942         char *fn;
943         int r;
944         JournalFile *f;
945
946         assert(j);
947         assert(prefix);
948         assert(filename);
949
950         if (dir)
951                 fn = join(prefix, "/", dir, "/", filename, NULL);
952         else
953                 fn = join(prefix, "/", filename, NULL);
954
955         if (!fn)
956                 return -ENOMEM;
957
958         if (hashmap_get(j->files, fn)) {
959                 free(fn);
960                 return 0;
961         }
962
963         if (hashmap_size(j->files) >= JOURNAL_FILES_MAX) {
964                 log_debug("Too many open journal files, not adding %s, ignoring.", fn);
965                 free(fn);
966                 return 0;
967         }
968
969         r = journal_file_open(fn, O_RDONLY, 0, NULL, &f);
970         free(fn);
971
972         if (r < 0) {
973                 if (errno == ENOENT)
974                         return 0;
975
976                 return r;
977         }
978
979         journal_file_dump(f);
980
981         r = hashmap_put(j->files, f->path, f);
982         if (r < 0) {
983                 journal_file_close(f);
984                 return r;
985         }
986
987         log_debug("File %s got added.", f->path);
988
989         return 0;
990 }
991
992 static int remove_file(sd_journal *j, const char *prefix, const char *dir, const char *filename) {
993         char *fn;
994         JournalFile *f;
995
996         assert(j);
997         assert(prefix);
998         assert(filename);
999
1000         if (dir)
1001                 fn = join(prefix, "/", dir, "/", filename, NULL);
1002         else
1003                 fn = join(prefix, "/", filename, NULL);
1004
1005         if (!fn)
1006                 return -ENOMEM;
1007
1008         f = hashmap_get(j->files, fn);
1009         free(fn);
1010
1011         if (!f)
1012                 return 0;
1013
1014         hashmap_remove(j->files, f->path);
1015         journal_file_close(f);
1016
1017         log_debug("File %s got removed.", f->path);
1018         return 0;
1019 }
1020
1021 static int add_directory(sd_journal *j, const char *prefix, const char *dir) {
1022         char *fn;
1023         int r;
1024         DIR *d;
1025         int wd;
1026
1027         assert(j);
1028         assert(prefix);
1029         assert(dir);
1030
1031         fn = join(prefix, "/", dir, NULL);
1032         if (!fn)
1033                 return -ENOMEM;
1034
1035         d = opendir(fn);
1036
1037         if (!d) {
1038                 free(fn);
1039                 if (errno == ENOENT)
1040                         return 0;
1041
1042                 return -errno;
1043         }
1044
1045         wd = inotify_add_watch(j->inotify_fd, fn,
1046                                IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE|
1047                                IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT|
1048                                IN_DONT_FOLLOW|IN_ONLYDIR);
1049         if (wd > 0) {
1050                 if (hashmap_put(j->inotify_wd_dirs, INT_TO_PTR(wd), fn) < 0)
1051                         inotify_rm_watch(j->inotify_fd, wd);
1052                 else
1053                         fn = NULL;
1054         }
1055
1056         free(fn);
1057
1058         for (;;) {
1059                 struct dirent buf, *de;
1060
1061                 r = readdir_r(d, &buf, &de);
1062                 if (r != 0 || !de)
1063                         break;
1064
1065                 if (!dirent_is_file_with_suffix(de, ".journal"))
1066                         continue;
1067
1068                 r = add_file(j, prefix, dir, de->d_name);
1069                 if (r < 0)
1070                         log_debug("Failed to add file %s/%s/%s: %s", prefix, dir, de->d_name, strerror(-r));
1071         }
1072
1073         closedir(d);
1074
1075         log_debug("Directory %s/%s got added.", prefix, dir);
1076
1077         return 0;
1078 }
1079
1080 static void remove_directory_wd(sd_journal *j, int wd) {
1081         char *p;
1082
1083         assert(j);
1084         assert(wd > 0);
1085
1086         if (j->inotify_fd >= 0)
1087                 inotify_rm_watch(j->inotify_fd, wd);
1088
1089         p = hashmap_remove(j->inotify_wd_dirs, INT_TO_PTR(wd));
1090
1091         if (p) {
1092                 log_debug("Directory %s got removed.", p);
1093                 free(p);
1094         }
1095 }
1096
1097 static void add_root_wd(sd_journal *j, const char *p) {
1098         int wd;
1099         char *k;
1100
1101         assert(j);
1102         assert(p);
1103
1104         wd = inotify_add_watch(j->inotify_fd, p,
1105                                IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE|
1106                                IN_DONT_FOLLOW|IN_ONLYDIR);
1107         if (wd <= 0)
1108                 return;
1109
1110         k = strdup(p);
1111         if (!k || hashmap_put(j->inotify_wd_roots, INT_TO_PTR(wd), k) < 0) {
1112                 inotify_rm_watch(j->inotify_fd, wd);
1113                 free(k);
1114         }
1115 }
1116
1117 static void remove_root_wd(sd_journal *j, int wd) {
1118         char *p;
1119
1120         assert(j);
1121         assert(wd > 0);
1122
1123         if (j->inotify_fd >= 0)
1124                 inotify_rm_watch(j->inotify_fd, wd);
1125
1126         p = hashmap_remove(j->inotify_wd_roots, INT_TO_PTR(wd));
1127
1128         if (p) {
1129                 log_debug("Root %s got removed.", p);
1130                 free(p);
1131         }
1132 }
1133
1134 int sd_journal_open(sd_journal **ret) {
1135         sd_journal *j;
1136         const char *p;
1137         const char search_paths[] =
1138                 "/run/log/journal\0"
1139                 "/var/log/journal\0";
1140         int r;
1141
1142         assert(ret);
1143
1144         j = new0(sd_journal, 1);
1145         if (!j)
1146                 return -ENOMEM;
1147
1148         j->inotify_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
1149         if (j->inotify_fd < 0) {
1150                 r = -errno;
1151                 goto fail;
1152         }
1153
1154         j->files = hashmap_new(string_hash_func, string_compare_func);
1155         if (!j->files) {
1156                 r = -ENOMEM;
1157                 goto fail;
1158         }
1159
1160         j->inotify_wd_dirs = hashmap_new(trivial_hash_func, trivial_compare_func);
1161         j->inotify_wd_roots = hashmap_new(trivial_hash_func, trivial_compare_func);
1162
1163         if (!j->inotify_wd_dirs || !j->inotify_wd_roots) {
1164                 r = -ENOMEM;
1165                 goto fail;
1166         }
1167
1168         /* We ignore most errors here, since the idea is to only open
1169          * what's actually accessible, and ignore the rest. */
1170
1171         NULSTR_FOREACH(p, search_paths) {
1172                 DIR *d;
1173
1174                 d = opendir(p);
1175                 if (!d) {
1176                         if (errno != ENOENT)
1177                                 log_debug("Failed to open %s: %m", p);
1178                         continue;
1179                 }
1180
1181                 add_root_wd(j, p);
1182
1183                 for (;;) {
1184                         struct dirent buf, *de;
1185                         sd_id128_t id;
1186
1187                         r = readdir_r(d, &buf, &de);
1188                         if (r != 0 || !de)
1189                                 break;
1190
1191                         if (dirent_is_file_with_suffix(de, ".journal")) {
1192                                 r = add_file(j, p, NULL, de->d_name);
1193                                 if (r < 0)
1194                                         log_debug("Failed to add file %s/%s: %s", p, de->d_name, strerror(-r));
1195
1196                         } else if ((de->d_type == DT_DIR || de->d_type == DT_UNKNOWN) &&
1197                                    sd_id128_from_string(de->d_name, &id) >= 0) {
1198
1199                                 r = add_directory(j, p, de->d_name);
1200                                 if (r < 0)
1201                                         log_debug("Failed to add directory %s/%s: %s", p, de->d_name, strerror(-r));
1202                         }
1203                 }
1204
1205                 closedir(d);
1206         }
1207
1208         *ret = j;
1209         return 0;
1210
1211 fail:
1212         sd_journal_close(j);
1213
1214         return r;
1215 };
1216
1217 void sd_journal_close(sd_journal *j) {
1218         assert(j);
1219
1220         if (j->inotify_wd_dirs) {
1221                 void *k;
1222
1223                 while ((k = hashmap_first_key(j->inotify_wd_dirs)))
1224                         remove_directory_wd(j, PTR_TO_INT(k));
1225
1226                 hashmap_free(j->inotify_wd_dirs);
1227         }
1228
1229         if (j->inotify_wd_roots) {
1230                 void *k;
1231
1232                 while ((k = hashmap_first_key(j->inotify_wd_roots)))
1233                         remove_root_wd(j, PTR_TO_INT(k));
1234
1235                 hashmap_free(j->inotify_wd_roots);
1236         }
1237
1238         if (j->files) {
1239                 JournalFile *f;
1240
1241                 while ((f = hashmap_steal_first(j->files)))
1242                         journal_file_close(f);
1243
1244                 hashmap_free(j->files);
1245         }
1246
1247         sd_journal_flush_matches(j);
1248
1249         if (j->inotify_fd >= 0)
1250                 close_nointr_nofail(j->inotify_fd);
1251
1252         free(j);
1253 }
1254
1255 int sd_journal_get_realtime_usec(sd_journal *j, uint64_t *ret) {
1256         Object *o;
1257         JournalFile *f;
1258         int r;
1259
1260         assert(j);
1261         assert(ret);
1262
1263         f = j->current_file;
1264         if (!f)
1265                 return -EADDRNOTAVAIL;
1266
1267         if (f->current_offset <= 0)
1268                 return -EADDRNOTAVAIL;
1269
1270         r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1271         if (r < 0)
1272                 return r;
1273
1274         *ret = le64toh(o->entry.realtime);
1275         return 0;
1276 }
1277
1278 int sd_journal_get_monotonic_usec(sd_journal *j, uint64_t *ret, sd_id128_t *ret_boot_id) {
1279         Object *o;
1280         JournalFile *f;
1281         int r;
1282         sd_id128_t id;
1283
1284         assert(j);
1285         assert(ret);
1286
1287         f = j->current_file;
1288         if (!f)
1289                 return -EADDRNOTAVAIL;
1290
1291         if (f->current_offset <= 0)
1292                 return -EADDRNOTAVAIL;
1293
1294         r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1295         if (r < 0)
1296                 return r;
1297
1298         if (ret_boot_id)
1299                 *ret_boot_id = o->entry.boot_id;
1300         else {
1301                 r = sd_id128_get_boot(&id);
1302                 if (r < 0)
1303                         return r;
1304
1305                 if (!sd_id128_equal(id, o->entry.boot_id))
1306                         return -ENOENT;
1307         }
1308
1309         *ret = le64toh(o->entry.monotonic);
1310         return 0;
1311 }
1312
1313 int sd_journal_get_data(sd_journal *j, const char *field, const void **data, size_t *size) {
1314         JournalFile *f;
1315         uint64_t i, n;
1316         size_t field_length;
1317         int r;
1318         Object *o;
1319
1320         assert(j);
1321         assert(field);
1322         assert(data);
1323         assert(size);
1324
1325         if (isempty(field) || strchr(field, '='))
1326                 return -EINVAL;
1327
1328         f = j->current_file;
1329         if (!f)
1330                 return -EADDRNOTAVAIL;
1331
1332         if (f->current_offset <= 0)
1333                 return -EADDRNOTAVAIL;
1334
1335         r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1336         if (r < 0)
1337                 return r;
1338
1339         field_length = strlen(field);
1340
1341         n = journal_file_entry_n_items(o);
1342         for (i = 0; i < n; i++) {
1343                 uint64_t p, l, le_hash;
1344                 size_t t;
1345
1346                 p = le64toh(o->entry.items[i].object_offset);
1347                 le_hash = o->entry.items[j->current_field].hash;
1348                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
1349                 if (r < 0)
1350                         return r;
1351
1352                 if (le_hash != o->data.hash)
1353                         return -EBADMSG;
1354
1355                 l = le64toh(o->object.size) - offsetof(Object, data.payload);
1356
1357                 if (l >= field_length+1 &&
1358                     memcmp(o->data.payload, field, field_length) == 0 &&
1359                     o->data.payload[field_length] == '=') {
1360
1361                         t = (size_t) l;
1362
1363                         if ((uint64_t) t != l)
1364                                 return -E2BIG;
1365
1366                         *data = o->data.payload;
1367                         *size = t;
1368
1369                         return 0;
1370                 }
1371
1372                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1373                 if (r < 0)
1374                         return r;
1375         }
1376
1377         return -ENOENT;
1378 }
1379
1380 int sd_journal_enumerate_data(sd_journal *j, const void **data, size_t *size) {
1381         JournalFile *f;
1382         uint64_t p, l, n, le_hash;
1383         int r;
1384         Object *o;
1385         size_t t;
1386
1387         assert(j);
1388         assert(data);
1389         assert(size);
1390
1391         f = j->current_file;
1392         if (!f)
1393                 return -EADDRNOTAVAIL;
1394
1395         if (f->current_offset <= 0)
1396                 return -EADDRNOTAVAIL;
1397
1398         r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1399         if (r < 0)
1400                 return r;
1401
1402         n = journal_file_entry_n_items(o);
1403         if (j->current_field >= n)
1404                 return 0;
1405
1406         p = le64toh(o->entry.items[j->current_field].object_offset);
1407         le_hash = o->entry.items[j->current_field].hash;
1408         r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
1409         if (r < 0)
1410                 return r;
1411
1412         if (le_hash != o->data.hash)
1413                 return -EBADMSG;
1414
1415         l = le64toh(o->object.size) - offsetof(Object, data.payload);
1416         t = (size_t) l;
1417
1418         /* We can't read objects larger than 4G on a 32bit machine */
1419         if ((uint64_t) t != l)
1420                 return -E2BIG;
1421
1422         *data = o->data.payload;
1423         *size = t;
1424
1425         j->current_field ++;
1426
1427         return 1;
1428 }
1429
1430 void sd_journal_restart_data(sd_journal *j) {
1431         assert(j);
1432
1433         j->current_field = 0;
1434 }
1435
1436 int sd_journal_get_fd(sd_journal *j) {
1437         assert(j);
1438
1439         return j->inotify_fd;
1440 }
1441
1442 static void process_inotify_event(sd_journal *j, struct inotify_event *e) {
1443         char *p;
1444         int r;
1445
1446         assert(j);
1447         assert(e);
1448
1449         /* Is this a subdirectory we watch? */
1450         p = hashmap_get(j->inotify_wd_dirs, INT_TO_PTR(e->wd));
1451         if (p) {
1452
1453                 if (!(e->mask & IN_ISDIR) && e->len > 0 && endswith(e->name, ".journal")) {
1454
1455                         /* Event for a journal file */
1456
1457                         if (e->mask & (IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB)) {
1458                                 r = add_file(j, p, NULL, e->name);
1459                                 if (r < 0)
1460                                         log_debug("Failed to add file %s/%s: %s", p, e->name, strerror(-r));
1461                         } else if (e->mask & (IN_DELETE|IN_UNMOUNT)) {
1462
1463                                 r = remove_file(j, p, NULL, e->name);
1464                                 if (r < 0)
1465                                         log_debug("Failed to remove file %s/%s: %s", p, e->name, strerror(-r));
1466                         }
1467
1468                 } else if (e->len == 0) {
1469
1470                         /* Event for the directory itself */
1471
1472                         if (e->mask & (IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT))
1473                                 remove_directory_wd(j, e->wd);
1474                 }
1475
1476                 return;
1477         }
1478
1479         /* Must be the root directory then? */
1480         p = hashmap_get(j->inotify_wd_roots, INT_TO_PTR(e->wd));
1481         if (p) {
1482                 sd_id128_t id;
1483
1484                 if (!(e->mask & IN_ISDIR) && e->len > 0 && endswith(e->name, ".journal")) {
1485
1486                         /* Event for a journal file */
1487
1488                         if (e->mask & (IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB)) {
1489                                 r = add_file(j, p, NULL, e->name);
1490                                 if (r < 0)
1491                                         log_debug("Failed to add file %s/%s: %s", p, e->name, strerror(-r));
1492                         } else if (e->mask & (IN_DELETE|IN_UNMOUNT)) {
1493
1494                                 r = remove_file(j, p, NULL, e->name);
1495                                 if (r < 0)
1496                                         log_debug("Failed to remove file %s/%s: %s", p, e->name, strerror(-r));
1497                         }
1498
1499                 } else if ((e->mask & IN_ISDIR) && e->len > 0 && sd_id128_from_string(e->name, &id) >= 0) {
1500
1501                         /* Event for subdirectory */
1502
1503                         if (e->mask & (IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB)) {
1504
1505                                 r = add_directory(j, p, e->name);
1506                                 if (r < 0)
1507                                         log_debug("Failed to add directory %s/%s: %s", p, e->name, strerror(-r));
1508                         }
1509                 }
1510
1511                 return;
1512         }
1513
1514         if (e->mask & IN_IGNORED)
1515                 return;
1516
1517         log_warning("Unknown inotify event.");
1518 }
1519
1520 int sd_journal_process(sd_journal *j) {
1521         uint8_t buffer[sizeof(struct inotify_event) + FILENAME_MAX];
1522
1523         assert(j);
1524
1525         for (;;) {
1526                 struct inotify_event *e;
1527                 ssize_t l;
1528
1529                 l = read(j->inotify_fd, buffer, sizeof(buffer));
1530                 if (l < 0) {
1531                         if (errno == EINTR || errno == EAGAIN)
1532                                 return 0;
1533
1534                         return -errno;
1535                 }
1536
1537                 e = (struct inotify_event*) buffer;
1538                 while (l > 0) {
1539                         size_t step;
1540
1541                         process_inotify_event(j, e);
1542
1543                         step = sizeof(struct inotify_event) + e->len;
1544                         assert(step <= (size_t) l);
1545
1546                         e = (struct inotify_event*) ((uint8_t*) e + step);
1547                         l -= step;
1548                 }
1549         }
1550 }