X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~ianmdlvl/git?p=elogind.git;a=blobdiff_plain;f=src%2Fjournal%2Fsd-journal.c;h=149dc10bdf60dd6767a7e9ab8adc9b3eb9ce82a4;hp=8bca300f93f4468531b483788d3ca35f47d54c1a;hb=e02d1cf72d115d1d61defdca5b551672d876c6bd;hpb=260a2be45522f03ce8d8aca38e471d7b0882ff05 diff --git a/src/journal/sd-journal.c b/src/journal/sd-journal.c index 8bca300f9..149dc10bd 100644 --- a/src/journal/sd-journal.c +++ b/src/journal/sd-journal.c @@ -6,1344 +6,1733 @@ Copyright 2011 Lennart Poettering systemd is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. systemd is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. + Lesser General Public License for more details. - You should have received a copy of the GNU General Public License + You should have received a copy of the GNU Lesser General Public License along with systemd; If not, see . ***/ -#include #include -#include -#include -#include #include #include +#include +#include +#include #include "sd-journal.h" #include "journal-def.h" -#include "journal-private.h" -#include "lookup3.h" -#include "list.h" +#include "journal-file.h" #include "hashmap.h" +#include "list.h" +#include "path-util.h" +#include "lookup3.h" +#include "compress.h" +#include "journal-internal.h" -#define DEFAULT_ARENA_MAX_SIZE (16ULL*1024ULL*1024ULL*1024ULL) -#define DEFAULT_ARENA_MIN_SIZE (256ULL*1024ULL) -#define DEFAULT_ARENA_KEEP_FREE (1ULL*1024ULL*1024ULL) - -#define DEFAULT_HASH_TABLE_SIZE (2047ULL*16ULL) -#define DEFAULT_BISECT_TABLE_SIZE ((DEFAULT_ARENA_MAX_SIZE/(64ULL*1024ULL))*8ULL) - -#define DEFAULT_WINDOW_SIZE (128ULL*1024ULL*1024ULL) - -struct JournalFile { - int fd; - char *path; - struct stat last_stat; - int prot; - bool writable; - - Header *header; - - HashItem *hash_table; - void *hash_table_window; - uint64_t hash_table_window_size; - - uint64_t *bisect_table; - void *bisect_table_window; - uint64_t bisect_table_window_size; +#define JOURNAL_FILES_MAX 1024 - void *window; - uint64_t window_offset; - uint64_t window_size; +static void detach_location(sd_journal *j) { + Iterator i; + JournalFile *f; - Object *current; - uint64_t current_offset; + assert(j); - LIST_FIELDS(JournalFile, files); -}; + j->current_file = NULL; + j->current_field = 0; -struct sd_journal { - Hashmap *files; -}; + HASHMAP_FOREACH(f, j->files, i) + f->current_offset = 0; +} -static const char signature[] = { 'L', 'P', 'K', 'S', 'H', 'H', 'R', 'H' }; +static void reset_location(sd_journal *j) { + assert(j); -#define ALIGN64(x) (((x) + 7ULL) & ~7ULL) + detach_location(j); + zero(j->current_location); +} -void journal_file_close(JournalFile *f) { +static void init_location(Location *l, JournalFile *f, Object *o) { + assert(l); assert(f); + assert(o->object.type == OBJECT_ENTRY); - if (f->fd >= 0) - close_nointr_nofail(f->fd); - - if (f->header) - munmap(f->header, PAGE_ALIGN(sizeof(Header))); + l->type = LOCATION_DISCRETE; + l->seqnum = le64toh(o->entry.seqnum); + l->seqnum_id = f->header->seqnum_id; + l->realtime = le64toh(o->entry.realtime); + l->monotonic = le64toh(o->entry.monotonic); + l->boot_id = o->entry.boot_id; + l->xor_hash = le64toh(o->entry.xor_hash); - if (f->hash_table_window) - munmap(f->hash_table_window, f->hash_table_window_size); - - if (f->bisect_table_window) - munmap(f->bisect_table_window, f->bisect_table_window_size); - - if (f->window) - munmap(f->window, f->window_size); - - free(f->path); - free(f); + l->seqnum_set = l->realtime_set = l->monotonic_set = l->xor_hash_set = true; } -static int journal_file_init_header(JournalFile *f) { - Header h; - ssize_t k; - int r; - +static void set_location(sd_journal *j, JournalFile *f, Object *o, uint64_t offset) { + assert(j); assert(f); + assert(o); - zero(h); - memcpy(h.signature, signature, 8); - h.arena_offset = htole64(ALIGN64(sizeof(h))); - h.arena_max_size = htole64(DEFAULT_ARENA_MAX_SIZE); - h.arena_min_size = htole64(DEFAULT_ARENA_MIN_SIZE); - h.arena_keep_free = htole64(DEFAULT_ARENA_KEEP_FREE); - - r = sd_id128_randomize(&h.file_id); - if (r < 0) - return r; - - k = pwrite(f->fd, &h, sizeof(h), 0); - if (k < 0) - return -errno; + init_location(&j->current_location, f, o); - if (k != sizeof(h)) - return -EIO; + j->current_file = f; + j->current_field = 0; - return 0; + f->current_offset = offset; } -static int journal_file_refresh_header(JournalFile *f) { - int r; +static int same_field(const void *_a, size_t s, const void *_b, size_t t) { + const uint8_t *a = _a, *b = _b; + size_t j; + bool a_good = false, b_good = false, different = false; - assert(f); + for (j = 0; j < s && j < t; j++) { - r = sd_id128_get_machine(&f->header->machine_id); - if (r < 0) - return r; + if (a[j] == '=') + a_good = true; + if (b[j] == '=') + b_good = true; + if (a[j] != b[j]) + different = true; - r = sd_id128_get_boot(&f->header->boot_id); - if (r < 0) - return r; + if (a_good && b_good) + return different ? 0 : 1; + } - f->header->state = htole32(STATE_ONLINE); - return 0; + return -EINVAL; } -static int journal_file_verify_header(JournalFile *f) { - assert(f); +_public_ int sd_journal_add_match(sd_journal *j, const void *data, size_t size) { + Match *m, *after = NULL; + le64_t le_hash; - if (memcmp(f->header, signature, 8)) - return -EBADMSG; + if (!j) + return -EINVAL; + if (!data) + return -EINVAL; + if (size <= 1) + return -EINVAL; + if (!memchr(data, '=', size)) + return -EINVAL; + if (*(char*) data == '=') + return -EINVAL; - if (f->header->incompatible_flags != 0) - return -EPROTONOSUPPORT; + /* FIXME: iterating with multiple matches is currently + * broken */ + if (j->matches) + return -ENOTSUP; - if ((uint64_t) f->last_stat.st_size < (le64toh(f->header->arena_offset) + le64toh(f->header->arena_size))) - return -ENODATA; + le_hash = htole64(hash64(data, size)); - if (f->writable) { - uint32_t state; - sd_id128_t machine_id; + LIST_FOREACH(matches, m, j->matches) { int r; - r = sd_id128_get_machine(&machine_id); + if (m->le_hash == le_hash && + m->size == size && + memcmp(m->data, data, size) == 0) + return 0; + + r = same_field(data, size, m->data, m->size); if (r < 0) return r; + else if (r > 0) + after = m; + } - if (!sd_id128_equal(machine_id, f->header->machine_id)) - return -EHOSTDOWN; + m = new0(Match, 1); + if (!m) + return -ENOMEM; - state = le32toh(f->header->state); + m->size = size; - if (state == STATE_ONLINE) - log_debug("Journal file %s is already online. Assuming unclean closing. Ignoring.", f->path); - else if (state == STATE_ARCHIVED) - return -ESHUTDOWN; - else if (state != STATE_OFFLINE) - log_debug("Journal file %s has unknown state %u. Ignoring.", f->path, state); + m->data = malloc(m->size); + if (!m->data) { + free(m); + return -ENOMEM; } + memcpy(m->data, data, size); + m->le_hash = le_hash; + + /* Matches for the same fields we order adjacent to each + * other */ + LIST_INSERT_AFTER(Match, matches, j->matches, after, m); + j->n_matches ++; + + detach_location(j); + return 0; } -static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) { - uint64_t asize; - uint64_t old_size, new_size; +_public_ void sd_journal_flush_matches(sd_journal *j) { + if (!j) + return; - assert(f); + while (j->matches) { + Match *m = j->matches; - if (offset < le64toh(f->header->arena_offset)) - return -EINVAL; + LIST_REMOVE(Match, matches, j->matches, m); + free(m->data); + free(m); + } + + j->n_matches = 0; + + detach_location(j); +} + +static int compare_order(JournalFile *af, Object *ao, + JournalFile *bf, Object *bo) { - new_size = PAGE_ALIGN(offset + size); + uint64_t a, b; - /* We assume that this file is not sparse, and we know that - * for sure, since we alway call posix_fallocate() - * ourselves */ + assert(af); + assert(ao); + assert(bf); + assert(bo); - old_size = - le64toh(f->header->arena_offset) + - le64toh(f->header->arena_size); + /* We operate on two different files here, hence we can access + * two objects at the same time, which we normally can't. + * + * If contents and timestamps match, these entries are + * identical, even if the seqnum does not match */ - if (old_size >= new_size) + if (sd_id128_equal(ao->entry.boot_id, bo->entry.boot_id) && + ao->entry.monotonic == bo->entry.monotonic && + ao->entry.realtime == bo->entry.realtime && + ao->entry.xor_hash == bo->entry.xor_hash) return 0; - asize = new_size - le64toh(f->header->arena_offset); + if (sd_id128_equal(af->header->seqnum_id, bf->header->seqnum_id)) { - if (asize > le64toh(f->header->arena_min_size)) { - struct statvfs svfs; + /* If this is from the same seqnum source, compare + * seqnums */ + a = le64toh(ao->entry.seqnum); + b = le64toh(bo->entry.seqnum); + + if (a < b) + return -1; + if (a > b) + return 1; - if (fstatvfs(f->fd, &svfs) >= 0) { - uint64_t available; + /* Wow! This is weird, different data but the same + * seqnums? Something is borked, but let's make the + * best of it and compare by time. */ + } - available = svfs.f_bfree * svfs.f_bsize; + if (sd_id128_equal(ao->entry.boot_id, bo->entry.boot_id)) { - if (available >= f->header->arena_keep_free) - available -= f->header->arena_keep_free; - else - available = 0; + /* If the boot id matches compare monotonic time */ + a = le64toh(ao->entry.monotonic); + b = le64toh(bo->entry.monotonic); - if (new_size - old_size > available) - return -E2BIG; - } + if (a < b) + return -1; + if (a > b) + return 1; } - if (asize > le64toh(f->header->arena_max_size)) - return -E2BIG; + /* Otherwise compare UTC time */ + a = le64toh(ao->entry.realtime); + b = le64toh(ao->entry.realtime); - if (posix_fallocate(f->fd, 0, new_size) < 0) - return -errno; + if (a < b) + return -1; + if (a > b) + return 1; - if (fstat(f->fd, &f->last_stat) < 0) - return -errno; + /* Finally, compare by contents */ + a = le64toh(ao->entry.xor_hash); + b = le64toh(ao->entry.xor_hash); - f->header->arena_size = htole64(asize); + if (a < b) + return -1; + if (a > b) + return 1; return 0; } -static int journal_file_map( - JournalFile *f, - uint64_t offset, - uint64_t size, - void **_window, - uint64_t *_woffset, - uint64_t *_wsize, - void **ret) { +static int compare_with_location(JournalFile *af, Object *ao, Location *l) { + uint64_t a; - uint64_t woffset, wsize; - void *window; + assert(af); + assert(ao); + assert(l); + assert(l->type == LOCATION_DISCRETE); - assert(f); - assert(size > 0); - assert(ret); + if (l->monotonic_set && + sd_id128_equal(ao->entry.boot_id, l->boot_id) && + l->realtime_set && + le64toh(ao->entry.realtime) == l->realtime && + l->xor_hash_set && + le64toh(ao->entry.xor_hash) == l->xor_hash) + return 0; - woffset = offset & ~((uint64_t) page_size() - 1ULL); - wsize = size + (offset - woffset); - wsize = PAGE_ALIGN(wsize); + if (l->seqnum_set && + sd_id128_equal(af->header->seqnum_id, l->seqnum_id)) { - window = mmap(NULL, wsize, f->prot, MAP_SHARED, f->fd, woffset); - if (window == MAP_FAILED) - return -errno; + a = le64toh(ao->entry.seqnum); - if (_window) - *_window = window; + if (a < l->seqnum) + return -1; + if (a > l->seqnum) + return 1; + } - if (_woffset) - *_woffset = woffset; + if (l->monotonic_set && + sd_id128_equal(ao->entry.boot_id, l->boot_id)) { - if (_wsize) - *_wsize = wsize; + a = le64toh(ao->entry.monotonic); - *ret = (uint8_t*) window + (offset - woffset); + if (a < l->monotonic) + return -1; + if (a > l->monotonic) + return 1; + } - return 0; -} + if (l->realtime_set) { -static int journal_file_move_to(JournalFile *f, uint64_t offset, uint64_t size, void **ret) { - void *p; - uint64_t delta; - int r; + a = le64toh(ao->entry.realtime); - assert(f); - assert(ret); + if (a < l->realtime) + return -1; + if (a > l->realtime) + return 1; + } - if (_likely_(f->window && - f->window_offset <= offset && - f->window_offset+f->window_size >= offset + size)) { + if (l->xor_hash_set) { + a = le64toh(ao->entry.xor_hash); - *ret = (uint8_t*) f->window + (offset - f->window_offset); - return 0; + if (a < l->xor_hash) + return -1; + if (a > l->xor_hash) + return 1; } - if (f->window) { - if (munmap(f->window, f->window_size) < 0) - return -errno; + return 0; +} - f->window = NULL; - f->window_size = f->window_offset = 0; - } +static int find_location(sd_journal *j, JournalFile *f, direction_t direction, Object **ret, uint64_t *offset) { + Object *o = NULL; + uint64_t p = 0; + int r; - if (size < DEFAULT_WINDOW_SIZE) { - /* If the default window size is larger then what was - * asked for extend the mapping a bit in the hope to - * minimize needed remappings later on. We add half - * the window space before and half behind the - * requested mapping */ + assert(j); - delta = PAGE_ALIGN((DEFAULT_WINDOW_SIZE - size) / 2); + if (!j->matches) { + /* No matches is simple */ + + if (j->current_location.type == LOCATION_HEAD) + r = journal_file_next_entry(f, NULL, 0, DIRECTION_DOWN, &o, &p); + else if (j->current_location.type == LOCATION_TAIL) + r = journal_file_next_entry(f, NULL, 0, DIRECTION_UP, &o, &p); + else if (j->current_location.seqnum_set && + sd_id128_equal(j->current_location.seqnum_id, f->header->seqnum_id)) + r = journal_file_move_to_entry_by_seqnum(f, j->current_location.seqnum, direction, &o, &p); + else if (j->current_location.monotonic_set) { + r = journal_file_move_to_entry_by_monotonic(f, j->current_location.boot_id, j->current_location.monotonic, direction, &o, &p); + + if (r == -ENOENT) { + /* boot id unknown in this file */ + if (j->current_location.realtime_set) + r = journal_file_move_to_entry_by_realtime(f, j->current_location.realtime, direction, &o, &p); + else + r = journal_file_next_entry(f, NULL, 0, direction, &o, &p); + } + } else if (j->current_location.realtime_set) + r = journal_file_move_to_entry_by_realtime(f, j->current_location.realtime, direction, &o, &p); + else + r = journal_file_next_entry(f, NULL, 0, direction, &o, &p); - if (offset < delta) - delta = offset; + if (r <= 0) + return r; - offset -= delta; - size += (DEFAULT_WINDOW_SIZE - delta); - } else - delta = 0; + } else { + Match *m, *term_match = NULL; + Object *to = NULL; + uint64_t tp = 0; - r = journal_file_map(f, - offset, size, - &f->window, &f->window_offset, &f->window_size, - & p); + /* We have matches, first, let's jump to the monotonic + * position if we have any, since it implies a + * match. */ - if (r < 0) - return r; + if (j->current_location.type == LOCATION_DISCRETE && + j->current_location.monotonic_set) { - *ret = (uint8_t*) p + delta; - return 0; -} + r = journal_file_move_to_entry_by_monotonic(f, j->current_location.boot_id, j->current_location.monotonic, direction, &o, &p); + if (r <= 0) + return r == -ENOENT ? 0 : r; + } -static bool verify_hash(Object *o) { - uint64_t t; + LIST_FOREACH(matches, m, j->matches) { + Object *c, *d; + uint64_t cp, dp; - assert(o); + r = journal_file_find_data_object_with_hash(f, m->data, m->size, le64toh(m->le_hash), &d, &dp); + if (r <= 0) + return r; - t = le64toh(o->object.type); - if (t == OBJECT_DATA) { - uint64_t s, h1, h2; + if (j->current_location.type == LOCATION_HEAD) + r = journal_file_next_entry_for_data(f, NULL, 0, dp, DIRECTION_DOWN, &c, &cp); + else if (j->current_location.type == LOCATION_TAIL) + r = journal_file_next_entry_for_data(f, NULL, 0, dp, DIRECTION_UP, &c, &cp); + else if (j->current_location.seqnum_set && + sd_id128_equal(j->current_location.seqnum_id, f->header->seqnum_id)) + r = journal_file_move_to_entry_by_seqnum_for_data(f, dp, j->current_location.seqnum, direction, &c, &cp); + else if (j->current_location.realtime_set) + r = journal_file_move_to_entry_by_realtime_for_data(f, dp, j->current_location.realtime, direction, &c, &cp); + else + r = journal_file_next_entry_for_data(f, NULL, 0, dp, direction, &c, &cp); - s = le64toh(o->object.size); + if (r < 0) + return r; - h1 = le64toh(o->data.hash); - h2 = hash64(o->data.payload, s - offsetof(Object, data.payload)); + if (!term_match) { + term_match = m; - return h1 == h2; - } + if (r > 0) { + to = c; + tp = cp; + } + } else if (same_field(term_match->data, term_match->size, m->data, m->size)) { - return true; -} + /* Same field as previous match... */ + if (r > 0) { -int journal_file_move_to_object(JournalFile *f, uint64_t offset, Object **ret) { - int r; - void *t; - Object *o; - uint64_t s; + /* Find the earliest of the OR matches */ - assert(f); - assert(ret); + if (!to || + (direction == DIRECTION_DOWN && cp < tp) || + (direction == DIRECTION_UP && cp > tp)) { + to = c; + tp = cp; + } - r = journal_file_move_to(f, offset, sizeof(ObjectHeader), &t); - if (r < 0) - return r; + } - o = (Object*) t; - s = le64toh(o->object.size); + } else { - if (s < sizeof(ObjectHeader)) - return -EBADMSG; + /* Previous term is finished, did anything match? */ + if (!to) + return 0; - if (s > sizeof(ObjectHeader)) { - r = journal_file_move_to(f, offset, s, &t); - if (r < 0) - return r; + /* Find the last of the AND matches */ + if (!o || + (direction == DIRECTION_DOWN && tp > p) || + (direction == DIRECTION_UP && tp < p)) { + o = to; + p = tp; + } - o = (Object*) t; - } + term_match = m; - if (!verify_hash(o)) - return -EBADMSG; + if (r > 0) { + to = c; + tp = cp; + } else { + to = NULL; + tp = 0; + } + } + } - *ret = o; - return 0; -} + /* Last term is finished, did anything match? */ + if (!to) + return 0; + + if (!o || + (direction == DIRECTION_DOWN && tp > p) || + (direction == DIRECTION_UP && tp < p)) { + o = to; + p = tp; + } -static uint64_t journal_file_seqnum(JournalFile *f) { - uint64_t r; + if (!o) + return 0; + } - assert(f); + if (ret) + *ret = o; - r = le64toh(f->header->seqnum) + 1; - f->header->seqnum = htole64(r); + if (offset) + *offset = p; - return r; + return 1; } -static int journal_file_append_object(JournalFile *f, uint64_t size, Object **ret, uint64_t *offset) { +static int next_with_matches(sd_journal *j, JournalFile *f, direction_t direction, Object **ret, uint64_t *offset) { int r; - uint64_t p; - Object *tail, *o; - void *t; + uint64_t cp; + Object *c; + assert(j); assert(f); - assert(size >= sizeof(ObjectHeader)); - assert(offset); assert(ret); + assert(offset); - p = le64toh(f->header->tail_object_offset); + c = *ret; + cp = *offset; - if (p == 0) - p = le64toh(f->header->arena_offset); - else { - r = journal_file_move_to_object(f, p, &tail); - if (r < 0) + if (!j->matches) { + /* No matches is easy */ + + r = journal_file_next_entry(f, c, cp, direction, &c, &cp); + if (r <= 0) return r; - p += ALIGN64(le64toh(tail->object.size)); + if (ret) + *ret = c; + if (offset) + *offset = cp; + return 1; } - r = journal_file_allocate(f, p, size); - if (r < 0) - return r; + /* So there are matches we have to adhere to, let's find the + * first entry that matches all of them */ - r = journal_file_move_to(f, p, size, &t); - if (r < 0) - return r; + for (;;) { + uint64_t np, n; + bool found, term_result = false; + Match *m, *term_match = NULL; + Object *npo = NULL; + + n = journal_file_entry_n_items(c); + + /* Make sure we don't match the entry we are starting + * from. */ + found = cp != *offset; + + np = 0; + LIST_FOREACH(matches, m, j->matches) { + uint64_t q, k; + Object *qo = NULL; + + /* Let's check if this is the beginning of a + * new term, i.e. has a different field prefix + * as the preceeding match. */ + if (!term_match) { + term_match = m; + term_result = false; + } else if (!same_field(term_match->data, term_match->size, m->data, m->size)) { + if (!term_result) + found = false; + + term_match = m; + term_result = false; + } + + for (k = 0; k < n; k++) + if (c->entry.items[k].hash == m->le_hash) + break; + + if (k >= n) { + /* Hmm, didn't find any field that + * matched this rule, so ignore this + * match. Go on with next match */ + continue; + } + + term_result = true; - o = (Object*) t; + /* Hmm, so, this field matched, let's remember + * where we'd have to try next, in case the other + * matches are not OK */ - zero(o->object); - o->object.type = htole64(OBJECT_UNUSED); - zero(o->object.reserved); - o->object.size = htole64(size); + r = journal_file_next_entry_for_data(f, c, cp, le64toh(c->entry.items[k].object_offset), direction, &qo, &q); + /* This pointer is invalidated if the window was + * remapped. May need to re-fetch it later */ + c = NULL; + if (r < 0) + return r; - f->header->tail_object_offset = htole64(p); - if (f->header->head_object_offset == 0) - f->header->head_object_offset = htole64(p); + if (r > 0) { + + if (direction == DIRECTION_DOWN) { + if (q > np) { + np = q; + npo = qo; + } + } else { + if (np == 0 || q < np) { + np = q; + npo = qo; + } + } + } + } - f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1); + /* Check the last term */ + if (term_match && !term_result) + found = false; + + /* Did this entry match against all matches? */ + if (found) { + if (ret) { + if (c == NULL) { + /* Re-fetch the entry */ + r = journal_file_move_to_object(f, OBJECT_ENTRY, cp, &c); + if (r < 0) + return r; + } + *ret = c; + } + if (offset) + *offset = cp; + return 1; + } - *ret = o; - *offset = p; + /* Did we find a subsequent entry? */ + if (np == 0) + return 0; - return 0; + /* Hmm, ok, this entry only matched partially, so + * let's try another one */ + cp = np; + c = npo; + } } -static int journal_file_setup_hash_table(JournalFile *f) { - uint64_t s, p; - Object *o; - int r; +static int next_beyond_location(sd_journal *j, JournalFile *f, direction_t direction, Object **ret, uint64_t *offset) { + Object *c; + uint64_t cp; + int compare_value, r; + assert(j); assert(f); - s = DEFAULT_HASH_TABLE_SIZE; - r = journal_file_append_object(f, offsetof(Object, hash_table.table) + s, &o, &p); - if (r < 0) - return r; + if (f->current_offset > 0) { + cp = f->current_offset; - o->object.type = htole64(OBJECT_HASH_TABLE); - memset(o->hash_table.table, 0, s); + r = journal_file_move_to_object(f, OBJECT_ENTRY, cp, &c); + if (r < 0) + return r; - f->header->hash_table_offset = htole64(p + offsetof(Object, hash_table.table)); - f->header->hash_table_size = htole64(s); + r = next_with_matches(j, f, direction, &c, &cp); + if (r <= 0) + return r; - return 0; -} + compare_value = 1; + } else { + r = find_location(j, f, direction, &c, &cp); + if (r <= 0) + return r; -static int journal_file_setup_bisect_table(JournalFile *f) { - uint64_t s, p; - Object *o; - int r; + compare_value = 0; + } - assert(f); + for (;;) { + bool found; - s = DEFAULT_BISECT_TABLE_SIZE; - r = journal_file_append_object(f, offsetof(Object, bisect_table.table) + s, &o, &p); - if (r < 0) - return r; + if (j->current_location.type == LOCATION_DISCRETE) { + int k; - o->object.type = htole64(OBJECT_BISECT_TABLE); - memset(o->bisect_table.table, 0, s); + k = compare_with_location(f, c, &j->current_location); + if (direction == DIRECTION_DOWN) + found = k >= compare_value; + else + found = k <= -compare_value; + } else + found = true; - f->header->bisect_table_offset = htole64(p + offsetof(Object, bisect_table.table)); - f->header->bisect_table_size = htole64(s); + if (found) { + if (ret) + *ret = c; + if (offset) + *offset = cp; + return 1; + } - return 0; + r = next_with_matches(j, f, direction, &c, &cp); + if (r <= 0) + return r; + } } -static int journal_file_map_hash_table(JournalFile *f) { - uint64_t s, p; - void *t; +static int real_journal_next(sd_journal *j, direction_t direction) { + JournalFile *f, *new_current = NULL; + Iterator i; int r; + uint64_t new_offset = 0; + Object *new_entry = NULL; - assert(f); + if (!j) + return -EINVAL; - p = le64toh(f->header->hash_table_offset); - s = le64toh(f->header->hash_table_size); + HASHMAP_FOREACH(f, j->files, i) { + Object *o; + uint64_t p; + bool found; - r = journal_file_map(f, - p, s, - &f->hash_table_window, NULL, &f->hash_table_window_size, - &t); - if (r < 0) - return r; + r = next_beyond_location(j, f, direction, &o, &p); + if (r < 0) { + log_debug("Can't iterate through %s, ignoring: %s", f->path, strerror(-r)); + continue; + } else if (r == 0) + continue; - f->hash_table = t; - return 0; -} + if (!new_current) + found = true; + else { + int k; -static int journal_file_map_bisect_table(JournalFile *f) { - uint64_t s, p; - void *t; - int r; + k = compare_order(f, o, new_current, new_entry); - assert(f); + if (direction == DIRECTION_DOWN) + found = k < 0; + else + found = k > 0; + } - p = le64toh(f->header->bisect_table_offset); - s = le64toh(f->header->bisect_table_size); + if (found) { + new_current = f; + new_entry = o; + new_offset = p; + } + } - r = journal_file_map(f, - p, s, - &f->bisect_table_window, NULL, &f->bisect_table_window_size, - &t); + if (!new_current) + return 0; - if (r < 0) - return r; + set_location(j, new_current, new_entry, new_offset); - f->bisect_table = t; - return 0; + return 1; } -static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, uint64_t hash_index) { - uint64_t p; - int r; +_public_ int sd_journal_next(sd_journal *j) { + return real_journal_next(j, DIRECTION_DOWN); +} - assert(f); - assert(o); - assert(offset > 0); - assert(o->object.type == htole64(OBJECT_DATA)); +_public_ int sd_journal_previous(sd_journal *j) { + return real_journal_next(j, DIRECTION_UP); +} - o->data.head_entry_offset = o->data.tail_entry_offset = 0; - o->data.next_hash_offset = 0; +static int real_journal_next_skip(sd_journal *j, direction_t direction, uint64_t skip) { + int c = 0, r; - p = le64toh(f->hash_table[hash_index].tail_hash_offset); - if (p == 0) { - /* Only entry in the hash table is easy */ + if (!j) + return -EINVAL; - o->data.prev_hash_offset = 0; - f->hash_table[hash_index].head_hash_offset = htole64(offset); - } else { - o->data.prev_hash_offset = htole64(p); + if (skip == 0) { + /* If this is not a discrete skip, then at least + * resolve the current location */ + if (j->current_location.type != LOCATION_DISCRETE) + return real_journal_next(j, direction); - /* Temporarily move back to the previous data object, - * to patch in pointer */ + return 0; + } - r = journal_file_move_to_object(f, p, &o); + do { + r = real_journal_next(j, direction); if (r < 0) return r; - o->data.next_hash_offset = offset; + if (r == 0) + return c; - r = journal_file_move_to_object(f, offset, &o); - if (r < 0) - return r; - } + skip--; + c++; + } while (skip > 0); + + return c; +} - f->hash_table[hash_index].tail_hash_offset = htole64(offset); +_public_ int sd_journal_next_skip(sd_journal *j, uint64_t skip) { + return real_journal_next_skip(j, DIRECTION_DOWN, skip); +} - return 0; +_public_ int sd_journal_previous_skip(sd_journal *j, uint64_t skip) { + return real_journal_next_skip(j, DIRECTION_UP, skip); } -static int journal_file_append_data(JournalFile *f, const void *data, uint64_t size, Object **ret, uint64_t *offset) { - uint64_t hash, h, p, np; - uint64_t osize; +_public_ int sd_journal_get_cursor(sd_journal *j, char **cursor) { Object *o; int r; + char bid[33], sid[33]; - assert(f); - assert(data || size == 0); - - osize = offsetof(Object, data.payload) + size; + if (!j) + return -EINVAL; + if (!cursor) + return -EINVAL; - hash = hash64(data, size); - h = hash % (le64toh(f->header->hash_table_size) / sizeof(HashItem)); - p = le64toh(f->hash_table[h].head_hash_offset); + if (!j->current_file || j->current_file->current_offset <= 0) + return -EADDRNOTAVAIL; - while (p != 0) { - /* Look for this data object in the hash table */ + r = journal_file_move_to_object(j->current_file, OBJECT_ENTRY, j->current_file->current_offset, &o); + if (r < 0) + return r; - r = journal_file_move_to_object(f, p, &o); - if (r < 0) - return r; + sd_id128_to_string(j->current_file->header->seqnum_id, sid); + sd_id128_to_string(o->entry.boot_id, bid); - if (le64toh(o->object.type) != OBJECT_DATA) - return -EBADMSG; + if (asprintf(cursor, + "s=%s;i=%llx;b=%s;m=%llx;t=%llx;x=%llx;p=%s", + sid, (unsigned long long) le64toh(o->entry.seqnum), + bid, (unsigned long long) le64toh(o->entry.monotonic), + (unsigned long long) le64toh(o->entry.realtime), + (unsigned long long) le64toh(o->entry.xor_hash), + path_get_file_name(j->current_file->path)) < 0) + return -ENOMEM; - if (le64toh(o->object.size) == osize && - memcmp(o->data.payload, data, size) == 0) { + return 1; +} - if (le64toh(o->data.hash) != hash) - return -EBADMSG; +_public_ int sd_journal_seek_cursor(sd_journal *j, const char *cursor) { + char *w; + size_t l; + char *state; + unsigned long long seqnum, monotonic, realtime, xor_hash; + bool + seqnum_id_set = false, + seqnum_set = false, + boot_id_set = false, + monotonic_set = false, + realtime_set = false, + xor_hash_set = false; + sd_id128_t seqnum_id, boot_id; - if (ret) - *ret = o; + if (!j) + return -EINVAL; + if (!cursor) + return -EINVAL; - if (offset) - *offset = p; + FOREACH_WORD_SEPARATOR(w, l, cursor, ";", state) { + char *item; + int k = 0; - return 0; + if (l < 2 || w[1] != '=') + return -EINVAL; + + item = strndup(w, l); + if (!item) + return -ENOMEM; + + switch (w[0]) { + + case 's': + seqnum_id_set = true; + k = sd_id128_from_string(w+2, &seqnum_id); + break; + + case 'i': + seqnum_set = true; + if (sscanf(w+2, "%llx", &seqnum) != 1) + k = -EINVAL; + break; + + case 'b': + boot_id_set = true; + k = sd_id128_from_string(w+2, &boot_id); + break; + + case 'm': + monotonic_set = true; + if (sscanf(w+2, "%llx", &monotonic) != 1) + k = -EINVAL; + break; + + case 't': + realtime_set = true; + if (sscanf(w+2, "%llx", &realtime) != 1) + k = -EINVAL; + break; + + case 'x': + xor_hash_set = true; + if (sscanf(w+2, "%llx", &xor_hash) != 1) + k = -EINVAL; + break; } - p = le64toh(o->data.next_hash_offset); + free(item); + + if (k < 0) + return k; } - r = journal_file_append_object(f, osize, &o, &np); - if (r < 0) - return r; + if ((!seqnum_set || !seqnum_id_set) && + (!monotonic_set || !boot_id_set) && + !realtime_set) + return -EINVAL; - o->object.type = htole64(OBJECT_DATA); - o->data.hash = htole64(hash); - memcpy(o->data.payload, data, size); + reset_location(j); - r = journal_file_link_data(f, o, np, h); - if (r < 0) - return r; + j->current_location.type = LOCATION_DISCRETE; - if (ret) - *ret = o; + if (realtime_set) { + j->current_location.realtime = (uint64_t) realtime; + j->current_location.realtime_set = true; + } - if (offset) - *offset = np; + if (seqnum_set && seqnum_id_set) { + j->current_location.seqnum = (uint64_t) seqnum; + j->current_location.seqnum_id = seqnum_id; + j->current_location.seqnum_set = true; + } - return 0; -} + if (monotonic_set && boot_id_set) { + j->current_location.monotonic = (uint64_t) monotonic; + j->current_location.boot_id = boot_id; + j->current_location.monotonic_set = true; + } -uint64_t journal_file_entry_n_items(Object *o) { - assert(o); - assert(o->object.type == htole64(OBJECT_ENTRY)); + if (xor_hash_set) { + j->current_location.xor_hash = (uint64_t) xor_hash; + j->current_location.xor_hash_set = true; + } - return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem); + return 0; } -static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) { - uint64_t p, q; - int r; - assert(f); - assert(o); - assert(offset > 0); - - p = le64toh(o->entry.items[i].object_offset); - if (p == 0) +_public_ int sd_journal_seek_monotonic_usec(sd_journal *j, sd_id128_t boot_id, uint64_t usec) { + if (!j) return -EINVAL; - o->entry.items[i].next_entry_offset = 0; - - /* Move to the data object */ - r = journal_file_move_to_object(f, p, &o); - if (r < 0) - return r; + reset_location(j); + j->current_location.type = LOCATION_DISCRETE; + j->current_location.boot_id = boot_id; + j->current_location.monotonic = usec; + j->current_location.monotonic_set = true; - if (o->object.type != htole64(OBJECT_DATA)) - return -EBADMSG; + return 0; +} - q = le64toh(o->data.tail_entry_offset); - o->data.tail_entry_offset = htole64(offset); +_public_ int sd_journal_seek_realtime_usec(sd_journal *j, uint64_t usec) { + if (!j) + return -EINVAL; - if (q == 0) - o->data.head_entry_offset = htole64(offset); - else { - uint64_t n, j; + reset_location(j); + j->current_location.type = LOCATION_DISCRETE; + j->current_location.realtime = usec; + j->current_location.realtime_set = true; - /* Move to previous entry */ - r = journal_file_move_to_object(f, q, &o); - if (r < 0) - return r; + return 0; +} - if (o->object.type != htole64(OBJECT_ENTRY)) - return -EBADMSG; +_public_ int sd_journal_seek_head(sd_journal *j) { + if (!j) + return -EINVAL; - n = journal_file_entry_n_items(o); - for (j = 0; j < n; j++) - if (le64toh(o->entry.items[j].object_offset) == p) - break; + reset_location(j); + j->current_location.type = LOCATION_HEAD; - if (j >= n) - return -EBADMSG; + return 0; +} - o->entry.items[j].next_entry_offset = offset; - } +_public_ int sd_journal_seek_tail(sd_journal *j) { + if (!j) + return -EINVAL; - /* Move back to original entry */ - r = journal_file_move_to_object(f, offset, &o); - if (r < 0) - return r; + reset_location(j); + j->current_location.type = LOCATION_TAIL; - o->entry.items[i].prev_entry_offset = q; return 0; } -static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) { - uint64_t p, i, n, k, a, b; +static int add_file(sd_journal *j, const char *prefix, const char *dir, const char *filename) { + char *fn; int r; + JournalFile *f; - assert(f); - assert(o); - assert(offset > 0); - assert(o->object.type == htole64(OBJECT_ENTRY)); - - /* Link up the entry itself */ - p = le64toh(f->header->tail_entry_offset); + assert(j); + assert(prefix); + assert(filename); - o->entry.prev_entry_offset = f->header->tail_entry_offset; - o->entry.next_entry_offset = 0; + if ((j->flags & SD_JOURNAL_SYSTEM_ONLY) && + !(streq(filename, "system.journal") || + (startswith(filename, "system@") && endswith(filename, ".journal")))) + return 0; - if (p == 0) - f->header->head_entry_offset = htole64(offset); - else { - /* Temporarily move back to the previous entry, to - * patch in pointer */ + if (dir) + fn = join(prefix, "/", dir, "/", filename, NULL); + else + fn = join(prefix, "/", filename, NULL); - r = journal_file_move_to_object(f, p, &o); - if (r < 0) - return r; + if (!fn) + return -ENOMEM; - o->entry.next_entry_offset = htole64(offset); + if (hashmap_get(j->files, fn)) { + free(fn); + return 0; + } - r = journal_file_move_to_object(f, offset, &o); - if (r < 0) - return r; + if (hashmap_size(j->files) >= JOURNAL_FILES_MAX) { + log_debug("Too many open journal files, not adding %s, ignoring.", fn); + free(fn); + return 0; } - f->header->tail_entry_offset = htole64(offset); + r = journal_file_open(fn, O_RDONLY, 0, NULL, &f); + free(fn); - /* Link up the items */ - n = journal_file_entry_n_items(o); - for (i = 0; i < n; i++) { - r = journal_file_link_entry_item(f, o, offset, i); - if (r < 0) - return r; - } + if (r < 0) { + if (errno == ENOENT) + return 0; - /* Link up the entry in the bisect table */ - n = le64toh(f->header->bisect_table_size) / sizeof(uint64_t); - k = le64toh(f->header->arena_max_size) / n; + return r; + } - a = (le64toh(f->header->last_bisect_offset) + k - 1) / k; - b = offset / k; + /* journal_file_dump(f); */ - for (; a <= b; a++) - f->bisect_table[a] = htole64(offset); + r = hashmap_put(j->files, f->path, f); + if (r < 0) { + journal_file_close(f); + return r; + } - f->header->last_bisect_offset = htole64(offset + le64toh(o->object.size)); + log_debug("File %s got added.", f->path); return 0; } -static int journal_file_append_entry_internal( - JournalFile *f, - const dual_timestamp *ts, - uint64_t xor_hash, - const EntryItem items[], unsigned n_items, - Object **ret, uint64_t *offset) { - uint64_t np; - uint64_t osize; - Object *o; - int r; - - assert(f); - assert(items || n_items == 0); +static int remove_file(sd_journal *j, const char *prefix, const char *dir, const char *filename) { + char *fn; + JournalFile *f; - osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem)); + assert(j); + assert(prefix); + assert(filename); - r = journal_file_append_object(f, osize, &o, &np); - if (r < 0) - return r; + if (dir) + fn = join(prefix, "/", dir, "/", filename, NULL); + else + fn = join(prefix, "/", filename, NULL); - o->object.type = htole64(OBJECT_ENTRY); - o->entry.seqnum = htole64(journal_file_seqnum(f)); - memcpy(o->entry.items, items, n_items * sizeof(EntryItem)); - o->entry.realtime = ts ? htole64(ts->realtime) : 0; - o->entry.monotonic = ts ? htole64(ts->monotonic) : 0; - o->entry.xor_hash = htole64(xor_hash); + if (!fn) + return -ENOMEM; - r = journal_file_link_entry(f, o, np); - if (r < 0) - return r; + f = hashmap_get(j->files, fn); + free(fn); - if (ret) - *ret = o; + if (!f) + return 0; - if (offset) - *offset = np; + hashmap_remove(j->files, f->path); + journal_file_close(f); + log_debug("File %s got removed.", f->path); return 0; } -int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, Object **ret, uint64_t *offset) { - unsigned i; - EntryItem *items; +static int add_directory(sd_journal *j, const char *prefix, const char *dir) { + char *fn; int r; - uint64_t xor_hash = 0; + DIR *d; + int wd; + sd_id128_t id, mid; - assert(f); - assert(iovec || n_iovec == 0); + assert(j); + assert(prefix); + assert(dir); - items = new(EntryItem, n_iovec); - if (!items) + if ((j->flags & SD_JOURNAL_LOCAL_ONLY) && + (sd_id128_from_string(dir, &id) < 0 || + sd_id128_get_machine(&mid) < 0 || + !sd_id128_equal(id, mid))) + return 0; + + fn = join(prefix, "/", dir, NULL); + if (!fn) return -ENOMEM; - for (i = 0; i < n_iovec; i++) { - uint64_t p; - Object *o; + d = opendir(fn); - r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p); - if (r < 0) - goto finish; + if (!d) { + free(fn); + if (errno == ENOENT) + return 0; - xor_hash ^= le64toh(o->data.hash); - items[i].object_offset = htole64(p); + return -errno; } - r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, ret, offset); + wd = inotify_add_watch(j->inotify_fd, fn, + IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE| + IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT| + IN_DONT_FOLLOW|IN_ONLYDIR); + if (wd > 0) { + if (hashmap_put(j->inotify_wd_dirs, INT_TO_PTR(wd), fn) < 0) + inotify_rm_watch(j->inotify_fd, wd); + else + fn = NULL; + } -finish: - free(items); + free(fn); - return r; -} + for (;;) { + struct dirent buf, *de; -int journal_file_move_to_entry(JournalFile *f, uint64_t seqnum, Object **ret, uint64_t *offset) { - Object *o; - uint64_t lower, upper, p, n, k; - int r; + r = readdir_r(d, &buf, &de); + if (r != 0 || !de) + break; - assert(f); + if (!dirent_is_file_with_suffix(de, ".journal")) + continue; - n = le64toh(f->header->bisect_table_size) / sizeof(uint64_t); - k = le64toh(f->header->arena_max_size) / n; + r = add_file(j, prefix, dir, de->d_name); + if (r < 0) + log_debug("Failed to add file %s/%s/%s: %s", prefix, dir, de->d_name, strerror(-r)); + } - lower = 0; - upper = le64toh(f->header->last_bisect_offset)/k+1; + closedir(d); - while (lower < upper) { - k = (upper + lower) / 2; - p = le64toh(f->bisect_table[k]); + log_debug("Directory %s/%s got added.", prefix, dir); - if (p == 0) { - upper = k; - continue; - } + return 0; +} - r = journal_file_move_to_object(f, p, &o); - if (r < 0) - return r; +static void remove_directory_wd(sd_journal *j, int wd) { + char *p; - if (o->object.type != htole64(OBJECT_ENTRY)) - return -EBADMSG; + assert(j); + assert(wd > 0); - if (o->entry.seqnum == seqnum) { - if (ret) - *ret = o; + if (j->inotify_fd >= 0) + inotify_rm_watch(j->inotify_fd, wd); - if (offset) - *offset = p; + p = hashmap_remove(j->inotify_wd_dirs, INT_TO_PTR(wd)); - return 1; - } else if (seqnum < o->entry.seqnum) - upper = k; - else if (seqnum > o->entry.seqnum) - lower = k+1; + if (p) { + log_debug("Directory %s got removed.", p); + free(p); } +} - assert(lower == upper); +static void add_root_wd(sd_journal *j, const char *p) { + int wd; + char *k; - if (lower <= 0) - return 0; + assert(j); + assert(p); + + wd = inotify_add_watch(j->inotify_fd, p, + IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE| + IN_DONT_FOLLOW|IN_ONLYDIR); + if (wd <= 0) + return; + + k = strdup(p); + if (!k || hashmap_put(j->inotify_wd_roots, INT_TO_PTR(wd), k) < 0) { + inotify_rm_watch(j->inotify_fd, wd); + free(k); + } +} - /* The object we are looking for is between - * bisect_table[lower-1] and bisect_table[lower] */ +static void remove_root_wd(sd_journal *j, int wd) { + char *p; - p = le64toh(f->bisect_table[lower-1]); + assert(j); + assert(wd > 0); - for (;;) { - r = journal_file_move_to_object(f, p, &o); - if (r < 0) - return r; + if (j->inotify_fd >= 0) + inotify_rm_watch(j->inotify_fd, wd); - if (o->entry.seqnum == seqnum) { - if (ret) - *ret = o; + p = hashmap_remove(j->inotify_wd_roots, INT_TO_PTR(wd)); - if (offset) - *offset = p; + if (p) { + log_debug("Root %s got removed.", p); + free(p); + } +} - return 1; +_public_ int sd_journal_open(sd_journal **ret, int flags) { + sd_journal *j; + const char *p; + const char search_paths[] = + "/run/log/journal\0" + "/var/log/journal\0"; + int r; - } if (seqnum < o->entry.seqnum) - return 0; + if (!ret) + return -EINVAL; - if (o->entry.next_entry_offset == 0) - return 0; + if (flags & ~(SD_JOURNAL_LOCAL_ONLY| + SD_JOURNAL_RUNTIME_ONLY| + SD_JOURNAL_SYSTEM_ONLY)) + return -EINVAL; - p = le64toh(o->entry.next_entry_offset); - } + j = new0(sd_journal, 1); + if (!j) + return -ENOMEM; - return 0; -} + j->flags = flags; -int journal_file_next_entry(JournalFile *f, Object *o, Object **ret, uint64_t *offset) { - uint64_t np; - int r; + j->inotify_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC); + if (j->inotify_fd < 0) { + r = -errno; + goto fail; + } - assert(f); + j->files = hashmap_new(string_hash_func, string_compare_func); + if (!j->files) { + r = -ENOMEM; + goto fail; + } - if (!o) - np = le64toh(f->header->head_entry_offset); - else { - if (le64toh(o->object.type) != OBJECT_ENTRY) - return -EINVAL; + j->inotify_wd_dirs = hashmap_new(trivial_hash_func, trivial_compare_func); + j->inotify_wd_roots = hashmap_new(trivial_hash_func, trivial_compare_func); - np = le64toh(o->entry.next_entry_offset); + if (!j->inotify_wd_dirs || !j->inotify_wd_roots) { + r = -ENOMEM; + goto fail; } - if (np == 0) - return 0; + /* We ignore most errors here, since the idea is to only open + * what's actually accessible, and ignore the rest. */ - r = journal_file_move_to_object(f, np, &o); - if (r < 0) - return r; + NULSTR_FOREACH(p, search_paths) { + DIR *d; - if (le64toh(o->object.type) != OBJECT_ENTRY) - return -EBADMSG; + if ((flags & SD_JOURNAL_RUNTIME_ONLY) && + !path_startswith(p, "/run")) + continue; - if (ret) - *ret = o; + d = opendir(p); + if (!d) { + if (errno != ENOENT) + log_debug("Failed to open %s: %m", p); + continue; + } - if (offset) - *offset = np; + add_root_wd(j, p); - return 1; -} + for (;;) { + struct dirent buf, *de; + sd_id128_t id; -int journal_file_prev_entry(JournalFile *f, Object *o, Object **ret, uint64_t *offset) { - uint64_t np; - int r; + r = readdir_r(d, &buf, &de); + if (r != 0 || !de) + break; - assert(f); + if (dirent_is_file_with_suffix(de, ".journal")) { + r = add_file(j, p, NULL, de->d_name); + if (r < 0) + log_debug("Failed to add file %s/%s: %s", p, de->d_name, strerror(-r)); - if (!o) - np = le64toh(f->header->tail_entry_offset); - else { - if (le64toh(o->object.type) != OBJECT_ENTRY) - return -EINVAL; + } else if ((de->d_type == DT_DIR || de->d_type == DT_UNKNOWN) && + sd_id128_from_string(de->d_name, &id) >= 0) { - np = le64toh(o->entry.prev_entry_offset); + r = add_directory(j, p, de->d_name); + if (r < 0) + log_debug("Failed to add directory %s/%s: %s", p, de->d_name, strerror(-r)); + } + } + + closedir(d); } - if (np == 0) - return 0; + *ret = j; + return 0; - r = journal_file_move_to_object(f, np, &o); - if (r < 0) - return r; +fail: + sd_journal_close(j); - if (le64toh(o->object.type) != OBJECT_ENTRY) - return -EBADMSG; + return r; +}; - if (ret) - *ret = o; +_public_ void sd_journal_close(sd_journal *j) { + if (!j) + return; - if (offset) - *offset = np; + if (j->inotify_wd_dirs) { + void *k; - return 1; -} + while ((k = hashmap_first_key(j->inotify_wd_dirs))) + remove_directory_wd(j, PTR_TO_INT(k)); -int journal_file_find_first_entry(JournalFile *f, const void *data, uint64_t size, Object **ret, uint64_t *offset) { - uint64_t p, osize, hash, h; - int r; + hashmap_free(j->inotify_wd_dirs); + } - assert(f); - assert(data || size == 0); + if (j->inotify_wd_roots) { + void *k; - osize = offsetof(Object, data.payload) + size; + while ((k = hashmap_first_key(j->inotify_wd_roots))) + remove_root_wd(j, PTR_TO_INT(k)); - hash = hash64(data, size); - h = hash % (le64toh(f->header->hash_table_size) / sizeof(HashItem)); - p = le64toh(f->hash_table[h].head_hash_offset); + hashmap_free(j->inotify_wd_roots); + } - while (p != 0) { - Object *o; + if (j->files) { + JournalFile *f; - r = journal_file_move_to_object(f, p, &o); - if (r < 0) - return r; + while ((f = hashmap_steal_first(j->files))) + journal_file_close(f); - if (le64toh(o->object.type) != OBJECT_DATA) - return -EBADMSG; + hashmap_free(j->files); + } - if (le64toh(o->object.size) == osize && - memcmp(o->data.payload, data, size) == 0) { + sd_journal_flush_matches(j); - if (le64toh(o->data.hash) != hash) - return -EBADMSG; + if (j->inotify_fd >= 0) + close_nointr_nofail(j->inotify_fd); - if (o->data.head_entry_offset == 0) - return 0; + free(j); +} - p = le64toh(o->data.head_entry_offset); - r = journal_file_move_to_object(f, p, &o); - if (r < 0) - return r; +_public_ int sd_journal_get_realtime_usec(sd_journal *j, uint64_t *ret) { + Object *o; + JournalFile *f; + int r; - if (le64toh(o->object.type) != OBJECT_ENTRY) - return -EBADMSG; + if (!j) + return -EINVAL; + if (!ret) + return -EINVAL; - if (ret) - *ret = o; + f = j->current_file; + if (!f) + return -EADDRNOTAVAIL; - if (offset) - *offset = p; + if (f->current_offset <= 0) + return -EADDRNOTAVAIL; - return 1; - } + r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o); + if (r < 0) + return r; + + *ret = le64toh(o->entry.realtime); + return 0; +} + +_public_ int sd_journal_get_monotonic_usec(sd_journal *j, uint64_t *ret, sd_id128_t *ret_boot_id) { + Object *o; + JournalFile *f; + int r; + sd_id128_t id; + + if (!j) + return -EINVAL; + if (!ret) + return -EINVAL; + + f = j->current_file; + if (!f) + return -EADDRNOTAVAIL; + + if (f->current_offset <= 0) + return -EADDRNOTAVAIL; + + r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o); + if (r < 0) + return r; - p = le64toh(o->data.next_hash_offset); + if (ret_boot_id) + *ret_boot_id = o->entry.boot_id; + else { + r = sd_id128_get_boot(&id); + if (r < 0) + return r; + + if (!sd_id128_equal(id, o->entry.boot_id)) + return -ESTALE; } + *ret = le64toh(o->entry.monotonic); return 0; } -int journal_file_find_last_entry(JournalFile *f, const void *data, uint64_t size, Object **ret, uint64_t *offset) { - uint64_t p, osize, hash, h; +_public_ int sd_journal_get_data(sd_journal *j, const char *field, const void **data, size_t *size) { + JournalFile *f; + uint64_t i, n; + size_t field_length; int r; + Object *o; - assert(f); - assert(data || size == 0); + if (!j) + return -EINVAL; + if (!field) + return -EINVAL; + if (!data) + return -EINVAL; + if (!size) + return -EINVAL; - osize = offsetof(Object, data.payload) + size; + if (isempty(field) || strchr(field, '=')) + return -EINVAL; - hash = hash64(data, size); - h = hash % (le64toh(f->header->hash_table_size) / sizeof(HashItem)); - p = le64toh(f->hash_table[h].tail_hash_offset); + f = j->current_file; + if (!f) + return -EADDRNOTAVAIL; - while (p != 0) { - Object *o; + if (f->current_offset <= 0) + return -EADDRNOTAVAIL; + + r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o); + if (r < 0) + return r; + + field_length = strlen(field); + + n = journal_file_entry_n_items(o); + for (i = 0; i < n; i++) { + uint64_t p, l; + le64_t le_hash; + size_t t; - r = journal_file_move_to_object(f, p, &o); + p = le64toh(o->entry.items[i].object_offset); + le_hash = o->entry.items[i].hash; + r = journal_file_move_to_object(f, OBJECT_DATA, p, &o); if (r < 0) return r; - if (le64toh(o->object.type) != OBJECT_DATA) + if (le_hash != o->data.hash) return -EBADMSG; - if (le64toh(o->object.size) == osize && - memcmp(o->data.payload, data, size) == 0) { + l = le64toh(o->object.size) - offsetof(Object, data.payload); - if (le64toh(o->data.hash) != hash) - return -EBADMSG; + if (o->object.flags & OBJECT_COMPRESSED) { + +#ifdef HAVE_XZ + if (uncompress_startswith(o->data.payload, l, + &f->compress_buffer, &f->compress_buffer_size, + field, field_length, '=')) { + + uint64_t rsize; + + if (!uncompress_blob(o->data.payload, l, + &f->compress_buffer, &f->compress_buffer_size, &rsize)) + return -EBADMSG; + + *data = f->compress_buffer; + *size = (size_t) rsize; - if (o->data.tail_entry_offset == 0) return 0; + } +#else + return -EPROTONOSUPPORT; +#endif - p = le64toh(o->data.tail_entry_offset); - r = journal_file_move_to_object(f, p, &o); - if (r < 0) - return r; + } else if (l >= field_length+1 && + memcmp(o->data.payload, field, field_length) == 0 && + o->data.payload[field_length] == '=') { - if (le64toh(o->object.type) != OBJECT_ENTRY) - return -EBADMSG; + t = (size_t) l; - if (ret) - *ret = o; + if ((uint64_t) t != l) + return -E2BIG; - if (offset) - *offset = p; + *data = o->data.payload; + *size = t; - return 1; + return 0; } - p = le64toh(o->data.prev_hash_offset); + r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o); + if (r < 0) + return r; } - return 0; + return -ENOENT; } -void journal_file_dump(JournalFile *f) { - char a[33], b[33], c[33]; - Object *o; +_public_ int sd_journal_enumerate_data(sd_journal *j, const void **data, size_t *size) { + JournalFile *f; + uint64_t p, l, n; + le64_t le_hash; int r; - uint64_t p; + Object *o; + size_t t; - assert(f); + if (!j) + return -EINVAL; + if (!data) + return -EINVAL; + if (!size) + return -EINVAL; - printf("File ID: %s\n" - "Machine ID: %s\n" - "Boot ID: %s\n" - "Arena size: %llu\n", - sd_id128_to_string(f->header->file_id, a), - sd_id128_to_string(f->header->machine_id, b), - sd_id128_to_string(f->header->boot_id, c), - (unsigned long long) le64toh(f->header->arena_size)); - - p = le64toh(f->header->head_object_offset); - while (p != 0) { - r = journal_file_move_to_object(f, p, &o); - if (r < 0) - goto fail; + f = j->current_file; + if (!f) + return -EADDRNOTAVAIL; - switch (o->object.type) { + if (f->current_offset <= 0) + return -EADDRNOTAVAIL; - case OBJECT_UNUSED: - printf("Type: OBJECT_UNUSED\n"); - break; + r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o); + if (r < 0) + return r; - case OBJECT_DATA: - printf("Type: OBJECT_DATA\n"); - break; + n = journal_file_entry_n_items(o); + if (j->current_field >= n) + return 0; - case OBJECT_ENTRY: - printf("Type: OBJECT_ENTRY %llu\n", (unsigned long long) le64toh(o->entry.seqnum)); - break; + p = le64toh(o->entry.items[j->current_field].object_offset); + le_hash = o->entry.items[j->current_field].hash; + r = journal_file_move_to_object(f, OBJECT_DATA, p, &o); + if (r < 0) + return r; - case OBJECT_HASH_TABLE: - printf("Type: OBJECT_HASH_TABLE\n"); - break; + if (le_hash != o->data.hash) + return -EBADMSG; - case OBJECT_BISECT_TABLE: - printf("Type: OBJECT_BISECT_TABLE\n"); - break; - } + l = le64toh(o->object.size) - offsetof(Object, data.payload); + t = (size_t) l; - if (p == le64toh(f->header->tail_object_offset)) - p = 0; - else - p = p + ALIGN64(le64toh(o->object.size)); + /* We can't read objects larger than 4G on a 32bit machine */ + if ((uint64_t) t != l) + return -E2BIG; + + if (o->object.flags & OBJECT_COMPRESSED) { +#ifdef HAVE_XZ + uint64_t rsize; + + if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize)) + return -EBADMSG; + + *data = f->compress_buffer; + *size = (size_t) rsize; +#else + return -EPROTONOSUPPORT; +#endif + } else { + *data = o->data.payload; + *size = t; } - return; -fail: - log_error("File corrupt"); -} + j->current_field ++; -int journal_file_open( - const char *fname, - int flags, - mode_t mode, - JournalFile **ret) { + return 1; +} - JournalFile *f; - int r; - bool newly_created = false; +_public_ void sd_journal_restart_data(sd_journal *j) { + if (!j) + return; - assert(fname); + j->current_field = 0; +} - if ((flags & O_ACCMODE) != O_RDONLY && - (flags & O_ACCMODE) != O_RDWR) +_public_ int sd_journal_get_fd(sd_journal *j) { + if (!j) return -EINVAL; - f = new0(JournalFile, 1); - if (!f) - return -ENOMEM; + return j->inotify_fd; +} - f->writable = (flags & O_ACCMODE) != O_RDONLY; - f->prot = prot_from_flags(flags); +static void process_inotify_event(sd_journal *j, struct inotify_event *e) { + char *p; + int r; - f->fd = open(fname, flags|O_CLOEXEC, mode); - if (f->fd < 0) { - r = -errno; - goto fail; - } + assert(j); + assert(e); - f->path = strdup(fname); - if (!f->path) { - r = -ENOMEM; - goto fail; - } + /* Is this a subdirectory we watch? */ + p = hashmap_get(j->inotify_wd_dirs, INT_TO_PTR(e->wd)); + if (p) { - if (fstat(f->fd, &f->last_stat) < 0) { - r = -errno; - goto fail; - } + if (!(e->mask & IN_ISDIR) && e->len > 0 && endswith(e->name, ".journal")) { - if (f->last_stat.st_size == 0 && f->writable) { - newly_created = true; + /* Event for a journal file */ - r = journal_file_init_header(f); - if (r < 0) - goto fail; + if (e->mask & (IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB)) { + r = add_file(j, p, NULL, e->name); + if (r < 0) + log_debug("Failed to add file %s/%s: %s", p, e->name, strerror(-r)); + } else if (e->mask & (IN_DELETE|IN_UNMOUNT)) { + + r = remove_file(j, p, NULL, e->name); + if (r < 0) + log_debug("Failed to remove file %s/%s: %s", p, e->name, strerror(-r)); + } - if (fstat(f->fd, &f->last_stat) < 0) { - r = -errno; - goto fail; + } else if (e->len == 0) { + + /* Event for the directory itself */ + + if (e->mask & (IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT)) + remove_directory_wd(j, e->wd); } - } - if (f->last_stat.st_size < (off_t) sizeof(Header)) { - r = -EIO; - goto fail; + return; } - f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0); - if (f->header == MAP_FAILED) { - f->header = NULL; - r = -errno; - goto fail; - } + /* Must be the root directory then? */ + p = hashmap_get(j->inotify_wd_roots, INT_TO_PTR(e->wd)); + if (p) { + sd_id128_t id; - if (!newly_created) { - r = journal_file_verify_header(f); - if (r < 0) - goto fail; - } + if (!(e->mask & IN_ISDIR) && e->len > 0 && endswith(e->name, ".journal")) { - if (f->writable) { - r = journal_file_refresh_header(f); - if (r < 0) - goto fail; - } + /* Event for a journal file */ - if (newly_created) { + if (e->mask & (IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB)) { + r = add_file(j, p, NULL, e->name); + if (r < 0) + log_debug("Failed to add file %s/%s: %s", p, e->name, strerror(-r)); + } else if (e->mask & (IN_DELETE|IN_UNMOUNT)) { - r = journal_file_setup_hash_table(f); - if (r < 0) - goto fail; + r = remove_file(j, p, NULL, e->name); + if (r < 0) + log_debug("Failed to remove file %s/%s: %s", p, e->name, strerror(-r)); + } - r = journal_file_setup_bisect_table(f); - if (r < 0) - goto fail; - } + } else if ((e->mask & IN_ISDIR) && e->len > 0 && sd_id128_from_string(e->name, &id) >= 0) { - r = journal_file_map_hash_table(f); - if (r < 0) - goto fail; + /* Event for subdirectory */ - r = journal_file_map_bisect_table(f); - if (r < 0) - goto fail; + if (e->mask & (IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB)) { - if (ret) - *ret = f; + r = add_directory(j, p, e->name); + if (r < 0) + log_debug("Failed to add directory %s/%s: %s", p, e->name, strerror(-r)); + } + } - return 0; + return; + } -fail: - journal_file_close(f); + if (e->mask & IN_IGNORED) + return; - return r; + log_warning("Unknown inotify event."); } -int sd_journal_open(sd_journal **ret) { - sd_journal *j; - char *fn; - const char *p; - int r = 0; - const char search_paths[] = - "/run/log/journal\0" - "/var/log/journal\0"; - - assert(ret); +_public_ int sd_journal_process(sd_journal *j) { + uint8_t buffer[sizeof(struct inotify_event) + FILENAME_MAX]; - j = new0(sd_journal, 1); if (!j) - return -ENOMEM; + return -EINVAL; - j->files = hashmap_new(string_hash_func, string_compare_func); - if (!j->files) - goto fail; + for (;;) { + struct inotify_event *e; + ssize_t l; - NULSTR_FOREACH(p, search_paths) { - DIR *d; + l = read(j->inotify_fd, buffer, sizeof(buffer)); + if (l < 0) { + if (errno == EINTR || errno == EAGAIN) + return 0; - d = opendir(p); - if (!d) { - if (errno != ENOENT && r == 0) - r = -errno; + return -errno; + } - continue; + e = (struct inotify_event*) buffer; + while (l > 0) { + size_t step; + + process_inotify_event(j, e); + + step = sizeof(struct inotify_event) + e->len; + assert(step <= (size_t) l); + + e = (struct inotify_event*) ((uint8_t*) e + step); + l -= step; } + } +} - for (;;) { - struct dirent buf, *de; - int k; - JournalFile *f; +_public_ int sd_journal_wait(sd_journal *j, uint64_t timeout_usec) { + int r, k; - k = readdir_r(d, &buf, &de); - if (k != 0) { - if (r == 0) - r = -k; + assert(j); - break; - } + r = fd_wait_for_event(j->inotify_fd, POLLIN, timeout_usec); + k = sd_journal_process(j); - if (!de) - break; + if (r < 0) + return r; - if (!dirent_is_file_with_suffix(de, ".journal")) - continue; + return k; +} - fn = join(p, "/", de->d_name, NULL); - if (!fn) { - r = -ENOMEM; - closedir(d); - goto fail; - } +_public_ int sd_journal_get_cutoff_realtime_usec(sd_journal *j, uint64_t *from, uint64_t *to) { + Iterator i; + JournalFile *f; + bool first = true; + int r; - k = journal_file_open(fn, O_RDONLY, 0, &f); - free(fn); + if (!j) + return -EINVAL; + if (!from && !to) + return -EINVAL; - if (k < 0) { + HASHMAP_FOREACH(f, j->files, i) { + usec_t fr, t; - if (r == 0) - r = -k; - } else { - k = hashmap_put(j->files, f->path, f); - if (k < 0) { - journal_file_close(f); - closedir(d); + r = journal_file_get_cutoff_realtime_usec(f, &fr, &t); + if (r < 0) + return r; + if (r == 0) + continue; - r = k; - goto fail; - } - } + if (first) { + if (from) + *from = fr; + if (to) + *to = t; + first = false; + } else { + if (from) + *from = MIN(fr, *from); + if (to) + *to = MIN(t, *to); } } - *ret = j; - return 0; - -fail: - sd_journal_close(j); + return first ? 0 : 1; +} - return r; -}; +_public_ int sd_journal_get_cutoff_monotonic_usec(sd_journal *j, sd_id128_t boot_id, uint64_t *from, uint64_t *to) { + Iterator i; + JournalFile *f; + bool first = true; + int r; -void sd_journal_close(sd_journal *j) { - assert(j); + if (!j) + return -EINVAL; + if (!from && !to) + return -EINVAL; - if (j->files) { - JournalFile *f; + HASHMAP_FOREACH(f, j->files, i) { + usec_t fr, t; - while ((f = hashmap_steal_first(j->files))) - journal_file_close(f); + r = journal_file_get_cutoff_monotonic_usec(f, boot_id, &fr, &t); + if (r < 0) + return r; + if (r == 0) + continue; - hashmap_free(j->files); + if (first) { + if (from) + *from = fr; + if (to) + *to = t; + first = false; + } else { + if (from) + *from = MIN(fr, *from); + if (to) + *to = MIN(t, *to); + } } - free(j); + return first ? 0 : 1; } + + +/* _public_ int sd_journal_query_unique(sd_journal *j, const char *field) { */ +/* if (!j) */ +/* return -EINVAL; */ +/* if (!field) */ +/* return -EINVAL; */ + +/* return -ENOTSUP; */ +/* } */ + +/* _public_ int sd_journal_enumerate_unique(sd_journal *j, const void **data, size_t *l) { */ +/* if (!j) */ +/* return -EINVAL; */ +/* if (!data) */ +/* return -EINVAL; */ +/* if (!l) */ +/* return -EINVAL; */ + +/* return -ENOTSUP; */ +/* } */ + +/* _public_ void sd_journal_restart_unique(sd_journal *j) { */ +/* if (!j) */ +/* return; */ +/* } */