X-Git-Url: https://www.chiark.greenend.org.uk/ucgi/~ianmdlvl/git?p=elogind.git;a=blobdiff_plain;f=src%2Fjournal%2Fsd-journal.c;h=41f0c4dfb4bc677ae57df8b50bc2b60b5057bb1f;hp=d49f7179153ea4318dc5043c37256966255b17ed;hb=943aad8ca57a6b5c49c4ea60f9e8c13bf9b20e6c;hpb=dad503169b2665ecfd3f5bfb3c936897e44ecca7 diff --git a/src/journal/sd-journal.c b/src/journal/sd-journal.c index d49f71791..41f0c4dfb 100644 --- a/src/journal/sd-journal.c +++ b/src/journal/sd-journal.c @@ -6,1338 +6,2446 @@ Copyright 2011 Lennart Poettering systemd is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. systemd is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. + Lesser General Public License for more details. - You should have received a copy of the GNU General Public License + You should have received a copy of the GNU Lesser General Public License along with systemd; If not, see . ***/ -#include #include -#include -#include -#include #include #include +#include +#include +#include +#include +#include #include "sd-journal.h" #include "journal-def.h" -#include "journal-private.h" -#include "lookup3.h" +#include "journal-file.h" +#include "hashmap.h" #include "list.h" +#include "path-util.h" +#include "lookup3.h" +#include "compress.h" +#include "journal-internal.h" +#include "missing.h" +#include "catalog.h" +#include "replace-var.h" -#define DEFAULT_ARENA_MAX_SIZE (16ULL*1024ULL*1024ULL*1024ULL) -#define DEFAULT_ARENA_MIN_SIZE (256ULL*1024ULL) -#define DEFAULT_ARENA_KEEP_FREE (1ULL*1024ULL*1024ULL) - -#define DEFAULT_HASH_TABLE_SIZE (2047ULL*16ULL) -#define DEFAULT_BISECT_TABLE_SIZE ((DEFAULT_ARENA_MAX_SIZE/(64ULL*1024ULL))*8ULL) - -#define DEFAULT_WINDOW_SIZE (128ULL*1024ULL*1024ULL) +#define JOURNAL_FILES_MAX 1024 -struct JournalFile { - sd_journal *journal; +#define JOURNAL_FILES_RECHECK_USEC (2 * USEC_PER_SEC) - int fd; - char *path; - struct stat last_stat; - int prot; - bool writable; +#define REPLACE_VAR_MAX 256 - Header *header; +static void detach_location(sd_journal *j) { + Iterator i; + JournalFile *f; - HashItem *hash_table; - void *hash_table_window; - uint64_t hash_table_window_size; + assert(j); - uint64_t *bisect_table; - void *bisect_table_window; - uint64_t bisect_table_window_size; + j->current_file = NULL; + j->current_field = 0; - void *window; - uint64_t window_offset; - uint64_t window_size; + HASHMAP_FOREACH(f, j->files, i) + f->current_offset = 0; +} - Object *current; - uint64_t current_offset; +static void reset_location(sd_journal *j) { + assert(j); - LIST_FIELDS(JournalFile, files); -}; + detach_location(j); + zero(j->current_location); +} -struct sd_journal { - LIST_HEAD(JournalFile, files); -}; +static void init_location(Location *l, LocationType type, JournalFile *f, Object *o) { + assert(l); + assert(type == LOCATION_DISCRETE || type == LOCATION_SEEK); + assert(f); + assert(o->object.type == OBJECT_ENTRY); -static const char signature[] = { 'L', 'P', 'K', 'S', 'H', 'H', 'R', 'H' }; + l->type = type; + l->seqnum = le64toh(o->entry.seqnum); + l->seqnum_id = f->header->seqnum_id; + l->realtime = le64toh(o->entry.realtime); + l->monotonic = le64toh(o->entry.monotonic); + l->boot_id = o->entry.boot_id; + l->xor_hash = le64toh(o->entry.xor_hash); -#define ALIGN64(x) (((x) + 7ULL) & ~7ULL) + l->seqnum_set = l->realtime_set = l->monotonic_set = l->xor_hash_set = true; +} -void journal_file_close(JournalFile *f) { +static void set_location(sd_journal *j, LocationType type, JournalFile *f, Object *o, uint64_t offset) { + assert(j); + assert(type == LOCATION_DISCRETE || type == LOCATION_SEEK); assert(f); + assert(o); - if (f->journal) - LIST_REMOVE(JournalFile, files, f->journal->files, f); + init_location(&j->current_location, type, f, o); - if (f->fd >= 0) - close_nointr_nofail(f->fd); + j->current_file = f; + j->current_field = 0; - if (f->header) - munmap(f->header, PAGE_ALIGN(sizeof(Header))); + f->current_offset = offset; +} - if (f->hash_table_window) - munmap(f->hash_table_window, f->hash_table_window_size); +static int match_is_valid(const void *data, size_t size) { + const char *b, *p; - if (f->bisect_table_window) - munmap(f->bisect_table_window, f->bisect_table_window_size); + assert(data); - if (f->window) - munmap(f->window, f->window_size); + if (size < 2) + return false; - free(f->path); - free(f); -} + if (startswith(data, "__")) + return false; -static int journal_file_init_header(JournalFile *f) { - Header h; - ssize_t k; - int r; + b = data; + for (p = b; p < b + size; p++) { - assert(f); + if (*p == '=') + return p > b; - zero(h); - memcpy(h.signature, signature, 8); - h.arena_offset = htole64(ALIGN64(sizeof(h))); - h.arena_max_size = htole64(DEFAULT_ARENA_MAX_SIZE); - h.arena_min_size = htole64(DEFAULT_ARENA_MIN_SIZE); - h.arena_keep_free = htole64(DEFAULT_ARENA_KEEP_FREE); + if (*p == '_') + continue; - r = sd_id128_randomize(&h.file_id); - if (r < 0) - return r; + if (*p >= 'A' && *p <= 'Z') + continue; - k = pwrite(f->fd, &h, sizeof(h), 0); - if (k < 0) - return -errno; + if (*p >= '0' && *p <= '9') + continue; - if (k != sizeof(h)) - return -EIO; + return false; + } - return 0; + return false; } -static int journal_file_refresh_header(JournalFile *f) { - int r; +static bool same_field(const void *_a, size_t s, const void *_b, size_t t) { + const uint8_t *a = _a, *b = _b; + size_t j; - assert(f); + for (j = 0; j < s && j < t; j++) { - r = sd_id128_get_machine(&f->header->machine_id); - if (r < 0) - return r; + if (a[j] != b[j]) + return false; - r = sd_id128_get_boot(&f->header->boot_id); - if (r < 0) - return r; + if (a[j] == '=') + return true; + } - f->header->state = htole32(STATE_ONLINE); - return 0; + return true; } -static int journal_file_verify_header(JournalFile *f) { - assert(f); +static Match *match_new(Match *p, MatchType t) { + Match *m; - if (memcmp(f->header, signature, 8)) - return -EBADMSG; + m = new0(Match, 1); + if (!m) + return NULL; - if (f->header->incompatible_flags != 0) - return -EPROTONOSUPPORT; + m->type = t; - if ((uint64_t) f->last_stat.st_size < (le64toh(f->header->arena_offset) + le64toh(f->header->arena_size))) - return -ENODATA; + if (p) { + m->parent = p; + LIST_PREPEND(Match, matches, p->matches, m); + } - if (f->writable) { - uint32_t state; - sd_id128_t machine_id; - int r; + return m; +} - r = sd_id128_get_machine(&machine_id); - if (r < 0) - return r; +static void match_free(Match *m) { + assert(m); - if (!sd_id128_equal(machine_id, f->header->machine_id)) - return -EHOSTDOWN; + while (m->matches) + match_free(m->matches); - state = le32toh(f->header->state); + if (m->parent) + LIST_REMOVE(Match, matches, m->parent->matches, m); - if (state == STATE_ONLINE) - log_debug("Journal file %s is already online. Assuming unclean closing. Ignoring.", f->path); - else if (state == STATE_ARCHIVED) - return -ESHUTDOWN; - else if (state != STATE_OFFLINE) - log_debug("Journal file %s has unknown state %u. Ignoring.", f->path, state); - } + free(m->data); + free(m); +} - return 0; +static void match_free_if_empty(Match *m) { + assert(m); + + if (m->matches) + return; + + match_free(m); } -static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) { - uint64_t asize; - uint64_t old_size, new_size; +_public_ int sd_journal_add_match(sd_journal *j, const void *data, size_t size) { + Match *l2, *l3, *add_here = NULL, *m; + le64_t le_hash; - assert(f); + if (!j) + return -EINVAL; - if (offset < le64toh(f->header->arena_offset)) + if (!data) return -EINVAL; - new_size = PAGE_ALIGN(offset + size); + if (size == 0) + size = strlen(data); - /* We assume that this file is not sparse, and we know that - * for sure, since we alway call posix_fallocate() - * ourselves */ + if (!match_is_valid(data, size)) + return -EINVAL; - old_size = - le64toh(f->header->arena_offset) + - le64toh(f->header->arena_size); + /* level 0: OR term + * level 1: AND terms + * level 2: OR terms + * level 3: concrete matches */ - if (old_size >= new_size) - return 0; + if (!j->level0) { + j->level0 = match_new(NULL, MATCH_OR_TERM); + if (!j->level0) + return -ENOMEM; + } + + if (!j->level1) { + j->level1 = match_new(j->level0, MATCH_AND_TERM); + if (!j->level1) + return -ENOMEM; + } - asize = new_size - le64toh(f->header->arena_offset); + assert(j->level0->type == MATCH_OR_TERM); + assert(j->level1->type == MATCH_AND_TERM); - if (asize > le64toh(f->header->arena_min_size)) { - struct statvfs svfs; + le_hash = htole64(hash64(data, size)); - if (fstatvfs(f->fd, &svfs) >= 0) { - uint64_t available; + LIST_FOREACH(matches, l2, j->level1->matches) { + assert(l2->type == MATCH_OR_TERM); - available = svfs.f_bfree * svfs.f_bsize; + LIST_FOREACH(matches, l3, l2->matches) { + assert(l3->type == MATCH_DISCRETE); - if (available >= f->header->arena_keep_free) - available -= f->header->arena_keep_free; - else - available = 0; + /* Exactly the same match already? Then ignore + * this addition */ + if (l3->le_hash == le_hash && + l3->size == size && + memcmp(l3->data, data, size) == 0) + return 0; - if (new_size - old_size > available) - return -E2BIG; + /* Same field? Then let's add this to this OR term */ + if (same_field(data, size, l3->data, l3->size)) { + add_here = l2; + break; + } } + + if (add_here) + break; } - if (asize > le64toh(f->header->arena_max_size)) - return -E2BIG; + if (!add_here) { + add_here = match_new(j->level1, MATCH_OR_TERM); + if (!add_here) + goto fail; + } - if (posix_fallocate(f->fd, 0, new_size) < 0) - return -errno; + m = match_new(add_here, MATCH_DISCRETE); + if (!m) + goto fail; - if (fstat(f->fd, &f->last_stat) < 0) - return -errno; + m->le_hash = le_hash; + m->size = size; + m->data = memdup(data, size); + if (!m->data) + goto fail; - f->header->arena_size = htole64(asize); + detach_location(j); return 0; -} -static int journal_file_map( - JournalFile *f, - uint64_t offset, - uint64_t size, - void **_window, - uint64_t *_woffset, - uint64_t *_wsize, - void **ret) { +fail: + if (add_here) + match_free_if_empty(add_here); - uint64_t woffset, wsize; - void *window; + if (j->level1) + match_free_if_empty(j->level1); - assert(f); - assert(size > 0); - assert(ret); + if (j->level0) + match_free_if_empty(j->level0); - woffset = offset & ~((uint64_t) page_size() - 1ULL); - wsize = size + (offset - woffset); - wsize = PAGE_ALIGN(wsize); + return -ENOMEM; +} - window = mmap(NULL, wsize, f->prot, MAP_SHARED, f->fd, woffset); - if (window == MAP_FAILED) - return -errno; +_public_ int sd_journal_add_disjunction(sd_journal *j) { + Match *m; + + assert(j); - if (_window) - *_window = window; + if (!j->level0) + return 0; - if (_woffset) - *_woffset = woffset; + if (!j->level1) + return 0; - if (_wsize) - *_wsize = wsize; + if (!j->level1->matches) + return 0; - *ret = (uint8_t*) window + (offset - woffset); + m = match_new(j->level0, MATCH_AND_TERM); + if (!m) + return -ENOMEM; + j->level1 = m; return 0; } -static int journal_file_move_to(JournalFile *f, uint64_t offset, uint64_t size, void **ret) { - void *p; - uint64_t delta; - int r; - - assert(f); - assert(ret); - - if (_likely_(f->window && - f->window_offset <= offset && - f->window_offset+f->window_size >= offset + size)) { +static char *match_make_string(Match *m) { + char *p, *r; + Match *i; + bool enclose = false; - *ret = (uint8_t*) f->window + (offset - f->window_offset); - return 0; - } + if (!m) + return strdup(""); - if (f->window) { - if (munmap(f->window, f->window_size) < 0) - return -errno; + if (m->type == MATCH_DISCRETE) + return strndup(m->data, m->size); - f->window = NULL; - f->window_size = f->window_offset = 0; - } + p = NULL; + LIST_FOREACH(matches, i, m->matches) { + char *t, *k; - if (size < DEFAULT_WINDOW_SIZE) { - /* If the default window size is larger then what was - * asked for extend the mapping a bit in the hope to - * minimize needed remappings later on. We add half - * the window space before and half behind the - * requested mapping */ + t = match_make_string(i); + if (!t) { + free(p); + return NULL; + } - delta = PAGE_ALIGN((DEFAULT_WINDOW_SIZE - size) / 2); + if (p) { + k = strjoin(p, m->type == MATCH_OR_TERM ? " OR " : " AND ", t, NULL); + free(p); + free(t); - if (offset < delta) - delta = offset; + if (!k) + return NULL; - offset -= delta; - size += (DEFAULT_WINDOW_SIZE - delta); - } else - delta = 0; + p = k; - r = journal_file_map(f, - offset, size, - &f->window, &f->window_offset, &f->window_size, - & p); + enclose = true; + } else { + free(p); + p = t; + } + } - if (r < 0) + if (enclose) { + r = strjoin("(", p, ")", NULL); + free(p); return r; + } - *ret = (uint8_t*) p + delta; - return 0; + return p; } -static bool verify_hash(Object *o) { - uint64_t t; +char *journal_make_match_string(sd_journal *j) { + assert(j); - assert(o); + return match_make_string(j->level0); +} - t = le64toh(o->object.type); - if (t == OBJECT_DATA) { - uint64_t s, h1, h2; +_public_ void sd_journal_flush_matches(sd_journal *j) { - s = le64toh(o->object.size); + if (!j) + return; - h1 = le64toh(o->data.hash); - h2 = hash64(o->data.payload, s - offsetof(Object, data.payload)); + if (j->level0) + match_free(j->level0); - return h1 == h2; - } + j->level0 = j->level1 = NULL; - return true; + detach_location(j); } -int journal_file_move_to_object(JournalFile *f, uint64_t offset, Object **ret) { +static int compare_entry_order(JournalFile *af, Object *_ao, + JournalFile *bf, uint64_t bp) { + + uint64_t a, b; + Object *ao, *bo; int r; - void *t; - Object *o; - uint64_t s; - assert(f); - assert(ret); + assert(af); + assert(bf); + assert(_ao); - r = journal_file_move_to(f, offset, sizeof(ObjectHeader), &t); + /* The mmap cache might invalidate the object from the first + * file if we look at the one from the second file. Hence + * temporarily copy the header of the first one, and look at + * that only. */ + ao = alloca(offsetof(EntryObject, items)); + memcpy(ao, _ao, offsetof(EntryObject, items)); + + r = journal_file_move_to_object(bf, OBJECT_ENTRY, bp, &bo); if (r < 0) - return r; + return strcmp(af->path, bf->path); + + /* We operate on two different files here, hence we can access + * two objects at the same time, which we normally can't. + * + * If contents and timestamps match, these entries are + * identical, even if the seqnum does not match */ + + if (sd_id128_equal(ao->entry.boot_id, bo->entry.boot_id) && + ao->entry.monotonic == bo->entry.monotonic && + ao->entry.realtime == bo->entry.realtime && + ao->entry.xor_hash == bo->entry.xor_hash) + return 0; - o = (Object*) t; - s = le64toh(o->object.size); + if (sd_id128_equal(af->header->seqnum_id, bf->header->seqnum_id)) { - if (s < sizeof(ObjectHeader)) - return -EBADMSG; + /* If this is from the same seqnum source, compare + * seqnums */ + a = le64toh(ao->entry.seqnum); + b = le64toh(bo->entry.seqnum); - if (s > sizeof(ObjectHeader)) { - r = journal_file_move_to(f, offset, s, &t); - if (r < 0) - return r; + if (a < b) + return -1; + if (a > b) + return 1; - o = (Object*) t; + /* Wow! This is weird, different data but the same + * seqnums? Something is borked, but let's make the + * best of it and compare by time. */ } - if (!verify_hash(o)) - return -EBADMSG; + if (sd_id128_equal(ao->entry.boot_id, bo->entry.boot_id)) { - *ret = o; - return 0; -} + /* If the boot id matches compare monotonic time */ + a = le64toh(ao->entry.monotonic); + b = le64toh(bo->entry.monotonic); -static uint64_t journal_file_seqnum(JournalFile *f) { - uint64_t r; + if (a < b) + return -1; + if (a > b) + return 1; + } - assert(f); + /* Otherwise compare UTC time */ + a = le64toh(ao->entry.realtime); + b = le64toh(bo->entry.realtime); - r = le64toh(f->header->seqnum) + 1; - f->header->seqnum = htole64(r); + if (a < b) + return -1; + if (a > b) + return 1; - return r; + /* Finally, compare by contents */ + a = le64toh(ao->entry.xor_hash); + b = le64toh(bo->entry.xor_hash); + + if (a < b) + return -1; + if (a > b) + return 1; + + return 0; } -static int journal_file_append_object(JournalFile *f, uint64_t size, Object **ret, uint64_t *offset) { - int r; - uint64_t p; - Object *tail, *o; - void *t; +static int compare_with_location(JournalFile *af, Object *ao, Location *l) { + uint64_t a; - assert(f); - assert(size >= sizeof(ObjectHeader)); - assert(offset); - assert(ret); + assert(af); + assert(ao); + assert(l); + assert(l->type == LOCATION_DISCRETE || l->type == LOCATION_SEEK); - p = le64toh(f->header->tail_object_offset); + if (l->monotonic_set && + sd_id128_equal(ao->entry.boot_id, l->boot_id) && + l->realtime_set && + le64toh(ao->entry.realtime) == l->realtime && + l->xor_hash_set && + le64toh(ao->entry.xor_hash) == l->xor_hash) + return 0; - if (p == 0) - p = le64toh(f->header->arena_offset); - else { - r = journal_file_move_to_object(f, p, &tail); - if (r < 0) - return r; + if (l->seqnum_set && + sd_id128_equal(af->header->seqnum_id, l->seqnum_id)) { - p += ALIGN64(le64toh(tail->object.size)); + a = le64toh(ao->entry.seqnum); + + if (a < l->seqnum) + return -1; + if (a > l->seqnum) + return 1; } - r = journal_file_allocate(f, p, size); - if (r < 0) - return r; + if (l->monotonic_set && + sd_id128_equal(ao->entry.boot_id, l->boot_id)) { - r = journal_file_move_to(f, p, size, &t); - if (r < 0) - return r; + a = le64toh(ao->entry.monotonic); + + if (a < l->monotonic) + return -1; + if (a > l->monotonic) + return 1; + } - o = (Object*) t; + if (l->realtime_set) { - zero(o->object); - o->object.type = htole64(OBJECT_UNUSED); - zero(o->object.reserved); - o->object.size = htole64(size); + a = le64toh(ao->entry.realtime); - f->header->tail_object_offset = htole64(p); - if (f->header->head_object_offset == 0) - f->header->head_object_offset = htole64(p); + if (a < l->realtime) + return -1; + if (a > l->realtime) + return 1; + } - f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1); + if (l->xor_hash_set) { + a = le64toh(ao->entry.xor_hash); - *ret = o; - *offset = p; + if (a < l->xor_hash) + return -1; + if (a > l->xor_hash) + return 1; + } return 0; } -static int journal_file_setup_hash_table(JournalFile *f) { - uint64_t s, p; - Object *o; +static int next_for_match( + sd_journal *j, + Match *m, + JournalFile *f, + uint64_t after_offset, + direction_t direction, + Object **ret, + uint64_t *offset) { + int r; + uint64_t np = 0; + Object *n; + assert(j); + assert(m); assert(f); - s = DEFAULT_HASH_TABLE_SIZE; - r = journal_file_append_object(f, offsetof(Object, hash_table.table) + s, &o, &p); - if (r < 0) - return r; + if (m->type == MATCH_DISCRETE) { + uint64_t dp; - o->object.type = htole64(OBJECT_HASH_TABLE); - memset(o->hash_table.table, 0, s); + r = journal_file_find_data_object_with_hash(f, m->data, m->size, le64toh(m->le_hash), NULL, &dp); + if (r <= 0) + return r; - f->header->hash_table_offset = htole64(p + offsetof(Object, hash_table.table)); - f->header->hash_table_size = htole64(s); + return journal_file_move_to_entry_by_offset_for_data(f, dp, after_offset, direction, ret, offset); - return 0; -} + } else if (m->type == MATCH_OR_TERM) { + Match *i; -static int journal_file_setup_bisect_table(JournalFile *f) { - uint64_t s, p; - Object *o; - int r; + /* Find the earliest match beyond after_offset */ - assert(f); + LIST_FOREACH(matches, i, m->matches) { + uint64_t cp; - s = DEFAULT_BISECT_TABLE_SIZE; - r = journal_file_append_object(f, offsetof(Object, bisect_table.table) + s, &o, &p); - if (r < 0) - return r; + r = next_for_match(j, i, f, after_offset, direction, NULL, &cp); + if (r < 0) + return r; + else if (r > 0) { + if (np == 0 || (direction == DIRECTION_DOWN ? np > cp : np < cp)) + np = cp; + } + } - o->object.type = htole64(OBJECT_BISECT_TABLE); - memset(o->bisect_table.table, 0, s); + } else if (m->type == MATCH_AND_TERM) { + Match *i; + bool continue_looking; - f->header->bisect_table_offset = htole64(p + offsetof(Object, bisect_table.table)); - f->header->bisect_table_size = htole64(s); + /* Always jump to the next matching entry and repeat + * this until we fine and offset that matches for all + * matches. */ - return 0; -} + if (!m->matches) + return 0; -static int journal_file_map_hash_table(JournalFile *f) { - uint64_t s, p; - void *t; - int r; + np = 0; + do { + continue_looking = false; + + LIST_FOREACH(matches, i, m->matches) { + uint64_t cp, limit; + + if (np == 0) + limit = after_offset; + else if (direction == DIRECTION_DOWN) + limit = MAX(np, after_offset); + else + limit = MIN(np, after_offset); + + r = next_for_match(j, i, f, limit, direction, NULL, &cp); + if (r <= 0) + return r; + + if ((direction == DIRECTION_DOWN ? cp >= after_offset : cp <= after_offset) && + (np == 0 || (direction == DIRECTION_DOWN ? cp > np : np < cp))) { + np = cp; + continue_looking = true; + } + } - assert(f); + } while (continue_looking); + } - p = le64toh(f->header->hash_table_offset); - s = le64toh(f->header->hash_table_size); + if (np == 0) + return 0; - r = journal_file_map(f, - p, s, - &f->hash_table_window, NULL, &f->hash_table_window_size, - &t); + r = journal_file_move_to_object(f, OBJECT_ENTRY, np, &n); if (r < 0) return r; - f->hash_table = t; - return 0; + if (ret) + *ret = n; + if (offset) + *offset = np; + + return 1; } -static int journal_file_map_bisect_table(JournalFile *f) { - uint64_t s, p; - void *t; +static int find_location_for_match( + sd_journal *j, + Match *m, + JournalFile *f, + direction_t direction, + Object **ret, + uint64_t *offset) { + int r; + assert(j); + assert(m); assert(f); - p = le64toh(f->header->bisect_table_offset); - s = le64toh(f->header->bisect_table_size); - - r = journal_file_map(f, - p, s, - &f->bisect_table_window, NULL, &f->bisect_table_window_size, - &t); - - if (r < 0) - return r; - - f->bisect_table = t; - return 0; -} + if (m->type == MATCH_DISCRETE) { + uint64_t dp; -static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, uint64_t hash_index) { - uint64_t p; - int r; + r = journal_file_find_data_object_with_hash(f, m->data, m->size, le64toh(m->le_hash), NULL, &dp); + if (r <= 0) + return r; - assert(f); - assert(o); - assert(offset > 0); - assert(o->object.type == htole64(OBJECT_DATA)); + /* FIXME: missing: find by monotonic */ + + if (j->current_location.type == LOCATION_HEAD) + return journal_file_next_entry_for_data(f, NULL, 0, dp, DIRECTION_DOWN, ret, offset); + if (j->current_location.type == LOCATION_TAIL) + return journal_file_next_entry_for_data(f, NULL, 0, dp, DIRECTION_UP, ret, offset); + if (j->current_location.seqnum_set && sd_id128_equal(j->current_location.seqnum_id, f->header->seqnum_id)) + return journal_file_move_to_entry_by_seqnum_for_data(f, dp, j->current_location.seqnum, direction, ret, offset); + if (j->current_location.monotonic_set) { + r = journal_file_move_to_entry_by_monotonic_for_data(f, dp, j->current_location.boot_id, j->current_location.monotonic, direction, ret, offset); + if (r != -ENOENT) + return r; + } + if (j->current_location.realtime_set) + return journal_file_move_to_entry_by_realtime_for_data(f, dp, j->current_location.realtime, direction, ret, offset); - o->data.head_entry_offset = o->data.tail_entry_offset = 0; - o->data.next_hash_offset = 0; + return journal_file_next_entry_for_data(f, NULL, 0, dp, direction, ret, offset); - p = le64toh(f->hash_table[hash_index].tail_hash_offset); - if (p == 0) { - /* Only entry in the hash table is easy */ + } else if (m->type == MATCH_OR_TERM) { + uint64_t np = 0; + Object *n; + Match *i; - o->data.prev_hash_offset = 0; - f->hash_table[hash_index].head_hash_offset = htole64(offset); - } else { - o->data.prev_hash_offset = htole64(p); + /* Find the earliest match */ - /* Temporarily move back to the previous data object, - * to patch in pointer */ + LIST_FOREACH(matches, i, m->matches) { + uint64_t cp; - r = journal_file_move_to_object(f, p, &o); - if (r < 0) - return r; + r = find_location_for_match(j, i, f, direction, NULL, &cp); + if (r < 0) + return r; + else if (r > 0) { + if (np == 0 || (direction == DIRECTION_DOWN ? np > cp : np < cp)) + np = cp; + } + } - o->data.next_hash_offset = offset; + if (np == 0) + return 0; - r = journal_file_move_to_object(f, offset, &o); + r = journal_file_move_to_object(f, OBJECT_ENTRY, np, &n); if (r < 0) return r; - } - f->hash_table[hash_index].tail_hash_offset = htole64(offset); + if (ret) + *ret = n; + if (offset) + *offset = np; - return 0; -} + return 1; -static int journal_file_append_data(JournalFile *f, const void *data, uint64_t size, Object **ret, uint64_t *offset) { - uint64_t hash, h, p, np; - uint64_t osize; - Object *o; - int r; + } else { + Match *i; + uint64_t np = 0; - assert(f); - assert(data || size == 0); + assert(m->type == MATCH_AND_TERM); - osize = offsetof(Object, data.payload) + size; + /* First jump to the last match, and then find the + * next one where all matches match */ - hash = hash64(data, size); - h = hash % (le64toh(f->header->hash_table_size) / sizeof(HashItem)); - p = le64toh(f->hash_table[h].head_hash_offset); + if (!m->matches) + return 0; - while (p != 0) { - /* Look for this data object in the hash table */ + LIST_FOREACH(matches, i, m->matches) { + uint64_t cp; - r = journal_file_move_to_object(f, p, &o); - if (r < 0) - return r; + r = find_location_for_match(j, i, f, direction, NULL, &cp); + if (r <= 0) + return r; - if (le64toh(o->object.type) != OBJECT_DATA) - return -EBADMSG; + if (np == 0 || (direction == DIRECTION_DOWN ? np < cp : np > cp)) + np = cp; + } - if (le64toh(o->object.size) == osize && - memcmp(o->data.payload, data, size) == 0) { + return next_for_match(j, m, f, np, direction, ret, offset); + } +} - if (le64toh(o->data.hash) != hash) - return -EBADMSG; +static int find_location_with_matches( + sd_journal *j, + JournalFile *f, + direction_t direction, + Object **ret, + uint64_t *offset) { - if (ret) - *ret = o; + int r; - if (offset) - *offset = p; + assert(j); + assert(f); + assert(ret); + assert(offset); - return 0; + if (!j->level0) { + /* No matches is simple */ + + if (j->current_location.type == LOCATION_HEAD) + return journal_file_next_entry(f, NULL, 0, DIRECTION_DOWN, ret, offset); + if (j->current_location.type == LOCATION_TAIL) + return journal_file_next_entry(f, NULL, 0, DIRECTION_UP, ret, offset); + if (j->current_location.seqnum_set && sd_id128_equal(j->current_location.seqnum_id, f->header->seqnum_id)) + return journal_file_move_to_entry_by_seqnum(f, j->current_location.seqnum, direction, ret, offset); + if (j->current_location.monotonic_set) { + r = journal_file_move_to_entry_by_monotonic(f, j->current_location.boot_id, j->current_location.monotonic, direction, ret, offset); + if (r != -ENOENT) + return r; } + if (j->current_location.realtime_set) + return journal_file_move_to_entry_by_realtime(f, j->current_location.realtime, direction, ret, offset); - p = le64toh(o->data.next_hash_offset); - } - - r = journal_file_append_object(f, osize, &o, &np); - if (r < 0) - return r; - - o->object.type = htole64(OBJECT_DATA); - o->data.hash = htole64(hash); - memcpy(o->data.payload, data, size); + return journal_file_next_entry(f, NULL, 0, direction, ret, offset); + } else + return find_location_for_match(j, j->level0, f, direction, ret, offset); +} - r = journal_file_link_data(f, o, np, h); - if (r < 0) - return r; +static int next_with_matches( + sd_journal *j, + JournalFile *f, + direction_t direction, + Object **ret, + uint64_t *offset) { - if (ret) - *ret = o; + Object *c; + uint64_t cp; - if (offset) - *offset = np; + assert(j); + assert(f); + assert(ret); + assert(offset); - return 0; -} + c = *ret; + cp = *offset; -uint64_t journal_file_entry_n_items(Object *o) { - assert(o); - assert(o->object.type == htole64(OBJECT_ENTRY)); + /* No matches is easy. We simple advance the file + * pointer by one. */ + if (!j->level0) + return journal_file_next_entry(f, c, cp, direction, ret, offset); - return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem); + /* If we have a match then we look for the next matching entry + * with an offset at least one step larger */ + return next_for_match(j, j->level0, f, direction == DIRECTION_DOWN ? cp+1 : cp-1, direction, ret, offset); } -static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) { - uint64_t p, q; +static int next_beyond_location(sd_journal *j, JournalFile *f, direction_t direction, Object **ret, uint64_t *offset) { + Object *c; + uint64_t cp; int r; + + assert(j); assert(f); - assert(o); - assert(offset > 0); - p = le64toh(o->entry.items[i].object_offset); - if (p == 0) + if (f->current_offset > 0) { + cp = f->current_offset; + + r = journal_file_move_to_object(f, OBJECT_ENTRY, cp, &c); + if (r < 0) + return r; + + r = next_with_matches(j, f, direction, &c, &cp); + if (r <= 0) + return r; + } else { + r = find_location_with_matches(j, f, direction, &c, &cp); + if (r <= 0) + return r; + } + + /* OK, we found the spot, now let's advance until to an entry + * that is actually different from what we were previously + * looking at. This is necessary to handle entries which exist + * in two (or more) journal files, and which shall all be + * suppressed but one. */ + + for (;;) { + bool found; + + if (j->current_location.type == LOCATION_DISCRETE) { + int k; + + k = compare_with_location(f, c, &j->current_location); + if (direction == DIRECTION_DOWN) + found = k > 0; + else + found = k < 0; + } else + found = true; + + if (found) { + if (ret) + *ret = c; + if (offset) + *offset = cp; + return 1; + } + + r = next_with_matches(j, f, direction, &c, &cp); + if (r <= 0) + return r; + } +} + +static int real_journal_next(sd_journal *j, direction_t direction) { + JournalFile *f, *new_file = NULL; + uint64_t new_offset = 0; + Object *o; + uint64_t p; + Iterator i; + int r; + + if (!j) + return -EINVAL; + + HASHMAP_FOREACH(f, j->files, i) { + bool found; + + r = next_beyond_location(j, f, direction, &o, &p); + if (r < 0) { + log_debug("Can't iterate through %s, ignoring: %s", f->path, strerror(-r)); + continue; + } else if (r == 0) + continue; + + if (!new_file) + found = true; + else { + int k; + + k = compare_entry_order(f, o, new_file, new_offset); + + if (direction == DIRECTION_DOWN) + found = k < 0; + else + found = k > 0; + } + + if (found) { + new_file = f; + new_offset = p; + } + } + + if (!new_file) + return 0; + + r = journal_file_move_to_object(new_file, OBJECT_ENTRY, new_offset, &o); + if (r < 0) + return r; + + set_location(j, LOCATION_DISCRETE, new_file, o, new_offset); + + return 1; +} + +_public_ int sd_journal_next(sd_journal *j) { + return real_journal_next(j, DIRECTION_DOWN); +} + +_public_ int sd_journal_previous(sd_journal *j) { + return real_journal_next(j, DIRECTION_UP); +} + +static int real_journal_next_skip(sd_journal *j, direction_t direction, uint64_t skip) { + int c = 0, r; + + if (!j) + return -EINVAL; + + if (skip == 0) { + /* If this is not a discrete skip, then at least + * resolve the current location */ + if (j->current_location.type != LOCATION_DISCRETE) + return real_journal_next(j, direction); + + return 0; + } + + do { + r = real_journal_next(j, direction); + if (r < 0) + return r; + + if (r == 0) + return c; + + skip--; + c++; + } while (skip > 0); + + return c; +} + +_public_ int sd_journal_next_skip(sd_journal *j, uint64_t skip) { + return real_journal_next_skip(j, DIRECTION_DOWN, skip); +} + +_public_ int sd_journal_previous_skip(sd_journal *j, uint64_t skip) { + return real_journal_next_skip(j, DIRECTION_UP, skip); +} + +_public_ int sd_journal_get_cursor(sd_journal *j, char **cursor) { + Object *o; + int r; + char bid[33], sid[33]; + + if (!j) + return -EINVAL; + if (!cursor) + return -EINVAL; + + if (!j->current_file || j->current_file->current_offset <= 0) + return -EADDRNOTAVAIL; + + r = journal_file_move_to_object(j->current_file, OBJECT_ENTRY, j->current_file->current_offset, &o); + if (r < 0) + return r; + + sd_id128_to_string(j->current_file->header->seqnum_id, sid); + sd_id128_to_string(o->entry.boot_id, bid); + + if (asprintf(cursor, + "s=%s;i=%llx;b=%s;m=%llx;t=%llx;x=%llx", + sid, (unsigned long long) le64toh(o->entry.seqnum), + bid, (unsigned long long) le64toh(o->entry.monotonic), + (unsigned long long) le64toh(o->entry.realtime), + (unsigned long long) le64toh(o->entry.xor_hash)) < 0) + return -ENOMEM; + + return 1; +} + +_public_ int sd_journal_seek_cursor(sd_journal *j, const char *cursor) { + char *w, *state; + size_t l; + unsigned long long seqnum, monotonic, realtime, xor_hash; + bool + seqnum_id_set = false, + seqnum_set = false, + boot_id_set = false, + monotonic_set = false, + realtime_set = false, + xor_hash_set = false; + sd_id128_t seqnum_id, boot_id; + + if (!j) + return -EINVAL; + if (isempty(cursor)) + return -EINVAL; + + FOREACH_WORD_SEPARATOR(w, l, cursor, ";", state) { + char *item; + int k = 0; + + if (l < 2 || w[1] != '=') + return -EINVAL; + + item = strndup(w, l); + if (!item) + return -ENOMEM; + + switch (w[0]) { + + case 's': + seqnum_id_set = true; + k = sd_id128_from_string(item+2, &seqnum_id); + break; + + case 'i': + seqnum_set = true; + if (sscanf(item+2, "%llx", &seqnum) != 1) + k = -EINVAL; + break; + + case 'b': + boot_id_set = true; + k = sd_id128_from_string(item+2, &boot_id); + break; + + case 'm': + monotonic_set = true; + if (sscanf(item+2, "%llx", &monotonic) != 1) + k = -EINVAL; + break; + + case 't': + realtime_set = true; + if (sscanf(item+2, "%llx", &realtime) != 1) + k = -EINVAL; + break; + + case 'x': + xor_hash_set = true; + if (sscanf(item+2, "%llx", &xor_hash) != 1) + k = -EINVAL; + break; + } + + free(item); + + if (k < 0) + return k; + } + + if ((!seqnum_set || !seqnum_id_set) && + (!monotonic_set || !boot_id_set) && + !realtime_set) + return -EINVAL; + + reset_location(j); + + j->current_location.type = LOCATION_SEEK; + + if (realtime_set) { + j->current_location.realtime = (uint64_t) realtime; + j->current_location.realtime_set = true; + } + + if (seqnum_set && seqnum_id_set) { + j->current_location.seqnum = (uint64_t) seqnum; + j->current_location.seqnum_id = seqnum_id; + j->current_location.seqnum_set = true; + } + + if (monotonic_set && boot_id_set) { + j->current_location.monotonic = (uint64_t) monotonic; + j->current_location.boot_id = boot_id; + j->current_location.monotonic_set = true; + } + + if (xor_hash_set) { + j->current_location.xor_hash = (uint64_t) xor_hash; + j->current_location.xor_hash_set = true; + } + + return 0; +} + +_public_ int sd_journal_test_cursor(sd_journal *j, const char *cursor) { + int r; + char *w, *state; + size_t l; + Object *o; + + if (!j) + return -EINVAL; + if (isempty(cursor)) + return -EINVAL; + + if (!j->current_file || j->current_file->current_offset <= 0) + return -EADDRNOTAVAIL; + + r = journal_file_move_to_object(j->current_file, OBJECT_ENTRY, j->current_file->current_offset, &o); + if (r < 0) + return r; + + FOREACH_WORD_SEPARATOR(w, l, cursor, ";", state) { + _cleanup_free_ char *item = NULL; + sd_id128_t id; + unsigned long long ll; + int k = 0; + + if (l < 2 || w[1] != '=') + return -EINVAL; + + item = strndup(w, l); + if (!item) + return -ENOMEM; + + switch (w[0]) { + + case 's': + k = sd_id128_from_string(item+2, &id); + if (k < 0) + return k; + if (!sd_id128_equal(id, j->current_file->header->seqnum_id)) + return 0; + break; + + case 'i': + if (sscanf(item+2, "%llx", &ll) != 1) + return -EINVAL; + if (ll != le64toh(o->entry.seqnum)) + return 0; + break; + + case 'b': + k = sd_id128_from_string(item+2, &id); + if (k < 0) + return k; + if (!sd_id128_equal(id, o->entry.boot_id)) + return 0; + break; + + case 'm': + if (sscanf(item+2, "%llx", &ll) != 1) + return -EINVAL; + if (ll != le64toh(o->entry.monotonic)) + return 0; + break; + + case 't': + if (sscanf(item+2, "%llx", &ll) != 1) + return -EINVAL; + if (ll != le64toh(o->entry.realtime)) + return 0; + break; + + case 'x': + if (sscanf(item+2, "%llx", &ll) != 1) + return -EINVAL; + if (ll != le64toh(o->entry.xor_hash)) + return 0; + break; + } + } + + return 1; +} + + +_public_ int sd_journal_seek_monotonic_usec(sd_journal *j, sd_id128_t boot_id, uint64_t usec) { + if (!j) + return -EINVAL; + + reset_location(j); + j->current_location.type = LOCATION_SEEK; + j->current_location.boot_id = boot_id; + j->current_location.monotonic = usec; + j->current_location.monotonic_set = true; + + return 0; +} + +_public_ int sd_journal_seek_realtime_usec(sd_journal *j, uint64_t usec) { + if (!j) + return -EINVAL; + + reset_location(j); + j->current_location.type = LOCATION_SEEK; + j->current_location.realtime = usec; + j->current_location.realtime_set = true; + + return 0; +} + +_public_ int sd_journal_seek_head(sd_journal *j) { + if (!j) + return -EINVAL; + + reset_location(j); + j->current_location.type = LOCATION_HEAD; + + return 0; +} + +_public_ int sd_journal_seek_tail(sd_journal *j) { + if (!j) + return -EINVAL; + + reset_location(j); + j->current_location.type = LOCATION_TAIL; + + return 0; +} + +static void check_network(sd_journal *j, int fd) { + struct statfs sfs; + + assert(j); + + if (j->on_network) + return; + + if (fstatfs(fd, &sfs) < 0) + return; + + j->on_network = + (long)sfs.f_type == (long)CIFS_MAGIC_NUMBER || + sfs.f_type == CODA_SUPER_MAGIC || + sfs.f_type == NCP_SUPER_MAGIC || + sfs.f_type == NFS_SUPER_MAGIC || + sfs.f_type == SMB_SUPER_MAGIC; +} + +static int add_file(sd_journal *j, const char *prefix, const char *filename) { + char *path; + int r; + JournalFile *f; + + assert(j); + assert(prefix); + assert(filename); + + if ((j->flags & SD_JOURNAL_SYSTEM_ONLY) && + !(streq(filename, "system.journal") || + streq(filename, "system.journal~") || + (startswith(filename, "system@") && + (endswith(filename, ".journal") || endswith(filename, ".journal~"))))) + return 0; + + path = strjoin(prefix, "/", filename, NULL); + if (!path) + return -ENOMEM; + + if (hashmap_get(j->files, path)) { + free(path); + return 0; + } + + if (hashmap_size(j->files) >= JOURNAL_FILES_MAX) { + log_debug("Too many open journal files, not adding %s, ignoring.", path); + free(path); + return 0; + } + + r = journal_file_open(path, O_RDONLY, 0, false, false, NULL, j->mmap, NULL, &f); + free(path); + + if (r < 0) { + if (errno == ENOENT) + return 0; + + return r; + } + + /* journal_file_dump(f); */ + + r = hashmap_put(j->files, f->path, f); + if (r < 0) { + journal_file_close(f); + return r; + } + + check_network(j, f->fd); + + j->current_invalidate_counter ++; + + log_debug("File %s got added.", f->path); + + return 0; +} + +static int remove_file(sd_journal *j, const char *prefix, const char *filename) { + char *path; + JournalFile *f; + + assert(j); + assert(prefix); + assert(filename); + + path = strjoin(prefix, "/", filename, NULL); + if (!path) + return -ENOMEM; + + f = hashmap_get(j->files, path); + free(path); + if (!f) + return 0; + + hashmap_remove(j->files, f->path); + + log_debug("File %s got removed.", f->path); + + if (j->current_file == f) { + j->current_file = NULL; + j->current_field = 0; + } + + if (j->unique_file == f) { + j->unique_file = NULL; + j->unique_offset = 0; + } + + journal_file_close(f); + + j->current_invalidate_counter ++; + + return 0; +} + +static int add_directory(sd_journal *j, const char *prefix, const char *dirname) { + char *path; + int r; + DIR *d; + sd_id128_t id, mid; + Directory *m; + + assert(j); + assert(prefix); + assert(dirname); + + if ((j->flags & SD_JOURNAL_LOCAL_ONLY) && + (sd_id128_from_string(dirname, &id) < 0 || + sd_id128_get_machine(&mid) < 0 || + !sd_id128_equal(id, mid))) + return 0; + + path = strjoin(prefix, "/", dirname, NULL); + if (!path) + return -ENOMEM; + + d = opendir(path); + if (!d) { + log_debug("Failed to open %s: %m", path); + free(path); + + if (errno == ENOENT) + return 0; + return -errno; + } + + m = hashmap_get(j->directories_by_path, path); + if (!m) { + m = new0(Directory, 1); + if (!m) { + closedir(d); + free(path); + return -ENOMEM; + } + + m->is_root = false; + m->path = path; + + if (hashmap_put(j->directories_by_path, m->path, m) < 0) { + closedir(d); + free(m->path); + free(m); + return -ENOMEM; + } + + j->current_invalidate_counter ++; + + log_debug("Directory %s got added.", m->path); + + } else if (m->is_root) { + free (path); + closedir(d); + return 0; + } else + free(path); + + if (m->wd <= 0 && j->inotify_fd >= 0) { + + m->wd = inotify_add_watch(j->inotify_fd, m->path, + IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE| + IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT|IN_MOVED_FROM| + IN_ONLYDIR); + + if (m->wd > 0 && hashmap_put(j->directories_by_wd, INT_TO_PTR(m->wd), m) < 0) + inotify_rm_watch(j->inotify_fd, m->wd); + } + + for (;;) { + struct dirent *de; + union dirent_storage buf; + + r = readdir_r(d, &buf.de, &de); + if (r != 0 || !de) + break; + + if (dirent_is_file_with_suffix(de, ".journal") || + dirent_is_file_with_suffix(de, ".journal~")) { + r = add_file(j, m->path, de->d_name); + if (r < 0) + log_debug("Failed to add file %s/%s: %s", m->path, de->d_name, strerror(-r)); + } + } + + check_network(j, dirfd(d)); + + closedir(d); + + return 0; +} + +static int add_root_directory(sd_journal *j, const char *p) { + DIR *d; + Directory *m; + int r; + + assert(j); + assert(p); + + if ((j->flags & SD_JOURNAL_RUNTIME_ONLY) && + !path_startswith(p, "/run")) return -EINVAL; - o->entry.items[i].next_entry_offset = 0; + d = opendir(p); + if (!d) + return -errno; - /* Move to the data object */ - r = journal_file_move_to_object(f, p, &o); - if (r < 0) - return r; + m = hashmap_get(j->directories_by_path, p); + if (!m) { + m = new0(Directory, 1); + if (!m) { + closedir(d); + return -ENOMEM; + } - if (o->object.type != htole64(OBJECT_DATA)) - return -EBADMSG; + m->is_root = true; + m->path = strdup(p); + if (!m->path) { + closedir(d); + free(m); + return -ENOMEM; + } - q = le64toh(o->data.tail_entry_offset); - o->data.tail_entry_offset = htole64(offset); + if (hashmap_put(j->directories_by_path, m->path, m) < 0) { + closedir(d); + free(m->path); + free(m); + return -ENOMEM; + } - if (q == 0) - o->data.head_entry_offset = htole64(offset); - else { - uint64_t n, j; + j->current_invalidate_counter ++; - /* Move to previous entry */ - r = journal_file_move_to_object(f, q, &o); - if (r < 0) - return r; + log_debug("Root directory %s got added.", m->path); - if (o->object.type != htole64(OBJECT_ENTRY)) - return -EBADMSG; + } else if (!m->is_root) { + closedir(d); + return 0; + } - n = journal_file_entry_n_items(o); - for (j = 0; j < n; j++) - if (le64toh(o->entry.items[j].object_offset) == p) - break; + if (m->wd <= 0 && j->inotify_fd >= 0) { - if (j >= n) - return -EBADMSG; + m->wd = inotify_add_watch(j->inotify_fd, m->path, + IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE| + IN_ONLYDIR); - o->entry.items[j].next_entry_offset = offset; + if (m->wd > 0 && hashmap_put(j->directories_by_wd, INT_TO_PTR(m->wd), m) < 0) + inotify_rm_watch(j->inotify_fd, m->wd); } - /* Move back to original entry */ - r = journal_file_move_to_object(f, offset, &o); - if (r < 0) - return r; + for (;;) { + struct dirent *de; + union dirent_storage buf; + sd_id128_t id; + + r = readdir_r(d, &buf.de, &de); + if (r != 0 || !de) + break; + + if (dirent_is_file_with_suffix(de, ".journal") || + dirent_is_file_with_suffix(de, ".journal~")) { + r = add_file(j, m->path, de->d_name); + if (r < 0) + log_debug("Failed to add file %s/%s: %s", m->path, de->d_name, strerror(-r)); + + } else if ((de->d_type == DT_DIR || de->d_type == DT_LNK || de->d_type == DT_UNKNOWN) && + sd_id128_from_string(de->d_name, &id) >= 0) { + + r = add_directory(j, m->path, de->d_name); + if (r < 0) + log_debug("Failed to add directory %s/%s: %s", m->path, de->d_name, strerror(-r)); + } + } + + check_network(j, dirfd(d)); + + closedir(d); - o->entry.items[i].prev_entry_offset = q; return 0; } -static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) { - uint64_t p, i, n, k, a, b; - int r; +static int remove_directory(sd_journal *j, Directory *d) { + assert(j); - assert(f); - assert(o); - assert(offset > 0); - assert(o->object.type == htole64(OBJECT_ENTRY)); + if (d->wd > 0) { + hashmap_remove(j->directories_by_wd, INT_TO_PTR(d->wd)); - /* Link up the entry itself */ - p = le64toh(f->header->tail_entry_offset); + if (j->inotify_fd >= 0) + inotify_rm_watch(j->inotify_fd, d->wd); + } - o->entry.prev_entry_offset = f->header->tail_entry_offset; - o->entry.next_entry_offset = 0; + hashmap_remove(j->directories_by_path, d->path); - if (p == 0) - f->header->head_entry_offset = htole64(offset); - else { - /* Temporarily move back to the previous entry, to - * patch in pointer */ + if (d->is_root) + log_debug("Root directory %s got removed.", d->path); + else + log_debug("Directory %s got removed.", d->path); - r = journal_file_move_to_object(f, p, &o); - if (r < 0) - return r; + free(d->path); + free(d); - o->entry.next_entry_offset = htole64(offset); + return 0; +} - r = journal_file_move_to_object(f, offset, &o); - if (r < 0) - return r; +static int add_search_paths(sd_journal *j) { + + const char search_paths[] = + "/run/log/journal\0" + "/var/log/journal\0"; + const char *p; + + assert(j); + + /* We ignore most errors here, since the idea is to only open + * what's actually accessible, and ignore the rest. */ + + NULSTR_FOREACH(p, search_paths) + add_root_directory(j, p); + + return 0; +} + +static int allocate_inotify(sd_journal *j) { + assert(j); + + if (j->inotify_fd < 0) { + j->inotify_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC); + if (j->inotify_fd < 0) + return -errno; } - f->header->tail_entry_offset = htole64(offset); + if (!j->directories_by_wd) { + j->directories_by_wd = hashmap_new(trivial_hash_func, trivial_compare_func); + if (!j->directories_by_wd) + return -ENOMEM; + } - /* Link up the items */ - n = journal_file_entry_n_items(o); - for (i = 0; i < n; i++) { - r = journal_file_link_entry_item(f, o, offset, i); - if (r < 0) - return r; + return 0; +} + +static sd_journal *journal_new(int flags, const char *path) { + sd_journal *j; + + j = new0(sd_journal, 1); + if (!j) + return NULL; + + j->inotify_fd = -1; + j->flags = flags; + + if (path) { + j->path = strdup(path); + if (!j->path) { + free(j); + return NULL; + } + } + + j->files = hashmap_new(string_hash_func, string_compare_func); + if (!j->files) { + free(j->path); + free(j); + return NULL; + } + + j->directories_by_path = hashmap_new(string_hash_func, string_compare_func); + if (!j->directories_by_path) { + hashmap_free(j->files); + free(j->path); + free(j); + return NULL; } - /* Link up the entry in the bisect table */ - n = le64toh(f->header->bisect_table_size) / sizeof(uint64_t); - k = le64toh(f->header->arena_max_size) / n; + j->mmap = mmap_cache_new(); + if (!j->mmap) { + hashmap_free(j->files); + hashmap_free(j->directories_by_path); + free(j->path); + free(j); + return NULL; + } + + return j; +} + +_public_ int sd_journal_open(sd_journal **ret, int flags) { + sd_journal *j; + int r; + + if (!ret) + return -EINVAL; - a = (le64toh(f->header->last_bisect_offset) + k - 1) / k; - b = offset / k; + if (flags & ~(SD_JOURNAL_LOCAL_ONLY| + SD_JOURNAL_RUNTIME_ONLY| + SD_JOURNAL_SYSTEM_ONLY)) + return -EINVAL; - for (; a <= b; a++) - f->bisect_table[a] = htole64(offset); + j = journal_new(flags, NULL); + if (!j) + return -ENOMEM; - f->header->last_bisect_offset = htole64(offset + le64toh(o->object.size)); + r = add_search_paths(j); + if (r < 0) + goto fail; + *ret = j; return 0; + +fail: + sd_journal_close(j); + + return r; } -static int journal_file_append_entry_internal( - JournalFile *f, - const dual_timestamp *ts, - uint64_t xor_hash, - const EntryItem items[], unsigned n_items, - Object **ret, uint64_t *offset) { - uint64_t np; - uint64_t osize; +_public_ int sd_journal_open_directory(sd_journal **ret, const char *path, int flags) { + sd_journal *j; + int r; + + if (!ret) + return -EINVAL; + + if (!path || !path_is_absolute(path)) + return -EINVAL; + + if (flags != 0) + return -EINVAL; + + j = journal_new(flags, path); + if (!j) + return -ENOMEM; + + r = add_root_directory(j, path); + if (r < 0) + goto fail; + + *ret = j; + return 0; + +fail: + sd_journal_close(j); + + return r; +} + +_public_ void sd_journal_close(sd_journal *j) { + Directory *d; + JournalFile *f; + + if (!j) + return; + + while ((f = hashmap_steal_first(j->files))) + journal_file_close(f); + + hashmap_free(j->files); + + while ((d = hashmap_first(j->directories_by_path))) + remove_directory(j, d); + + while ((d = hashmap_first(j->directories_by_wd))) + remove_directory(j, d); + + hashmap_free(j->directories_by_path); + hashmap_free(j->directories_by_wd); + + if (j->inotify_fd >= 0) + close_nointr_nofail(j->inotify_fd); + + sd_journal_flush_matches(j); + + if (j->mmap) + mmap_cache_unref(j->mmap); + + free(j->path); + free(j->unique_field); + free(j); +} + +_public_ int sd_journal_get_realtime_usec(sd_journal *j, uint64_t *ret) { Object *o; + JournalFile *f; int r; - assert(f); - assert(items || n_items == 0); + if (!j) + return -EINVAL; + if (!ret) + return -EINVAL; - osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem)); + f = j->current_file; + if (!f) + return -EADDRNOTAVAIL; + + if (f->current_offset <= 0) + return -EADDRNOTAVAIL; - r = journal_file_append_object(f, osize, &o, &np); + r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o); if (r < 0) return r; - o->object.type = htole64(OBJECT_ENTRY); - o->entry.seqnum = htole64(journal_file_seqnum(f)); - memcpy(o->entry.items, items, n_items * sizeof(EntryItem)); - o->entry.realtime = ts ? htole64(ts->realtime) : 0; - o->entry.monotonic = ts ? htole64(ts->monotonic) : 0; - o->entry.xor_hash = htole64(xor_hash); + *ret = le64toh(o->entry.realtime); + return 0; +} + +_public_ int sd_journal_get_monotonic_usec(sd_journal *j, uint64_t *ret, sd_id128_t *ret_boot_id) { + Object *o; + JournalFile *f; + int r; + sd_id128_t id; + + if (!j) + return -EINVAL; + + f = j->current_file; + if (!f) + return -EADDRNOTAVAIL; + + if (f->current_offset <= 0) + return -EADDRNOTAVAIL; - r = journal_file_link_entry(f, o, np); + r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o); if (r < 0) return r; - if (ret) - *ret = o; + if (ret_boot_id) + *ret_boot_id = o->entry.boot_id; + else { + r = sd_id128_get_boot(&id); + if (r < 0) + return r; - if (offset) - *offset = np; + if (!sd_id128_equal(id, o->entry.boot_id)) + return -ESTALE; + } + + if (ret) + *ret = le64toh(o->entry.monotonic); return 0; } -int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, Object **ret, uint64_t *offset) { - unsigned i; - EntryItem *items; - int r; - uint64_t xor_hash = 0; +static bool field_is_valid(const char *field) { + const char *p; - assert(f); - assert(iovec || n_iovec == 0); + assert(field); - items = new(EntryItem, n_iovec); - if (!items) - return -ENOMEM; + if (isempty(field)) + return false; - for (i = 0; i < n_iovec; i++) { - uint64_t p; - Object *o; + if (startswith(field, "__")) + return false; - r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p); - if (r < 0) - goto finish; + for (p = field; *p; p++) { - xor_hash ^= le64toh(o->data.hash); - items[i].object_offset = htole64(p); - } + if (*p == '_') + continue; + + if (*p >= 'A' && *p <= 'Z') + continue; - r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, ret, offset); + if (*p >= '0' && *p <= '9') + continue; -finish: - free(items); + return false; + } - return r; + return true; } -int journal_file_move_to_entry(JournalFile *f, uint64_t seqnum, Object **ret, uint64_t *offset) { - Object *o; - uint64_t lower, upper, p, n, k; +_public_ int sd_journal_get_data(sd_journal *j, const char *field, const void **data, size_t *size) { + JournalFile *f; + uint64_t i, n; + size_t field_length; int r; + Object *o; - assert(f); + if (!j) + return -EINVAL; + if (!field) + return -EINVAL; + if (!data) + return -EINVAL; + if (!size) + return -EINVAL; - n = le64toh(f->header->bisect_table_size) / sizeof(uint64_t); - k = le64toh(f->header->arena_max_size) / n; + if (!field_is_valid(field)) + return -EINVAL; - lower = 0; - upper = le64toh(f->header->last_bisect_offset)/k+1; + f = j->current_file; + if (!f) + return -EADDRNOTAVAIL; - while (lower < upper) { - k = (upper + lower) / 2; - p = le64toh(f->bisect_table[k]); + if (f->current_offset <= 0) + return -EADDRNOTAVAIL; - if (p == 0) { - upper = k; - continue; - } + r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o); + if (r < 0) + return r; + + field_length = strlen(field); - r = journal_file_move_to_object(f, p, &o); + n = journal_file_entry_n_items(o); + for (i = 0; i < n; i++) { + uint64_t p, l; + le64_t le_hash; + size_t t; + + p = le64toh(o->entry.items[i].object_offset); + le_hash = o->entry.items[i].hash; + r = journal_file_move_to_object(f, OBJECT_DATA, p, &o); if (r < 0) return r; - if (o->object.type != htole64(OBJECT_ENTRY)) + if (le_hash != o->data.hash) return -EBADMSG; - if (o->entry.seqnum == seqnum) { - if (ret) - *ret = o; + l = le64toh(o->object.size) - offsetof(Object, data.payload); - if (offset) - *offset = p; + if (o->object.flags & OBJECT_COMPRESSED) { - return 1; - } else if (seqnum < o->entry.seqnum) - upper = k; - else if (seqnum > o->entry.seqnum) - lower = k+1; - } +#ifdef HAVE_XZ + if (uncompress_startswith(o->data.payload, l, + &f->compress_buffer, &f->compress_buffer_size, + field, field_length, '=')) { - assert(lower == upper); + uint64_t rsize; - if (lower <= 0) - return 0; + if (!uncompress_blob(o->data.payload, l, + &f->compress_buffer, &f->compress_buffer_size, &rsize)) + return -EBADMSG; - /* The object we are looking for is between - * bisect_table[lower-1] and bisect_table[lower] */ + *data = f->compress_buffer; + *size = (size_t) rsize; - p = le64toh(f->bisect_table[lower-1]); + return 0; + } +#else + return -EPROTONOSUPPORT; +#endif - for (;;) { - r = journal_file_move_to_object(f, p, &o); + } else if (l >= field_length+1 && + memcmp(o->data.payload, field, field_length) == 0 && + o->data.payload[field_length] == '=') { + + t = (size_t) l; + + if ((uint64_t) t != l) + return -E2BIG; + + *data = o->data.payload; + *size = t; + + return 0; + } + + r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o); if (r < 0) return r; + } - if (o->entry.seqnum == seqnum) { - if (ret) - *ret = o; + return -ENOENT; +} - if (offset) - *offset = p; +static int return_data(JournalFile *f, Object *o, const void **data, size_t *size) { + size_t t; + uint64_t l; - return 1; + l = le64toh(o->object.size) - offsetof(Object, data.payload); + t = (size_t) l; - } if (seqnum < o->entry.seqnum) - return 0; + /* We can't read objects larger than 4G on a 32bit machine */ + if ((uint64_t) t != l) + return -E2BIG; - if (o->entry.next_entry_offset == 0) - return 0; + if (o->object.flags & OBJECT_COMPRESSED) { +#ifdef HAVE_XZ + uint64_t rsize; + + if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize)) + return -EBADMSG; - p = le64toh(o->entry.next_entry_offset); + *data = f->compress_buffer; + *size = (size_t) rsize; +#else + return -EPROTONOSUPPORT; +#endif + } else { + *data = o->data.payload; + *size = t; } return 0; } -int journal_file_next_entry(JournalFile *f, Object *o, Object **ret, uint64_t *offset) { - uint64_t np; +_public_ int sd_journal_enumerate_data(sd_journal *j, const void **data, size_t *size) { + JournalFile *f; + uint64_t p, n; + le64_t le_hash; int r; + Object *o; + + if (!j) + return -EINVAL; + if (!data) + return -EINVAL; + if (!size) + return -EINVAL; - assert(f); + f = j->current_file; + if (!f) + return -EADDRNOTAVAIL; - if (!o) - np = le64toh(f->header->head_entry_offset); - else { - if (le64toh(o->object.type) != OBJECT_ENTRY) - return -EINVAL; + if (f->current_offset <= 0) + return -EADDRNOTAVAIL; - np = le64toh(o->entry.next_entry_offset); - } + r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o); + if (r < 0) + return r; - if (np == 0) + n = journal_file_entry_n_items(o); + if (j->current_field >= n) return 0; - r = journal_file_move_to_object(f, np, &o); + p = le64toh(o->entry.items[j->current_field].object_offset); + le_hash = o->entry.items[j->current_field].hash; + r = journal_file_move_to_object(f, OBJECT_DATA, p, &o); if (r < 0) return r; - if (le64toh(o->object.type) != OBJECT_ENTRY) + if (le_hash != o->data.hash) return -EBADMSG; - if (ret) - *ret = o; + r = return_data(f, o, data, size); + if (r < 0) + return r; - if (offset) - *offset = np; + j->current_field ++; return 1; } -int journal_file_prev_entry(JournalFile *f, Object *o, Object **ret, uint64_t *offset) { - uint64_t np; - int r; +_public_ void sd_journal_restart_data(sd_journal *j) { + if (!j) + return; - assert(f); + j->current_field = 0; +} - if (!o) - np = le64toh(f->header->tail_entry_offset); - else { - if (le64toh(o->object.type) != OBJECT_ENTRY) - return -EINVAL; +_public_ int sd_journal_get_fd(sd_journal *j) { + int r; - np = le64toh(o->entry.prev_entry_offset); - } + if (!j) + return -EINVAL; - if (np == 0) - return 0; + if (j->inotify_fd >= 0) + return j->inotify_fd; - r = journal_file_move_to_object(f, np, &o); + r = allocate_inotify(j); if (r < 0) return r; - if (le64toh(o->object.type) != OBJECT_ENTRY) - return -EBADMSG; - - if (ret) - *ret = o; - - if (offset) - *offset = np; + /* Iterate through all dirs again, to add them to the + * inotify */ + if (j->path) + r = add_root_directory(j, j->path); + else + r = add_search_paths(j); + if (r < 0) + return r; - return 1; + return j->inotify_fd; } -int journal_file_find_first_entry(JournalFile *f, const void *data, uint64_t size, Object **ret, uint64_t *offset) { - uint64_t p, osize, hash, h; +static void process_inotify_event(sd_journal *j, struct inotify_event *e) { + Directory *d; int r; - assert(f); - assert(data || size == 0); + assert(j); + assert(e); - osize = offsetof(Object, data.payload) + size; + /* Is this a subdirectory we watch? */ + d = hashmap_get(j->directories_by_wd, INT_TO_PTR(e->wd)); + if (d) { + sd_id128_t id; - hash = hash64(data, size); - h = hash % (le64toh(f->header->hash_table_size) / sizeof(HashItem)); - p = le64toh(f->hash_table[h].head_hash_offset); + if (!(e->mask & IN_ISDIR) && e->len > 0 && + (endswith(e->name, ".journal") || + endswith(e->name, ".journal~"))) { - while (p != 0) { - Object *o; + /* Event for a journal file */ - r = journal_file_move_to_object(f, p, &o); - if (r < 0) - return r; + if (e->mask & (IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB)) { + r = add_file(j, d->path, e->name); + if (r < 0) + log_debug("Failed to add file %s/%s: %s", d->path, e->name, strerror(-r)); - if (le64toh(o->object.type) != OBJECT_DATA) - return -EBADMSG; + } else if (e->mask & (IN_DELETE|IN_MOVED_FROM|IN_UNMOUNT)) { - if (le64toh(o->object.size) == osize && - memcmp(o->data.payload, data, size) == 0) { + r = remove_file(j, d->path, e->name); + if (r < 0) + log_debug("Failed to remove file %s/%s: %s", d->path, e->name, strerror(-r)); + } - if (le64toh(o->data.hash) != hash) - return -EBADMSG; + } else if (!d->is_root && e->len == 0) { - if (o->data.head_entry_offset == 0) - return 0; + /* Event for a subdirectory */ - p = le64toh(o->data.head_entry_offset); - r = journal_file_move_to_object(f, p, &o); - if (r < 0) - return r; + if (e->mask & (IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT)) { + r = remove_directory(j, d); + if (r < 0) + log_debug("Failed to remove directory %s: %s", d->path, strerror(-r)); + } - if (le64toh(o->object.type) != OBJECT_ENTRY) - return -EBADMSG; - if (ret) - *ret = o; + } else if (d->is_root && (e->mask & IN_ISDIR) && e->len > 0 && sd_id128_from_string(e->name, &id) >= 0) { - if (offset) - *offset = p; + /* Event for root directory */ - return 1; + if (e->mask & (IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB)) { + r = add_directory(j, d->path, e->name); + if (r < 0) + log_debug("Failed to add directory %s/%s: %s", d->path, e->name, strerror(-r)); + } } - p = le64toh(o->data.next_hash_offset); + return; } - return 0; -} + if (e->mask & IN_IGNORED) + return; -int journal_file_find_last_entry(JournalFile *f, const void *data, uint64_t size, Object **ret, uint64_t *offset) { - uint64_t p, osize, hash, h; - int r; + log_warning("Unknown inotify event."); +} - assert(f); - assert(data || size == 0); +static int determine_change(sd_journal *j) { + bool b; - osize = offsetof(Object, data.payload) + size; + assert(j); - hash = hash64(data, size); - h = hash % (le64toh(f->header->hash_table_size) / sizeof(HashItem)); - p = le64toh(f->hash_table[h].tail_hash_offset); + b = j->current_invalidate_counter != j->last_invalidate_counter; + j->last_invalidate_counter = j->current_invalidate_counter; - while (p != 0) { - Object *o; + return b ? SD_JOURNAL_INVALIDATE : SD_JOURNAL_APPEND; +} - r = journal_file_move_to_object(f, p, &o); - if (r < 0) - return r; +_public_ int sd_journal_process(sd_journal *j) { + uint8_t buffer[sizeof(struct inotify_event) + FILENAME_MAX] _alignas_(struct inotify_event); + bool got_something = false; - if (le64toh(o->object.type) != OBJECT_DATA) - return -EBADMSG; + if (!j) + return -EINVAL; - if (le64toh(o->object.size) == osize && - memcmp(o->data.payload, data, size) == 0) { + for (;;) { + struct inotify_event *e; + ssize_t l; - if (le64toh(o->data.hash) != hash) - return -EBADMSG; + l = read(j->inotify_fd, buffer, sizeof(buffer)); + if (l < 0) { + if (errno == EAGAIN || errno == EINTR) + return got_something ? determine_change(j) : SD_JOURNAL_NOP; - if (o->data.tail_entry_offset == 0) - return 0; + return -errno; + } - p = le64toh(o->data.tail_entry_offset); - r = journal_file_move_to_object(f, p, &o); - if (r < 0) - return r; + got_something = true; - if (le64toh(o->object.type) != OBJECT_ENTRY) - return -EBADMSG; + e = (struct inotify_event*) buffer; + while (l > 0) { + size_t step; - if (ret) - *ret = o; + process_inotify_event(j, e); - if (offset) - *offset = p; + step = sizeof(struct inotify_event) + e->len; + assert(step <= (size_t) l); - return 1; + e = (struct inotify_event*) ((uint8_t*) e + step); + l -= step; } - - p = le64toh(o->data.prev_hash_offset); } - return 0; + return determine_change(j); } -void journal_file_dump(JournalFile *f) { - char a[33], b[33], c[33]; - Object *o; +_public_ int sd_journal_wait(sd_journal *j, uint64_t timeout_usec) { int r; - uint64_t p; - assert(f); - - printf("File ID: %s\n" - "Machine ID: %s\n" - "Boot ID: %s\n" - "Arena size: %llu\n", - sd_id128_to_string(f->header->file_id, a), - sd_id128_to_string(f->header->machine_id, b), - sd_id128_to_string(f->header->boot_id, c), - (unsigned long long) le64toh(f->header->arena_size)); - - p = le64toh(f->header->head_object_offset); - while (p != 0) { - r = journal_file_move_to_object(f, p, &o); - if (r < 0) - goto fail; + assert(j); - switch (o->object.type) { + if (j->inotify_fd < 0) { - case OBJECT_UNUSED: - printf("Type: OBJECT_UNUSED\n"); - break; + /* This is the first invocation, hence create the + * inotify watch */ + r = sd_journal_get_fd(j); + if (r < 0) + return r; - case OBJECT_DATA: - printf("Type: OBJECT_DATA\n"); - break; + /* The journal might have changed since the context + * object was created and we weren't watching before, + * hence don't wait for anything, and return + * immediately. */ + return determine_change(j); + } - case OBJECT_ENTRY: - printf("Type: OBJECT_ENTRY %llu\n", (unsigned long long) le64toh(o->entry.seqnum)); - break; + if (j->on_network) { + /* If we are on the network we need to regularly check + * for changes manually */ - case OBJECT_HASH_TABLE: - printf("Type: OBJECT_HASH_TABLE\n"); - break; + if (timeout_usec == (uint64_t) -1 || timeout_usec > JOURNAL_FILES_RECHECK_USEC) + timeout_usec = JOURNAL_FILES_RECHECK_USEC; + } - case OBJECT_BISECT_TABLE: - printf("Type: OBJECT_BISECT_TABLE\n"); - break; - } + do { + r = fd_wait_for_event(j->inotify_fd, POLLIN, timeout_usec); + } while (r == -EINTR); - if (p == le64toh(f->header->tail_object_offset)) - p = 0; - else - p = p + ALIGN64(le64toh(o->object.size)); - } + if (r < 0) + return r; - return; -fail: - log_error("File corrupt"); + return sd_journal_process(j); } -int journal_file_open( - sd_journal *j, - const char *fname, - int flags, - mode_t mode, - JournalFile **ret) { - +_public_ int sd_journal_get_cutoff_realtime_usec(sd_journal *j, uint64_t *from, uint64_t *to) { + Iterator i; JournalFile *f; + bool first = true; int r; - bool newly_created = false; - assert(fname); - - if ((flags & O_ACCMODE) != O_RDONLY && - (flags & O_ACCMODE) != O_RDWR) + if (!j) + return -EINVAL; + if (!from && !to) return -EINVAL; - f = new0(JournalFile, 1); - if (!f) - return -ENOMEM; + HASHMAP_FOREACH(f, j->files, i) { + usec_t fr, t; - f->writable = (flags & O_ACCMODE) != O_RDONLY; - f->prot = prot_from_flags(flags); + r = journal_file_get_cutoff_realtime_usec(f, &fr, &t); + if (r == -ENOENT) + continue; + if (r < 0) + return r; + if (r == 0) + continue; - f->fd = open(fname, flags|O_CLOEXEC, mode); - if (f->fd < 0) { - r = -errno; - goto fail; + if (first) { + if (from) + *from = fr; + if (to) + *to = t; + first = false; + } else { + if (from) + *from = MIN(fr, *from); + if (to) + *to = MAX(t, *to); + } } - f->path = strdup(fname); - if (!f->path) { - r = -ENOMEM; - goto fail; - } + return first ? 0 : 1; +} - if (fstat(f->fd, &f->last_stat) < 0) { - r = -errno; - goto fail; - } +_public_ int sd_journal_get_cutoff_monotonic_usec(sd_journal *j, sd_id128_t boot_id, uint64_t *from, uint64_t *to) { + Iterator i; + JournalFile *f; + bool first = true; + int r; + + if (!j) + return -EINVAL; + if (!from && !to) + return -EINVAL; - if (f->last_stat.st_size == 0 && f->writable) { - newly_created = true; + HASHMAP_FOREACH(f, j->files, i) { + usec_t fr, t; - r = journal_file_init_header(f); + r = journal_file_get_cutoff_monotonic_usec(f, boot_id, &fr, &t); + if (r == -ENOENT) + continue; if (r < 0) - goto fail; + return r; + if (r == 0) + continue; - if (fstat(f->fd, &f->last_stat) < 0) { - r = -errno; - goto fail; + if (first) { + if (from) + *from = fr; + if (to) + *to = t; + first = false; + } else { + if (from) + *from = MIN(fr, *from); + if (to) + *to = MAX(t, *to); } } - if (f->last_stat.st_size < (off_t) sizeof(Header)) { - r = -EIO; - goto fail; - } + return first ? 0 : 1; +} - f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0); - if (f->header == MAP_FAILED) { - f->header = NULL; - r = -errno; - goto fail; - } +void journal_print_header(sd_journal *j) { + Iterator i; + JournalFile *f; + bool newline = false; - if (!newly_created) { - r = journal_file_verify_header(f); - if (r < 0) - goto fail; - } + assert(j); - if (f->writable) { - r = journal_file_refresh_header(f); - if (r < 0) - goto fail; + HASHMAP_FOREACH(f, j->files, i) { + if (newline) + putchar('\n'); + else + newline = true; + + journal_file_print_header(f); } +} - if (newly_created) { +_public_ int sd_journal_get_usage(sd_journal *j, uint64_t *bytes) { + Iterator i; + JournalFile *f; + uint64_t sum = 0; - r = journal_file_setup_hash_table(f); - if (r < 0) - goto fail; + if (!j) + return -EINVAL; + if (!bytes) + return -EINVAL; - r = journal_file_setup_bisect_table(f); - if (r < 0) - goto fail; + HASHMAP_FOREACH(f, j->files, i) { + struct stat st; + + if (fstat(f->fd, &st) < 0) + return -errno; + + sum += (uint64_t) st.st_blocks * 512ULL; } - r = journal_file_map_hash_table(f); - if (r < 0) - goto fail; + *bytes = sum; + return 0; +} - r = journal_file_map_bisect_table(f); - if (r < 0) - goto fail; +_public_ int sd_journal_query_unique(sd_journal *j, const char *field) { + char *f; - if (j) { - LIST_PREPEND(JournalFile, files, j->files, f); - f->journal = j; - } + if (!j) + return -EINVAL; + if (isempty(field)) + return -EINVAL; + if (!field_is_valid(field)) + return -EINVAL; - if (ret) - *ret = f; + f = strdup(field); + if (!f) + return -ENOMEM; + + free(j->unique_field); + j->unique_field = f; + j->unique_file = NULL; + j->unique_offset = 0; return 0; +} -fail: - journal_file_close(f); +_public_ int sd_journal_enumerate_unique(sd_journal *j, const void **data, size_t *l) { + Object *o; + size_t k; + int r; - return r; -} + if (!j) + return -EINVAL; + if (!data) + return -EINVAL; + if (!l) + return -EINVAL; + if (!j->unique_field) + return -EINVAL; -int sd_journal_open(sd_journal **ret) { - sd_journal *j; - char *fn; - const char *p; - int r = 0; - const char search_paths[] = - "/run/log/journal\0" - "/var/log/journal\0"; + k = strlen(j->unique_field); - assert(ret); + if (!j->unique_file) { + j->unique_file = hashmap_first(j->files); + if (!j->unique_file) + return 0; + j->unique_offset = 0; + } - j = new0(sd_journal, 1); - if (!j) - return -ENOMEM; + for (;;) { + JournalFile *of; + Iterator i; + const void *odata; + size_t ol; + bool found; + + /* Proceed to next data object in the field's linked list */ + if (j->unique_offset == 0) { + r = journal_file_find_field_object(j->unique_file, j->unique_field, k, &o, NULL); + if (r < 0) + return r; - NULSTR_FOREACH(p, search_paths) { - DIR *d; + j->unique_offset = r > 0 ? le64toh(o->field.head_data_offset) : 0; + } else { + r = journal_file_move_to_object(j->unique_file, OBJECT_DATA, j->unique_offset, &o); + if (r < 0) + return r; + + j->unique_offset = le64toh(o->data.next_field_offset); + } - d = opendir(p); - if (!d) { - if (errno != ENOENT && r == 0) - r = -errno; + /* We reached the end of the list? Then start again, with the next file */ + if (j->unique_offset == 0) { + JournalFile *n; + n = hashmap_next(j->files, j->unique_file->path); + if (!n) + return 0; + + j->unique_file = n; continue; } - for (;;) { - struct dirent buf, *de; - int k; + /* We do not use the type context here, but 0 instead, + * so that we can look at this data object at the same + * time as one on another file */ + r = journal_file_move_to_object(j->unique_file, 0, j->unique_offset, &o); + if (r < 0) + return r; - k = readdir_r(d, &buf, &de); - if (k != 0) { - if (r == 0) - r = -k; + /* Let's do the type check by hand, since we used 0 context above. */ + if (o->object.type != OBJECT_DATA) + return -EBADMSG; - break; - } + r = return_data(j->unique_file, o, &odata, &ol); + if (r < 0) + return r; - if (!de) + /* OK, now let's see if we already returned this data + * object by checking if it exists in the earlier + * traversed files. */ + found = false; + HASHMAP_FOREACH(of, j->files, i) { + Object *oo; + uint64_t op; + + if (of == j->unique_file) break; - if (!dirent_is_file_with_suffix(de, ".journal")) + /* Skip this file it didn't have any fields + * indexed */ + if (JOURNAL_HEADER_CONTAINS(of->header, n_fields) && + le64toh(of->header->n_fields) <= 0) continue; - fn = join(p, "/", de->d_name, NULL); - if (!fn) { - r = -ENOMEM; - closedir(d); - goto fail; - } - - k = journal_file_open(j, fn, O_RDONLY, 0, NULL); - if (k < 0 && r == 0) - r = -k; + r = journal_file_find_data_object_with_hash(of, odata, ol, le64toh(o->data.hash), &oo, &op); + if (r < 0) + return r; - free(fn); + if (r > 0) + found = true; } - } - if (!j->files) { - if (r >= 0) - r = -ENOENT; + if (found) + continue; - goto fail; + r = return_data(j->unique_file, o, data, l); + if (r < 0) + return r; + + return 1; } +} - *ret = j; - return 0; +_public_ void sd_journal_restart_unique(sd_journal *j) { + if (!j) + return; -fail: - sd_journal_close(j); + j->unique_file = NULL; + j->unique_offset = 0; +} - return r; -}; +_public_ int sd_journal_reliable_fd(sd_journal *j) { + if (!j) + return -EINVAL; + + return !j->on_network; +} + +static char *lookup_field(const char *field, void *userdata) { + sd_journal *j = userdata; + const void *data; + size_t size, d; + int r; -void sd_journal_close(sd_journal *j) { + assert(field); assert(j); - while (j->files) - journal_file_close(j->files); + r = sd_journal_get_data(j, field, &data, &size); + if (r < 0 || + size > REPLACE_VAR_MAX) + return strdup(field); - free(j); + d = strlen(field) + 1; + + return strndup((const char*) data + d, size - d); +} + +_public_ int sd_journal_get_catalog(sd_journal *j, char **ret) { + const void *data; + size_t size; + sd_id128_t id; + _cleanup_free_ char *text = NULL, *cid = NULL; + char *t; + int r; + + if (!j) + return -EINVAL; + if (!ret) + return -EINVAL; + + r = sd_journal_get_data(j, "MESSAGE_ID", &data, &size); + if (r < 0) + return r; + + cid = strndup((const char*) data + 11, size - 11); + if (!cid) + return -ENOMEM; + + r = sd_id128_from_string(cid, &id); + if (r < 0) + return r; + + r = catalog_get(id, &text); + if (r < 0) + return r; + + t = replace_var(text, lookup_field, j); + if (!t) + return -ENOMEM; + + *ret = t; + return 0; }