chiark / gitweb /
build-sys: use glibc's xattr support instead of requiring libattr
[elogind.git] / src / journal / journal-file.c
index d606adaf8e38dba7d3b3237c7b01712d88951e74..b3b1ffc3c07138be9e809ebe1f8fa43e88a832c6 100644 (file)
 #include <sys/statvfs.h>
 #include <fcntl.h>
 #include <stddef.h>
-
-#ifdef HAVE_XATTR
-#include <attr/xattr.h>
-#endif
+#include <sys/xattr.h>
 
 #include "journal-def.h"
 #include "journal-file.h"
@@ -133,9 +130,7 @@ void journal_file_close(JournalFile *f) {
         if (f->header)
                 munmap(f->header, PAGE_ALIGN(sizeof(Header)));
 
-        if (f->fd >= 0)
-                close_nointr_nofail(f->fd);
-
+        safe_close(f->fd);
         free(f->path);
 
         if (f->mmap)
@@ -315,7 +310,7 @@ static int journal_file_verify_header(JournalFile *f) {
 }
 
 static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
-        uint64_t old_size, new_size, file_size;
+        uint64_t old_size, new_size;
         int r;
 
         assert(f);
@@ -356,6 +351,11 @@ static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size)
                 }
         }
 
+        /* Increase by larger blocks at once */
+        new_size = ((new_size+FILE_SIZE_INCREASE-1) / FILE_SIZE_INCREASE) * FILE_SIZE_INCREASE;
+        if (f->metrics.max_size > 0 && new_size > f->metrics.max_size)
+                new_size = f->metrics.max_size;
+
         /* Note that the glibc fallocate() fallback is very
            inefficient, hence we try to minimize the allocation area
            as we can. */
@@ -363,16 +363,8 @@ static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size)
         if (r != 0)
                 return -r;
 
-        /* Increase the file size a bit further than this, so that we
-         * we can create larger memory maps to cache */
-        file_size = ((new_size+FILE_SIZE_INCREASE-1) / FILE_SIZE_INCREASE) * FILE_SIZE_INCREASE;
-        if (file_size > (uint64_t) f->last_stat.st_size) {
-                if (file_size > new_size)
-                        ftruncate(f->fd, file_size);
-
-                if (fstat(f->fd, &f->last_stat) < 0)
-                        return -errno;
-        }
+        if (fstat(f->fd, &f->last_stat) < 0)
+                return -errno;
 
         f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
 
@@ -401,7 +393,7 @@ static int journal_file_move_to(JournalFile *f, int context, bool keep_always, u
 
 static uint64_t minimum_header_size(Object *o) {
 
-        static uint64_t table[] = {
+        static const uint64_t table[] = {
                 [OBJECT_DATA] = sizeof(DataObject),
                 [OBJECT_FIELD] = sizeof(FieldObject),
                 [OBJECT_ENTRY] = sizeof(EntryObject),
@@ -422,7 +414,6 @@ int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Objec
         void *t;
         Object *o;
         uint64_t s;
-        unsigned context;
 
         assert(f);
         assert(ret);
@@ -431,10 +422,8 @@ int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Objec
         if (!VALID64(offset))
                 return -EFAULT;
 
-        /* One context for each type, plus one catch-all for the rest */
-        context = type > 0 && type < _OBJECT_TYPE_MAX ? type : 0;
 
-        r = journal_file_move_to(f, context, false, offset, sizeof(ObjectHeader), &t);
+        r = journal_file_move_to(f, type_to_context(type), false, offset, sizeof(ObjectHeader), &t);
         if (r < 0)
                 return r;
 
@@ -566,7 +555,7 @@ static int journal_file_setup_data_hash_table(JournalFile *f) {
         if (r < 0)
                 return r;
 
-        memset(o->hash_table.items, 0, s);
+        memzero(o->hash_table.items, s);
 
         f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
         f->header->data_hash_table_size = htole64(s);
@@ -592,7 +581,7 @@ static int journal_file_setup_field_hash_table(JournalFile *f) {
         if (r < 0)
                 return r;
 
-        memset(o->hash_table.items, 0, s);
+        memzero(o->hash_table.items, s);
 
         f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
         f->header->field_hash_table_size = htole64(s);
@@ -1013,10 +1002,13 @@ static int journal_file_append_data(
         if (r < 0)
                 return r;
 
-        eq = memchr(data, '=', size);
+        if (!data)
+                eq = NULL;
+        else
+                eq = memchr(data, '=', size);
         if (eq && eq > data) {
+                Object *fo = NULL;
                 uint64_t fp;
-                Object *fo;
 
                 /* Create field object ... */
                 r = journal_file_append_field(f, data, (uint8_t*) eq - (uint8_t*) data, &fo, &fp);
@@ -1362,10 +1354,11 @@ int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const st
 }
 
 typedef struct ChainCacheItem {
-        uint64_t first; /* the array at the begin of the chain */
+        uint64_t first; /* the array at the beginning of the chain */
         uint64_t array; /* the cached array */
         uint64_t begin; /* the first item in the cached array */
         uint64_t total; /* the total number of items in all arrays before this one in the chain */
+        uint64_t last_index; /* the last index we looked at, to optimize locality when bisecting */
 } ChainCacheItem;
 
 static void chain_cache_put(
@@ -1374,7 +1367,8 @@ static void chain_cache_put(
                 uint64_t first,
                 uint64_t array,
                 uint64_t begin,
-                uint64_t total) {
+                uint64_t total,
+                uint64_t last_index) {
 
         if (!ci) {
                 /* If the chain item to cache for this chain is the
@@ -1402,12 +1396,14 @@ static void chain_cache_put(
         ci->array = array;
         ci->begin = begin;
         ci->total = total;
+        ci->last_index = last_index;
 }
 
-static int generic_array_get(JournalFile *f,
-                             uint64_t first,
-                             uint64_t i,
-                             Object **ret, uint64_t *offset) {
+static int generic_array_get(
+                JournalFile *f,
+                uint64_t first,
+                uint64_t i,
+                Object **ret, uint64_t *offset) {
 
         Object *o;
         uint64_t p = 0, a, t = 0;
@@ -1448,7 +1444,7 @@ static int generic_array_get(JournalFile *f,
 
 found:
         /* Let's cache this item for the next invocation */
-        chain_cache_put(f->chain_cache, ci, first, a, o->entry_array.items[0], t);
+        chain_cache_put(f->chain_cache, ci, first, a, le64toh(o->entry_array.items[0]), t, i);
 
         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
         if (r < 0)
@@ -1463,11 +1459,12 @@ found:
         return 1;
 }
 
-static int generic_array_get_plus_one(JournalFile *f,
-                                      uint64_t extra,
-                                      uint64_t first,
-                                      uint64_t i,
-                                      Object **ret, uint64_t *offset) {
+static int generic_array_get_plus_one(
+                JournalFile *f,
+                uint64_t extra,
+                uint64_t first,
+                uint64_t i,
+                Object **ret, uint64_t *offset) {
 
         Object *o;
 
@@ -1498,17 +1495,18 @@ enum {
         TEST_RIGHT
 };
 
-static int generic_array_bisect(JournalFile *f,
-                                uint64_t first,
-                                uint64_t n,
-                                uint64_t needle,
-                                int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
-                                direction_t direction,
-                                Object **ret,
-                                uint64_t *offset,
-                                uint64_t *idx) {
-
-        uint64_t a, p, t = 0, i = 0, last_p = 0;
+static int generic_array_bisect(
+                JournalFile *f,
+                uint64_t first,
+                uint64_t n,
+                uint64_t needle,
+                int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
+                direction_t direction,
+                Object **ret,
+                uint64_t *offset,
+                uint64_t *idx) {
+
+        uint64_t a, p, t = 0, i = 0, last_p = 0, last_index = (uint64_t) -1;
         bool subtract_one = false;
         Object *o, *array = NULL;
         int r;
@@ -1533,7 +1531,7 @@ static int generic_array_bisect(JournalFile *f,
                         return r;
 
                 if (r == TEST_LEFT) {
-                        /* OK, what we are looking for is right of th
+                        /* OK, what we are looking for is right of the
                          * begin of this EntryArray, so let's jump
                          * straight to previously cached array in the
                          * chain */
@@ -1541,6 +1539,7 @@ static int generic_array_bisect(JournalFile *f,
                         a = ci->array;
                         n -= ci->total;
                         t = ci->total;
+                        last_index = ci->last_index;
                 }
         }
 
@@ -1571,6 +1570,58 @@ static int generic_array_bisect(JournalFile *f,
                 if (r == TEST_RIGHT) {
                         left = 0;
                         right -= 1;
+
+                        if (last_index != (uint64_t) -1) {
+                                assert(last_index <= right);
+
+                                /* If we cached the last index we
+                                 * looked at, let's try to not to jump
+                                 * too wildly around and see if we can
+                                 * limit the range to look at early to
+                                 * the immediate neighbors of the last
+                                 * index we looked at. */
+
+                                if (last_index > 0) {
+                                        uint64_t x = last_index - 1;
+
+                                        p = le64toh(array->entry_array.items[x]);
+                                        if (p <= 0)
+                                                return -EBADMSG;
+
+                                        r = test_object(f, p, needle);
+                                        if (r < 0)
+                                                return r;
+
+                                        if (r == TEST_FOUND)
+                                                r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
+
+                                        if (r == TEST_RIGHT)
+                                                right = x;
+                                        else
+                                                left = x + 1;
+                                }
+
+                                if (last_index < right) {
+                                        uint64_t y = last_index + 1;
+
+                                        p = le64toh(array->entry_array.items[y]);
+                                        if (p <= 0)
+                                                return -EBADMSG;
+
+                                        r = test_object(f, p, needle);
+                                        if (r < 0)
+                                                return r;
+
+                                        if (r == TEST_FOUND)
+                                                r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
+
+                                        if (r == TEST_RIGHT)
+                                                right = y;
+                                        else
+                                                left = y + 1;
+                                }
+                        }
+
                         for (;;) {
                                 if (left == right) {
                                         if (direction == DIRECTION_UP)
@@ -1581,8 +1632,8 @@ static int generic_array_bisect(JournalFile *f,
                                 }
 
                                 assert(left < right);
-
                                 i = (left + right) / 2;
+
                                 p = le64toh(array->entry_array.items[i]);
                                 if (p <= 0)
                                         return -EBADMSG;
@@ -1615,6 +1666,7 @@ static int generic_array_bisect(JournalFile *f,
 
                 n -= k;
                 t += k;
+                last_index = (uint64_t) -1;
                 a = le64toh(array->entry_array.next_entry_array_offset);
         }
 
@@ -1625,7 +1677,7 @@ found:
                 return 0;
 
         /* Let's cache this item for the next invocation */
-        chain_cache_put(f->chain_cache, ci, first, a, array->entry_array.items[0], t);
+        chain_cache_put(f->chain_cache, ci, first, a, le64toh(array->entry_array.items[0]), t, subtract_one ? (i > 0 ? i-1 : (uint64_t) -1) : i);
 
         if (subtract_one && i == 0)
                 p = last_p;
@@ -1650,16 +1702,18 @@ found:
         return 1;
 }
 
-static int generic_array_bisect_plus_one(JournalFile *f,
-                                         uint64_t extra,
-                                         uint64_t first,
-                                         uint64_t n,
-                                         uint64_t needle,
-                                         int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
-                                         direction_t direction,
-                                         Object **ret,
-                                         uint64_t *offset,
-                                         uint64_t *idx) {
+
+static int generic_array_bisect_plus_one(
+                JournalFile *f,
+                uint64_t extra,
+                uint64_t first,
+                uint64_t n,
+                uint64_t needle,
+                int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
+                direction_t direction,
+                Object **ret,
+                uint64_t *offset,
+                uint64_t *idx) {
 
         int r;
         bool step_back = false;
@@ -1886,7 +1940,7 @@ int journal_file_next_entry(
                 direction_t direction,
                 Object **ret, uint64_t *offset) {
 
-        uint64_t i, n;
+        uint64_t i, n, ofs;
         int r;
 
         assert(f);
@@ -1927,10 +1981,24 @@ int journal_file_next_entry(
         }
 
         /* And jump to it */
-        return generic_array_get(f,
-                                 le64toh(f->header->entry_array_offset),
-                                 i,
-                                 ret, offset);
+        r = generic_array_get(f,
+                              le64toh(f->header->entry_array_offset),
+                              i,
+                              ret, &ofs);
+        if (r <= 0)
+                return r;
+
+        if (p > 0 &&
+            (direction == DIRECTION_DOWN ? ofs <= p : ofs >= p)) {
+                log_debug("%s: entry array corrupted at entry %"PRIu64,
+                          f->path, i);
+                return -EBADMSG;
+        }
+
+        if (offset)
+                *offset = ofs;
+
+        return 1;
 }
 
 int journal_file_skip_entry(
@@ -2151,8 +2219,6 @@ int journal_file_move_to_entry_by_monotonic_for_data(
 
                 z = q;
         }
-
-        return 0;
 }
 
 int journal_file_move_to_entry_by_seqnum_for_data(
@@ -2442,7 +2508,6 @@ int journal_file_open(
         }
 
         if (f->last_stat.st_size == 0 && f->writable) {
-#ifdef HAVE_XATTR
                 uint64_t crtime;
 
                 /* Let's attach the creation time to the journal file,
@@ -2457,7 +2522,6 @@ int journal_file_open(
 
                 crtime = htole64((uint64_t) now(CLOCK_REALTIME));
                 fsetxattr(f->fd, "user.crtime_usec", &crtime, sizeof(crtime), XATTR_CREATE);
-#endif
 
 #ifdef HAVE_GCRYPT
                 /* Try to load the FSPRG state, and if we can't, then
@@ -2634,10 +2698,10 @@ int journal_file_open_reliably(
         /* The file is corrupted. Rotate it away and try it again (but only once) */
 
         l = strlen(fname);
-        if (asprintf(&p, "%.*s@%016llx-%016llx.journal~",
+        if (asprintf(&p, "%.*s@%016llx-%016" PRIx64 ".journal~",
                      (int) l - 8, fname,
                      (unsigned long long) now(CLOCK_REALTIME),
-                     random_ull()) < 0)
+                     random_u64()) < 0)
                 return -ENOMEM;
 
         r = rename(fname, p);
@@ -2668,12 +2732,9 @@ int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint6
         ts.monotonic = le64toh(o->entry.monotonic);
         ts.realtime = le64toh(o->entry.realtime);
 
-        if (to->tail_entry_monotonic_valid &&
-            ts.monotonic < le64toh(to->header->tail_entry_monotonic))
-                return -EINVAL;
-
         n = journal_file_entry_n_items(o);
-        items = alloca(sizeof(EntryItem) * n);
+        /* alloca() can't take 0, hence let's allocate at least one */
+        items = alloca(sizeof(EntryItem) * MAX(1u, n));
 
         for (i = 0; i < n; i++) {
                 uint64_t l, h;