chiark / gitweb /
journal: add superficial structure verifier
authorLennart Poettering <lennart@poettering.net>
Wed, 15 Aug 2012 23:20:32 +0000 (01:20 +0200)
committerLennart Poettering <lennart@poettering.net>
Thu, 16 Aug 2012 15:10:57 +0000 (17:10 +0200)
src/journal/journal-file.c
src/journal/journald.c
src/journal/mmap-cache.c
src/journal/mmap-cache.h

index 709e15e..7beedb4 100644 (file)
@@ -299,6 +299,8 @@ static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size)
         if (r != 0)
                 return -r;
 
+        mmap_cache_close_fd_range(f->mmap, f->fd, old_size);
+
         if (fstat(f->fd, &f->last_stat) < 0)
                 return -errno;
 
@@ -2335,18 +2337,86 @@ static void flush_progress(void) {
         fflush(stdout);
 }
 
+static int write_uint64(int fd, uint64_t p) {
+        ssize_t k;
+
+        k = write(fd, &p, sizeof(p));
+        if (k < 0)
+                return -errno;
+        if (k != sizeof(p))
+                return -EIO;
+
+        return 0;
+}
+
+static int contains_uint64(MMapCache *m, int fd, uint64_t n, uint64_t p) {
+        uint64_t a, b;
+        int r;
+
+        assert(m);
+        assert(fd >= 0);
+
+        /* Bisection ... */
+
+        a = 0; b = n;
+        while (a < b) {
+                uint64_t c, *z;
+
+                c = (a + b) / 2;
+
+                r = mmap_cache_get(m, fd, PROT_READ, 0, c * sizeof(uint64_t), sizeof(uint64_t), (void **) &z);
+                if (r < 0)
+                        return r;
+
+                if (*z == p)
+                        return 1;
+
+                if (p < *z)
+                        b = c;
+                else
+                        a = c;
+        }
+
+        return 0;
+}
+
 int journal_file_verify(JournalFile *f, const char *key) {
         int r;
         Object *o;
-        uint64_t p = 0, q = 0, e;
+        uint64_t p = 0;
         uint64_t tag_seqnum = 0, entry_seqnum = 0, entry_monotonic = 0, entry_realtime = 0;
         sd_id128_t entry_boot_id;
         bool entry_seqnum_set = false, entry_monotonic_set = false, entry_realtime_set = false, found_main_entry_array = false;
-        uint64_t n_weird = 0, n_objects = 0, n_entries = 0, n_data = 0, n_fields = 0, n_data_hash_tables = 0, n_field_hash_tables = 0;
+        uint64_t n_weird = 0, n_objects = 0, n_entries = 0, n_data = 0, n_fields = 0, n_data_hash_tables = 0, n_field_hash_tables = 0, n_entry_arrays = 0;
         usec_t last_usec = 0;
+        int data_fd = -1, entry_fd = -1, entry_array_fd = -1;
+        char data_path[] = "/var/tmp/journal-data-XXXXXX",
+                entry_path[] = "/var/tmp/journal-entry-XXXXXX",
+                entry_array_path[] = "/var/tmp/journal-entry-array-XXXXXX";
 
         assert(f);
 
+        data_fd = mkostemp(data_path, O_CLOEXEC);
+        if (data_fd < 0) {
+                log_error("Failed to create data file: %m");
+                goto fail;
+        }
+        unlink(data_path);
+
+        entry_fd = mkostemp(entry_path, O_CLOEXEC);
+        if (entry_fd < 0) {
+                log_error("Failed to create entry file: %m");
+                goto fail;
+        }
+        unlink(entry_path);
+
+        entry_array_fd = mkostemp(entry_array_path, O_CLOEXEC);
+        if (entry_array_fd < 0) {
+                log_error("Failed to create entry array file: %m");
+                goto fail;
+        }
+        unlink(entry_array_path);
+
         /* First iteration: we go through all objects, verify the
          * superficial structure, headers, hashes. */
 
@@ -2358,7 +2428,7 @@ int journal_file_verify(JournalFile *f, const char *key) {
 
         p = le64toh(f->header->header_size);
         while (p != 0) {
-                draw_progress((65535ULL * p / le64toh(f->header->tail_object_offset)), &last_usec);
+                draw_progress((0x7FFF * p) / le64toh(f->header->tail_object_offset), &last_usec);
 
                 r = journal_file_move_to_object(f, -1, p, &o);
                 if (r < 0) {
@@ -2416,6 +2486,10 @@ int journal_file_verify(JournalFile *f, const char *key) {
 
                 } else if (o->object.type == OBJECT_ENTRY) {
 
+                        r = write_uint64(entry_fd, p);
+                        if (r < 0)
+                                goto fail;
+
                         if (!entry_seqnum_set &&
                             le64toh(o->entry.seqnum) != le64toh(f->header->head_entry_seqnum)) {
                                 log_error("Head entry sequence number incorrect");
@@ -2458,6 +2532,10 @@ int journal_file_verify(JournalFile *f, const char *key) {
                         n_entries ++;
                 } else if (o->object.type == OBJECT_ENTRY_ARRAY) {
 
+                        r = write_uint64(entry_array_fd, p);
+                        if (r < 0)
+                                goto fail;
+
                         if (p == le64toh(f->header->entry_array_offset)) {
                                 if (found_main_entry_array) {
                                         log_error("More than one main entry array at %llu", (unsigned long long) p);
@@ -2468,9 +2546,17 @@ int journal_file_verify(JournalFile *f, const char *key) {
                                 found_main_entry_array = true;
                         }
 
-                } else if (o->object.type == OBJECT_DATA)
+                        n_entry_arrays++;
+
+                } else if (o->object.type == OBJECT_DATA) {
+
+                        r = write_uint64(data_fd, p);
+                        if (r < 0)
+                                goto fail;
+
                         n_data++;
-                else if (o->object.type == OBJECT_FIELD)
+
+                } else if (o->object.type == OBJECT_FIELD)
                         n_fields++;
                 else if (o->object.type == OBJECT_DATA_HASH_TABLE) {
                         n_data_hash_tables++;
@@ -2502,14 +2588,8 @@ int journal_file_verify(JournalFile *f, const char *key) {
                                 r = -EBADMSG;
                                 goto fail;
                         }
-                }
-
-                if (o->object.type >= _OBJECT_TYPE_MAX)
+                } else if (o->object.type >= _OBJECT_TYPE_MAX)
                         n_weird ++;
-                else {
-                        /* Write address to file... */
-
-                }
 
                 if (p == le64toh(f->header->tail_object_offset))
                         p = 0;
@@ -2592,36 +2672,86 @@ int journal_file_verify(JournalFile *f, const char *key) {
         /* Second iteration: we go through all objects again, this
          * time verify all pointers. */
 
-        /* q = le64toh(f->header->header_size); */
-        /* while (q != 0) { */
-        /*         r = journal_file_move_to_object(f, -1, q, &o); */
-        /*         if (r < 0) { */
-        /*                 log_error("Invalid object at %llu", (unsigned long long) q); */
-        /*                 goto fail; */
-        /*         } */
+        p = le64toh(f->header->header_size);
+        while (p != 0) {
+                draw_progress(0x8000 + (0x7FFF * p) / le64toh(f->header->tail_object_offset), &last_usec);
+
+                r = journal_file_move_to_object(f, -1, p, &o);
+                if (r < 0) {
+                        log_error("Invalid object at %llu", (unsigned long long) p);
+                        goto fail;
+                }
+
+                if (o->object.type == OBJECT_ENTRY_ARRAY) {
+                        uint64_t i = 0, n;
+
+                        if (le64toh(o->entry_array.next_entry_array_offset) != 0 &&
+                            !contains_uint64(f->mmap, entry_array_fd, n_entry_arrays, le64toh(o->entry_array.next_entry_array_offset))) {
+                                log_error("Entry array chains up to invalid next array at %llu", (unsigned long long) p);
+                                r = -EBADMSG;
+                                goto fail;
+                        }
+
+                        n = journal_file_entry_array_n_items(o);
+                        for (i = 0; i < n; i++) {
+                                if (le64toh(o->entry_array.items[i]) != 0 &&
+                                    !contains_uint64(f->mmap, entry_fd, n_entries, le64toh(o->entry_array.items[i]))) {
+
+                                        log_error("Entry array points to invalid next array at %llu", (unsigned long long) p);
+                                        r = -EBADMSG;
+                                        goto fail;
+                                }
+                        }
+
+                }
 
-        /*         if (q == le64toh(f->header->tail_object_offset)) */
-        /*                 q = 0; */
-        /*         else */
-        /*                 q = q + ALIGN64(le64toh(o->object.size)); */
-        /* } */
+                r = journal_file_move_to_object(f, -1, p, &o);
+                if (r < 0) {
+                        log_error("Invalid object at %llu", (unsigned long long) p);
+                        goto fail;
+                }
+
+                if (p == le64toh(f->header->tail_object_offset))
+                        p = 0;
+                else
+                        p = p + ALIGN64(le64toh(o->object.size));
+        }
 
         flush_progress();
 
+        mmap_cache_close_fd(f->mmap, data_fd);
+        mmap_cache_close_fd(f->mmap, entry_fd);
+        mmap_cache_close_fd(f->mmap, entry_array_fd);
+
+        close_nointr_nofail(data_fd);
+        close_nointr_nofail(entry_fd);
+        close_nointr_nofail(entry_array_fd);
+
         return 0;
 
 fail:
-        e = p <= 0 ? q :
-        q <= 0 ? p :
-        MIN(p, q);
-
         flush_progress();
 
         log_error("File corruption detected at %s:%llu (of %llu, %llu%%).",
                   f->path,
-                  (unsigned long long) e,
+                  (unsigned long long) p,
                   (unsigned long long) f->last_stat.st_size,
-                  (unsigned long long) (100 * e / f->last_stat.st_size));
+                  (unsigned long long) (100 * p / f->last_stat.st_size));
+
+        if (data_fd >= 0) {
+                mmap_cache_close_fd(f->mmap, data_fd);
+                close_nointr_nofail(data_fd);
+        }
+
+        if (entry_fd >= 0) {
+                mmap_cache_close_fd(f->mmap, entry_fd);
+                close_nointr_nofail(entry_fd);
+        }
+
+        if (entry_array_fd >= 0) {
+                mmap_cache_close_fd(f->mmap, entry_array_fd);
+                close_nointr_nofail(entry_array_fd);
+        }
 
         return r;
 }
index 145663b..384ed90 100644 (file)
@@ -2944,6 +2944,7 @@ int main(int argc, char *argv[]) {
 
         log_set_target(LOG_TARGET_SAFE);
         log_set_facility(LOG_SYSLOG);
+        log_set_max_level(LOG_DEBUG);
         log_parse_environment();
         log_open();
 
index 77710ff..9782139 100644 (file)
@@ -68,6 +68,8 @@ struct MMapCache {
         FileDescriptor *by_fd;
 };
 
+static int mmap_cache_peek_fd_index(MMapCache *m, int fd, unsigned *fd_index);
+
 static void mmap_cache_window_unmap(MMapCache *m, unsigned w) {
         Window *v;
 
@@ -89,6 +91,13 @@ static void mmap_cache_window_add_lru(MMapCache *m, unsigned w) {
         assert(w < m->n_windows);
 
         v = m->windows + w;
+        assert(v->n_ref == 0);
+
+        if (m->lru_last != (unsigned) -1) {
+                assert(m->windows[m->lru_last].lru_next == (unsigned) -1);
+                m->windows[m->lru_last].lru_next = w;
+        }
+
         v->lru_prev = m->lru_last;
         v->lru_next = (unsigned) -1;
 
@@ -105,15 +114,21 @@ static void mmap_cache_window_remove_lru(MMapCache *m, unsigned w) {
 
         v = m->windows + w;
 
-        if (v->lru_prev == (unsigned) -1)
+        if (v->lru_prev == (unsigned) -1) {
+                assert(m->lru_first == w);
                 m->lru_first = v->lru_next;
-        else
+        } else {
+                assert(m->windows[v->lru_prev].lru_next == w);
                 m->windows[v->lru_prev].lru_next = v->lru_next;
+        }
 
-        if (v->lru_next == (unsigned) -1)
+        if (v->lru_next == (unsigned) -1) {
+                assert(m->lru_last == w);
                 m->lru_last = v->lru_prev;
-        else
+        } else {
+                assert(m->windows[v->lru_next].lru_prev == w);
                 m->windows[v->lru_next].lru_prev = v->lru_prev;
+        }
 }
 
 static void mmap_cache_fd_add(MMapCache *m, unsigned fd_index, unsigned w) {
@@ -123,6 +138,13 @@ static void mmap_cache_fd_add(MMapCache *m, unsigned fd_index, unsigned w) {
         assert(fd_index < m->n_fds);
 
         v = m->windows + w;
+        assert(m->by_fd[fd_index].fd == v->fd);
+
+        if (m->by_fd[fd_index].windows != (unsigned) -1) {
+                assert(m->windows[m->by_fd[fd_index].windows].by_fd_prev == (unsigned) -1);
+                m->windows[m->by_fd[fd_index].windows].by_fd_prev = w;
+        }
+
         v->by_fd_next = m->by_fd[fd_index].windows;
         v->by_fd_prev = (unsigned) -1;
 
@@ -136,13 +158,22 @@ static void mmap_cache_fd_remove(MMapCache *m, unsigned fd_index, unsigned w) {
         assert(fd_index < m->n_fds);
 
         v = m->windows + w;
-        if (v->by_fd_prev == (unsigned) -1)
+        assert(m->by_fd[fd_index].fd == v->fd);
+        assert(v->by_fd_next == (unsigned) -1 || m->windows[v->by_fd_next].fd == v->fd);
+        assert(v->by_fd_prev == (unsigned) -1 || m->windows[v->by_fd_prev].fd == v->fd);
+
+        if (v->by_fd_prev == (unsigned) -1) {
+                assert(m->by_fd[fd_index].windows == w);
                 m->by_fd[fd_index].windows = v->by_fd_next;
-        else
+        } else {
+                assert(m->windows[v->by_fd_prev].by_fd_next == w);
                 m->windows[v->by_fd_prev].by_fd_next = v->by_fd_next;
+        }
 
-        if (v->by_fd_next != (unsigned) -1)
+        if (v->by_fd_next != (unsigned) -1) {
+                assert(m->windows[v->by_fd_next].by_fd_prev == w);
                 m->windows[v->by_fd_next].by_fd_prev = v->by_fd_prev;
+        }
 }
 
 static void mmap_cache_context_unset(MMapCache *m, unsigned c) {
@@ -182,6 +213,7 @@ static void mmap_cache_context_set(MMapCache *m, unsigned c, unsigned w) {
 
         v = m->windows + w;
         v->n_ref ++;
+
         if (v->n_ref == 1)
                 mmap_cache_window_remove_lru(m, w);
 }
@@ -264,6 +296,9 @@ MMapCache* mmap_cache_unref(MMapCache *m) {
 }
 
 static int mmap_cache_allocate_window(MMapCache *m, unsigned *w) {
+        Window *v;
+        unsigned fd_index;
+
         assert(m);
         assert(w);
 
@@ -276,7 +311,16 @@ static int mmap_cache_allocate_window(MMapCache *m, unsigned *w) {
                 return -E2BIG;
 
         *w = m->lru_first;
+        v = m->windows + *w;
+        assert(v->n_ref == 0);
+
         mmap_cache_window_unmap(m, *w);
+
+        if (v->fd >= 0) {
+                assert_se(mmap_cache_peek_fd_index(m, v->fd, &fd_index) > 0);
+                mmap_cache_fd_remove(m, fd_index, *w);
+        }
+
         mmap_cache_window_remove_lru(m, *w);
 
         return 0;
@@ -370,8 +414,7 @@ static int mmap_cache_put(
         v->size = wsize;
 
         v->n_ref = 0;
-        v->lru_prev = v->lru_next = (unsigned) -1;
-
+        mmap_cache_window_add_lru(m, w);
         mmap_cache_fd_add(m, fd_index, w);
         mmap_cache_context_set(m, context, w);
 
@@ -390,28 +433,48 @@ static int fd_cmp(const void *_a, const void *_b) {
         return 0;
 }
 
+static int mmap_cache_peek_fd_index(MMapCache *m, int fd, unsigned *fd_index) {
+        FileDescriptor *j;
+        unsigned r;
+
+        assert(m);
+        assert(fd >= 0);
+        assert(fd_index);
+
+        for (r = 0; r < m->n_fds; r++)
+                assert(m->by_fd[r].windows == (unsigned) -1 ||
+                       m->windows[m->by_fd[r].windows].fd == m->by_fd[r].fd);
+
+        j = bsearch(&fd, m->by_fd, m->n_fds, sizeof(FileDescriptor), fd_cmp);
+        if (!j)
+                return 0;
+
+        *fd_index = (unsigned) (j - m->by_fd);
+        return 1;
+}
+
 static int mmap_cache_get_fd_index(MMapCache *m, int fd, unsigned *fd_index) {
         FileDescriptor *j;
+        int r;
 
         assert(m);
         assert(fd >= 0);
         assert(fd_index);
 
-        j = bsearch(&fd, m->by_fd, m->n_fds, sizeof(m->by_fd[0]), fd_cmp);
-        if (!j) {
-                if (m->n_fds >= m->fds_max)
-                        return -E2BIG;
+        r = mmap_cache_peek_fd_index(m, fd, fd_index);
+        if (r != 0)
+                return r;
 
-                j = m->by_fd + m->n_fds ++;
-                j->fd = fd;
-                j->windows = (unsigned) -1;
+        if (m->n_fds >= m->fds_max)
+                return -E2BIG;
 
-                qsort(m->by_fd, m->n_fds, sizeof(m->by_fd[0]), fd_cmp);
-                j = bsearch(&fd, m->by_fd, m->n_fds, sizeof(m->by_fd[0]), fd_cmp);
-        }
+        j = m->by_fd + m->n_fds ++;
+        j->fd = fd;
+        j->windows = (unsigned) -1;
 
-        *fd_index = (unsigned) (j - m->by_fd);
-        return 0;
+        qsort(m->by_fd, m->n_fds, sizeof(FileDescriptor), fd_cmp);
+
+        return mmap_cache_peek_fd_index(m, fd, fd_index);
 }
 
 static bool mmap_cache_test_window(
@@ -466,6 +529,7 @@ static int mmap_cache_current(
 
 static int mmap_cache_find(
                 MMapCache *m,
+                int fd,
                 unsigned fd_index,
                 unsigned context,
                 uint64_t offset,
@@ -476,6 +540,7 @@ static int mmap_cache_find(
         unsigned w;
 
         assert(m);
+        assert(fd >= 0);
         assert(fd_index < m->n_fds);
         assert(context < m->contexts_max);
         assert(size > 0);
@@ -483,10 +548,13 @@ static int mmap_cache_find(
 
         w = m->by_fd[fd_index].windows;
         while (w != (unsigned) -1) {
+                v = m->windows + w;
+                assert(v->fd == fd);
+
                 if (mmap_cache_test_window(m, w, offset, size))
                         break;
 
-                w = m->windows[w].by_fd_next;
+                w = v->by_fd_next;
         }
 
         if (w == (unsigned) -1)
@@ -494,7 +562,6 @@ static int mmap_cache_find(
 
         mmap_cache_context_set(m, context, w);
 
-        v = m->windows + w;
         *ret = (uint8_t*) v->ptr + (offset - v->offset);
         return 1;
 }
@@ -523,13 +590,17 @@ int mmap_cache_get(
         if (r != 0)
                 return r;
 
+        /* Hmm, drop the reference to the current one, since it wasn't
+         * good enough */
+        mmap_cache_context_unset(m, context);
+
         /* OK, let's find the chain for this FD */
         r = mmap_cache_get_fd_index(m, fd, &fd_index);
         if (r < 0)
                 return r;
 
         /* And let's look through the available mmaps */
-        r = mmap_cache_find(m, fd_index, context, offset, size, ret);
+        r = mmap_cache_find(m, fd, fd_index, context, offset, size, ret);
         if (r != 0)
                 return r;
 
@@ -538,16 +609,15 @@ int mmap_cache_get(
 }
 
 void mmap_cache_close_fd(MMapCache *m, int fd) {
-        FileDescriptor *j;
         unsigned fd_index, c, w;
+        int r;
 
         assert(m);
         assert(fd > 0);
 
-        j = bsearch(&fd, m->by_fd, m->n_fds, sizeof(m->by_fd[0]), fd_cmp);
-        if (!j)
+        r = mmap_cache_peek_fd_index(m, fd, &fd_index);
+        if (r <= 0)
                 return;
-        fd_index = (unsigned) (j - m->by_fd);
 
         for (c = 0; c < m->contexts_max; c++) {
                 w = m->by_context[c];
@@ -560,9 +630,14 @@ void mmap_cache_close_fd(MMapCache *m, int fd) {
 
         w = m->by_fd[fd_index].windows;
         while (w != (unsigned) -1) {
+                Window *v;
+
+                v = m->windows + w;
+                assert(v->fd == fd);
 
-                mmap_cache_fd_remove(m, fd_index, w);
                 mmap_cache_window_unmap(m, w);
+                mmap_cache_fd_remove(m, fd_index, w);
+                v->fd = -1;
 
                 w = m->by_fd[fd_index].windows;
         }
@@ -571,6 +646,51 @@ void mmap_cache_close_fd(MMapCache *m, int fd) {
         m->n_fds --;
 }
 
+void mmap_cache_close_fd_range(MMapCache *m, int fd, uint64_t p) {
+        unsigned fd_index, c, w;
+        int r;
+
+        assert(m);
+        assert(fd > 0);
+
+        /* This drops all windows that include space right of the
+         * specified offset. This is useful to ensure that after the
+         * file size is extended we drop our mappings of the end and
+         * create it anew, since otherwise it is undefined whether
+         * mapping will continue to work as intended. */
+
+        r = mmap_cache_peek_fd_index(m, fd, &fd_index);
+        if (r <= 0)
+                return;
+
+        for (c = 0; c < m->contexts_max; c++) {
+                w = m->by_context[c];
+
+                if (w != (unsigned) -1 && m->windows[w].fd == fd)
+                        mmap_cache_context_unset(m, c);
+        }
+
+        w = m->by_fd[fd_index].windows;
+        while (w != (unsigned) -1) {
+                Window *v;
+
+                v = m->windows + w;
+                assert(v->fd == fd);
+                assert(v->by_fd_next == (unsigned) -1 ||
+                       m->windows[v->by_fd_next].fd == fd);
+
+                if (v->offset + v->size > p) {
+
+                        mmap_cache_window_unmap(m, w);
+                        mmap_cache_fd_remove(m, fd_index, w);
+                        v->fd = -1;
+
+                        w = m->by_fd[fd_index].windows;
+                } else
+                        w = v->by_fd_next;
+        }
+}
+
 void mmap_cache_close_context(MMapCache *m, unsigned context) {
         mmap_cache_context_unset(m, context);
 }
index 0a88fc5..984b759 100644 (file)
@@ -31,4 +31,5 @@ MMapCache* mmap_cache_unref(MMapCache *m);
 
 int mmap_cache_get(MMapCache *m, int fd, int prot, unsigned context, uint64_t offset, uint64_t size, void **ret);
 void mmap_cache_close_fd(MMapCache *m, int fd);
+void mmap_cache_close_fd_range(MMapCache *m, int fd, uint64_t range);
 void mmap_cache_close_context(MMapCache *m, unsigned context);