X-Git-Url: https://www.chiark.greenend.org.uk/ucgi/~ianmdlvl/git?a=blobdiff_plain;f=src%2Fjournal%2Fjournal-file.c;h=8c17620ed339bf9576bf498e2d6ad02018983df8;hb=54ecda32c60c6f2548f74703bfd324694393edaa;hp=190bfb996b3a36feb890d94635606464f1a6d964;hpb=cf244689e9d1ab50082c9ddd0f3c4d1eb982badc;p=elogind.git diff --git a/src/journal/journal-file.c b/src/journal/journal-file.c index 190bfb996..8c17620ed 100644 --- a/src/journal/journal-file.c +++ b/src/journal/journal-file.c @@ -37,7 +37,26 @@ #define DEFAULT_WINDOW_SIZE (128ULL*1024ULL*1024ULL) -#define COMPRESSION_SIZE_THRESHOLD (64ULL) +#define COMPRESSION_SIZE_THRESHOLD (512ULL) + +/* This is the minimum journal file size */ +#define JOURNAL_FILE_SIZE_MIN (64ULL*1024ULL) + +/* These are the lower and upper bounds if we deduce the max_use value + * from the file system size */ +#define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL) /* 1 MiB */ +#define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */ + +/* This is the upper bound if we deduce max_size from max_use */ +#define DEFAULT_MAX_SIZE_UPPER (16ULL*1024ULL*1024ULL) /* 16 MiB */ + +/* This is the upper bound if we deduce the keep_free value from the + * file system size */ +#define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */ + +/* This is the keep_free value when we can't determine the system + * size */ +#define DEFAULT_KEEP_FREE (1024ULL*1024ULL) /* 1 MB */ static const char signature[] = { 'L', 'P', 'K', 'S', 'H', 'H', 'R', 'H' }; @@ -119,6 +138,9 @@ static int journal_file_refresh_header(JournalFile *f) { f->header->boot_id = boot_id; f->header->state = STATE_ONLINE; + + __sync_synchronize(); + return 0; } @@ -216,7 +238,7 @@ static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) if (fstat(f->fd, &f->last_stat) < 0) return -errno; - f->header->arena_size = new_size - htole64(f->header->arena_offset); + f->header->arena_size = htole64(new_size - le64toh(f->header->arena_offset)); return 0; } @@ -264,7 +286,7 @@ static int journal_file_map( } static int journal_file_move_to(JournalFile *f, int wt, uint64_t offset, uint64_t size, void **ret) { - void *p; + void *p = NULL; uint64_t delta; int r; Window *w; @@ -274,6 +296,15 @@ static int journal_file_move_to(JournalFile *f, int wt, uint64_t offset, uint64_ assert(wt >= 0); assert(wt < _WINDOW_MAX); + if (offset + size > (uint64_t) f->last_stat.st_size) { + /* Hmm, out of range? Let's refresh the fstat() data + * first, before we trust that check. */ + + if (fstat(f->fd, &f->last_stat) < 0 || + offset + size > (uint64_t) f->last_stat.st_size) + return -EADDRNOTAVAIL; + } + w = f->windows + wt; if (_likely_(w->ptr && @@ -299,21 +330,18 @@ static int journal_file_move_to(JournalFile *f, int wt, uint64_t offset, uint64_ * the window space before and half behind the * requested mapping */ - delta = PAGE_ALIGN((DEFAULT_WINDOW_SIZE - size) / 2); + delta = (DEFAULT_WINDOW_SIZE - size) / 2; - if (offset < delta) + if (delta > offset) delta = offset; offset -= delta; - size += (DEFAULT_WINDOW_SIZE - delta); + size = DEFAULT_WINDOW_SIZE; } else delta = 0; - if (offset > (uint64_t) f->last_stat.st_size) - return -EADDRNOTAVAIL; - if (offset + size > (uint64_t) f->last_stat.st_size) - size = PAGE_ALIGN((uint64_t) f->last_stat.st_size - offset); + size = (uint64_t) f->last_stat.st_size - offset; if (size <= 0) return -EADDRNOTAVAIL; @@ -553,6 +581,8 @@ static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, ui assert(offset > 0); assert(o->object.type == OBJECT_DATA); + /* This might alter the window we are looking at */ + o->data.next_hash_offset = o->data.next_field_offset = 0; o->data.entry_offset = o->data.entry_array_offset = 0; o->data.n_entries = 0; @@ -563,18 +593,14 @@ static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, ui /* Only entry in the hash table is easy */ f->data_hash_table[h].head_hash_offset = htole64(offset); } else { - /* Temporarily move back to the previous data object, - * to patch in pointer */ + /* Move back to the previous data object, to patch in + * pointer */ r = journal_file_move_to_object(f, OBJECT_DATA, p, &o); if (r < 0) return r; o->data.next_hash_offset = htole64(offset); - - r = journal_file_move_to_object(f, OBJECT_DATA, offset, &o); - if (r < 0) - return r; } f->data_hash_table[h].tail_hash_offset = htole64(offset); @@ -586,6 +612,7 @@ int journal_file_find_data_object_with_hash( JournalFile *f, const void *data, uint64_t size, uint64_t hash, Object **ret, uint64_t *offset) { + uint64_t p, osize, h; int r; @@ -674,7 +701,11 @@ int journal_file_find_data_object( ret, offset); } -static int journal_file_append_data(JournalFile *f, const void *data, uint64_t size, Object **ret, uint64_t *offset) { +static int journal_file_append_data( + JournalFile *f, + const void *data, uint64_t size, + Object **ret, uint64_t *offset) { + uint64_t hash, p; uint64_t osize; Object *o; @@ -732,6 +763,12 @@ static int journal_file_append_data(JournalFile *f, const void *data, uint64_t s if (r < 0) return r; + /* The linking might have altered the window, so let's + * refresh our pointer */ + r = journal_file_move_to_object(f, OBJECT_DATA, p, &o); + if (r < 0) + return r; + if (ret) *ret = o; @@ -743,14 +780,14 @@ static int journal_file_append_data(JournalFile *f, const void *data, uint64_t s uint64_t journal_file_entry_n_items(Object *o) { assert(o); - assert(o->object.type == htole64(OBJECT_ENTRY)); + assert(o->object.type == OBJECT_ENTRY); return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem); } static uint64_t journal_file_entry_array_n_items(Object *o) { assert(o); - assert(o->object.type == htole64(OBJECT_ENTRY_ARRAY)); + assert(o->object.type == OBJECT_ENTRY_ARRAY); return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t); } @@ -805,7 +842,7 @@ static int link_entry_into_array(JournalFile *f, o->entry_array.items[i] = htole64(p); if (ap == 0) - *first = q; + *first = htole64(q); else { r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o); if (r < 0) @@ -838,7 +875,7 @@ static int link_entry_into_array_plus_one(JournalFile *f, else { uint64_t i; - i = le64toh(*idx) - 1; + i = htole64(le64toh(*idx) - 1); r = link_entry_into_array(f, first, &i, p); if (r < 0) return r; @@ -879,6 +916,8 @@ static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) { assert(offset > 0); assert(o->object.type == OBJECT_ENTRY); + __sync_synchronize(); + /* Link up the entry itself */ r = link_entry_into_array(f, &f->header->entry_array_offset, @@ -887,7 +926,7 @@ static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) { if (r < 0) return r; - log_error("=> %s seqnr=%lu n_entries=%lu", f->path, (unsigned long) o->entry.seqnum, (unsigned long) f->header->n_entries); + /* log_debug("=> %s seqnr=%lu n_entries=%lu", f->path, (unsigned long) o->entry.seqnum, (unsigned long) f->header->n_entries); */ if (f->header->head_entry_realtime == 0) f->header->head_entry_realtime = o->entry.realtime; @@ -986,9 +1025,6 @@ int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const st ts->monotonic < le64toh(f->header->tail_entry_monotonic)) return -EINVAL; - if (ts->realtime < le64toh(f->header->tail_entry_realtime)) - return -EINVAL; - items = alloca(sizeof(EntryItem) * n_iovec); for (i = 0; i < n_iovec; i++) { @@ -1017,7 +1053,7 @@ static int generic_array_get(JournalFile *f, Object **ret, uint64_t *offset) { Object *o; - uint64_t p, a; + uint64_t p = 0, a; int r; assert(f); @@ -1701,10 +1737,6 @@ int journal_file_open( f->writable = (flags & O_ACCMODE) != O_RDONLY; f->prot = prot_from_flags(flags); - f->metrics.max_size = DEFAULT_MAX_SIZE; - f->metrics.min_size = DEFAULT_MIN_SIZE; - f->metrics.keep_free = DEFAULT_KEEP_FREE; - f->path = strdup(fname); if (!f->path) { r = -ENOMEM; @@ -1877,7 +1909,7 @@ int journal_directory_vacuum(const char *directory, uint64_t max_use, uint64_t m assert(directory); if (max_use <= 0) - max_use = DEFAULT_MAX_USE; + return 0; d = opendir(directory); if (!d) @@ -2076,3 +2108,78 @@ int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint6 return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset); } + +void journal_default_metrics(JournalMetrics *m, int fd) { + uint64_t fs_size = 0; + struct statvfs ss; + char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX]; + + assert(m); + assert(fd >= 0); + + if (fstatvfs(fd, &ss) >= 0) + fs_size = ss.f_frsize * ss.f_blocks; + + if (m->max_use == (uint64_t) -1) { + + if (fs_size > 0) { + m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */ + + if (m->max_use > DEFAULT_MAX_USE_UPPER) + m->max_use = DEFAULT_MAX_USE_UPPER; + + if (m->max_use < DEFAULT_MAX_USE_LOWER) + m->max_use = DEFAULT_MAX_USE_LOWER; + } else + m->max_use = DEFAULT_MAX_USE_LOWER; + } else { + m->max_use = PAGE_ALIGN(m->max_use); + + if (m->max_use < JOURNAL_FILE_SIZE_MIN*2) + m->max_use = JOURNAL_FILE_SIZE_MIN*2; + } + + if (m->max_size == (uint64_t) -1) { + m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */ + + if (m->max_size > DEFAULT_MAX_SIZE_UPPER) + m->max_size = DEFAULT_MAX_SIZE_UPPER; + } else + m->max_size = PAGE_ALIGN(m->max_size); + + if (m->max_size < JOURNAL_FILE_SIZE_MIN) + m->max_size = JOURNAL_FILE_SIZE_MIN; + + if (m->max_size*2 > m->max_use) + m->max_use = m->max_size*2; + + if (m->min_size == (uint64_t) -1) + m->min_size = JOURNAL_FILE_SIZE_MIN; + else { + m->min_size = PAGE_ALIGN(m->min_size); + + if (m->min_size < JOURNAL_FILE_SIZE_MIN) + m->min_size = JOURNAL_FILE_SIZE_MIN; + + if (m->min_size > m->max_size) + m->max_size = m->min_size; + } + + if (m->keep_free == (uint64_t) -1) { + + if (fs_size > 0) { + m->keep_free = PAGE_ALIGN(fs_size / 20); /* 5% of file system size */ + + if (m->keep_free > DEFAULT_KEEP_FREE_UPPER) + m->keep_free = DEFAULT_KEEP_FREE_UPPER; + + } else + m->keep_free = DEFAULT_KEEP_FREE; + } + + log_debug("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s", + format_bytes(a, sizeof(a), m->max_use), + format_bytes(b, sizeof(b), m->max_size), + format_bytes(c, sizeof(c), m->min_size), + format_bytes(d, sizeof(d), m->keep_free)); +}