X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~ianmdlvl/git?p=elogind.git;a=blobdiff_plain;f=src%2Fjournal%2Fjournal-file.c;h=718dc5d6eaa13f73e77ce8bb5f2106d9a1aa19ed;hp=a110a0090f33c217ca6e55354475adbcd3893733;hb=64825d3c589cd8742887f30acde8c57eceac2001;hpb=162566a4a12c35e1e86e35ced1748354f7ec935e diff --git a/src/journal/journal-file.c b/src/journal/journal-file.c index a110a0090..718dc5d6e 100644 --- a/src/journal/journal-file.c +++ b/src/journal/journal-file.c @@ -32,8 +32,8 @@ #include "lookup3.h" #include "compress.h" -#define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*16ULL) -#define DEFAULT_FIELD_HASH_TABLE_SIZE (2047ULL*16ULL) +#define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem)) +#define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem)) #define DEFAULT_WINDOW_SIZE (8ULL*1024ULL*1024ULL) @@ -58,17 +58,24 @@ * size */ #define DEFAULT_KEEP_FREE (1024ULL*1024ULL) /* 1 MB */ -static const char signature[] = { 'L', 'P', 'K', 'S', 'H', 'H', 'R', 'H' }; +/* n_data was the first entry we added after the initial file format design */ +#define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data)) #define ALIGN64(x) (((x) + 7ULL) & ~7ULL) +#define JOURNAL_HEADER_CONTAINS(h, field) \ + (le64toh((h)->header_size) >= offsetof(Header, field) + sizeof((h)->field)) + +static const char signature[] = { 'L', 'P', 'K', 'S', 'H', 'H', 'R', 'H' }; + void journal_file_close(JournalFile *f) { int t; assert(f); if (f->header) { - if (f->writable) + /* Mark the file offline. Don't override the archived state if it already is set */ + if (f->writable && f->header->state == STATE_ONLINE) f->header->state = STATE_OFFLINE; munmap(f->header, PAGE_ALIGN(sizeof(Header))); @@ -107,7 +114,7 @@ static int journal_file_init_header(JournalFile *f, JournalFile *template) { if (template) { h.seqnum_id = template->header->seqnum_id; - h.seqnum = template->header->seqnum; + h.tail_seqnum = template->header->tail_seqnum; } else h.seqnum_id = h.file_id; @@ -161,7 +168,8 @@ static int journal_file_verify_header(JournalFile *f) { return -EPROTONOSUPPORT; #endif - if (f->header->header_size != htole64(ALIGN64(sizeof(*(f->header))))) + /* The first addition was n_data, so check that we are at least this large */ + if (le64toh(f->header->header_size) < HEADER_SIZE_MIN) return -EBADMSG; if ((uint64_t) f->last_stat.st_size < (le64toh(f->header->header_size) + le64toh(f->header->arena_size))) @@ -181,13 +189,15 @@ static int journal_file_verify_header(JournalFile *f) { state = f->header->state; - if (state == STATE_ONLINE) - log_debug("Journal file %s is already online. Assuming unclean closing. Ignoring.", f->path); - /* FIXME: immediately rotate */ - else if (state == STATE_ARCHIVED) + if (state == STATE_ONLINE) { + log_debug("Journal file %s is already online. Assuming unclean closing.", f->path); + return -EBUSY; + } else if (state == STATE_ARCHIVED) return -ESHUTDOWN; - else if (state != STATE_OFFLINE) - log_debug("Journal file %s has unknown state %u. Ignoring.", f->path, state); + else if (state != STATE_OFFLINE) { + log_debug("Journal file %s has unknown state %u.", f->path, state); + return -EBUSY; + } } return 0; @@ -427,7 +437,7 @@ static uint64_t journal_file_seqnum(JournalFile *f, uint64_t *seqnum) { assert(f); - r = le64toh(f->header->seqnum) + 1; + r = le64toh(f->header->tail_seqnum) + 1; if (seqnum) { /* If an external seqnum counter was passed, we update @@ -440,10 +450,10 @@ static uint64_t journal_file_seqnum(JournalFile *f, uint64_t *seqnum) { *seqnum = r; } - f->header->seqnum = htole64(r); + f->header->tail_seqnum = htole64(r); - if (f->header->first_seqnum == 0) - f->header->first_seqnum = htole64(r); + if (f->header->head_seqnum == 0) + f->header->head_seqnum = htole64(r); return r; } @@ -500,7 +510,17 @@ static int journal_file_setup_data_hash_table(JournalFile *f) { assert(f); - s = DEFAULT_DATA_HASH_TABLE_SIZE; + /* We estimate that we need 1 hash table entry per 768 of + journal file and we want to make sure we never get beyond + 75% fill level. Calculate the hash table size for the + maximum file size based on these metrics. */ + + s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem); + if (s < DEFAULT_DATA_HASH_TABLE_SIZE) + s = DEFAULT_DATA_HASH_TABLE_SIZE; + + log_info("Reserving %llu entries in hash table.", (unsigned long long) (s / sizeof(HashItem))); + r = journal_file_append_object(f, OBJECT_DATA_HASH_TABLE, offsetof(Object, hash_table.items) + s, @@ -614,6 +634,9 @@ static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, ui f->data_hash_table[h].tail_hash_offset = htole64(offset); + if (JOURNAL_HEADER_CONTAINS(f->header, n_data)) + f->header->n_data = htole64(le64toh(f->header->n_data) + 1); + return 0; } @@ -765,7 +788,7 @@ static int journal_file_append_data( } #endif - if (!compressed) + if (!compressed && size > 0) memcpy(o->data.payload, data, size); r = journal_file_link_data(f, o, p, hash); @@ -1034,7 +1057,8 @@ int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const st ts->monotonic < le64toh(f->header->tail_entry_monotonic)) return -EINVAL; - items = alloca(sizeof(EntryItem) * n_iovec); + /* alloca() can't take 0, hence let's allocate at least one */ + items = alloca(sizeof(EntryItem) * MAX(1, n_iovec)); for (i = 0; i < n_iovec; i++) { uint64_t p; @@ -1809,27 +1833,13 @@ int journal_file_move_to_entry_by_realtime_for_data( } void journal_file_dump(JournalFile *f) { - char a[33], b[33], c[33]; Object *o; int r; uint64_t p; assert(f); - printf("File Path: %s\n" - "File ID: %s\n" - "Machine ID: %s\n" - "Boot ID: %s\n" - "Arena size: %llu\n" - "Objects: %lu\n" - "Entries: %lu\n", - f->path, - sd_id128_to_string(f->header->file_id, a), - sd_id128_to_string(f->header->machine_id, b), - sd_id128_to_string(f->header->boot_id, c), - (unsigned long long) le64toh(f->header->arena_size), - (unsigned long) le64toh(f->header->n_objects), - (unsigned long) le64toh(f->header->n_entries)); + journal_file_print_header(f); p = le64toh(f->header->header_size); while (p != 0) { @@ -1885,10 +1895,73 @@ fail: log_error("File corrupt"); } +void journal_file_print_header(JournalFile *f) { + char a[33], b[33], c[33]; + char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX]; + + assert(f); + + printf("File Path: %s\n" + "File ID: %s\n" + "Machine ID: %s\n" + "Boot ID: %s\n" + "Sequential Number ID: %s\n" + "State: %s\n" + "Compatible Flags:%s%s\n" + "Incompatible Flags:%s%s\n" + "Header size: %llu\n" + "Arena size: %llu\n" + "Data Hash Table Size: %llu\n" + "Field Hash Table Size: %llu\n" + "Objects: %llu\n" + "Entry Objects: %llu\n" + "Rotate Suggested: %s\n" + "Head Sequential Number: %llu\n" + "Tail Sequential Number: %llu\n" + "Head Realtime Timestamp: %s\n" + "Tail Realtime Timestamp: %s\n", + f->path, + sd_id128_to_string(f->header->file_id, a), + sd_id128_to_string(f->header->machine_id, b), + sd_id128_to_string(f->header->boot_id, c), + sd_id128_to_string(f->header->seqnum_id, c), + f->header->state == STATE_OFFLINE ? "offline" : + f->header->state == STATE_ONLINE ? "online" : + f->header->state == STATE_ARCHIVED ? "archived" : "unknown", + (f->header->compatible_flags & HEADER_COMPATIBLE_SIGNED) ? " SIGNED" : "", + (f->header->compatible_flags & ~HEADER_COMPATIBLE_SIGNED) ? " ???" : "", + (f->header->incompatible_flags & HEADER_INCOMPATIBLE_COMPRESSED) ? " COMPRESSED" : "", + (f->header->incompatible_flags & ~HEADER_INCOMPATIBLE_COMPRESSED) ? " ???" : "", + (unsigned long long) le64toh(f->header->header_size), + (unsigned long long) le64toh(f->header->arena_size), + (unsigned long long) le64toh(f->header->data_hash_table_size) / sizeof(HashItem), + (unsigned long long) le64toh(f->header->field_hash_table_size) / sizeof(HashItem), + (unsigned long long) le64toh(f->header->n_objects), + (unsigned long long) le64toh(f->header->n_entries), + yes_no(journal_file_rotate_suggested(f)), + (unsigned long long) le64toh(f->header->head_seqnum), + (unsigned long long) le64toh(f->header->tail_seqnum), + format_timestamp(x, sizeof(x), le64toh(f->header->head_entry_realtime)), + format_timestamp(y, sizeof(y), le64toh(f->header->tail_entry_realtime))); + + if (JOURNAL_HEADER_CONTAINS(f->header, n_data)) + printf("Data Objects: %llu\n" + "Data Hash Table Fill: %.1f%%\n", + (unsigned long long) le64toh(f->header->n_data), + 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)))); + + if (JOURNAL_HEADER_CONTAINS(f->header, n_fields)) + printf("Field Objects: %llu\n" + "Field Hash Table Fill: %.1f%%\n", + (unsigned long long) le64toh(f->header->n_fields), + 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)))); +} + int journal_file_open( const char *fname, int flags, mode_t mode, + JournalMetrics *metrics, JournalFile *template, JournalFile **ret) { @@ -1915,10 +1988,8 @@ int journal_file_open( f->writable = (flags & O_ACCMODE) != O_RDONLY; f->prot = prot_from_flags(flags); - if (template) { - f->metrics = template->metrics; + if (template) f->compress = template->compress; - } f->path = strdup(fname); if (!f->path) { @@ -1950,7 +2021,7 @@ int journal_file_open( } } - if (f->last_stat.st_size < (off_t) sizeof(Header)) { + if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) { r = -EIO; goto fail; } @@ -1969,6 +2040,12 @@ int journal_file_open( } if (f->writable) { + if (metrics) { + journal_default_metrics(metrics, f->fd); + f->metrics = *metrics; + } else if (template) + f->metrics = template->metrics; + r = journal_file_refresh_header(f); if (r < 0) goto fail; @@ -2032,7 +2109,7 @@ int journal_file_rotate(JournalFile **f) { sd_id128_to_string(old_file->header->seqnum_id, p + l - 8 + 1); snprintf(p + l - 8 + 1 + 32, 1 + 16 + 1 + 16 + 8 + 1, "-%016llx-%016llx.journal", - (unsigned long long) le64toh((*f)->header->seqnum), + (unsigned long long) le64toh((*f)->header->tail_seqnum), (unsigned long long) le64toh((*f)->header->tail_entry_realtime)); r = rename(old_file->path, p); @@ -2043,7 +2120,7 @@ int journal_file_rotate(JournalFile **f) { old_file->header->state = STATE_ARCHIVED; - r = journal_file_open(old_file->path, old_file->flags, old_file->mode, old_file, &new_file); + r = journal_file_open(old_file->path, old_file->flags, old_file->mode, NULL, old_file, &new_file); journal_file_close(old_file); *f = new_file; @@ -2054,6 +2131,7 @@ int journal_file_open_reliably( const char *fname, int flags, mode_t mode, + JournalMetrics *metrics, JournalFile *template, JournalFile **ret) { @@ -2061,11 +2139,13 @@ int journal_file_open_reliably( size_t l; char *p; - r = journal_file_open(fname, flags, mode, template, ret); + r = journal_file_open(fname, flags, mode, metrics, template, ret); if (r != -EBADMSG && /* corrupted */ r != -ENODATA && /* truncated */ r != -EHOSTDOWN && /* other machine */ - r != -EPROTONOSUPPORT) /* incompatible feature */ + r != -EPROTONOSUPPORT && /* incompatible feature */ + r != -EBUSY && /* unclean shutdown */ + r != -ESHUTDOWN /* already archived */) return r; if ((flags & O_ACCMODE) == O_RDONLY) @@ -2088,9 +2168,9 @@ int journal_file_open_reliably( if (r < 0) return -errno; - log_warning("File %s corrupted, renaming and replacing.", fname); + log_warning("File %s corrupted or uncleanly shut down, renaming and replacing.", fname); - return journal_file_open(fname, flags, mode, template, ret); + return journal_file_open(fname, flags, mode, metrics, template, ret); } struct vacuum_info { @@ -2257,7 +2337,8 @@ int journal_directory_vacuum(const char *directory, uint64_t max_use, uint64_t m n_list ++; } - qsort(list, n_list, sizeof(struct vacuum_info), vacuum_compare); + if (n_list > 0) + qsort(list, n_list, sizeof(struct vacuum_info), vacuum_compare); for(i = 0; i < n_list; i++) { struct statvfs ss; @@ -2510,3 +2591,44 @@ int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, u return 1; } + +bool journal_file_rotate_suggested(JournalFile *f) { + assert(f); + + /* If we gained new header fields we gained new features, + * hence suggest a rotation */ + if (le64toh(f->header->header_size) < sizeof(Header)) { + log_debug("%s uses an outdated header, suggesting rotation.", f->path); + return true; + } + + /* Let's check if the hash tables grew over a certain fill + * level (75%, borrowing this value from Java's hash table + * implementation), and if so suggest a rotation. To calculate + * the fill level we need the n_data field, which only exists + * in newer versions. */ + + if (JOURNAL_HEADER_CONTAINS(f->header, n_data)) + if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) { + log_debug("Data hash table of %s has a fill level at %.1f (%llu of %llu items, %llu file size, %llu bytes per hash table item), suggesting rotation.", + f->path, + 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))), + (unsigned long long) le64toh(f->header->n_data), + (unsigned long long) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)), + (unsigned long long) (f->last_stat.st_size), + (unsigned long long) (f->last_stat.st_size / le64toh(f->header->n_data))); + return true; + } + + if (JOURNAL_HEADER_CONTAINS(f->header, n_fields)) + if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) { + log_debug("Field hash table of %s has a fill level at %.1f (%llu of %llu items), suggesting rotation.", + f->path, + 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))), + (unsigned long long) le64toh(f->header->n_fields), + (unsigned long long) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))); + return true; + } + + return false; +}