X-Git-Url: https://www.chiark.greenend.org.uk/ucgi/~ianmdlvl/git?p=elogind.git;a=blobdiff_plain;f=src%2Fjournal%2Fjournal-file.c;h=5557028147eb66fdbe6084a77de88cfd2ad47208;hp=37e2e37eb126c786d0239a04b5f1ed6be89d3535;hb=de7b95cdc3228131498021c2fdcf6647004c3920;hpb=cec736d21ff86c4ac81b4d306ddba2120333818c diff --git a/src/journal/journal-file.c b/src/journal/journal-file.c index 37e2e37eb..555702814 100644 --- a/src/journal/journal-file.c +++ b/src/journal/journal-file.c @@ -35,6 +35,8 @@ #define DEFAULT_ARENA_MIN_SIZE (256ULL*1024ULL) #define DEFAULT_ARENA_KEEP_FREE (1ULL*1024ULL*1024ULL) +#define DEFAULT_MAX_USE (16ULL*1024ULL*1024ULL*16ULL) + #define DEFAULT_HASH_TABLE_SIZE (2047ULL*16ULL) #define DEFAULT_BISECT_TABLE_SIZE ((DEFAULT_ARENA_MAX_SIZE/(64ULL*1024ULL))*8ULL) @@ -47,11 +49,12 @@ static const char signature[] = { 'L', 'P', 'K', 'S', 'H', 'H', 'R', 'H' }; void journal_file_close(JournalFile *f) { assert(f); - if (f->fd >= 0) - close_nointr_nofail(f->fd); + if (f->header) { + if (f->writable && f->header->state == htole32(STATE_ONLINE)) + f->header->state = htole32(STATE_OFFLINE); - if (f->header) munmap(f->header, PAGE_ALIGN(sizeof(Header))); + } if (f->hash_table_window) munmap(f->hash_table_window, f->hash_table_window_size); @@ -62,11 +65,14 @@ void journal_file_close(JournalFile *f) { if (f->window) munmap(f->window, f->window_size); + if (f->fd >= 0) + close_nointr_nofail(f->fd); + free(f->path); free(f); } -static int journal_file_init_header(JournalFile *f) { +static int journal_file_init_header(JournalFile *f, JournalFile *template) { Header h; ssize_t k; int r; @@ -84,7 +90,11 @@ static int journal_file_init_header(JournalFile *f) { if (r < 0) return r; - h.seqnum_id = h.file_id; + if (template) { + h.seqnum_id = template->header->seqnum_id; + h.seqnum = template->header->seqnum; + } else + h.seqnum_id = h.file_id; k = pwrite(f->fd, &h, sizeof(h), 0); if (k < 0) @@ -162,7 +172,7 @@ static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) new_size = PAGE_ALIGN(offset + size); /* We assume that this file is not sparse, and we know that - * for sure, since we alway call posix_fallocate() + * for sure, since we always call posix_fallocate() * ourselves */ old_size = @@ -195,7 +205,7 @@ static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) if (asize > le64toh(f->header->arena_max_size)) return -E2BIG; - if (posix_fallocate(f->fd, 0, new_size) < 0) + if (posix_fallocate(f->fd, old_size, new_size - old_size) < 0) return -errno; if (fstat(f->fd, &f->last_stat) < 0) @@ -354,12 +364,24 @@ int journal_file_move_to_object(JournalFile *f, uint64_t offset, int type, Objec return 0; } -static uint64_t journal_file_seqnum(JournalFile *f) { +static uint64_t journal_file_seqnum(JournalFile *f, uint64_t *seqnum) { uint64_t r; assert(f); r = le64toh(f->header->seqnum) + 1; + + if (seqnum) { + /* If an external seqno counter was passed, we update + * both the local and the external one, and set it to + * the maximum of both */ + + if (*seqnum + 1 > r) + r = *seqnum + 1; + + *seqnum = r; + } + f->header->seqnum = htole64(r); return r; @@ -674,9 +696,10 @@ static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) { o->entry.prev_entry_offset = f->header->tail_entry_offset; o->entry.next_entry_offset = 0; - if (p == 0) + if (p == 0) { f->header->head_entry_offset = htole64(offset); - else { + f->header->head_entry_realtime = o->entry.realtime; + } else { /* Temporarily move back to the previous entry, to * patch in pointer */ @@ -692,6 +715,7 @@ static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) { } f->header->tail_entry_offset = htole64(offset); + f->header->tail_entry_realtime = o->entry.realtime; /* Link up the items */ n = journal_file_entry_n_items(o); @@ -721,6 +745,7 @@ static int journal_file_append_entry_internal( const dual_timestamp *ts, uint64_t xor_hash, const EntryItem items[], unsigned n_items, + uint64_t *seqno, Object **ret, uint64_t *offset) { uint64_t np; uint64_t osize; @@ -737,10 +762,10 @@ static int journal_file_append_entry_internal( return r; o->object.type = htole64(OBJECT_ENTRY); - o->entry.seqnum = htole64(journal_file_seqnum(f)); + o->entry.seqnum = htole64(journal_file_seqnum(f, seqno)); memcpy(o->entry.items, items, n_items * sizeof(EntryItem)); - o->entry.realtime = ts ? htole64(ts->realtime) : 0; - o->entry.monotonic = ts ? htole64(ts->monotonic) : 0; + o->entry.realtime = htole64(ts ? ts->realtime : now(CLOCK_REALTIME)); + o->entry.monotonic = htole64(ts ? ts->monotonic : now(CLOCK_MONOTONIC)); o->entry.xor_hash = htole64(xor_hash); o->entry.boot_id = f->header->boot_id; @@ -757,7 +782,7 @@ static int journal_file_append_entry_internal( return 0; } -int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, Object **ret, uint64_t *offset) { +int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqno, Object **ret, uint64_t *offset) { unsigned i; EntryItem *items; int r; @@ -780,9 +805,10 @@ int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const st xor_hash ^= le64toh(o->data.hash); items[i].object_offset = htole64(p); + items[i].hash = o->data.hash; } - r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, ret, offset); + r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqno, ret, offset); finish: free(items); @@ -1060,7 +1086,10 @@ void journal_file_dump(JournalFile *f) { break; case OBJECT_ENTRY: - printf("Type: OBJECT_ENTRY %llu\n", (unsigned long long) le64toh(o->entry.seqnum)); + printf("Type: OBJECT_ENTRY %llu %llu %llu\n", + (unsigned long long) le64toh(o->entry.seqnum), + (unsigned long long) le64toh(o->entry.monotonic), + (unsigned long long) le64toh(o->entry.realtime)); break; case OBJECT_HASH_TABLE: @@ -1087,6 +1116,7 @@ int journal_file_open( const char *fname, int flags, mode_t mode, + JournalFile *template, JournalFile **ret) { JournalFile *f; @@ -1103,21 +1133,24 @@ int journal_file_open( if (!f) return -ENOMEM; + f->fd = -1; + f->flags = flags; + f->mode = mode; f->writable = (flags & O_ACCMODE) != O_RDONLY; f->prot = prot_from_flags(flags); - f->fd = open(fname, flags|O_CLOEXEC, mode); - if (f->fd < 0) { - r = -errno; - goto fail; - } - f->path = strdup(fname); if (!f->path) { r = -ENOMEM; goto fail; } + f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode); + if (f->fd < 0) { + r = -errno; + goto fail; + } + if (fstat(f->fd, &f->last_stat) < 0) { r = -errno; goto fail; @@ -1126,7 +1159,7 @@ int journal_file_open( if (f->last_stat.st_size == 0 && f->writable) { newly_created = true; - r = journal_file_init_header(f); + r = journal_file_init_header(f, template); if (r < 0) goto fail; @@ -1189,3 +1222,206 @@ fail: return r; } + +int journal_file_rotate(JournalFile **f) { + char *p; + size_t l; + JournalFile *old_file, *new_file = NULL; + int r; + + assert(f); + assert(*f); + + old_file = *f; + + if (!old_file->writable) + return -EINVAL; + + if (!endswith(old_file->path, ".journal")) + return -EINVAL; + + l = strlen(old_file->path); + + p = new(char, l + 1 + 16 + 1 + 32 + 1 + 16 + 1); + if (!p) + return -ENOMEM; + + memcpy(p, old_file->path, l - 8); + p[l-8] = '@'; + sd_id128_to_string(old_file->header->seqnum_id, p + l - 8 + 1); + snprintf(p + l - 8 + 1 + 32, 1 + 16 + 1 + 16 + 8 + 1, + "-%016llx-%016llx.journal", + (unsigned long long) le64toh((*f)->header->seqnum), + (unsigned long long) le64toh((*f)->header->tail_entry_realtime)); + + r = rename(old_file->path, p); + free(p); + + if (r < 0) + return -errno; + + old_file->header->state = le32toh(STATE_ARCHIVED); + + r = journal_file_open(old_file->path, old_file->flags, old_file->mode, old_file, &new_file); + journal_file_close(old_file); + + *f = new_file; + return r; +} + +struct vacuum_info { + off_t usage; + char *filename; + + uint64_t realtime; + sd_id128_t seqnum_id; + uint64_t seqnum; +}; + +static int vacuum_compare(const void *_a, const void *_b) { + const struct vacuum_info *a, *b; + + a = _a; + b = _b; + + if (sd_id128_equal(a->seqnum_id, b->seqnum_id)) { + if (a->seqnum < b->seqnum) + return -1; + else if (a->seqnum > b->seqnum) + return 1; + else + return 0; + } + + if (a->realtime < b->realtime) + return -1; + else if (a->realtime > b->realtime) + return 1; + else + return memcmp(&a->seqnum_id, &b->seqnum_id, 16); +} + +int journal_directory_vacuum(const char *directory, uint64_t max_use, uint64_t min_free) { + DIR *d; + int r = 0; + struct vacuum_info *list = NULL; + unsigned n_list = 0, n_allocated = 0, i; + uint64_t sum = 0; + + assert(directory); + + if (max_use <= 0) + max_use = DEFAULT_MAX_USE; + + d = opendir(directory); + if (!d) + return -errno; + + for (;;) { + int k; + struct dirent buf, *de; + size_t q; + struct stat st; + char *p; + unsigned long long seqnum, realtime; + sd_id128_t seqnum_id; + + k = readdir_r(d, &buf, &de); + if (k != 0) { + r = -k; + goto finish; + } + + if (!de) + break; + + if (!dirent_is_file_with_suffix(de, ".journal")) + continue; + + q = strlen(de->d_name); + + if (q < 1 + 32 + 1 + 16 + 1 + 16 + 8) + continue; + + if (de->d_name[q-8-16-1] != '-' || + de->d_name[q-8-16-1-16-1] != '-' || + de->d_name[q-8-16-1-16-1-32-1] != '@') + continue; + + if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) + continue; + + if (!S_ISREG(st.st_mode)) + continue; + + p = strdup(de->d_name); + if (!p) { + r = -ENOMEM; + goto finish; + } + + de->d_name[q-8-16-1-16-1] = 0; + if (sd_id128_from_string(de->d_name + q-8-16-1-16-1-32, &seqnum_id) < 0) { + free(p); + continue; + } + + if (sscanf(de->d_name + q-8-16-1-16, "%16llx-%16llx.journal", &seqnum, &realtime) != 2) { + free(p); + continue; + } + + if (n_list >= n_allocated) { + struct vacuum_info *j; + + n_allocated = MAX(n_allocated * 2U, 8U); + j = realloc(list, n_allocated * sizeof(struct vacuum_info)); + if (!j) { + free(p); + r = -ENOMEM; + goto finish; + } + + list = j; + } + + list[n_list].filename = p; + list[n_list].usage = (uint64_t) st.st_blksize * (uint64_t) st.st_blocks; + list[n_list].seqnum = seqnum; + list[n_list].realtime = realtime; + list[n_list].seqnum_id = seqnum_id; + + sum += list[n_list].usage; + + n_list ++; + } + + qsort(list, n_list, sizeof(struct vacuum_info), vacuum_compare); + + for(i = 0; i < n_list; i++) { + struct statvfs ss; + + if (fstatvfs(dirfd(d), &ss) < 0) { + r = -errno; + goto finish; + } + + if (sum <= max_use && + (uint64_t) ss.f_bavail * (uint64_t) ss.f_bsize >= min_free) + break; + + if (unlinkat(dirfd(d), list[i].filename, 0) >= 0) { + log_debug("Deleted archived journal %s/%s.", directory, list[i].filename); + sum -= list[i].usage; + } else if (errno != ENOENT) + log_warning("Failed to delete %s/%s: %m", directory, list[i].filename); + } + +finish: + for (i = 0; i < n_list; i++) + free(list[i].filename); + + free(list); + + return r; +}