#include <sys/statvfs.h>
#include <fcntl.h>
#include <stddef.h>
+#include <linux/fs.h>
+#include "btrfs-util.h"
#include "journal-def.h"
#include "journal-file.h"
#include "journal-authenticate.h"
/* How much to increase the journal file size at once each time we allocate something new. */
#define FILE_SIZE_INCREASE (8ULL*1024ULL*1024ULL) /* 8MB */
+/* Reread fstat() of the file for detecting deletions at least this often */
+#define LAST_STAT_REFRESH_USEC (5*USEC_PER_SEC)
+
/* The mmap context to use for the header we pick as one above the last defined typed */
#define CONTEXT_HEADER _OBJECT_TYPE_MAX
if (f->mmap && f->fd >= 0)
mmap_cache_close_fd(f->mmap, f->fd);
+ if (f->fd >= 0 && f->defrag_on_close) {
+
+ /* Be friendly to btrfs: turn COW back on again now,
+ * and defragment the file. We won't write to the file
+ * ever again, hence remove all fragmentation, and
+ * reenable all the good bits COW usually provides
+ * (such as data checksumming). */
+
+ (void) chattr_fd(f->fd, false, FS_NOCOW_FL);
+ (void) btrfs_defrag_fd(f->fd);
+ }
+
safe_close(f->fd);
free(f->path);
} else if (state == STATE_ARCHIVED)
return -ESHUTDOWN;
else if (state != STATE_OFFLINE) {
- log_debug("Journal file %s has unknown state %u.", f->path, state);
+ log_debug("Journal file %s has unknown state %i.", f->path, state);
return -EBUSY;
}
}
return 0;
}
+static int journal_file_fstat(JournalFile *f) {
+ assert(f);
+ assert(f->fd >= 0);
+
+ if (fstat(f->fd, &f->last_stat) < 0)
+ return -errno;
+
+ f->last_stat_usec = now(CLOCK_MONOTONIC);
+
+ /* Refuse appending to files that are already deleted */
+ if (f->last_stat.st_nlink <= 0)
+ return -EIDRM;
+
+ return 0;
+}
+
static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
uint64_t old_size, new_size;
int r;
if (new_size < le64toh(f->header->header_size))
new_size = le64toh(f->header->header_size);
- if (new_size <= old_size)
- return 0;
+ if (new_size <= old_size) {
+
+ /* We already pre-allocated enough space, but before
+ * we write to it, let's check with fstat() if the
+ * file got deleted, in order make sure we don't throw
+ * away the data immediately. Don't check fstat() for
+ * all writes though, but only once ever 10s. */
+
+ if (f->last_stat_usec + LAST_STAT_REFRESH_USEC > now(CLOCK_MONOTONIC))
+ return 0;
+
+ return journal_file_fstat(f);
+ }
+
+ /* Allocate more space. */
if (f->metrics.max_size > 0 && new_size > f->metrics.max_size)
return -E2BIG;
if (r != 0)
return -r;
- if (fstat(f->fd, &f->last_stat) < 0)
- return -errno;
-
f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
- return 0;
+ return journal_file_fstat(f);
}
static unsigned type_to_context(ObjectType type) {
}
static int journal_file_move_to(JournalFile *f, ObjectType type, bool keep_always, uint64_t offset, uint64_t size, void **ret) {
+ int r;
+
assert(f);
assert(ret);
/* Hmm, out of range? Let's refresh the fstat() data
* first, before we trust that check. */
- if (fstat(f->fd, &f->last_stat) < 0 ||
- offset + size > (uint64_t) f->last_stat.st_size)
+ r = journal_file_fstat(f);
+ if (r < 0)
+ return r;
+
+ if (offset + size > (uint64_t) f->last_stat.st_size)
return -EADDRNOTAVAIL;
}
break;
default:
- printf("Type: unknown (%u)\n", o->object.type);
+ printf("Type: unknown (%i)\n", o->object.type);
break;
}
goto fail;
}
- if (fstat(f->fd, &f->last_stat) < 0) {
- r = -errno;
+ r = journal_file_fstat(f);
+ if (r < 0)
goto fail;
- }
if (f->last_stat.st_size == 0 && f->writable) {
+
+ /* Before we write anything, turn off COW logic. Given
+ * our write pattern that is quite unfriendly to COW
+ * file systems this should greatly improve
+ * performance on COW file systems, such as btrfs, at
+ * the expense of data integrity features (which
+ * shouldn't be too bad, given that we do our own
+ * checksumming). */
+ r = chattr_fd(f->fd, true, FS_NOCOW_FL);
+ if (r < 0)
+ log_warning_errno(errno, "Failed to set file attributes: %m");
+
/* Let's attach the creation time to the journal file,
* so that the vacuuming code knows the age of this
* file even if the file might end up corrupted one
* attributes are not supported we'll just skip this,
* and rely solely on mtime/atime/ctime of the file. */
- fd_setcrtime(f->fd, now(CLOCK_REALTIME));
+ fd_setcrtime(f->fd, 0);
#ifdef HAVE_GCRYPT
/* Try to load the FSPRG state, and if we can't, then
if (r < 0)
goto fail;
- if (fstat(f->fd, &f->last_stat) < 0) {
- r = -errno;
+ r = journal_file_fstat(f);
+ if (r < 0)
goto fail;
- }
newly_created = true;
}
if (r < 0)
return -ENOMEM;
+ /* Try to rename the file to the archived version. If the file
+ * already was deleted, we'll get ENOENT, let's ignore that
+ * case. */
r = rename(old_file->path, p);
- if (r < 0)
+ if (r < 0 && errno != ENOENT)
return -errno;
old_file->header->state = STATE_ARCHIVED;
+ /* Currently, btrfs is not very good with out write patterns
+ * and fragments heavily. Let's defrag our journal files when
+ * we archive them */
+ old_file->defrag_on_close = true;
+
r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, seal, NULL, old_file->mmap, old_file, &new_file);
journal_file_close(old_file);
r != -EPROTONOSUPPORT && /* incompatible feature */
r != -EBUSY && /* unclean shutdown */
r != -ESHUTDOWN && /* already archived */
- r != -EIO /* IO error, including SIGBUS on mmap */)
+ r != -EIO && /* IO error, including SIGBUS on mmap */
+ r != -EIDRM /* File has been deleted */)
return r;
if ((flags & O_ACCMODE) == O_RDONLY)
if (r < 0)
return -errno;
+ /* btrfs doesn't cope well with our write pattern and
+ * fragments heavily. Let's defrag all files we rotate */
+
+ (void) chattr_path(p, false, FS_NOCOW_FL);
+ (void) btrfs_defrag(p);
+
log_warning("File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
return journal_file_open(fname, flags, mode, compress, seal,