chiark / gitweb /
journal: implement time-based rotation/vacuuming
authorLennart Poettering <lennart@poettering.net>
Tue, 16 Oct 2012 20:58:07 +0000 (22:58 +0200)
committerLennart Poettering <lennart@poettering.net>
Tue, 16 Oct 2012 20:58:07 +0000 (22:58 +0200)
This also enables time-based rotation (but not vacuuming) after 1month,
so that not more one month of journal is lost at a time per vacuuming.

12 files changed:
README
configure.ac
man/journald.conf.xml
src/journal/journal-file.c
src/journal/journal-file.h
src/journal/journal-vacuum.c
src/journal/journal-vacuum.h
src/journal/journald-gperf.gperf
src/journal/journald.c
src/journal/journald.conf
src/journal/journald.h
src/journal/test-journal.c

diff --git a/README b/README
index 25dadde0560c067fe059f051758c894260cda3e0..d947f27e28773c67bc4f16fe38943b6823ac81cb 100644 (file)
--- a/README
+++ b/README
@@ -47,6 +47,7 @@ REQUIREMENTS:
         libgcrypt (optional)
         libaudit (optional)
         libacl (optional)
+        libattr (optional)
         libselinux (optional)
         liblzma (optional)
         tcpwrappers (optional)
index 1bb657d157ac744d5d41c898656a1a3208cba64b..62e83beb1f334554655368ef7213b1c3a4f4a1e6 100644 (file)
@@ -1,4 +1,4 @@
-
+#
 #  This file is part of systemd.
 #
 #  Copyright 2010-2012 Lennart Poettering
@@ -340,6 +340,44 @@ fi
 AC_SUBST(ACL_LIBS)
 AM_CONDITIONAL([HAVE_ACL], [test "x$have_acl" != xno])
 
+# ------------------------------------------------------------------------------
+AC_ARG_ENABLE([xattr],
+        AS_HELP_STRING([--disable-xattr],[Disable optional XATTR support]),
+                [case "${enableval}" in
+                        yes) have_xattr=yes ;;
+                        no) have_xattr=no ;;
+                        *) AC_MSG_ERROR(bad value ${enableval} for --disable-xattr) ;;
+                esac],
+                [have_xattr=auto])
+
+if test "x${have_xattr}" != xno ; then
+        AC_CHECK_HEADERS(
+                [attr/xattr.h],
+                [have_xattr=yes],
+                [if test "x$have_xattr" = xyes ; then
+                        AC_MSG_ERROR([*** XATTR headers not found.])
+                fi])
+
+        AC_CHECK_LIB(
+                [attr],
+                [fsetxattr],
+                [have_xattr=yes],
+                [if test "x$have_xattr" = xyes ; then
+                        AC_MSG_ERROR([*** libattr not found.])
+                fi])
+
+        if test "x$have_xattr" = xyes ; then
+                XATTR_LIBS="-lattr"
+                AC_DEFINE(HAVE_XATTR, 1, [XATTR available])
+        else
+                have_xattr=no
+        fi
+else
+        XATTR_LIBS=
+fi
+AC_SUBST(XATTR_LIBS)
+AM_CONDITIONAL([HAVE_XATTR], [test "x$have_xattr" != xno])
+
 # ------------------------------------------------------------------------------
 AC_ARG_ENABLE([gcrypt],
         AS_HELP_STRING([--disable-gcrypt],[Disable optional GCRYPT support]),
@@ -823,6 +861,7 @@ AC_MSG_RESULT([
         SELinux:                 ${have_selinux}
         XZ:                      ${have_xz}
         ACL:                     ${have_acl}
+        XATTR:                   ${have_xattr}
         GCRYPT:                  ${have_gcrypt}
         QRENCODE:                ${have_qrencode}
         MICROHTTPD:              ${have_microhttpd}
index b06a23d80e39309f28b86bc3c1bf64ac3164f2c9..66189bd92d70285bccaec0ad02ce7d75cfaa2ae7 100644 (file)
                                 time.</para></listitem>
                         </varlistentry>
 
+                        <varlistentry>
+                                <term><varname>MaxFileSec=</varname></term>
+
+                                <listitem><para>The maximum time to
+                                store entries in a single journal
+                                file, before rotating to the next
+                                one. Normally time-based rotation
+                                should not be required as size-based
+                                rotation with options such as
+                                <varname>SystemMaxFileSize=</varname>
+                                should be sufficient to ensure that
+                                journal files don't grow without
+                                bounds. However, to ensure that not
+                                too much data is lost at once when old
+                                journal files are deleted it might
+                                make sense to change this value from
+                                the default of one month. Set to 0 to
+                                turn off this feature. This setting
+                                takes time values which may be
+                                suffixed with the units year, month,
+                                week, day, h, m to override the
+                                default time unit of
+                                seconds.</para></listitem>
+                        </varlistentry>
+
+                        <varlistentry>
+                                <term><varname>MaxRetentionSec=</varname></term>
+
+                                <listitem><para>The maximum time to
+                                store journal entries for. This
+                                controls whether journal files
+                                containing entries older then the
+                                specified time span are
+                                deleted. Normally time-based deletion
+                                of old journal files should not be
+                                required as size-based deletion with
+                                options such as
+                                <varname>SystemMaxUse=</varname>
+                                should be sufficient to ensure that
+                                journal files don't grow without
+                                bounds. However, to enforce data
+                                retention policies it might make sense
+                                to set change this value from the
+                                default of 0 (which turns off this
+                                feature). This settings also takes
+                                time values which may be suffixed with
+                                the units year, month, week, day, h, m
+                                to override the default time unit of
+                                seconds. </para></listitem>
+                        </varlistentry>
+
                         <varlistentry>
                                 <term><varname>ForwardToSyslog=</varname></term>
                                 <term><varname>ForwardToKMsg=</varname></term>
index ba04d1667b11dbda2f1eac5163252eff15773efb..ae01e5df5020f00cbbed8859baba7a218ce738aa 100644 (file)
 #include <fcntl.h>
 #include <stddef.h>
 
+#ifdef HAVE_XATTR
+#include <attr/xattr.h>
+#endif
+
 #include "journal-def.h"
 #include "journal-file.h"
 #include "journal-authenticate.h"
@@ -1978,7 +1982,7 @@ void journal_file_print_header(JournalFile *f) {
                (unsigned long long) le64toh(f->header->arena_size),
                (unsigned long long) le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
                (unsigned long long) le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
-               yes_no(journal_file_rotate_suggested(f)),
+               yes_no(journal_file_rotate_suggested(f, 0)),
                (unsigned long long) le64toh(f->header->head_entry_seqnum),
                (unsigned long long) le64toh(f->header->tail_entry_seqnum),
                format_timestamp(x, sizeof(x), le64toh(f->header->head_entry_realtime)),
@@ -2080,7 +2084,22 @@ int journal_file_open(
         }
 
         if (f->last_stat.st_size == 0 && f->writable) {
-                newly_created = true;
+#ifdef HAVE_XATTR
+                uint64_t crtime;
+
+                /* Let's attach the creation time to the journal file,
+                 * so that the vacuuming code knows the age of this
+                 * file even if the file might end up corrupted one
+                 * day... Ideally we'd just use the creation time many
+                 * file systems maintain for each file, but there is
+                 * currently no usable API to query this, hence let's
+                 * emulate this via extended attributes. If extended
+                 * attributes are not supported we'll just skip this,
+                 * and rely solely on mtime/atime/ctime of the file.*/
+
+                crtime = htole64((uint64_t) now(CLOCK_REALTIME));
+                fsetxattr(f->fd, "user.crtime_usec", &crtime, sizeof(crtime), XATTR_CREATE);
+#endif
 
 #ifdef HAVE_GCRYPT
                 /* Try to load the FSPRG state, and if we can't, then
@@ -2100,6 +2119,8 @@ int journal_file_open(
                         r = -errno;
                         goto fail;
                 }
+
+                newly_created = true;
         }
 
         if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
@@ -2207,8 +2228,8 @@ int journal_file_rotate(JournalFile **f, bool compress, bool seal) {
         sd_id128_to_string(old_file->header->seqnum_id, p + l - 8 + 1);
         snprintf(p + l - 8 + 1 + 32, 1 + 16 + 1 + 16 + 8 + 1,
                  "-%016llx-%016llx.journal",
-                 (unsigned long long) le64toh((*f)->header->tail_entry_seqnum),
-                 (unsigned long long) le64toh((*f)->header->tail_entry_realtime));
+                 (unsigned long long) le64toh((*f)->header->head_entry_seqnum),
+                 (unsigned long long) le64toh((*f)->header->head_entry_realtime));
 
         r = rename(old_file->path, p);
         free(p);
@@ -2501,7 +2522,7 @@ int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, u
         return 1;
 }
 
-bool journal_file_rotate_suggested(JournalFile *f) {
+bool journal_file_rotate_suggested(JournalFile *f, usec_t max_file_usec) {
         assert(f);
 
         /* If we gained new header fields we gained new features,
@@ -2539,5 +2560,15 @@ bool journal_file_rotate_suggested(JournalFile *f) {
                         return true;
                 }
 
+        if (max_file_usec > 0) {
+                usec_t t, h;
+
+                h = le64toh(f->header->head_entry_realtime);
+                t = now(CLOCK_REALTIME);
+
+                if (h > 0 && t > h + max_file_usec)
+                        return true;
+        }
+
         return false;
 }
index 5b1530e7a7ae4119bc6c08140035bc41e6f3448c..f52ee8c538459fce23924c6ed4006cb463a0c0f2 100644 (file)
@@ -184,4 +184,4 @@ void journal_default_metrics(JournalMetrics *m, int fd);
 int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to);
 int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot, usec_t *from, usec_t *to);
 
-bool journal_file_rotate_suggested(JournalFile *f);
+bool journal_file_rotate_suggested(JournalFile *f, usec_t max_file_usec);
index ac16bdfcfd4890fbe603ebe38d572ec9ca4ba03c..22c9cfcd526a02e78574e304ff20f8a95a0fcf1e 100644 (file)
 #include <sys/statvfs.h>
 #include <unistd.h>
 
+#ifdef HAVE_XATTR
+#include <attr/xattr.h>
+#endif
+
 #include "journal-def.h"
 #include "journal-file.h"
 #include "journal-vacuum.h"
@@ -68,18 +72,89 @@ static int vacuum_compare(const void *_a, const void *_b) {
                 return strcmp(a->filename, b->filename);
 }
 
-int journal_directory_vacuum(const char *directory, uint64_t max_use, uint64_t min_free) {
+static void patch_realtime(
+                const char *dir,
+                const char *fn,
+                const struct stat *st,
+                unsigned long long *realtime) {
+
+        usec_t x;
+
+#ifdef HAVE_XATTR
+        uint64_t crtime;
+        _cleanup_free_ const char *path = NULL;
+#endif
+
+        /* The timestamp was determined by the file name, but let's
+         * see if the file might actually be older than the file name
+         * suggested... */
+
+        assert(dir);
+        assert(fn);
+        assert(st);
+        assert(realtime);
+
+        x = timespec_load(&st->st_ctim);
+        if (x > 0 && x != (usec_t) -1 && x < *realtime)
+                *realtime = x;
+
+        x = timespec_load(&st->st_atim);
+        if (x > 0 && x != (usec_t) -1 && x < *realtime)
+                *realtime = x;
+
+        x = timespec_load(&st->st_mtim);
+        if (x > 0 && x != (usec_t) -1 && x < *realtime)
+                *realtime = x;
+
+#ifdef HAVE_XATTR
+        /* Let's read the original creation time, if possible. Ideally
+         * we'd just query the creation time the FS might provide, but
+         * unfortunately there's currently no sane API to query
+         * it. Hence let's implement this manually... */
+
+        /* Unfortunately there is is not fgetxattrat(), so we need to
+         * go via path here. :-( */
+
+        path = strjoin(dir, "/", fn, NULL);
+        if (!path)
+                return;
+
+        if (getxattr(path, "user.crtime_usec", &crtime, sizeof(crtime)) == sizeof(crtime)) {
+                crtime = le64toh(crtime);
+
+                if (crtime > 0 && crtime != (uint64_t) -1 && crtime < *realtime)
+                        *realtime = crtime;
+        }
+#endif
+}
+
+int journal_directory_vacuum(
+                const char *directory,
+                uint64_t max_use,
+                uint64_t min_free,
+                usec_t max_retention_usec,
+                usec_t *oldest_usec) {
+
         DIR *d;
         int r = 0;
         struct vacuum_info *list = NULL;
         unsigned n_list = 0, n_allocated = 0, i;
         uint64_t sum = 0;
+        usec_t retention_limit = 0;
 
         assert(directory);
 
-        if (max_use <= 0)
+        if (max_use <= 0 && min_free <= 0 && max_retention_usec <= 0)
                 return 0;
 
+        if (max_retention_usec > 0) {
+                retention_limit = now(CLOCK_REALTIME);
+                if (retention_limit > max_retention_usec)
+                        retention_limit -= max_retention_usec;
+                else
+                        max_retention_usec = retention_limit = 0;
+        }
+
         d = opendir(directory);
         if (!d)
                 return -errno;
@@ -170,6 +245,8 @@ int journal_directory_vacuum(const char *directory, uint64_t max_use, uint64_t m
                 } else
                         continue;
 
+                patch_realtime(directory, de->d_name, &st, &realtime);
+
                 if (n_list >= n_allocated) {
                         struct vacuum_info *j;
 
@@ -199,7 +276,7 @@ int journal_directory_vacuum(const char *directory, uint64_t max_use, uint64_t m
         if (n_list > 0)
                 qsort(list, n_list, sizeof(struct vacuum_info), vacuum_compare);
 
-        for(i = 0; i < n_list; i++) {
+        for (i = 0; i < n_list; i++) {
                 struct statvfs ss;
 
                 if (fstatvfs(dirfd(d), &ss) < 0) {
@@ -207,8 +284,9 @@ int journal_directory_vacuum(const char *directory, uint64_t max_use, uint64_t m
                         goto finish;
                 }
 
-                if (sum <= max_use &&
-                    (uint64_t) ss.f_bavail * (uint64_t) ss.f_bsize >= min_free)
+                if ((max_retention_usec <= 0 || list[i].realtime >= retention_limit) &&
+                    (max_use <= 0 || sum <= max_use) &&
+                    (min_free <= 0 || (uint64_t) ss.f_bavail * (uint64_t) ss.f_bsize >= min_free))
                         break;
 
                 if (unlinkat(dirfd(d), list[i].filename, 0) >= 0) {
@@ -218,6 +296,9 @@ int journal_directory_vacuum(const char *directory, uint64_t max_use, uint64_t m
                         log_warning("Failed to delete %s/%s: %m", directory, list[i].filename);
         }
 
+        if (oldest_usec && i < n_list && (*oldest_usec == 0 || list[i].realtime < *oldest_usec))
+                *oldest_usec = list[i].realtime;
+
 finish:
         for (i = 0; i < n_list; i++)
                 free(list[i].filename);
index 9841d72de89aed944d75cbd3ea4ded63ee6dfd59..f5e3e5291fab5ff720eaa457696a94fbb560b464 100644 (file)
@@ -23,4 +23,4 @@
 
 #include <inttypes.h>
 
-int journal_directory_vacuum(const char *directory, uint64_t max_use, uint64_t min_free);
+int journal_directory_vacuum(const char *directory, uint64_t max_use, uint64_t min_free, usec_t max_retention_usec, usec_t *oldest_usec);
index 4c021edb5256c8875ec8daa4c36cfcd3c2c95e73..1635e1cfc8e48378e52c95d41da25bb99bd7c63f 100644 (file)
@@ -28,6 +28,8 @@ Journal.RuntimeMaxUse,      config_parse_bytes_off, 0, offsetof(Server, runtime_
 Journal.RuntimeMaxFileSize, config_parse_bytes_off, 0, offsetof(Server, runtime_metrics.max_size)
 Journal.RuntimeMinFileSize, config_parse_bytes_off, 0, offsetof(Server, runtime_metrics.min_size)
 Journal.RuntimeKeepFree,    config_parse_bytes_off, 0, offsetof(Server, runtime_metrics.keep_free)
+Journal.MaxRetentionSec,    config_parse_usec,      0, offsetof(Server, max_retention_usec)
+Journal.MaxFileSec,         config_parse_usec,      0, offsetof(Server, max_file_usec)
 Journal.ForwardToSyslog,    config_parse_bool,      0, offsetof(Server, forward_to_syslog)
 Journal.ForwardToKMsg,      config_parse_bool,      0, offsetof(Server, forward_to_kmsg)
 Journal.ForwardToConsole,   config_parse_bool,      0, offsetof(Server, forward_to_console)
index 4dcf7d32c2327f5810ee0b8af266f7f70c8e5f4a..f56e8224289955f4591798a994899839c901c8db 100644 (file)
@@ -361,6 +361,8 @@ static void server_vacuum(Server *s) {
 
         log_debug("Vacuuming...");
 
+        s->oldest_file_usec = 0;
+
         r = sd_id128_get_machine(&machine);
         if (r < 0) {
                 log_error("Failed to get machine ID: %s", strerror(-r));
@@ -376,7 +378,7 @@ static void server_vacuum(Server *s) {
                         return;
                 }
 
-                r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free);
+                r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
                 if (r < 0 && r != -ENOENT)
                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
                 free(p);
@@ -389,7 +391,7 @@ static void server_vacuum(Server *s) {
                         return;
                 }
 
-                r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free);
+                r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
                 if (r < 0 && r != -ENOENT)
                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
                 free(p);
@@ -482,7 +484,7 @@ static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned
         if (!f)
                 return;
 
-        if (journal_file_rotate_suggested(f)) {
+        if (journal_file_rotate_suggested(f, s->max_file_usec)) {
                 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
                 server_rotate(s);
                 server_vacuum(s);
@@ -1543,24 +1545,38 @@ int main(int argc, char *argv[]) {
 
         for (;;) {
                 struct epoll_event event;
-                int t;
+                int t = -1;
+                usec_t n;
 
-#ifdef HAVE_GCRYPT
-                usec_t u;
+                n = now(CLOCK_REALTIME);
 
-                if (server.system_journal &&
-                    journal_file_next_evolve_usec(server.system_journal, &u)) {
-                        usec_t n;
+                if (server.max_retention_usec > 0 && server.oldest_file_usec > 0) {
 
-                        n = now(CLOCK_REALTIME);
+                        /* The retention time is reached, so let's vacuum! */
+                        if (server.oldest_file_usec + server.max_retention_usec < n) {
+                                log_info("Retention time reached.");
+                                server_rotate(&server);
+                                server_vacuum(&server);
+                                continue;
+                        }
 
-                        if (n >= u)
-                                t = 0;
-                        else
-                                t = (int) ((u - n + USEC_PER_MSEC - 1) / USEC_PER_MSEC);
-                } else
+                        /* Calculate when to rotate the next time */
+                        t = (int) ((server.oldest_file_usec + server.max_retention_usec - n + USEC_PER_MSEC - 1) / USEC_PER_MSEC);
+                        log_info("Sleeping for %i ms", t);
+                }
+
+#ifdef HAVE_GCRYPT
+                if (server.system_journal) {
+                        usec_t u;
+
+                        if (journal_file_next_evolve_usec(server.system_journal, &u)) {
+                                if (n >= u)
+                                        t = 0;
+                                else
+                                        t = MIN(t, (int) ((u - n + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
+                        }
+                }
 #endif
-                        t = -1;
 
                 r = epoll_wait(server.epoll_fd, &event, 1, t);
                 if (r < 0) {
index e5f3b76a68fc5794e52ba465e4d8b907e2bdcff6..49648332c7bfa7908293c5543d0e437e519d676c 100644 (file)
@@ -22,6 +22,8 @@
 #RuntimeKeepFree=
 #RuntimeMaxFileSize=
 #RuntimeMinFileSize=
+#MaxRetentionSec=
+#MaxFileSec=1month
 #ForwardToSyslog=yes
 #ForwardToKMsg=no
 #ForwardToConsole=no
index 25e7ec3b11fe5e76e26d2f588d4925f7f3a9e587..e3ef3b529d5385460790abcac2c2a943c92b1f89 100644 (file)
@@ -91,6 +91,10 @@ typedef struct Server {
 
         uint64_t var_available_timestamp;
 
+        usec_t max_retention_usec;
+        usec_t max_file_usec;
+        usec_t oldest_file_usec;
+
         gid_t file_gid;
         bool file_gid_valid;
 
index 22735001001320999a8bfdb34b1accaa542535bb..2fd49c17fc4cf04db61531f771e4518867e5b689 100644 (file)
@@ -119,7 +119,7 @@ int main(int argc, char *argv[]) {
 
         journal_file_close(f);
 
-        journal_directory_vacuum(".", 3000000, 0);
+        journal_directory_vacuum(".", 3000000, 0, 0, NULL);
 
         log_error("Exiting...");