chiark / gitweb /
journald: do not free space when disk space runs low
authorZbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Wed, 13 Nov 2013 05:42:22 +0000 (00:42 -0500)
committerZbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Sat, 11 Jan 2014 21:54:59 +0000 (16:54 -0500)
Before, journald would remove journal files until both MaxUse= and
KeepFree= settings would be satisfied. The first one depends (if set
automatically) on the size of the file system and is constant.  But
the second one depends on current use of the file system, and a spike
in disk usage would cause journald to delete journal files, trying to
reach usage which would leave 15% of the disk free. This behaviour is
surprising for the user who doesn't expect his logs to be purged when
disk usage goes above 85%, which on a large disk could be some
gigabytes from being full. In addition attempting to keep 15% free
provides an attack vector where filling the disk sufficiently disposes
of almost all logs.

Instead, obey KeepFree= only as a limit on adding additional files.
When replacing old files with new, ignore KeepFree=. This means that
if journal disk usage reached some high point that at some later point
start to violate the KeepFree= constraint, journald will not add files
to go above this point, but it will stay (slightly) below it. When
journald is restarted, it forgets the previous maximum usage value,
and sets the limit based on the current usage, so if disk remains to
be filled, journald might use one journal-file-size less on each
restart, if restarts happen just after rotation. This seems like a
reasonable compromise between implementation complexity and robustness.

man/journald.conf.xml
src/journal/journal-file.h
src/journal/journal-vacuum.c
src/journal/journal-vacuum.h
src/journal/journald-server.c
src/journal/test-journal-interleaving.c
src/journal/test-journal.c
src/shared/macro.h

index b362c5de2c402ed02e4d454f2ea6aeb3b848d3fc..e0796e1cce4841363664b4e7a2a2b893f58847ec 100644 (file)
                                 <para><varname>SystemMaxUse=</varname>
                                 and <varname>RuntimeMaxUse=</varname>
                                 control how much disk space the
-                                journal may use up at
-                                maximum. Defaults to 10% of the size
-                                of the respective file
-                                system. <varname>SystemKeepFree=</varname>
-                                and
+                                journal may use up at maximum.
+                                <varname>SystemKeepFree=</varname> and
                                 <varname>RuntimeKeepFree=</varname>
                                 control how much disk space
-                                systemd-journald shall always leave
-                                free for other uses. Defaults to 15%
-                                of the size of the respective file
-                                system. systemd-journald will respect
-                                both limits, i.e. use the smaller of
-                                the two values.
-                                <varname>SystemMaxFileSize=</varname>
+                                systemd-journald shall leave free for
+                                other uses.
+                                <command>systemd-journald</command>
+                                will respect both limits and use the
+                                smaller of the two values.</para>
+
+                                <para>The first pair defaults to 10%
+                                and the second to 15% of the size of
+                                the respective file system. If the
+                                file system is nearly full and either
+                                <varname>SystemKeepFree=</varname> or
+                                <varname>RuntimeKeepFree=</varname> is
+                                violated when systemd-journald is
+                                started, the value will be raised to
+                                percentage that is actually free. This
+                                means that if before there was enough
+                                free space and journal files were
+                                created, and subsequently something
+                                else causes the file system to fill
+                                up, journald will stop using more
+                                space, but it'll will not removing
+                                existing files to go reduce footprint
+                                either.</para>
+
+                                <para><varname>SystemMaxFileSize=</varname>
                                 and
                                 <varname>RuntimeMaxFileSize=</varname>
                                 control how large individual journal
index 885b3b1df489c5fedfae12c2c51c551375393d1f..50dbe29cc16d422f7a22f4943599d876b7e77b02 100644 (file)
@@ -37,6 +37,7 @@
 
 typedef struct JournalMetrics {
         uint64_t max_use;
+        uint64_t use;
         uint64_t max_size;
         uint64_t min_size;
         uint64_t keep_free;
index b2b47d69b2af815972133869c5618fffe165dc9c..c92c198245536c2a09a69063d0049a32c4d2d7c7 100644 (file)
@@ -150,7 +150,6 @@ static int journal_file_empty(int dir_fd, const char *name) {
 int journal_directory_vacuum(
                 const char *directory,
                 uint64_t max_use,
-                uint64_t min_free,
                 usec_t max_retention_usec,
                 usec_t *oldest_usec) {
 
@@ -164,7 +163,7 @@ int journal_directory_vacuum(
 
         assert(directory);
 
-        if (max_use <= 0 && min_free <= 0 && max_retention_usec <= 0)
+        if (max_use <= 0 && max_retention_usec <= 0)
                 return 0;
 
         if (max_retention_usec > 0) {
@@ -309,8 +308,7 @@ int journal_directory_vacuum(
                 }
 
                 if ((max_retention_usec <= 0 || list[i].realtime >= retention_limit) &&
-                    (max_use <= 0 || sum <= max_use) &&
-                    (min_free <= 0 || (uint64_t) ss.f_bavail * (uint64_t) ss.f_bsize >= min_free))
+                    (max_use <= 0 || sum <= max_use))
                         break;
 
                 if (unlinkat(dirfd(d), list[i].filename, 0) >= 0) {
index f5e3e5291fab5ff720eaa457696a94fbb560b464..bc30c3a1408e4edcb799286a76ce3e3f4c03d063 100644 (file)
@@ -23,4 +23,4 @@
 
 #include <inttypes.h>
 
-int journal_directory_vacuum(const char *directory, uint64_t max_use, uint64_t min_free, usec_t max_retention_usec, usec_t *oldest_usec);
+int journal_directory_vacuum(const char *directory, uint64_t max_use, usec_t max_retention_usec, usec_t *oldest_usec);
index a3bacdab66a68ec930d6903f9ec0166872d79664..d3a1c574bd6d3ed3aa3273ed92c8e7a3a2a6f5f0 100644 (file)
@@ -158,9 +158,18 @@ static uint64_t available_space(Server *s, bool verbose) {
         }
 
         ss_avail = ss.f_bsize * ss.f_bavail;
-        avail = ss_avail > m->keep_free ? ss_avail - m->keep_free : 0;
 
-        s->cached_available_space = MIN(m->max_use, avail) > sum ? MIN(m->max_use, avail) - sum : 0;
+        /* If we reached a high mark, we will always allow this much
+         * again, unless usage goes above max_use. This watermark
+         * value is cached so that we don't give up space on pressure,
+         * but hover below the maximum usage. */
+
+        if (m->use < sum)
+                m->use = sum;
+
+        avail = LESS_BY(ss_avail, m->keep_free);
+
+        s->cached_available_space = LESS_BY(MIN(m->max_use, avail), sum);
         s->cached_available_space_timestamp = ts;
 
         if (verbose) {
@@ -168,13 +177,14 @@ static uint64_t available_space(Server *s, bool verbose) {
                         fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX];
 
                 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
-                                      "%s journal is using %s (max %s, leaving %s of free %s, current limit %s).",
+                                      "%s journal is using %s (max allowed %s, "
+                                      "trying to leave %s free of %s available → current limit %s).",
                                       s->system_journal ? "Permanent" : "Runtime",
                                       format_bytes(fb1, sizeof(fb1), sum),
                                       format_bytes(fb2, sizeof(fb2), m->max_use),
                                       format_bytes(fb3, sizeof(fb3), m->keep_free),
                                       format_bytes(fb4, sizeof(fb4), ss_avail),
-                                      format_bytes(fb5, sizeof(fb5), MIN(m->max_use, avail)));
+                                      format_bytes(fb5, sizeof(fb5), s->cached_available_space + sum));
         }
 
         return s->cached_available_space;
@@ -379,7 +389,7 @@ void server_vacuum(Server *s) {
         if (s->system_journal) {
                 char *p = strappenda("/var/log/journal/", ids);
 
-                r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
+                r = journal_directory_vacuum(p, s->system_metrics.max_use, s->max_retention_usec, &s->oldest_file_usec);
                 if (r < 0 && r != -ENOENT)
                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
         }
@@ -387,7 +397,7 @@ void server_vacuum(Server *s) {
         if (s->runtime_journal) {
                 char *p = strappenda("/run/log/journal/", ids);
 
-                r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
+                r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->max_retention_usec, &s->oldest_file_usec);
                 if (r < 0 && r != -ENOENT)
                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
         }
index 5b747147827e2cb0643e30451478802b617250dd..5c9604472e7f4dad54a6c73e6760548027e08b78 100644 (file)
@@ -189,7 +189,7 @@ static void test_skip(void (*setup)(void)) {
         if (arg_keep)
                 log_info("Not removing %s", t);
         else {
-                journal_directory_vacuum(".", 3000000, 0, 0, NULL);
+                journal_directory_vacuum(".", 3000000, 0, NULL);
 
                 assert_se(rm_rf_dangerous(t, false, true, false) >= 0);
         }
@@ -274,7 +274,7 @@ static void test_sequence_numbers(void) {
         if (arg_keep)
                 log_info("Not removing %s", t);
         else {
-                journal_directory_vacuum(".", 3000000, 0, 0, NULL);
+                journal_directory_vacuum(".", 3000000, 0, NULL);
 
                 assert_se(rm_rf_dangerous(t, false, true, false) >= 0);
         }
index 189fe07b485d5ca44757af455e057b3f30c33d04..85b4cf71acf5915da2d8b22963faad4c9a0f7c21 100644 (file)
@@ -126,7 +126,7 @@ static void test_non_empty(void) {
         if (arg_keep)
                 log_info("Not removing %s", t);
         else {
-                journal_directory_vacuum(".", 3000000, 0, 0, NULL);
+                journal_directory_vacuum(".", 3000000, 0, NULL);
 
                 assert_se(rm_rf_dangerous(t, false, true, false) >= 0);
         }
@@ -165,7 +165,7 @@ static void test_empty(void) {
         if (arg_keep)
                 log_info("Not removing %s", t);
         else {
-                journal_directory_vacuum(".", 3000000, 0, 0, NULL);
+                journal_directory_vacuum(".", 3000000, 0, NULL);
 
                 assert_se(rm_rf_dangerous(t, false, true, false) >= 0);
         }
index 79bee0396c3a52c4bed07f28f1878915bad8b282..dfbc20142f753172ee8708f5e5aa94eca27fa66b 100644 (file)
@@ -117,6 +117,13 @@ static inline size_t ALIGN_TO(size_t l, size_t ali) {
                         _a < _b ? _a : _b;      \
                 })
 
+#define LESS_BY(A,B)                            \
+        __extension__ ({                        \
+                        typeof(A) _A = (A);     \
+                        typeof(B) _B = (B);     \
+                        _A > _B ? _A - _B : 0;  \
+                })
+
 #ifndef CLAMP
 #define CLAMP(x, low, high)                                             \
         __extension__ ({                                                \