1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
27 #include <sys/timerfd.h>
30 #include <systemd/sd-journal.h>
31 #include <systemd/sd-messages.h>
32 #include <systemd/sd-daemon.h>
37 #include "journal-file.h"
38 #include "socket-util.h"
39 #include "cgroup-util.h"
43 #include "conf-parser.h"
44 #include "journal-internal.h"
45 #include "journal-vacuum.h"
46 #include "journal-authenticate.h"
47 #include "journald-server.h"
48 #include "journald-rate-limit.h"
49 #include "journald-kmsg.h"
50 #include "journald-syslog.h"
51 #include "journald-stream.h"
52 #include "journald-console.h"
53 #include "journald-native.h"
57 #include <acl/libacl.h>
62 #include <selinux/selinux.h>
65 #define USER_JOURNALS_MAX 1024
67 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
68 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
69 #define DEFAULT_RATE_LIMIT_BURST 1000
71 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
73 static const char* const storage_table[] = {
74 [STORAGE_AUTO] = "auto",
75 [STORAGE_VOLATILE] = "volatile",
76 [STORAGE_PERSISTENT] = "persistent",
77 [STORAGE_NONE] = "none"
80 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
81 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
83 static const char* const split_mode_table[] = {
84 [SPLIT_NONE] = "none",
86 [SPLIT_LOGIN] = "login"
89 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
90 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
92 static uint64_t available_space(Server *s, bool verbose) {
94 _cleanup_free_ char *p = NULL;
97 uint64_t sum = 0, ss_avail = 0, avail = 0;
99 _cleanup_closedir_ DIR *d = NULL;
104 ts = now(CLOCK_MONOTONIC);
106 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts
108 return s->cached_available_space;
110 r = sd_id128_get_machine(&machine);
114 if (s->system_journal) {
115 f = "/var/log/journal/";
116 m = &s->system_metrics;
118 f = "/run/log/journal/";
119 m = &s->runtime_metrics;
124 p = strappend(f, sd_id128_to_string(machine, ids));
132 if (fstatvfs(dirfd(d), &ss) < 0)
138 union dirent_storage buf;
140 r = readdir_r(d, &buf.de, &de);
147 if (!endswith(de->d_name, ".journal") &&
148 !endswith(de->d_name, ".journal~"))
151 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
154 if (!S_ISREG(st.st_mode))
157 sum += (uint64_t) st.st_blocks * 512UL;
160 ss_avail = ss.f_bsize * ss.f_bavail;
161 avail = ss_avail > m->keep_free ? ss_avail - m->keep_free : 0;
163 s->cached_available_space = MIN(m->max_use, avail) > sum ? MIN(m->max_use, avail) - sum : 0;
164 s->cached_available_space_timestamp = ts;
167 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
168 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX];
170 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
171 "%s journal is using %s (max %s, leaving %s of free %s, current limit %s).",
172 s->system_journal ? "Permanent" : "Runtime",
173 format_bytes(fb1, sizeof(fb1), sum),
174 format_bytes(fb2, sizeof(fb2), m->max_use),
175 format_bytes(fb3, sizeof(fb3), m->keep_free),
176 format_bytes(fb4, sizeof(fb4), ss_avail),
177 format_bytes(fb5, sizeof(fb5), MIN(m->max_use, avail)));
180 return s->cached_available_space;
183 static void server_read_file_gid(Server *s) {
184 const char *g = "systemd-journal";
189 if (s->file_gid_valid)
192 r = get_group_creds(&g, &s->file_gid);
194 log_warning("Failed to resolve '%s' group: %s", g, strerror(-r));
196 /* if we couldn't read the gid, then it will be 0, but that's
197 * fine and we shouldn't try to resolve the group again, so
198 * let's just pretend it worked right-away. */
199 s->file_gid_valid = true;
202 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
207 acl_permset_t permset;
212 server_read_file_gid(s);
214 r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
216 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
222 acl = acl_get_fd(f->fd);
224 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
228 r = acl_find_uid(acl, uid, &entry);
231 if (acl_create_entry(&acl, &entry) < 0 ||
232 acl_set_tag_type(entry, ACL_USER) < 0 ||
233 acl_set_qualifier(entry, &uid) < 0) {
234 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
239 /* We do not recalculate the mask unconditionally here,
240 * so that the fchmod() mask above stays intact. */
241 if (acl_get_permset(entry, &permset) < 0 ||
242 acl_add_perm(permset, ACL_READ) < 0 ||
243 calc_acl_mask_if_needed(&acl) < 0) {
244 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
248 if (acl_set_fd(f->fd, acl) < 0)
249 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
256 static JournalFile* find_journal(Server *s, uid_t uid) {
257 _cleanup_free_ char *p = NULL;
264 /* We split up user logs only on /var, not on /run. If the
265 * runtime file is open, we write to it exclusively, in order
266 * to guarantee proper order as soon as we flush /run to
267 * /var and close the runtime file. */
269 if (s->runtime_journal)
270 return s->runtime_journal;
273 return s->system_journal;
275 r = sd_id128_get_machine(&machine);
277 return s->system_journal;
279 f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
283 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
284 SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
285 return s->system_journal;
287 while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
288 /* Too many open? Then let's close one */
289 f = hashmap_steal_first(s->user_journals);
291 journal_file_close(f);
294 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
296 return s->system_journal;
298 server_fix_perms(s, f, uid);
300 r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
302 journal_file_close(f);
303 return s->system_journal;
309 void server_rotate(Server *s) {
315 log_debug("Rotating...");
317 if (s->runtime_journal) {
318 r = journal_file_rotate(&s->runtime_journal, s->compress, false);
320 if (s->runtime_journal)
321 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
323 log_error("Failed to create new runtime journal: %s", strerror(-r));
325 server_fix_perms(s, s->runtime_journal, 0);
328 if (s->system_journal) {
329 r = journal_file_rotate(&s->system_journal, s->compress, s->seal);
331 if (s->system_journal)
332 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
334 log_error("Failed to create new system journal: %s", strerror(-r));
337 server_fix_perms(s, s->system_journal, 0);
340 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
341 r = journal_file_rotate(&f, s->compress, s->seal);
344 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
346 log_error("Failed to create user journal: %s", strerror(-r));
348 hashmap_replace(s->user_journals, k, f);
349 server_fix_perms(s, f, PTR_TO_UINT32(k));
354 void server_sync(Server *s) {
360 static const struct itimerspec sync_timer_disable = {};
362 if (s->system_journal) {
363 r = journal_file_set_offline(s->system_journal);
365 log_error("Failed to sync system journal: %s", strerror(-r));
368 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
369 r = journal_file_set_offline(f);
371 log_error("Failed to sync user journal: %s", strerror(-r));
374 r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_disable, NULL);
376 log_error("Failed to disable max timer: %m");
378 s->sync_scheduled = false;
381 void server_vacuum(Server *s) {
386 log_debug("Vacuuming...");
388 s->oldest_file_usec = 0;
390 r = sd_id128_get_machine(&machine);
392 log_error("Failed to get machine ID: %s", strerror(-r));
396 sd_id128_to_string(machine, ids);
398 if (s->system_journal) {
399 char *p = strappenda("/var/log/journal/", ids);
401 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
402 if (r < 0 && r != -ENOENT)
403 log_error("Failed to vacuum %s: %s", p, strerror(-r));
406 if (s->runtime_journal) {
407 char *p = strappenda("/run/log/journal/", ids);
409 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
410 if (r < 0 && r != -ENOENT)
411 log_error("Failed to vacuum %s: %s", p, strerror(-r));
414 s->cached_available_space_timestamp = 0;
417 bool shall_try_append_again(JournalFile *f, int r) {
419 /* -E2BIG Hit configured limit
421 -EDQUOT Quota limit hit
423 -EHOSTDOWN Other machine
424 -EBUSY Unclean shutdown
425 -EPROTONOSUPPORT Unsupported feature
428 -ESHUTDOWN Already archived */
430 if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
431 log_debug("%s: Allocation limit reached, rotating.", f->path);
432 else if (r == -EHOSTDOWN)
433 log_info("%s: Journal file from other machine, rotating.", f->path);
434 else if (r == -EBUSY)
435 log_info("%s: Unclean shutdown, rotating.", f->path);
436 else if (r == -EPROTONOSUPPORT)
437 log_info("%s: Unsupported feature, rotating.", f->path);
438 else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
439 log_warning("%s: Journal file corrupted, rotating.", f->path);
446 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n) {
448 bool vacuumed = false;
455 f = find_journal(s, uid);
459 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
460 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
465 f = find_journal(s, uid);
470 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
472 server_schedule_sync(s);
476 if (vacuumed || !shall_try_append_again(f, r)) {
479 for (i = 0; i < n; i++)
480 size += iovec[i].iov_len;
482 log_error("Failed to write entry (%d items, %zu bytes), ignoring: %s", n, size, strerror(-r));
489 f = find_journal(s, uid);
493 log_debug("Retrying write.");
494 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
498 for (i = 0; i < n; i++)
499 size += iovec[i].iov_len;
501 log_error("Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %s", n, size, strerror(-r));
505 static void dispatch_message_real(
507 struct iovec *iovec, unsigned n, unsigned m,
510 const char *label, size_t label_len,
514 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
515 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
516 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
517 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
518 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
519 boot_id[sizeof("_BOOT_ID=") + 32] = "_BOOT_ID=",
520 machine_id[sizeof("_MACHINE_ID=") + 32] = "_MACHINE_ID=",
521 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
522 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
523 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
531 uid_t realuid = 0, owner = 0, journal_uid;
532 bool owner_valid = false;
534 char audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
535 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
536 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
537 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
546 assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
549 realuid = ucred->uid;
551 sprintf(pid, "_PID=%lu", (unsigned long) ucred->pid);
552 IOVEC_SET_STRING(iovec[n++], pid);
554 sprintf(uid, "_UID=%lu", (unsigned long) ucred->uid);
555 IOVEC_SET_STRING(iovec[n++], uid);
557 sprintf(gid, "_GID=%lu", (unsigned long) ucred->gid);
558 IOVEC_SET_STRING(iovec[n++], gid);
560 r = get_process_comm(ucred->pid, &t);
562 x = strappenda("_COMM=", t);
564 IOVEC_SET_STRING(iovec[n++], x);
567 r = get_process_exe(ucred->pid, &t);
569 x = strappenda("_EXE=", t);
571 IOVEC_SET_STRING(iovec[n++], x);
574 r = get_process_cmdline(ucred->pid, 0, false, &t);
576 x = strappenda("_CMDLINE=", t);
578 IOVEC_SET_STRING(iovec[n++], x);
581 r = get_process_capeff(ucred->pid, &t);
583 x = strappenda("_CAP_EFFECTIVE=", t);
585 IOVEC_SET_STRING(iovec[n++], x);
589 r = audit_session_from_pid(ucred->pid, &audit);
591 sprintf(audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit);
592 IOVEC_SET_STRING(iovec[n++], audit_session);
595 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
597 sprintf(audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid);
598 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
602 r = cg_pid_get_path_shifted(ucred->pid, NULL, &c);
604 char *session = NULL;
606 x = strappenda("_SYSTEMD_CGROUP=", c);
607 IOVEC_SET_STRING(iovec[n++], x);
609 r = cg_path_get_session(c, &t);
611 session = strappenda("_SYSTEMD_SESSION=", t);
613 IOVEC_SET_STRING(iovec[n++], session);
616 if (cg_path_get_owner_uid(c, &owner) >= 0) {
619 sprintf(owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner);
620 IOVEC_SET_STRING(iovec[n++], owner_uid);
623 if (cg_path_get_unit(c, &t) >= 0) {
624 x = strappenda("_SYSTEMD_UNIT=", t);
626 } else if (cg_path_get_user_unit(c, &t) >= 0) {
627 x = strappenda("_SYSTEMD_USER_UNIT=", t);
629 } else if (unit_id) {
631 x = strappenda("_SYSTEMD_USER_UNIT=", unit_id);
633 x = strappenda("_SYSTEMD_UNIT=", unit_id);
638 IOVEC_SET_STRING(iovec[n++], x);
645 x = alloca(sizeof("_SELINUX_CONTEXT=") + label_len);
647 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
648 IOVEC_SET_STRING(iovec[n++], x);
650 security_context_t con;
652 if (getpidcon(ucred->pid, &con) >= 0) {
653 x = strappenda("_SELINUX_CONTEXT=", con);
656 IOVEC_SET_STRING(iovec[n++], x);
664 r = get_process_uid(object_pid, &object_uid);
666 sprintf(o_uid, "OBJECT_UID=%lu", (unsigned long) object_uid);
667 IOVEC_SET_STRING(iovec[n++], o_uid);
670 r = get_process_gid(object_pid, &object_gid);
672 sprintf(o_gid, "OBJECT_GID=%lu", (unsigned long) object_gid);
673 IOVEC_SET_STRING(iovec[n++], o_gid);
676 r = get_process_comm(object_pid, &t);
678 x = strappenda("OBJECT_COMM=", t);
680 IOVEC_SET_STRING(iovec[n++], x);
683 r = get_process_exe(object_pid, &t);
685 x = strappenda("OBJECT_EXE=", t);
687 IOVEC_SET_STRING(iovec[n++], x);
690 r = get_process_cmdline(object_pid, 0, false, &t);
692 x = strappenda("OBJECT_CMDLINE=", t);
694 IOVEC_SET_STRING(iovec[n++], x);
698 r = audit_session_from_pid(object_pid, &audit);
700 sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%lu", (unsigned long) audit);
701 IOVEC_SET_STRING(iovec[n++], o_audit_session);
704 r = audit_loginuid_from_pid(object_pid, &loginuid);
706 sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID=%lu", (unsigned long) loginuid);
707 IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
711 r = cg_pid_get_path_shifted(object_pid, NULL, &c);
713 x = strappenda("OBJECT_SYSTEMD_CGROUP=", c);
714 IOVEC_SET_STRING(iovec[n++], x);
716 r = cg_path_get_session(c, &t);
718 x = strappenda("OBJECT_SYSTEMD_SESSION=", t);
720 IOVEC_SET_STRING(iovec[n++], x);
723 if (cg_path_get_owner_uid(c, &owner) >= 0) {
724 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner);
725 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
728 if (cg_path_get_unit(c, &t) >= 0) {
729 x = strappenda("OBJECT_SYSTEMD_UNIT=", t);
731 } else if (cg_path_get_user_unit(c, &t) >= 0) {
732 x = strappenda("OBJECT_SYSTEMD_USER_UNIT=", t);
738 IOVEC_SET_STRING(iovec[n++], x);
746 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
747 IOVEC_SET_STRING(iovec[n++], source_time);
750 /* Note that strictly speaking storing the boot id here is
751 * redundant since the entry includes this in-line
752 * anyway. However, we need this indexed, too. */
753 r = sd_id128_get_boot(&id);
755 sd_id128_to_string(id, boot_id + strlen("_BOOT_ID="));
756 IOVEC_SET_STRING(iovec[n++], boot_id);
759 r = sd_id128_get_machine(&id);
761 sd_id128_to_string(id, machine_id + strlen("_MACHINE_ID="));
762 IOVEC_SET_STRING(iovec[n++], machine_id);
765 t = gethostname_malloc();
767 x = strappenda("_HOSTNAME=", t);
769 IOVEC_SET_STRING(iovec[n++], x);
774 if (s->split_mode == SPLIT_UID && realuid > 0)
775 /* Split up strictly by any UID */
776 journal_uid = realuid;
777 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
778 /* Split up by login UIDs, this avoids creation of
779 * individual journals for system UIDs. We do this
780 * only if the realuid is not root, in order not to
781 * accidentally leak privileged information to the
782 * user that is logged by a privileged process that is
783 * part of an unprivileged session.*/
788 write_to_journal(s, journal_uid, iovec, n);
791 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
792 char mid[11 + 32 + 1];
793 char buffer[16 + LINE_MAX + 1];
794 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
797 struct ucred ucred = {};
802 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
803 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
805 memcpy(buffer, "MESSAGE=", 8);
806 va_start(ap, format);
807 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
809 char_array_0(buffer);
810 IOVEC_SET_STRING(iovec[n++], buffer);
812 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
813 snprintf(mid, sizeof(mid), MESSAGE_ID(message_id));
815 IOVEC_SET_STRING(iovec[n++], mid);
818 ucred.pid = getpid();
819 ucred.uid = getuid();
820 ucred.gid = getgid();
822 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, 0);
825 void server_dispatch_message(
827 struct iovec *iovec, unsigned n, unsigned m,
830 const char *label, size_t label_len,
836 _cleanup_free_ char *path = NULL;
840 assert(iovec || n == 0);
845 if (LOG_PRI(priority) > s->max_level_store)
848 /* Stop early in case the information will not be stored
850 if (s->storage == STORAGE_NONE)
856 r = cg_pid_get_path_shifted(ucred->pid, NULL, &path);
860 /* example: /user/lennart/3/foobar
861 * /system/dbus.service/foobar
863 * So let's cut of everything past the third /, since that is
864 * where user directories start */
866 c = strchr(path, '/');
868 c = strchr(c+1, '/');
870 c = strchr(c+1, '/');
876 rl = journal_rate_limit_test(s->rate_limit, path,
877 priority & LOG_PRIMASK, available_space(s, false));
882 /* Write a suppression message if we suppressed something */
884 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
885 "Suppressed %u messages from %s", rl - 1, path);
888 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, object_pid);
892 static int system_journal_open(Server *s) {
898 r = sd_id128_get_machine(&machine);
902 sd_id128_to_string(machine, ids);
904 if (!s->system_journal &&
905 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
906 access("/run/systemd/journal/flushed", F_OK) >= 0) {
908 /* If in auto mode: first try to create the machine
909 * path, but not the prefix.
911 * If in persistent mode: create /var/log/journal and
912 * the machine path */
914 if (s->storage == STORAGE_PERSISTENT)
915 (void) mkdir("/var/log/journal/", 0755);
917 fn = strappenda("/var/log/journal/", ids);
918 (void) mkdir(fn, 0755);
920 fn = strappenda(fn, "/system.journal");
921 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
924 server_fix_perms(s, s->system_journal, 0);
926 if (r != -ENOENT && r != -EROFS)
927 log_warning("Failed to open system journal: %s", strerror(-r));
933 if (!s->runtime_journal &&
934 (s->storage != STORAGE_NONE)) {
936 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
940 if (s->system_journal) {
942 /* Try to open the runtime journal, but only
943 * if it already exists, so that we can flush
944 * it into the system journal */
946 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
951 log_warning("Failed to open runtime journal: %s", strerror(-r));
958 /* OK, we really need the runtime journal, so create
959 * it if necessary. */
961 (void) mkdir_parents(fn, 0755);
962 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
966 log_error("Failed to open runtime journal: %s", strerror(-r));
971 if (s->runtime_journal)
972 server_fix_perms(s, s->runtime_journal, 0);
975 available_space(s, true);
980 int server_flush_to_var(Server *s) {
983 sd_journal *j = NULL;
987 if (s->storage != STORAGE_AUTO &&
988 s->storage != STORAGE_PERSISTENT)
991 if (!s->runtime_journal)
994 system_journal_open(s);
996 if (!s->system_journal)
999 log_debug("Flushing to /var...");
1001 r = sd_id128_get_machine(&machine);
1003 log_error("Failed to get machine id: %s", strerror(-r));
1007 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1009 log_error("Failed to read runtime journal: %s", strerror(-r));
1013 sd_journal_set_data_threshold(j, 0);
1015 SD_JOURNAL_FOREACH(j) {
1019 f = j->current_file;
1020 assert(f && f->current_offset > 0);
1022 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1024 log_error("Can't read entry: %s", strerror(-r));
1028 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1032 if (!shall_try_append_again(s->system_journal, r)) {
1033 log_error("Can't write entry: %s", strerror(-r));
1040 if (!s->system_journal) {
1041 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1046 log_debug("Retrying write.");
1047 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1049 log_error("Can't write entry: %s", strerror(-r));
1055 journal_file_post_change(s->system_journal);
1057 journal_file_close(s->runtime_journal);
1058 s->runtime_journal = NULL;
1061 rm_rf("/run/log/journal", false, true, false);
1063 sd_journal_close(j);
1068 int process_event(Server *s, struct epoll_event *ev) {
1072 if (ev->data.fd == s->signal_fd) {
1073 struct signalfd_siginfo sfsi;
1076 if (ev->events != EPOLLIN) {
1077 log_error("Got invalid event from epoll.");
1081 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
1082 if (n != sizeof(sfsi)) {
1087 if (errno == EINTR || errno == EAGAIN)
1093 if (sfsi.ssi_signo == SIGUSR1) {
1094 touch("/run/systemd/journal/flushed");
1095 server_flush_to_var(s);
1100 if (sfsi.ssi_signo == SIGUSR2) {
1106 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
1110 } else if (ev->data.fd == s->sync_timer_fd) {
1114 log_debug("Got sync request from epoll.");
1116 r = read(ev->data.fd, (void *)&t, sizeof(t));
1123 } else if (ev->data.fd == s->dev_kmsg_fd) {
1126 if (ev->events != EPOLLIN) {
1127 log_error("Got invalid event from epoll.");
1131 r = server_read_dev_kmsg(s);
1137 } else if (ev->data.fd == s->native_fd ||
1138 ev->data.fd == s->syslog_fd) {
1140 if (ev->events != EPOLLIN) {
1141 log_error("Got invalid event from epoll.");
1146 struct msghdr msghdr;
1148 struct ucred *ucred = NULL;
1149 struct timeval *tv = NULL;
1150 struct cmsghdr *cmsg;
1152 size_t label_len = 0;
1154 struct cmsghdr cmsghdr;
1156 /* We use NAME_MAX space for the
1157 * SELinux label here. The kernel
1158 * currently enforces no limit, but
1159 * according to suggestions from the
1160 * SELinux people this will change and
1161 * it will probably be identical to
1162 * NAME_MAX. For now we use that, but
1163 * this should be updated one day when
1164 * the final limit is known.*/
1165 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1166 CMSG_SPACE(sizeof(struct timeval)) +
1167 CMSG_SPACE(sizeof(int)) + /* fd */
1168 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1175 if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
1176 log_error("SIOCINQ failed: %m");
1180 if (s->buffer_size < (size_t) v) {
1184 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
1185 b = realloc(s->buffer, l+1);
1188 log_error("Couldn't increase buffer.");
1197 iovec.iov_base = s->buffer;
1198 iovec.iov_len = s->buffer_size;
1202 msghdr.msg_iov = &iovec;
1203 msghdr.msg_iovlen = 1;
1204 msghdr.msg_control = &control;
1205 msghdr.msg_controllen = sizeof(control);
1207 n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1210 if (errno == EINTR || errno == EAGAIN)
1213 log_error("recvmsg() failed: %m");
1217 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1219 if (cmsg->cmsg_level == SOL_SOCKET &&
1220 cmsg->cmsg_type == SCM_CREDENTIALS &&
1221 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1222 ucred = (struct ucred*) CMSG_DATA(cmsg);
1223 else if (cmsg->cmsg_level == SOL_SOCKET &&
1224 cmsg->cmsg_type == SCM_SECURITY) {
1225 label = (char*) CMSG_DATA(cmsg);
1226 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1227 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1228 cmsg->cmsg_type == SO_TIMESTAMP &&
1229 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1230 tv = (struct timeval*) CMSG_DATA(cmsg);
1231 else if (cmsg->cmsg_level == SOL_SOCKET &&
1232 cmsg->cmsg_type == SCM_RIGHTS) {
1233 fds = (int*) CMSG_DATA(cmsg);
1234 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1238 if (ev->data.fd == s->syslog_fd) {
1241 if (n > 0 && n_fds == 0) {
1242 e = memchr(s->buffer, '\n', n);
1248 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1249 } else if (n_fds > 0)
1250 log_warning("Got file descriptors via syslog socket. Ignoring.");
1253 if (n > 0 && n_fds == 0)
1254 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1255 else if (n == 0 && n_fds == 1)
1256 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1258 log_warning("Got too many file descriptors via native socket. Ignoring.");
1261 close_many(fds, n_fds);
1266 } else if (ev->data.fd == s->stdout_fd) {
1268 if (ev->events != EPOLLIN) {
1269 log_error("Got invalid event from epoll.");
1273 stdout_stream_new(s);
1277 StdoutStream *stream;
1279 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
1280 log_error("Got invalid event from epoll.");
1284 /* If it is none of the well-known fds, it must be an
1285 * stdout stream fd. Note that this is a bit ugly here
1286 * (since we rely that none of the well-known fds
1287 * could be interpreted as pointer), but nonetheless
1288 * safe, since the well-known fds would never get an
1289 * fd > 4096, i.e. beyond the first memory page */
1291 stream = ev->data.ptr;
1293 if (stdout_stream_process(stream) <= 0)
1294 stdout_stream_free(stream);
1299 log_error("Unknown event.");
1303 static int open_signalfd(Server *s) {
1305 struct epoll_event ev;
1309 assert_se(sigemptyset(&mask) == 0);
1310 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1311 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1313 s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
1314 if (s->signal_fd < 0) {
1315 log_error("signalfd(): %m");
1320 ev.events = EPOLLIN;
1321 ev.data.fd = s->signal_fd;
1323 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
1324 log_error("epoll_ctl(): %m");
1331 static int server_parse_proc_cmdline(Server *s) {
1332 _cleanup_free_ char *line = NULL;
1337 if (detect_container(NULL) > 0)
1340 r = read_one_line_file("/proc/cmdline", &line);
1342 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1346 FOREACH_WORD_QUOTED(w, l, line, state) {
1347 _cleanup_free_ char *word;
1349 word = strndup(w, l);
1353 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1354 r = parse_boolean(word + 35);
1356 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1358 s->forward_to_syslog = r;
1359 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1360 r = parse_boolean(word + 33);
1362 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1364 s->forward_to_kmsg = r;
1365 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1366 r = parse_boolean(word + 36);
1368 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1370 s->forward_to_console = r;
1371 } else if (startswith(word, "systemd.journald"))
1372 log_warning("Invalid systemd.journald parameter. Ignoring.");
1378 static int server_parse_config_file(Server *s) {
1379 static const char fn[] = "/etc/systemd/journald.conf";
1380 _cleanup_fclose_ FILE *f = NULL;
1385 f = fopen(fn, "re");
1387 if (errno == ENOENT)
1390 log_warning("Failed to open configuration file %s: %m", fn);
1394 r = config_parse(NULL, fn, f, "Journal\0", config_item_perf_lookup,
1395 (void*) journald_gperf_lookup, false, false, s);
1397 log_warning("Failed to parse configuration file: %s", strerror(-r));
1402 static int server_open_sync_timer(Server *s) {
1404 struct epoll_event ev;
1408 s->sync_timer_fd = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC);
1409 if (s->sync_timer_fd < 0)
1413 ev.events = EPOLLIN;
1414 ev.data.fd = s->sync_timer_fd;
1416 r = epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->sync_timer_fd, &ev);
1418 log_error("Failed to add idle timer fd to epoll object: %m");
1425 int server_schedule_sync(Server *s) {
1430 if (s->sync_scheduled)
1433 if (s->sync_interval_usec) {
1434 struct itimerspec sync_timer_enable = {};
1436 timespec_store(&sync_timer_enable.it_value, s->sync_interval_usec);
1438 r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_enable, NULL);
1443 s->sync_scheduled = true;
1448 int server_init(Server *s) {
1454 s->sync_timer_fd = s->syslog_fd = s->native_fd = s->stdout_fd =
1455 s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
1459 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1460 s->sync_scheduled = false;
1462 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1463 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1465 s->forward_to_syslog = true;
1467 s->max_level_store = LOG_DEBUG;
1468 s->max_level_syslog = LOG_DEBUG;
1469 s->max_level_kmsg = LOG_NOTICE;
1470 s->max_level_console = LOG_INFO;
1472 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1473 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1475 server_parse_config_file(s);
1476 server_parse_proc_cmdline(s);
1477 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1478 log_debug("Setting both rate limit interval and burst from %llu,%u to 0,0",
1479 (long long unsigned) s->rate_limit_interval,
1480 s->rate_limit_burst);
1481 s->rate_limit_interval = s->rate_limit_burst = 0;
1484 mkdir_p("/run/systemd/journal", 0755);
1486 s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
1487 if (!s->user_journals)
1490 s->mmap = mmap_cache_new();
1494 s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
1495 if (s->epoll_fd < 0) {
1496 log_error("Failed to create epoll object: %m");
1500 n = sd_listen_fds(true);
1502 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1506 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1508 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1510 if (s->native_fd >= 0) {
1511 log_error("Too many native sockets passed.");
1517 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1519 if (s->stdout_fd >= 0) {
1520 log_error("Too many stdout sockets passed.");
1526 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
1528 if (s->syslog_fd >= 0) {
1529 log_error("Too many /dev/log sockets passed.");
1536 log_error("Unknown socket passed.");
1541 r = server_open_syslog_socket(s);
1545 r = server_open_native_socket(s);
1549 r = server_open_stdout_socket(s);
1553 r = server_open_dev_kmsg(s);
1557 r = server_open_kernel_seqnum(s);
1561 r = server_open_sync_timer(s);
1565 r = open_signalfd(s);
1569 s->udev = udev_new();
1573 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval,
1574 s->rate_limit_burst);
1578 r = system_journal_open(s);
1585 void server_maybe_append_tags(Server *s) {
1591 n = now(CLOCK_REALTIME);
1593 if (s->system_journal)
1594 journal_file_maybe_append_tag(s->system_journal, n);
1596 HASHMAP_FOREACH(f, s->user_journals, i)
1597 journal_file_maybe_append_tag(f, n);
1601 void server_done(Server *s) {
1605 while (s->stdout_streams)
1606 stdout_stream_free(s->stdout_streams);
1608 if (s->system_journal)
1609 journal_file_close(s->system_journal);
1611 if (s->runtime_journal)
1612 journal_file_close(s->runtime_journal);
1614 while ((f = hashmap_steal_first(s->user_journals)))
1615 journal_file_close(f);
1617 hashmap_free(s->user_journals);
1619 if (s->epoll_fd >= 0)
1620 close_nointr_nofail(s->epoll_fd);
1622 if (s->signal_fd >= 0)
1623 close_nointr_nofail(s->signal_fd);
1625 if (s->syslog_fd >= 0)
1626 close_nointr_nofail(s->syslog_fd);
1628 if (s->native_fd >= 0)
1629 close_nointr_nofail(s->native_fd);
1631 if (s->stdout_fd >= 0)
1632 close_nointr_nofail(s->stdout_fd);
1634 if (s->dev_kmsg_fd >= 0)
1635 close_nointr_nofail(s->dev_kmsg_fd);
1637 if (s->sync_timer_fd >= 0)
1638 close_nointr_nofail(s->sync_timer_fd);
1641 journal_rate_limit_free(s->rate_limit);
1643 if (s->kernel_seqnum)
1644 munmap(s->kernel_seqnum, sizeof(uint64_t));
1650 mmap_cache_unref(s->mmap);
1653 udev_unref(s->udev);