1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
27 #include <sys/timerfd.h>
30 #include <systemd/sd-journal.h>
31 #include <systemd/sd-messages.h>
32 #include <systemd/sd-daemon.h>
37 #include "journal-file.h"
38 #include "socket-util.h"
39 #include "cgroup-util.h"
43 #include "conf-parser.h"
44 #include "journal-internal.h"
45 #include "journal-vacuum.h"
46 #include "journal-authenticate.h"
47 #include "journald-server.h"
48 #include "journald-rate-limit.h"
49 #include "journald-kmsg.h"
50 #include "journald-syslog.h"
51 #include "journald-stream.h"
52 #include "journald-console.h"
53 #include "journald-native.h"
57 #include <acl/libacl.h>
62 #include <selinux/selinux.h>
65 #define USER_JOURNALS_MAX 1024
67 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
68 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
69 #define DEFAULT_RATE_LIMIT_BURST 1000
71 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
73 static const char* const storage_table[] = {
74 [STORAGE_AUTO] = "auto",
75 [STORAGE_VOLATILE] = "volatile",
76 [STORAGE_PERSISTENT] = "persistent",
77 [STORAGE_NONE] = "none"
80 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
81 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
83 static const char* const split_mode_table[] = {
84 [SPLIT_NONE] = "none",
86 [SPLIT_LOGIN] = "login"
89 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
90 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
92 static uint64_t available_space(Server *s, bool verbose) {
94 _cleanup_free_ char *p = NULL;
97 uint64_t sum = 0, ss_avail = 0, avail = 0;
99 _cleanup_closedir_ DIR *d = NULL;
104 ts = now(CLOCK_MONOTONIC);
106 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts
108 return s->cached_available_space;
110 r = sd_id128_get_machine(&machine);
114 if (s->system_journal) {
115 f = "/var/log/journal/";
116 m = &s->system_metrics;
118 f = "/run/log/journal/";
119 m = &s->runtime_metrics;
124 p = strappend(f, sd_id128_to_string(machine, ids));
132 if (fstatvfs(dirfd(d), &ss) < 0)
138 union dirent_storage buf;
140 r = readdir_r(d, &buf.de, &de);
147 if (!endswith(de->d_name, ".journal") &&
148 !endswith(de->d_name, ".journal~"))
151 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
154 if (!S_ISREG(st.st_mode))
157 sum += (uint64_t) st.st_blocks * 512UL;
160 ss_avail = ss.f_bsize * ss.f_bavail;
161 avail = ss_avail > m->keep_free ? ss_avail - m->keep_free : 0;
163 s->cached_available_space = MIN(m->max_use, avail) > sum ? MIN(m->max_use, avail) - sum : 0;
164 s->cached_available_space_timestamp = ts;
167 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
168 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX];
170 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
171 "%s journal is using %s (max %s, leaving %s of free %s, current limit %s).",
172 s->system_journal ? "Permanent" : "Runtime",
173 format_bytes(fb1, sizeof(fb1), sum),
174 format_bytes(fb2, sizeof(fb2), m->max_use),
175 format_bytes(fb3, sizeof(fb3), m->keep_free),
176 format_bytes(fb4, sizeof(fb4), ss_avail),
177 format_bytes(fb5, sizeof(fb5), MIN(m->max_use, avail)));
180 return s->cached_available_space;
183 static void server_read_file_gid(Server *s) {
184 const char *g = "systemd-journal";
189 if (s->file_gid_valid)
192 r = get_group_creds(&g, &s->file_gid);
194 log_warning("Failed to resolve '%s' group: %s", g, strerror(-r));
196 /* if we couldn't read the gid, then it will be 0, but that's
197 * fine and we shouldn't try to resolve the group again, so
198 * let's just pretend it worked right-away. */
199 s->file_gid_valid = true;
202 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
207 acl_permset_t permset;
212 server_read_file_gid(s);
214 r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
216 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
222 acl = acl_get_fd(f->fd);
224 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
228 r = acl_find_uid(acl, uid, &entry);
231 if (acl_create_entry(&acl, &entry) < 0 ||
232 acl_set_tag_type(entry, ACL_USER) < 0 ||
233 acl_set_qualifier(entry, &uid) < 0) {
234 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
239 /* We do not recalculate the mask unconditionally here,
240 * so that the fchmod() mask above stays intact. */
241 if (acl_get_permset(entry, &permset) < 0 ||
242 acl_add_perm(permset, ACL_READ) < 0 ||
243 calc_acl_mask_if_needed(&acl) < 0) {
244 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
248 if (acl_set_fd(f->fd, acl) < 0)
249 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
256 static JournalFile* find_journal(Server *s, uid_t uid) {
257 _cleanup_free_ char *p = NULL;
264 /* We split up user logs only on /var, not on /run. If the
265 * runtime file is open, we write to it exclusively, in order
266 * to guarantee proper order as soon as we flush /run to
267 * /var and close the runtime file. */
269 if (s->runtime_journal)
270 return s->runtime_journal;
273 return s->system_journal;
275 r = sd_id128_get_machine(&machine);
277 return s->system_journal;
279 f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
283 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
284 SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
285 return s->system_journal;
287 while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
288 /* Too many open? Then let's close one */
289 f = hashmap_steal_first(s->user_journals);
291 journal_file_close(f);
294 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
296 return s->system_journal;
298 server_fix_perms(s, f, uid);
300 r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
302 journal_file_close(f);
303 return s->system_journal;
309 void server_rotate(Server *s) {
315 log_debug("Rotating...");
317 if (s->runtime_journal) {
318 r = journal_file_rotate(&s->runtime_journal, s->compress, false);
320 if (s->runtime_journal)
321 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
323 log_error("Failed to create new runtime journal: %s", strerror(-r));
325 server_fix_perms(s, s->runtime_journal, 0);
328 if (s->system_journal) {
329 r = journal_file_rotate(&s->system_journal, s->compress, s->seal);
331 if (s->system_journal)
332 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
334 log_error("Failed to create new system journal: %s", strerror(-r));
337 server_fix_perms(s, s->system_journal, 0);
340 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
341 r = journal_file_rotate(&f, s->compress, s->seal);
344 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
346 log_error("Failed to create user journal: %s", strerror(-r));
348 hashmap_replace(s->user_journals, k, f);
349 server_fix_perms(s, f, PTR_TO_UINT32(k));
354 void server_sync(Server *s) {
360 static const struct itimerspec sync_timer_disable = {};
362 if (s->system_journal) {
363 r = journal_file_set_offline(s->system_journal);
365 log_error("Failed to sync system journal: %s", strerror(-r));
368 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
369 r = journal_file_set_offline(f);
371 log_error("Failed to sync user journal: %s", strerror(-r));
374 r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_disable, NULL);
376 log_error("Failed to disable max timer: %m");
378 s->sync_scheduled = false;
381 void server_vacuum(Server *s) {
386 log_debug("Vacuuming...");
388 s->oldest_file_usec = 0;
390 r = sd_id128_get_machine(&machine);
392 log_error("Failed to get machine ID: %s", strerror(-r));
396 sd_id128_to_string(machine, ids);
398 if (s->system_journal) {
399 char *p = strappenda("/var/log/journal/", ids);
401 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
402 if (r < 0 && r != -ENOENT)
403 log_error("Failed to vacuum %s: %s", p, strerror(-r));
406 if (s->runtime_journal) {
407 char *p = strappenda("/run/log/journal/", ids);
409 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
410 if (r < 0 && r != -ENOENT)
411 log_error("Failed to vacuum %s: %s", p, strerror(-r));
414 s->cached_available_space_timestamp = 0;
417 bool shall_try_append_again(JournalFile *f, int r) {
419 /* -E2BIG Hit configured limit
421 -EDQUOT Quota limit hit
423 -EHOSTDOWN Other machine
424 -EBUSY Unclean shutdown
425 -EPROTONOSUPPORT Unsupported feature
428 -ESHUTDOWN Already archived */
430 if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
431 log_debug("%s: Allocation limit reached, rotating.", f->path);
432 else if (r == -EHOSTDOWN)
433 log_info("%s: Journal file from other machine, rotating.", f->path);
434 else if (r == -EBUSY)
435 log_info("%s: Unclean shutdown, rotating.", f->path);
436 else if (r == -EPROTONOSUPPORT)
437 log_info("%s: Unsupported feature, rotating.", f->path);
438 else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
439 log_warning("%s: Journal file corrupted, rotating.", f->path);
446 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n) {
448 bool vacuumed = false;
455 f = find_journal(s, uid);
459 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
460 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
465 f = find_journal(s, uid);
470 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
472 server_schedule_sync(s);
476 if (vacuumed || !shall_try_append_again(f, r)) {
479 for (i = 0; i < n; i++)
480 size += iovec[i].iov_len;
482 log_error("Failed to write entry (%d items, %zu bytes), ignoring: %s", n, size, strerror(-r));
489 f = find_journal(s, uid);
493 log_debug("Retrying write.");
494 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
498 for (i = 0; i < n; i++)
499 size += iovec[i].iov_len;
501 log_error("Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %s", n, size, strerror(-r));
505 static void dispatch_message_real(
507 struct iovec *iovec, unsigned n, unsigned m,
510 const char *label, size_t label_len,
514 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
515 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
516 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
517 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
518 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
519 boot_id[sizeof("_BOOT_ID=") + 32] = "_BOOT_ID=",
520 machine_id[sizeof("_MACHINE_ID=") + 32] = "_MACHINE_ID=",
521 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
522 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
523 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
531 uid_t realuid = 0, owner = 0, journal_uid;
532 bool owner_valid = false;
534 char audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
535 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
536 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
537 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
546 assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
549 realuid = ucred->uid;
551 sprintf(pid, "_PID=%lu", (unsigned long) ucred->pid);
552 IOVEC_SET_STRING(iovec[n++], pid);
554 sprintf(uid, "_UID=%lu", (unsigned long) ucred->uid);
555 IOVEC_SET_STRING(iovec[n++], uid);
557 sprintf(gid, "_GID=%lu", (unsigned long) ucred->gid);
558 IOVEC_SET_STRING(iovec[n++], gid);
560 r = get_process_comm(ucred->pid, &t);
562 x = strappenda("_COMM=", t);
564 IOVEC_SET_STRING(iovec[n++], x);
567 r = get_process_exe(ucred->pid, &t);
569 x = strappenda("_EXE=", t);
571 IOVEC_SET_STRING(iovec[n++], x);
574 r = get_process_cmdline(ucred->pid, 0, false, &t);
576 x = strappenda("_CMDLINE=", t);
578 IOVEC_SET_STRING(iovec[n++], x);
581 r = get_process_capeff(ucred->pid, &t);
583 x = strappenda("_CAP_EFFECTIVE=", t);
585 IOVEC_SET_STRING(iovec[n++], x);
589 r = audit_session_from_pid(ucred->pid, &audit);
591 sprintf(audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit);
592 IOVEC_SET_STRING(iovec[n++], audit_session);
595 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
597 sprintf(audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid);
598 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
602 r = cg_pid_get_path_shifted(ucred->pid, NULL, &c);
604 char *session = NULL;
606 x = strappenda("_SYSTEMD_CGROUP=", c);
607 IOVEC_SET_STRING(iovec[n++], x);
609 r = cg_path_get_session(c, &t);
611 session = strappenda("_SYSTEMD_SESSION=", t);
613 IOVEC_SET_STRING(iovec[n++], session);
616 if (cg_path_get_owner_uid(c, &owner) >= 0) {
619 sprintf(owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner);
620 IOVEC_SET_STRING(iovec[n++], owner_uid);
623 if (cg_path_get_unit(c, &t) >= 0) {
624 x = strappenda("_SYSTEMD_UNIT=", t);
626 IOVEC_SET_STRING(iovec[n++], x);
627 } else if (unit_id && !session) {
628 x = strappenda("_SYSTEMD_UNIT=", unit_id);
629 IOVEC_SET_STRING(iovec[n++], x);
632 if (cg_path_get_user_unit(c, &t) >= 0) {
633 x = strappenda("_SYSTEMD_USER_UNIT=", t);
635 IOVEC_SET_STRING(iovec[n++], x);
636 } else if (unit_id && session) {
637 x = strappenda("_SYSTEMD_USER_UNIT=", unit_id);
638 IOVEC_SET_STRING(iovec[n++], x);
646 x = alloca(sizeof("_SELINUX_CONTEXT=") + label_len);
648 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
649 IOVEC_SET_STRING(iovec[n++], x);
651 security_context_t con;
653 if (getpidcon(ucred->pid, &con) >= 0) {
654 x = strappenda("_SELINUX_CONTEXT=", con);
657 IOVEC_SET_STRING(iovec[n++], x);
665 r = get_process_uid(object_pid, &object_uid);
667 sprintf(o_uid, "OBJECT_UID=%lu", (unsigned long) object_uid);
668 IOVEC_SET_STRING(iovec[n++], o_uid);
671 r = get_process_gid(object_pid, &object_gid);
673 sprintf(o_gid, "OBJECT_GID=%lu", (unsigned long) object_gid);
674 IOVEC_SET_STRING(iovec[n++], o_gid);
677 r = get_process_comm(object_pid, &t);
679 x = strappenda("OBJECT_COMM=", t);
681 IOVEC_SET_STRING(iovec[n++], x);
684 r = get_process_exe(object_pid, &t);
686 x = strappenda("OBJECT_EXE=", t);
688 IOVEC_SET_STRING(iovec[n++], x);
691 r = get_process_cmdline(object_pid, 0, false, &t);
693 x = strappenda("OBJECT_CMDLINE=", t);
695 IOVEC_SET_STRING(iovec[n++], x);
699 r = audit_session_from_pid(object_pid, &audit);
701 sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%lu", (unsigned long) audit);
702 IOVEC_SET_STRING(iovec[n++], o_audit_session);
705 r = audit_loginuid_from_pid(object_pid, &loginuid);
707 sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID=%lu", (unsigned long) loginuid);
708 IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
712 r = cg_pid_get_path_shifted(object_pid, NULL, &c);
714 x = strappenda("OBJECT_SYSTEMD_CGROUP=", c);
715 IOVEC_SET_STRING(iovec[n++], x);
717 r = cg_path_get_session(c, &t);
719 x = strappenda("OBJECT_SYSTEMD_SESSION=", t);
721 IOVEC_SET_STRING(iovec[n++], x);
724 if (cg_path_get_owner_uid(c, &owner) >= 0) {
725 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner);
726 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
729 if (cg_path_get_unit(c, &t) >= 0) {
730 x = strappenda("OBJECT_SYSTEMD_UNIT=", t);
732 IOVEC_SET_STRING(iovec[n++], x);
735 if (cg_path_get_user_unit(c, &t) >= 0) {
736 x = strappenda("OBJECT_SYSTEMD_USER_UNIT=", t);
738 IOVEC_SET_STRING(iovec[n++], x);
747 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
748 IOVEC_SET_STRING(iovec[n++], source_time);
751 /* Note that strictly speaking storing the boot id here is
752 * redundant since the entry includes this in-line
753 * anyway. However, we need this indexed, too. */
754 r = sd_id128_get_boot(&id);
756 sd_id128_to_string(id, boot_id + strlen("_BOOT_ID="));
757 IOVEC_SET_STRING(iovec[n++], boot_id);
760 r = sd_id128_get_machine(&id);
762 sd_id128_to_string(id, machine_id + strlen("_MACHINE_ID="));
763 IOVEC_SET_STRING(iovec[n++], machine_id);
766 t = gethostname_malloc();
768 x = strappenda("_HOSTNAME=", t);
770 IOVEC_SET_STRING(iovec[n++], x);
775 if (s->split_mode == SPLIT_UID && realuid > 0)
776 /* Split up strictly by any UID */
777 journal_uid = realuid;
778 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
779 /* Split up by login UIDs, this avoids creation of
780 * individual journals for system UIDs. We do this
781 * only if the realuid is not root, in order not to
782 * accidentally leak privileged information to the
783 * user that is logged by a privileged process that is
784 * part of an unprivileged session.*/
789 write_to_journal(s, journal_uid, iovec, n);
792 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
793 char mid[11 + 32 + 1];
794 char buffer[16 + LINE_MAX + 1];
795 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
798 struct ucred ucred = {};
803 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
804 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
806 memcpy(buffer, "MESSAGE=", 8);
807 va_start(ap, format);
808 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
810 char_array_0(buffer);
811 IOVEC_SET_STRING(iovec[n++], buffer);
813 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
814 snprintf(mid, sizeof(mid), MESSAGE_ID(message_id));
816 IOVEC_SET_STRING(iovec[n++], mid);
819 ucred.pid = getpid();
820 ucred.uid = getuid();
821 ucred.gid = getgid();
823 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, 0);
826 void server_dispatch_message(
828 struct iovec *iovec, unsigned n, unsigned m,
831 const char *label, size_t label_len,
837 _cleanup_free_ char *path = NULL;
841 assert(iovec || n == 0);
846 if (LOG_PRI(priority) > s->max_level_store)
849 /* Stop early in case the information will not be stored
851 if (s->storage == STORAGE_NONE)
857 r = cg_pid_get_path_shifted(ucred->pid, NULL, &path);
861 /* example: /user/lennart/3/foobar
862 * /system/dbus.service/foobar
864 * So let's cut of everything past the third /, since that is
865 * where user directories start */
867 c = strchr(path, '/');
869 c = strchr(c+1, '/');
871 c = strchr(c+1, '/');
877 rl = journal_rate_limit_test(s->rate_limit, path,
878 priority & LOG_PRIMASK, available_space(s, false));
883 /* Write a suppression message if we suppressed something */
885 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
886 "Suppressed %u messages from %s", rl - 1, path);
889 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, object_pid);
893 static int system_journal_open(Server *s) {
899 r = sd_id128_get_machine(&machine);
903 sd_id128_to_string(machine, ids);
905 if (!s->system_journal &&
906 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
907 access("/run/systemd/journal/flushed", F_OK) >= 0) {
909 /* If in auto mode: first try to create the machine
910 * path, but not the prefix.
912 * If in persistent mode: create /var/log/journal and
913 * the machine path */
915 if (s->storage == STORAGE_PERSISTENT)
916 (void) mkdir("/var/log/journal/", 0755);
918 fn = strappenda("/var/log/journal/", ids);
919 (void) mkdir(fn, 0755);
921 fn = strappenda(fn, "/system.journal");
922 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
925 server_fix_perms(s, s->system_journal, 0);
927 if (r != -ENOENT && r != -EROFS)
928 log_warning("Failed to open system journal: %s", strerror(-r));
934 if (!s->runtime_journal &&
935 (s->storage != STORAGE_NONE)) {
937 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
941 if (s->system_journal) {
943 /* Try to open the runtime journal, but only
944 * if it already exists, so that we can flush
945 * it into the system journal */
947 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
952 log_warning("Failed to open runtime journal: %s", strerror(-r));
959 /* OK, we really need the runtime journal, so create
960 * it if necessary. */
962 (void) mkdir_parents(fn, 0755);
963 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
967 log_error("Failed to open runtime journal: %s", strerror(-r));
972 if (s->runtime_journal)
973 server_fix_perms(s, s->runtime_journal, 0);
976 available_space(s, true);
981 int server_flush_to_var(Server *s) {
984 sd_journal *j = NULL;
988 if (s->storage != STORAGE_AUTO &&
989 s->storage != STORAGE_PERSISTENT)
992 if (!s->runtime_journal)
995 system_journal_open(s);
997 if (!s->system_journal)
1000 log_debug("Flushing to /var...");
1002 r = sd_id128_get_machine(&machine);
1004 log_error("Failed to get machine id: %s", strerror(-r));
1008 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1010 log_error("Failed to read runtime journal: %s", strerror(-r));
1014 sd_journal_set_data_threshold(j, 0);
1016 SD_JOURNAL_FOREACH(j) {
1020 f = j->current_file;
1021 assert(f && f->current_offset > 0);
1023 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1025 log_error("Can't read entry: %s", strerror(-r));
1029 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1033 if (!shall_try_append_again(s->system_journal, r)) {
1034 log_error("Can't write entry: %s", strerror(-r));
1041 if (!s->system_journal) {
1042 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1047 log_debug("Retrying write.");
1048 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1050 log_error("Can't write entry: %s", strerror(-r));
1056 journal_file_post_change(s->system_journal);
1058 journal_file_close(s->runtime_journal);
1059 s->runtime_journal = NULL;
1062 rm_rf("/run/log/journal", false, true, false);
1064 sd_journal_close(j);
1069 int process_event(Server *s, struct epoll_event *ev) {
1073 if (ev->data.fd == s->signal_fd) {
1074 struct signalfd_siginfo sfsi;
1077 if (ev->events != EPOLLIN) {
1078 log_error("Got invalid event from epoll.");
1082 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
1083 if (n != sizeof(sfsi)) {
1088 if (errno == EINTR || errno == EAGAIN)
1094 if (sfsi.ssi_signo == SIGUSR1) {
1095 touch("/run/systemd/journal/flushed");
1096 server_flush_to_var(s);
1101 if (sfsi.ssi_signo == SIGUSR2) {
1107 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
1111 } else if (ev->data.fd == s->sync_timer_fd) {
1115 log_debug("Got sync request from epoll.");
1117 r = read(ev->data.fd, (void *)&t, sizeof(t));
1124 } else if (ev->data.fd == s->dev_kmsg_fd) {
1127 if (ev->events != EPOLLIN) {
1128 log_error("Got invalid event from epoll.");
1132 r = server_read_dev_kmsg(s);
1138 } else if (ev->data.fd == s->native_fd ||
1139 ev->data.fd == s->syslog_fd) {
1141 if (ev->events != EPOLLIN) {
1142 log_error("Got invalid event from epoll.");
1147 struct msghdr msghdr;
1149 struct ucred *ucred = NULL;
1150 struct timeval *tv = NULL;
1151 struct cmsghdr *cmsg;
1153 size_t label_len = 0;
1155 struct cmsghdr cmsghdr;
1157 /* We use NAME_MAX space for the
1158 * SELinux label here. The kernel
1159 * currently enforces no limit, but
1160 * according to suggestions from the
1161 * SELinux people this will change and
1162 * it will probably be identical to
1163 * NAME_MAX. For now we use that, but
1164 * this should be updated one day when
1165 * the final limit is known.*/
1166 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1167 CMSG_SPACE(sizeof(struct timeval)) +
1168 CMSG_SPACE(sizeof(int)) + /* fd */
1169 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1176 if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
1177 log_error("SIOCINQ failed: %m");
1181 if (s->buffer_size < (size_t) v) {
1185 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
1186 b = realloc(s->buffer, l+1);
1189 log_error("Couldn't increase buffer.");
1198 iovec.iov_base = s->buffer;
1199 iovec.iov_len = s->buffer_size;
1203 msghdr.msg_iov = &iovec;
1204 msghdr.msg_iovlen = 1;
1205 msghdr.msg_control = &control;
1206 msghdr.msg_controllen = sizeof(control);
1208 n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1211 if (errno == EINTR || errno == EAGAIN)
1214 log_error("recvmsg() failed: %m");
1218 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1220 if (cmsg->cmsg_level == SOL_SOCKET &&
1221 cmsg->cmsg_type == SCM_CREDENTIALS &&
1222 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1223 ucred = (struct ucred*) CMSG_DATA(cmsg);
1224 else if (cmsg->cmsg_level == SOL_SOCKET &&
1225 cmsg->cmsg_type == SCM_SECURITY) {
1226 label = (char*) CMSG_DATA(cmsg);
1227 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1228 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1229 cmsg->cmsg_type == SO_TIMESTAMP &&
1230 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1231 tv = (struct timeval*) CMSG_DATA(cmsg);
1232 else if (cmsg->cmsg_level == SOL_SOCKET &&
1233 cmsg->cmsg_type == SCM_RIGHTS) {
1234 fds = (int*) CMSG_DATA(cmsg);
1235 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1239 if (ev->data.fd == s->syslog_fd) {
1242 if (n > 0 && n_fds == 0) {
1243 e = memchr(s->buffer, '\n', n);
1249 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1250 } else if (n_fds > 0)
1251 log_warning("Got file descriptors via syslog socket. Ignoring.");
1254 if (n > 0 && n_fds == 0)
1255 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1256 else if (n == 0 && n_fds == 1)
1257 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1259 log_warning("Got too many file descriptors via native socket. Ignoring.");
1262 close_many(fds, n_fds);
1267 } else if (ev->data.fd == s->stdout_fd) {
1269 if (ev->events != EPOLLIN) {
1270 log_error("Got invalid event from epoll.");
1274 stdout_stream_new(s);
1278 StdoutStream *stream;
1280 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
1281 log_error("Got invalid event from epoll.");
1285 /* If it is none of the well-known fds, it must be an
1286 * stdout stream fd. Note that this is a bit ugly here
1287 * (since we rely that none of the well-known fds
1288 * could be interpreted as pointer), but nonetheless
1289 * safe, since the well-known fds would never get an
1290 * fd > 4096, i.e. beyond the first memory page */
1292 stream = ev->data.ptr;
1294 if (stdout_stream_process(stream) <= 0)
1295 stdout_stream_free(stream);
1300 log_error("Unknown event.");
1304 static int open_signalfd(Server *s) {
1306 struct epoll_event ev;
1310 assert_se(sigemptyset(&mask) == 0);
1311 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1312 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1314 s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
1315 if (s->signal_fd < 0) {
1316 log_error("signalfd(): %m");
1321 ev.events = EPOLLIN;
1322 ev.data.fd = s->signal_fd;
1324 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
1325 log_error("epoll_ctl(): %m");
1332 static int server_parse_proc_cmdline(Server *s) {
1333 _cleanup_free_ char *line = NULL;
1338 if (detect_container(NULL) > 0)
1341 r = read_one_line_file("/proc/cmdline", &line);
1343 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1347 FOREACH_WORD_QUOTED(w, l, line, state) {
1348 _cleanup_free_ char *word;
1350 word = strndup(w, l);
1354 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1355 r = parse_boolean(word + 35);
1357 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1359 s->forward_to_syslog = r;
1360 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1361 r = parse_boolean(word + 33);
1363 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1365 s->forward_to_kmsg = r;
1366 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1367 r = parse_boolean(word + 36);
1369 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1371 s->forward_to_console = r;
1372 } else if (startswith(word, "systemd.journald"))
1373 log_warning("Invalid systemd.journald parameter. Ignoring.");
1379 static int server_parse_config_file(Server *s) {
1380 static const char fn[] = "/etc/systemd/journald.conf";
1381 _cleanup_fclose_ FILE *f = NULL;
1386 f = fopen(fn, "re");
1388 if (errno == ENOENT)
1391 log_warning("Failed to open configuration file %s: %m", fn);
1395 r = config_parse(NULL, fn, f, "Journal\0", config_item_perf_lookup,
1396 (void*) journald_gperf_lookup, false, false, s);
1398 log_warning("Failed to parse configuration file: %s", strerror(-r));
1403 static int server_open_sync_timer(Server *s) {
1405 struct epoll_event ev;
1409 s->sync_timer_fd = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC);
1410 if (s->sync_timer_fd < 0)
1414 ev.events = EPOLLIN;
1415 ev.data.fd = s->sync_timer_fd;
1417 r = epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->sync_timer_fd, &ev);
1419 log_error("Failed to add idle timer fd to epoll object: %m");
1426 int server_schedule_sync(Server *s) {
1431 if (s->sync_scheduled)
1434 if (s->sync_interval_usec) {
1435 struct itimerspec sync_timer_enable = {};
1437 timespec_store(&sync_timer_enable.it_value, s->sync_interval_usec);
1439 r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_enable, NULL);
1444 s->sync_scheduled = true;
1449 int server_init(Server *s) {
1455 s->sync_timer_fd = s->syslog_fd = s->native_fd = s->stdout_fd =
1456 s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
1460 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1461 s->sync_scheduled = false;
1463 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1464 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1466 s->forward_to_syslog = true;
1468 s->max_level_store = LOG_DEBUG;
1469 s->max_level_syslog = LOG_DEBUG;
1470 s->max_level_kmsg = LOG_NOTICE;
1471 s->max_level_console = LOG_INFO;
1473 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1474 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1476 server_parse_config_file(s);
1477 server_parse_proc_cmdline(s);
1478 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1479 log_debug("Setting both rate limit interval and burst from %llu,%u to 0,0",
1480 (long long unsigned) s->rate_limit_interval,
1481 s->rate_limit_burst);
1482 s->rate_limit_interval = s->rate_limit_burst = 0;
1485 mkdir_p("/run/systemd/journal", 0755);
1487 s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
1488 if (!s->user_journals)
1491 s->mmap = mmap_cache_new();
1495 s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
1496 if (s->epoll_fd < 0) {
1497 log_error("Failed to create epoll object: %m");
1501 n = sd_listen_fds(true);
1503 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1507 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1509 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1511 if (s->native_fd >= 0) {
1512 log_error("Too many native sockets passed.");
1518 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1520 if (s->stdout_fd >= 0) {
1521 log_error("Too many stdout sockets passed.");
1527 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
1529 if (s->syslog_fd >= 0) {
1530 log_error("Too many /dev/log sockets passed.");
1537 log_error("Unknown socket passed.");
1542 r = server_open_syslog_socket(s);
1546 r = server_open_native_socket(s);
1550 r = server_open_stdout_socket(s);
1554 r = server_open_dev_kmsg(s);
1558 r = server_open_kernel_seqnum(s);
1562 r = server_open_sync_timer(s);
1566 r = open_signalfd(s);
1570 s->udev = udev_new();
1574 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval,
1575 s->rate_limit_burst);
1579 r = system_journal_open(s);
1586 void server_maybe_append_tags(Server *s) {
1592 n = now(CLOCK_REALTIME);
1594 if (s->system_journal)
1595 journal_file_maybe_append_tag(s->system_journal, n);
1597 HASHMAP_FOREACH(f, s->user_journals, i)
1598 journal_file_maybe_append_tag(f, n);
1602 void server_done(Server *s) {
1606 while (s->stdout_streams)
1607 stdout_stream_free(s->stdout_streams);
1609 if (s->system_journal)
1610 journal_file_close(s->system_journal);
1612 if (s->runtime_journal)
1613 journal_file_close(s->runtime_journal);
1615 while ((f = hashmap_steal_first(s->user_journals)))
1616 journal_file_close(f);
1618 hashmap_free(s->user_journals);
1620 if (s->epoll_fd >= 0)
1621 close_nointr_nofail(s->epoll_fd);
1623 if (s->signal_fd >= 0)
1624 close_nointr_nofail(s->signal_fd);
1626 if (s->syslog_fd >= 0)
1627 close_nointr_nofail(s->syslog_fd);
1629 if (s->native_fd >= 0)
1630 close_nointr_nofail(s->native_fd);
1632 if (s->stdout_fd >= 0)
1633 close_nointr_nofail(s->stdout_fd);
1635 if (s->dev_kmsg_fd >= 0)
1636 close_nointr_nofail(s->dev_kmsg_fd);
1638 if (s->sync_timer_fd >= 0)
1639 close_nointr_nofail(s->sync_timer_fd);
1642 journal_rate_limit_free(s->rate_limit);
1644 if (s->kernel_seqnum)
1645 munmap(s->kernel_seqnum, sizeof(uint64_t));
1651 mmap_cache_unref(s->mmap);
1654 udev_unref(s->udev);