1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
27 #include <sys/timerfd.h>
30 #include <systemd/sd-journal.h>
31 #include <systemd/sd-messages.h>
32 #include <systemd/sd-daemon.h>
37 #include "journal-file.h"
38 #include "socket-util.h"
39 #include "cgroup-util.h"
43 #include "conf-parser.h"
44 #include "journal-internal.h"
45 #include "journal-vacuum.h"
46 #include "journal-authenticate.h"
47 #include "journald-server.h"
48 #include "journald-rate-limit.h"
49 #include "journald-kmsg.h"
50 #include "journald-syslog.h"
51 #include "journald-stream.h"
52 #include "journald-console.h"
53 #include "journald-native.h"
57 #include <acl/libacl.h>
62 #include <selinux/selinux.h>
65 #define USER_JOURNALS_MAX 1024
67 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
68 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
69 #define DEFAULT_RATE_LIMIT_BURST 1000
71 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
73 static const char* const storage_table[] = {
74 [STORAGE_AUTO] = "auto",
75 [STORAGE_VOLATILE] = "volatile",
76 [STORAGE_PERSISTENT] = "persistent",
77 [STORAGE_NONE] = "none"
80 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
81 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
83 static const char* const split_mode_table[] = {
84 [SPLIT_NONE] = "none",
86 [SPLIT_LOGIN] = "login"
89 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
90 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
92 static uint64_t available_space(Server *s) {
94 _cleanup_free_ char *p = NULL;
98 uint64_t sum = 0, avail = 0, ss_avail = 0;
100 _cleanup_closedir_ DIR *d = NULL;
104 ts = now(CLOCK_MONOTONIC);
106 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
107 return s->cached_available_space;
109 r = sd_id128_get_machine(&machine);
113 if (s->system_journal) {
114 f = "/var/log/journal/";
115 m = &s->system_metrics;
117 f = "/run/log/journal/";
118 m = &s->runtime_metrics;
123 p = strappend(f, sd_id128_to_string(machine, ids));
131 if (fstatvfs(dirfd(d), &ss) < 0)
137 union dirent_storage buf;
139 r = readdir_r(d, &buf.de, &de);
146 if (!endswith(de->d_name, ".journal") &&
147 !endswith(de->d_name, ".journal~"))
150 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
153 if (!S_ISREG(st.st_mode))
156 sum += (uint64_t) st.st_blocks * 512UL;
159 avail = sum >= m->max_use ? 0 : m->max_use - sum;
161 ss_avail = ss.f_bsize * ss.f_bavail;
163 ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
165 if (ss_avail < avail)
168 s->cached_available_space = avail;
169 s->cached_available_space_timestamp = ts;
174 static void server_read_file_gid(Server *s) {
175 const char *g = "systemd-journal";
180 if (s->file_gid_valid)
183 r = get_group_creds(&g, &s->file_gid);
185 log_warning("Failed to resolve '%s' group: %s", g, strerror(-r));
187 /* if we couldn't read the gid, then it will be 0, but that's
188 * fine and we shouldn't try to resolve the group again, so
189 * let's just pretend it worked right-away. */
190 s->file_gid_valid = true;
193 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
198 acl_permset_t permset;
203 server_read_file_gid(s);
205 r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
207 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
213 acl = acl_get_fd(f->fd);
215 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
219 r = acl_find_uid(acl, uid, &entry);
222 if (acl_create_entry(&acl, &entry) < 0 ||
223 acl_set_tag_type(entry, ACL_USER) < 0 ||
224 acl_set_qualifier(entry, &uid) < 0) {
225 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
230 /* We do not recalculate the mask unconditionally here,
231 * so that the fchmod() mask above stays intact. */
232 if (acl_get_permset(entry, &permset) < 0 ||
233 acl_add_perm(permset, ACL_READ) < 0 ||
234 calc_acl_mask_if_needed(&acl) < 0) {
235 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
239 if (acl_set_fd(f->fd, acl) < 0)
240 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
247 static JournalFile* find_journal(Server *s, uid_t uid) {
248 _cleanup_free_ char *p = NULL;
255 /* We split up user logs only on /var, not on /run. If the
256 * runtime file is open, we write to it exclusively, in order
257 * to guarantee proper order as soon as we flush /run to
258 * /var and close the runtime file. */
260 if (s->runtime_journal)
261 return s->runtime_journal;
264 return s->system_journal;
266 r = sd_id128_get_machine(&machine);
268 return s->system_journal;
270 f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
274 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
275 SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
276 return s->system_journal;
278 while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
279 /* Too many open? Then let's close one */
280 f = hashmap_steal_first(s->user_journals);
282 journal_file_close(f);
285 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
287 return s->system_journal;
289 server_fix_perms(s, f, uid);
291 r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
293 journal_file_close(f);
294 return s->system_journal;
300 void server_rotate(Server *s) {
306 log_debug("Rotating...");
308 if (s->runtime_journal) {
309 r = journal_file_rotate(&s->runtime_journal, s->compress, false);
311 if (s->runtime_journal)
312 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
314 log_error("Failed to create new runtime journal: %s", strerror(-r));
316 server_fix_perms(s, s->runtime_journal, 0);
319 if (s->system_journal) {
320 r = journal_file_rotate(&s->system_journal, s->compress, s->seal);
322 if (s->system_journal)
323 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
325 log_error("Failed to create new system journal: %s", strerror(-r));
328 server_fix_perms(s, s->system_journal, 0);
331 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
332 r = journal_file_rotate(&f, s->compress, s->seal);
335 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
337 log_error("Failed to create user journal: %s", strerror(-r));
339 hashmap_replace(s->user_journals, k, f);
340 server_fix_perms(s, f, PTR_TO_UINT32(k));
345 void server_sync(Server *s) {
351 static const struct itimerspec sync_timer_disable = {};
353 if (s->system_journal) {
354 r = journal_file_set_offline(s->system_journal);
356 log_error("Failed to sync system journal: %s", strerror(-r));
359 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
360 r = journal_file_set_offline(f);
362 log_error("Failed to sync user journal: %s", strerror(-r));
365 r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_disable, NULL);
367 log_error("Failed to disable max timer: %m");
369 s->sync_scheduled = false;
372 void server_vacuum(Server *s) {
377 log_debug("Vacuuming...");
379 s->oldest_file_usec = 0;
381 r = sd_id128_get_machine(&machine);
383 log_error("Failed to get machine ID: %s", strerror(-r));
387 sd_id128_to_string(machine, ids);
389 if (s->system_journal) {
390 char *p = strappenda("/var/log/journal/", ids);
392 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
393 if (r < 0 && r != -ENOENT)
394 log_error("Failed to vacuum %s: %s", p, strerror(-r));
397 if (s->runtime_journal) {
398 char *p = strappenda("/run/log/journal/", ids);
400 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
401 if (r < 0 && r != -ENOENT)
402 log_error("Failed to vacuum %s: %s", p, strerror(-r));
405 s->cached_available_space_timestamp = 0;
408 bool shall_try_append_again(JournalFile *f, int r) {
410 /* -E2BIG Hit configured limit
412 -EDQUOT Quota limit hit
414 -EHOSTDOWN Other machine
415 -EBUSY Unclean shutdown
416 -EPROTONOSUPPORT Unsupported feature
419 -ESHUTDOWN Already archived */
421 if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
422 log_debug("%s: Allocation limit reached, rotating.", f->path);
423 else if (r == -EHOSTDOWN)
424 log_info("%s: Journal file from other machine, rotating.", f->path);
425 else if (r == -EBUSY)
426 log_info("%s: Unclean shutdown, rotating.", f->path);
427 else if (r == -EPROTONOSUPPORT)
428 log_info("%s: Unsupported feature, rotating.", f->path);
429 else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
430 log_warning("%s: Journal file corrupted, rotating.", f->path);
437 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n) {
439 bool vacuumed = false;
446 f = find_journal(s, uid);
450 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
451 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
456 f = find_journal(s, uid);
461 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
463 server_schedule_sync(s);
467 if (vacuumed || !shall_try_append_again(f, r)) {
470 for (i = 0; i < n; i++)
471 size += iovec[i].iov_len;
473 log_error("Failed to write entry (%d items, %zu bytes), ignoring: %s", n, size, strerror(-r));
480 f = find_journal(s, uid);
484 log_debug("Retrying write.");
485 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
489 for (i = 0; i < n; i++)
490 size += iovec[i].iov_len;
492 log_error("Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %s", n, size, strerror(-r));
496 static void dispatch_message_real(
498 struct iovec *iovec, unsigned n, unsigned m,
501 const char *label, size_t label_len,
505 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
506 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
507 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
508 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
509 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
510 boot_id[sizeof("_BOOT_ID=") + 32] = "_BOOT_ID=",
511 machine_id[sizeof("_MACHINE_ID=") + 32] = "_MACHINE_ID=",
512 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
513 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
514 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
522 uid_t realuid = 0, owner = 0, journal_uid;
523 bool owner_valid = false;
525 char audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
526 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
527 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
528 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
537 assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
540 realuid = ucred->uid;
542 sprintf(pid, "_PID=%lu", (unsigned long) ucred->pid);
543 IOVEC_SET_STRING(iovec[n++], pid);
545 sprintf(uid, "_UID=%lu", (unsigned long) ucred->uid);
546 IOVEC_SET_STRING(iovec[n++], uid);
548 sprintf(gid, "_GID=%lu", (unsigned long) ucred->gid);
549 IOVEC_SET_STRING(iovec[n++], gid);
551 r = get_process_comm(ucred->pid, &t);
553 x = strappenda("_COMM=", t);
555 IOVEC_SET_STRING(iovec[n++], x);
558 r = get_process_exe(ucred->pid, &t);
560 x = strappenda("_EXE=", t);
562 IOVEC_SET_STRING(iovec[n++], x);
565 r = get_process_cmdline(ucred->pid, 0, false, &t);
567 x = strappenda("_CMDLINE=", t);
569 IOVEC_SET_STRING(iovec[n++], x);
573 r = audit_session_from_pid(ucred->pid, &audit);
575 sprintf(audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit);
576 IOVEC_SET_STRING(iovec[n++], audit_session);
579 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
581 sprintf(audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid);
582 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
586 r = cg_pid_get_path_shifted(ucred->pid, NULL, &c);
588 char *session = NULL;
590 x = strappenda("_SYSTEMD_CGROUP=", c);
591 IOVEC_SET_STRING(iovec[n++], x);
593 r = cg_path_get_session(c, &t);
595 session = strappenda("_SYSTEMD_SESSION=", t);
597 IOVEC_SET_STRING(iovec[n++], session);
600 if (cg_path_get_owner_uid(c, &owner) >= 0) {
603 sprintf(owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner);
604 IOVEC_SET_STRING(iovec[n++], owner_uid);
607 if (cg_path_get_unit(c, &t) >= 0) {
608 x = strappenda("_SYSTEMD_UNIT=", t);
610 } else if (cg_path_get_user_unit(c, &t) >= 0) {
611 x = strappenda("_SYSTEMD_USER_UNIT=", t);
613 } else if (unit_id) {
615 x = strappenda("_SYSTEMD_USER_UNIT=", unit_id);
617 x = strappenda("_SYSTEMD_UNIT=", unit_id);
622 IOVEC_SET_STRING(iovec[n++], x);
629 x = alloca(sizeof("_SELINUX_CONTEXT=") + label_len);
631 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
632 IOVEC_SET_STRING(iovec[n++], x);
634 security_context_t con;
636 if (getpidcon(ucred->pid, &con) >= 0) {
637 x = strappenda("_SELINUX_CONTEXT=", con);
640 IOVEC_SET_STRING(iovec[n++], x);
648 r = get_process_uid(object_pid, &object_uid);
650 sprintf(o_uid, "OBJECT_UID=%lu", (unsigned long) object_uid);
651 IOVEC_SET_STRING(iovec[n++], o_uid);
654 r = get_process_gid(object_pid, &object_gid);
656 sprintf(o_gid, "OBJECT_GID=%lu", (unsigned long) object_gid);
657 IOVEC_SET_STRING(iovec[n++], o_gid);
660 r = get_process_comm(object_pid, &t);
662 x = strappenda("OBJECT_COMM=", t);
664 IOVEC_SET_STRING(iovec[n++], x);
667 r = get_process_exe(object_pid, &t);
669 x = strappenda("OBJECT_EXE=", t);
671 IOVEC_SET_STRING(iovec[n++], x);
674 r = get_process_cmdline(object_pid, 0, false, &t);
676 x = strappenda("OBJECT_CMDLINE=", t);
678 IOVEC_SET_STRING(iovec[n++], x);
682 r = audit_session_from_pid(object_pid, &audit);
684 sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%lu", (unsigned long) audit);
685 IOVEC_SET_STRING(iovec[n++], o_audit_session);
688 r = audit_loginuid_from_pid(object_pid, &loginuid);
690 sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID=%lu", (unsigned long) loginuid);
691 IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
695 r = cg_pid_get_path_shifted(object_pid, NULL, &c);
697 x = strappenda("OBJECT_SYSTEMD_CGROUP=", c);
698 IOVEC_SET_STRING(iovec[n++], x);
700 r = cg_path_get_session(c, &t);
702 x = strappenda("OBJECT_SYSTEMD_SESSION=", t);
704 IOVEC_SET_STRING(iovec[n++], x);
707 if (cg_path_get_owner_uid(c, &owner) >= 0) {
708 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner);
709 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
712 if (cg_path_get_unit(c, &t) >= 0) {
713 x = strappenda("OBJECT_SYSTEMD_UNIT=", t);
715 } else if (cg_path_get_user_unit(c, &t) >= 0) {
716 x = strappenda("OBJECT_SYSTEMD_USER_UNIT=", t);
722 IOVEC_SET_STRING(iovec[n++], x);
730 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
731 IOVEC_SET_STRING(iovec[n++], source_time);
734 /* Note that strictly speaking storing the boot id here is
735 * redundant since the entry includes this in-line
736 * anyway. However, we need this indexed, too. */
737 r = sd_id128_get_boot(&id);
739 sd_id128_to_string(id, boot_id + strlen("_BOOT_ID="));
740 IOVEC_SET_STRING(iovec[n++], boot_id);
743 r = sd_id128_get_machine(&id);
745 sd_id128_to_string(id, machine_id + strlen("_MACHINE_ID="));
746 IOVEC_SET_STRING(iovec[n++], machine_id);
749 t = gethostname_malloc();
751 x = strappenda("_HOSTNAME=", t);
753 IOVEC_SET_STRING(iovec[n++], x);
758 if (s->split_mode == SPLIT_UID && realuid > 0)
759 /* Split up strictly by any UID */
760 journal_uid = realuid;
761 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
762 /* Split up by login UIDs, this avoids creation of
763 * individual journals for system UIDs. We do this
764 * only if the realuid is not root, in order not to
765 * accidentally leak privileged information to the
766 * user that is logged by a privileged process that is
767 * part of an unprivileged session.*/
772 write_to_journal(s, journal_uid, iovec, n);
775 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
776 char mid[11 + 32 + 1];
777 char buffer[16 + LINE_MAX + 1];
778 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
781 struct ucred ucred = {};
786 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
787 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
789 memcpy(buffer, "MESSAGE=", 8);
790 va_start(ap, format);
791 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
793 char_array_0(buffer);
794 IOVEC_SET_STRING(iovec[n++], buffer);
796 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
797 snprintf(mid, sizeof(mid), MESSAGE_ID(message_id));
799 IOVEC_SET_STRING(iovec[n++], mid);
802 ucred.pid = getpid();
803 ucred.uid = getuid();
804 ucred.gid = getgid();
806 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, 0);
809 void server_dispatch_message(
811 struct iovec *iovec, unsigned n, unsigned m,
814 const char *label, size_t label_len,
820 _cleanup_free_ char *path = NULL;
824 assert(iovec || n == 0);
829 if (LOG_PRI(priority) > s->max_level_store)
835 r = cg_pid_get_path_shifted(ucred->pid, NULL, &path);
839 /* example: /user/lennart/3/foobar
840 * /system/dbus.service/foobar
842 * So let's cut of everything past the third /, since that is
843 * where user directories start */
845 c = strchr(path, '/');
847 c = strchr(c+1, '/');
849 c = strchr(c+1, '/');
855 rl = journal_rate_limit_test(s->rate_limit, path,
856 priority & LOG_PRIMASK, available_space(s));
861 /* Write a suppression message if we suppressed something */
863 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
864 "Suppressed %u messages from %s", rl - 1, path);
867 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, object_pid);
871 static int system_journal_open(Server *s) {
877 r = sd_id128_get_machine(&machine);
881 sd_id128_to_string(machine, ids);
883 if (!s->system_journal &&
884 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
885 access("/run/systemd/journal/flushed", F_OK) >= 0) {
887 /* If in auto mode: first try to create the machine
888 * path, but not the prefix.
890 * If in persistent mode: create /var/log/journal and
891 * the machine path */
893 if (s->storage == STORAGE_PERSISTENT)
894 (void) mkdir("/var/log/journal/", 0755);
896 fn = strappenda("/var/log/journal/", ids);
897 (void) mkdir(fn, 0755);
899 fn = strappenda(fn, "/system.journal");
900 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
903 char fb[FORMAT_BYTES_MAX];
906 server_fix_perms(s, s->system_journal, 0);
908 server_driver_message(s, SD_ID128_NULL, "Allowing system journal files to grow to %s.",
909 format_bytes(fb, sizeof(fb), s->system_metrics.max_use));
911 avail = available_space(s);
913 if (s->system_metrics.max_use > avail)
914 server_driver_message(s, SD_ID128_NULL, "Journal size currently limited to %s due to SystemKeepFree.",
915 format_bytes(fb, sizeof(fb), avail));
919 if (r != -ENOENT && r != -EROFS)
920 log_warning("Failed to open system journal: %s", strerror(-r));
926 if (!s->runtime_journal &&
927 (s->storage != STORAGE_NONE)) {
929 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
933 if (s->system_journal) {
935 /* Try to open the runtime journal, but only
936 * if it already exists, so that we can flush
937 * it into the system journal */
939 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
944 log_warning("Failed to open runtime journal: %s", strerror(-r));
951 /* OK, we really need the runtime journal, so create
952 * it if necessary. */
954 (void) mkdir_parents(fn, 0755);
955 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
959 log_error("Failed to open runtime journal: %s", strerror(-r));
964 if (s->runtime_journal) {
965 char fb[FORMAT_BYTES_MAX];
968 server_fix_perms(s, s->runtime_journal, 0);
969 server_driver_message(s, SD_ID128_NULL, "Allowing runtime journal files to grow to %s.",
970 format_bytes(fb, sizeof(fb), s->runtime_metrics.max_use));
972 avail = available_space(s);
974 if (s->system_metrics.max_use > avail)
975 server_driver_message(s, SD_ID128_NULL, "Journal size currently limited to %s due to RuntimeKeepFree.",
976 format_bytes(fb, sizeof(fb), avail));
983 int server_flush_to_var(Server *s) {
986 sd_journal *j = NULL;
990 if (s->storage != STORAGE_AUTO &&
991 s->storage != STORAGE_PERSISTENT)
994 if (!s->runtime_journal)
997 system_journal_open(s);
999 if (!s->system_journal)
1002 log_debug("Flushing to /var...");
1004 r = sd_id128_get_machine(&machine);
1006 log_error("Failed to get machine id: %s", strerror(-r));
1010 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1012 log_error("Failed to read runtime journal: %s", strerror(-r));
1016 sd_journal_set_data_threshold(j, 0);
1018 SD_JOURNAL_FOREACH(j) {
1022 f = j->current_file;
1023 assert(f && f->current_offset > 0);
1025 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1027 log_error("Can't read entry: %s", strerror(-r));
1031 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1035 if (!shall_try_append_again(s->system_journal, r)) {
1036 log_error("Can't write entry: %s", strerror(-r));
1043 if (!s->system_journal) {
1044 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1049 log_debug("Retrying write.");
1050 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1052 log_error("Can't write entry: %s", strerror(-r));
1058 journal_file_post_change(s->system_journal);
1060 journal_file_close(s->runtime_journal);
1061 s->runtime_journal = NULL;
1064 rm_rf("/run/log/journal", false, true, false);
1066 sd_journal_close(j);
1071 int process_event(Server *s, struct epoll_event *ev) {
1075 if (ev->data.fd == s->signal_fd) {
1076 struct signalfd_siginfo sfsi;
1079 if (ev->events != EPOLLIN) {
1080 log_error("Got invalid event from epoll.");
1084 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
1085 if (n != sizeof(sfsi)) {
1090 if (errno == EINTR || errno == EAGAIN)
1096 if (sfsi.ssi_signo == SIGUSR1) {
1097 touch("/run/systemd/journal/flushed");
1098 server_flush_to_var(s);
1103 if (sfsi.ssi_signo == SIGUSR2) {
1109 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
1113 } else if (ev->data.fd == s->sync_timer_fd) {
1117 log_debug("Got sync request from epoll.");
1119 r = read(ev->data.fd, (void *)&t, sizeof(t));
1126 } else if (ev->data.fd == s->dev_kmsg_fd) {
1129 if (ev->events != EPOLLIN) {
1130 log_error("Got invalid event from epoll.");
1134 r = server_read_dev_kmsg(s);
1140 } else if (ev->data.fd == s->native_fd ||
1141 ev->data.fd == s->syslog_fd) {
1143 if (ev->events != EPOLLIN) {
1144 log_error("Got invalid event from epoll.");
1149 struct msghdr msghdr;
1151 struct ucred *ucred = NULL;
1152 struct timeval *tv = NULL;
1153 struct cmsghdr *cmsg;
1155 size_t label_len = 0;
1157 struct cmsghdr cmsghdr;
1159 /* We use NAME_MAX space for the
1160 * SELinux label here. The kernel
1161 * currently enforces no limit, but
1162 * according to suggestions from the
1163 * SELinux people this will change and
1164 * it will probably be identical to
1165 * NAME_MAX. For now we use that, but
1166 * this should be updated one day when
1167 * the final limit is known.*/
1168 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1169 CMSG_SPACE(sizeof(struct timeval)) +
1170 CMSG_SPACE(sizeof(int)) + /* fd */
1171 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1178 if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
1179 log_error("SIOCINQ failed: %m");
1183 if (s->buffer_size < (size_t) v) {
1187 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
1188 b = realloc(s->buffer, l+1);
1191 log_error("Couldn't increase buffer.");
1200 iovec.iov_base = s->buffer;
1201 iovec.iov_len = s->buffer_size;
1205 msghdr.msg_iov = &iovec;
1206 msghdr.msg_iovlen = 1;
1207 msghdr.msg_control = &control;
1208 msghdr.msg_controllen = sizeof(control);
1210 n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1213 if (errno == EINTR || errno == EAGAIN)
1216 log_error("recvmsg() failed: %m");
1220 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1222 if (cmsg->cmsg_level == SOL_SOCKET &&
1223 cmsg->cmsg_type == SCM_CREDENTIALS &&
1224 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1225 ucred = (struct ucred*) CMSG_DATA(cmsg);
1226 else if (cmsg->cmsg_level == SOL_SOCKET &&
1227 cmsg->cmsg_type == SCM_SECURITY) {
1228 label = (char*) CMSG_DATA(cmsg);
1229 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1230 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1231 cmsg->cmsg_type == SO_TIMESTAMP &&
1232 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1233 tv = (struct timeval*) CMSG_DATA(cmsg);
1234 else if (cmsg->cmsg_level == SOL_SOCKET &&
1235 cmsg->cmsg_type == SCM_RIGHTS) {
1236 fds = (int*) CMSG_DATA(cmsg);
1237 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1241 if (ev->data.fd == s->syslog_fd) {
1244 if (n > 0 && n_fds == 0) {
1245 e = memchr(s->buffer, '\n', n);
1251 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1252 } else if (n_fds > 0)
1253 log_warning("Got file descriptors via syslog socket. Ignoring.");
1256 if (n > 0 && n_fds == 0)
1257 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1258 else if (n == 0 && n_fds == 1)
1259 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1261 log_warning("Got too many file descriptors via native socket. Ignoring.");
1264 close_many(fds, n_fds);
1269 } else if (ev->data.fd == s->stdout_fd) {
1271 if (ev->events != EPOLLIN) {
1272 log_error("Got invalid event from epoll.");
1276 stdout_stream_new(s);
1280 StdoutStream *stream;
1282 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
1283 log_error("Got invalid event from epoll.");
1287 /* If it is none of the well-known fds, it must be an
1288 * stdout stream fd. Note that this is a bit ugly here
1289 * (since we rely that none of the well-known fds
1290 * could be interpreted as pointer), but nonetheless
1291 * safe, since the well-known fds would never get an
1292 * fd > 4096, i.e. beyond the first memory page */
1294 stream = ev->data.ptr;
1296 if (stdout_stream_process(stream) <= 0)
1297 stdout_stream_free(stream);
1302 log_error("Unknown event.");
1306 static int open_signalfd(Server *s) {
1308 struct epoll_event ev;
1312 assert_se(sigemptyset(&mask) == 0);
1313 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1314 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1316 s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
1317 if (s->signal_fd < 0) {
1318 log_error("signalfd(): %m");
1323 ev.events = EPOLLIN;
1324 ev.data.fd = s->signal_fd;
1326 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
1327 log_error("epoll_ctl(): %m");
1334 static int server_parse_proc_cmdline(Server *s) {
1335 _cleanup_free_ char *line = NULL;
1340 if (detect_container(NULL) > 0)
1343 r = read_one_line_file("/proc/cmdline", &line);
1345 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1349 FOREACH_WORD_QUOTED(w, l, line, state) {
1350 _cleanup_free_ char *word;
1352 word = strndup(w, l);
1356 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1357 r = parse_boolean(word + 35);
1359 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1361 s->forward_to_syslog = r;
1362 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1363 r = parse_boolean(word + 33);
1365 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1367 s->forward_to_kmsg = r;
1368 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1369 r = parse_boolean(word + 36);
1371 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1373 s->forward_to_console = r;
1374 } else if (startswith(word, "systemd.journald"))
1375 log_warning("Invalid systemd.journald parameter. Ignoring.");
1381 static int server_parse_config_file(Server *s) {
1382 static const char fn[] = "/etc/systemd/journald.conf";
1383 _cleanup_fclose_ FILE *f = NULL;
1388 f = fopen(fn, "re");
1390 if (errno == ENOENT)
1393 log_warning("Failed to open configuration file %s: %m", fn);
1397 r = config_parse(NULL, fn, f, "Journal\0", config_item_perf_lookup,
1398 (void*) journald_gperf_lookup, false, false, s);
1400 log_warning("Failed to parse configuration file: %s", strerror(-r));
1405 static int server_open_sync_timer(Server *s) {
1407 struct epoll_event ev;
1411 s->sync_timer_fd = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC);
1412 if (s->sync_timer_fd < 0)
1416 ev.events = EPOLLIN;
1417 ev.data.fd = s->sync_timer_fd;
1419 r = epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->sync_timer_fd, &ev);
1421 log_error("Failed to add idle timer fd to epoll object: %m");
1428 int server_schedule_sync(Server *s) {
1433 if (s->sync_scheduled)
1436 if (s->sync_interval_usec) {
1437 struct itimerspec sync_timer_enable = {};
1439 timespec_store(&sync_timer_enable.it_value, s->sync_interval_usec);
1441 r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_enable, NULL);
1446 s->sync_scheduled = true;
1451 int server_init(Server *s) {
1457 s->sync_timer_fd = s->syslog_fd = s->native_fd = s->stdout_fd =
1458 s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
1462 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1463 s->sync_scheduled = false;
1465 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1466 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1468 s->forward_to_syslog = true;
1470 s->max_level_store = LOG_DEBUG;
1471 s->max_level_syslog = LOG_DEBUG;
1472 s->max_level_kmsg = LOG_NOTICE;
1473 s->max_level_console = LOG_INFO;
1475 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1476 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1478 server_parse_config_file(s);
1479 server_parse_proc_cmdline(s);
1480 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1481 log_debug("Setting both rate limit interval and burst from %llu,%u to 0,0",
1482 (long long unsigned) s->rate_limit_interval,
1483 s->rate_limit_burst);
1484 s->rate_limit_interval = s->rate_limit_burst = 0;
1487 mkdir_p("/run/systemd/journal", 0755);
1489 s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
1490 if (!s->user_journals)
1493 s->mmap = mmap_cache_new();
1497 s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
1498 if (s->epoll_fd < 0) {
1499 log_error("Failed to create epoll object: %m");
1503 n = sd_listen_fds(true);
1505 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1509 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1511 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1513 if (s->native_fd >= 0) {
1514 log_error("Too many native sockets passed.");
1520 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1522 if (s->stdout_fd >= 0) {
1523 log_error("Too many stdout sockets passed.");
1529 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
1531 if (s->syslog_fd >= 0) {
1532 log_error("Too many /dev/log sockets passed.");
1539 log_error("Unknown socket passed.");
1544 r = server_open_syslog_socket(s);
1548 r = server_open_native_socket(s);
1552 r = server_open_stdout_socket(s);
1556 r = server_open_dev_kmsg(s);
1560 r = server_open_kernel_seqnum(s);
1564 r = server_open_sync_timer(s);
1568 r = open_signalfd(s);
1572 s->udev = udev_new();
1576 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval,
1577 s->rate_limit_burst);
1581 r = system_journal_open(s);
1588 void server_maybe_append_tags(Server *s) {
1594 n = now(CLOCK_REALTIME);
1596 if (s->system_journal)
1597 journal_file_maybe_append_tag(s->system_journal, n);
1599 HASHMAP_FOREACH(f, s->user_journals, i)
1600 journal_file_maybe_append_tag(f, n);
1604 void server_done(Server *s) {
1608 while (s->stdout_streams)
1609 stdout_stream_free(s->stdout_streams);
1611 if (s->system_journal)
1612 journal_file_close(s->system_journal);
1614 if (s->runtime_journal)
1615 journal_file_close(s->runtime_journal);
1617 while ((f = hashmap_steal_first(s->user_journals)))
1618 journal_file_close(f);
1620 hashmap_free(s->user_journals);
1622 if (s->epoll_fd >= 0)
1623 close_nointr_nofail(s->epoll_fd);
1625 if (s->signal_fd >= 0)
1626 close_nointr_nofail(s->signal_fd);
1628 if (s->syslog_fd >= 0)
1629 close_nointr_nofail(s->syslog_fd);
1631 if (s->native_fd >= 0)
1632 close_nointr_nofail(s->native_fd);
1634 if (s->stdout_fd >= 0)
1635 close_nointr_nofail(s->stdout_fd);
1637 if (s->dev_kmsg_fd >= 0)
1638 close_nointr_nofail(s->dev_kmsg_fd);
1640 if (s->sync_timer_fd >= 0)
1641 close_nointr_nofail(s->sync_timer_fd);
1644 journal_rate_limit_free(s->rate_limit);
1646 if (s->kernel_seqnum)
1647 munmap(s->kernel_seqnum, sizeof(uint64_t));
1653 mmap_cache_unref(s->mmap);
1656 udev_unref(s->udev);