1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
27 #include <sys/timerfd.h>
30 #include <systemd/sd-journal.h>
31 #include <systemd/sd-messages.h>
32 #include <systemd/sd-daemon.h>
37 #include "journal-file.h"
38 #include "socket-util.h"
39 #include "cgroup-util.h"
43 #include "conf-parser.h"
44 #include "journal-internal.h"
45 #include "journal-vacuum.h"
46 #include "journal-authenticate.h"
47 #include "journald-server.h"
48 #include "journald-rate-limit.h"
49 #include "journald-kmsg.h"
50 #include "journald-syslog.h"
51 #include "journald-stream.h"
52 #include "journald-console.h"
53 #include "journald-native.h"
57 #include <acl/libacl.h>
62 #include <selinux/selinux.h>
65 #define USER_JOURNALS_MAX 1024
67 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
68 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
69 #define DEFAULT_RATE_LIMIT_BURST 1000
71 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
73 static const char* const storage_table[] = {
74 [STORAGE_AUTO] = "auto",
75 [STORAGE_VOLATILE] = "volatile",
76 [STORAGE_PERSISTENT] = "persistent",
77 [STORAGE_NONE] = "none"
80 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
81 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
83 static const char* const split_mode_table[] = {
84 [SPLIT_NONE] = "none",
86 [SPLIT_LOGIN] = "login"
89 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
90 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
92 static uint64_t available_space(Server *s, bool verbose) {
94 _cleanup_free_ char *p = NULL;
97 uint64_t sum = 0, ss_avail = 0, avail = 0;
99 _cleanup_closedir_ DIR *d = NULL;
104 ts = now(CLOCK_MONOTONIC);
106 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts
108 return s->cached_available_space;
110 r = sd_id128_get_machine(&machine);
114 if (s->system_journal) {
115 f = "/var/log/journal/";
116 m = &s->system_metrics;
118 f = "/run/log/journal/";
119 m = &s->runtime_metrics;
124 p = strappend(f, sd_id128_to_string(machine, ids));
132 if (fstatvfs(dirfd(d), &ss) < 0)
138 union dirent_storage buf;
140 r = readdir_r(d, &buf.de, &de);
147 if (!endswith(de->d_name, ".journal") &&
148 !endswith(de->d_name, ".journal~"))
151 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
154 if (!S_ISREG(st.st_mode))
157 sum += (uint64_t) st.st_blocks * 512UL;
160 ss_avail = ss.f_bsize * ss.f_bavail;
161 avail = ss_avail > m->keep_free ? ss_avail - m->keep_free : 0;
163 s->cached_available_space = MIN(m->max_use, avail) > sum ? MIN(m->max_use, avail) - sum : 0;
164 s->cached_available_space_timestamp = ts;
167 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
168 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX];
170 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
171 "%s journal is using %s (max %s, leaving %s of free %s, current limit %s).",
172 s->system_journal ? "Permanent" : "Runtime",
173 format_bytes(fb1, sizeof(fb1), sum),
174 format_bytes(fb2, sizeof(fb2), m->max_use),
175 format_bytes(fb3, sizeof(fb3), m->keep_free),
176 format_bytes(fb4, sizeof(fb4), ss_avail),
177 format_bytes(fb5, sizeof(fb5), MIN(m->max_use, avail)));
180 return s->cached_available_space;
183 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
188 acl_permset_t permset;
193 r = fchmod(f->fd, 0640);
195 log_warning("Failed to fix access mode on %s, ignoring: %s", f->path, strerror(-r));
201 acl = acl_get_fd(f->fd);
203 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
207 r = acl_find_uid(acl, uid, &entry);
210 if (acl_create_entry(&acl, &entry) < 0 ||
211 acl_set_tag_type(entry, ACL_USER) < 0 ||
212 acl_set_qualifier(entry, &uid) < 0) {
213 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
218 /* We do not recalculate the mask unconditionally here,
219 * so that the fchmod() mask above stays intact. */
220 if (acl_get_permset(entry, &permset) < 0 ||
221 acl_add_perm(permset, ACL_READ) < 0 ||
222 calc_acl_mask_if_needed(&acl) < 0) {
223 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
227 if (acl_set_fd(f->fd, acl) < 0)
228 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
235 static JournalFile* find_journal(Server *s, uid_t uid) {
236 _cleanup_free_ char *p = NULL;
243 /* We split up user logs only on /var, not on /run. If the
244 * runtime file is open, we write to it exclusively, in order
245 * to guarantee proper order as soon as we flush /run to
246 * /var and close the runtime file. */
248 if (s->runtime_journal)
249 return s->runtime_journal;
252 return s->system_journal;
254 r = sd_id128_get_machine(&machine);
256 return s->system_journal;
258 f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
262 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
263 SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
264 return s->system_journal;
266 while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
267 /* Too many open? Then let's close one */
268 f = hashmap_steal_first(s->user_journals);
270 journal_file_close(f);
273 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
275 return s->system_journal;
277 server_fix_perms(s, f, uid);
279 r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
281 journal_file_close(f);
282 return s->system_journal;
288 void server_rotate(Server *s) {
294 log_debug("Rotating...");
296 if (s->runtime_journal) {
297 r = journal_file_rotate(&s->runtime_journal, s->compress, false);
299 if (s->runtime_journal)
300 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
302 log_error("Failed to create new runtime journal: %s", strerror(-r));
304 server_fix_perms(s, s->runtime_journal, 0);
307 if (s->system_journal) {
308 r = journal_file_rotate(&s->system_journal, s->compress, s->seal);
310 if (s->system_journal)
311 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
313 log_error("Failed to create new system journal: %s", strerror(-r));
316 server_fix_perms(s, s->system_journal, 0);
319 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
320 r = journal_file_rotate(&f, s->compress, s->seal);
323 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
325 log_error("Failed to create user journal: %s", strerror(-r));
326 hashmap_remove(s->user_journals, k);
329 hashmap_replace(s->user_journals, k, f);
330 server_fix_perms(s, f, PTR_TO_UINT32(k));
335 void server_sync(Server *s) {
336 static const struct itimerspec sync_timer_disable = {};
342 if (s->system_journal) {
343 r = journal_file_set_offline(s->system_journal);
345 log_error("Failed to sync system journal: %s", strerror(-r));
348 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
349 r = journal_file_set_offline(f);
351 log_error("Failed to sync user journal: %s", strerror(-r));
354 r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_disable, NULL);
356 log_error("Failed to disable max timer: %m");
358 s->sync_scheduled = false;
361 void server_vacuum(Server *s) {
366 log_debug("Vacuuming...");
368 s->oldest_file_usec = 0;
370 r = sd_id128_get_machine(&machine);
372 log_error("Failed to get machine ID: %s", strerror(-r));
376 sd_id128_to_string(machine, ids);
378 if (s->system_journal) {
379 char *p = strappenda("/var/log/journal/", ids);
381 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
382 if (r < 0 && r != -ENOENT)
383 log_error("Failed to vacuum %s: %s", p, strerror(-r));
386 if (s->runtime_journal) {
387 char *p = strappenda("/run/log/journal/", ids);
389 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
390 if (r < 0 && r != -ENOENT)
391 log_error("Failed to vacuum %s: %s", p, strerror(-r));
394 s->cached_available_space_timestamp = 0;
397 bool shall_try_append_again(JournalFile *f, int r) {
399 /* -E2BIG Hit configured limit
401 -EDQUOT Quota limit hit
403 -EHOSTDOWN Other machine
404 -EBUSY Unclean shutdown
405 -EPROTONOSUPPORT Unsupported feature
408 -ESHUTDOWN Already archived */
410 if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
411 log_debug("%s: Allocation limit reached, rotating.", f->path);
412 else if (r == -EHOSTDOWN)
413 log_info("%s: Journal file from other machine, rotating.", f->path);
414 else if (r == -EBUSY)
415 log_info("%s: Unclean shutdown, rotating.", f->path);
416 else if (r == -EPROTONOSUPPORT)
417 log_info("%s: Unsupported feature, rotating.", f->path);
418 else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
419 log_warning("%s: Journal file corrupted, rotating.", f->path);
426 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
428 bool vacuumed = false;
435 f = find_journal(s, uid);
439 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
440 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
445 f = find_journal(s, uid);
450 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
452 server_schedule_sync(s, priority);
456 if (vacuumed || !shall_try_append_again(f, r)) {
459 for (i = 0; i < n; i++)
460 size += iovec[i].iov_len;
462 log_error("Failed to write entry (%d items, %zu bytes), ignoring: %s", n, size, strerror(-r));
469 f = find_journal(s, uid);
473 log_debug("Retrying write.");
474 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
478 for (i = 0; i < n; i++)
479 size += iovec[i].iov_len;
481 log_error("Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %s", n, size, strerror(-r));
483 server_schedule_sync(s, priority);
486 static void dispatch_message_real(
488 struct iovec *iovec, unsigned n, unsigned m,
491 const char *label, size_t label_len,
496 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
497 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
498 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
499 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
500 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
501 boot_id[sizeof("_BOOT_ID=") + 32] = "_BOOT_ID=",
502 machine_id[sizeof("_MACHINE_ID=") + 32] = "_MACHINE_ID=",
503 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
504 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
505 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
512 uid_t realuid = 0, owner = 0, journal_uid;
513 bool owner_valid = false;
515 char audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
516 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
517 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
518 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
527 assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
530 realuid = ucred->uid;
532 sprintf(pid, "_PID=%lu", (unsigned long) ucred->pid);
533 IOVEC_SET_STRING(iovec[n++], pid);
535 sprintf(uid, "_UID=%lu", (unsigned long) ucred->uid);
536 IOVEC_SET_STRING(iovec[n++], uid);
538 sprintf(gid, "_GID=%lu", (unsigned long) ucred->gid);
539 IOVEC_SET_STRING(iovec[n++], gid);
541 r = get_process_comm(ucred->pid, &t);
543 x = strappenda("_COMM=", t);
545 IOVEC_SET_STRING(iovec[n++], x);
548 r = get_process_exe(ucred->pid, &t);
550 x = strappenda("_EXE=", t);
552 IOVEC_SET_STRING(iovec[n++], x);
555 r = get_process_cmdline(ucred->pid, 0, false, &t);
557 x = strappenda("_CMDLINE=", t);
559 IOVEC_SET_STRING(iovec[n++], x);
562 r = get_process_capeff(ucred->pid, &t);
564 x = strappenda("_CAP_EFFECTIVE=", t);
566 IOVEC_SET_STRING(iovec[n++], x);
570 r = audit_session_from_pid(ucred->pid, &audit);
572 sprintf(audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit);
573 IOVEC_SET_STRING(iovec[n++], audit_session);
576 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
578 sprintf(audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid);
579 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
583 r = cg_pid_get_path_shifted(ucred->pid, NULL, &c);
585 char *session = NULL;
587 x = strappenda("_SYSTEMD_CGROUP=", c);
588 IOVEC_SET_STRING(iovec[n++], x);
590 r = cg_path_get_session(c, &t);
592 session = strappenda("_SYSTEMD_SESSION=", t);
594 IOVEC_SET_STRING(iovec[n++], session);
597 if (cg_path_get_owner_uid(c, &owner) >= 0) {
600 sprintf(owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner);
601 IOVEC_SET_STRING(iovec[n++], owner_uid);
604 if (cg_path_get_unit(c, &t) >= 0) {
605 x = strappenda("_SYSTEMD_UNIT=", t);
607 IOVEC_SET_STRING(iovec[n++], x);
608 } else if (unit_id && !session) {
609 x = strappenda("_SYSTEMD_UNIT=", unit_id);
610 IOVEC_SET_STRING(iovec[n++], x);
613 if (cg_path_get_user_unit(c, &t) >= 0) {
614 x = strappenda("_SYSTEMD_USER_UNIT=", t);
616 IOVEC_SET_STRING(iovec[n++], x);
617 } else if (unit_id && session) {
618 x = strappenda("_SYSTEMD_USER_UNIT=", unit_id);
619 IOVEC_SET_STRING(iovec[n++], x);
622 if (cg_path_get_slice(c, &t) >= 0) {
623 x = strappenda("_SYSTEMD_SLICE=", t);
625 IOVEC_SET_STRING(iovec[n++], x);
634 x = alloca(sizeof("_SELINUX_CONTEXT=") + label_len);
636 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
637 IOVEC_SET_STRING(iovec[n++], x);
639 security_context_t con;
641 if (getpidcon(ucred->pid, &con) >= 0) {
642 x = strappenda("_SELINUX_CONTEXT=", con);
645 IOVEC_SET_STRING(iovec[n++], x);
654 r = get_process_uid(object_pid, &object_uid);
656 sprintf(o_uid, "OBJECT_UID=%lu", (unsigned long) object_uid);
657 IOVEC_SET_STRING(iovec[n++], o_uid);
660 r = get_process_gid(object_pid, &object_gid);
662 sprintf(o_gid, "OBJECT_GID=%lu", (unsigned long) object_gid);
663 IOVEC_SET_STRING(iovec[n++], o_gid);
666 r = get_process_comm(object_pid, &t);
668 x = strappenda("OBJECT_COMM=", t);
670 IOVEC_SET_STRING(iovec[n++], x);
673 r = get_process_exe(object_pid, &t);
675 x = strappenda("OBJECT_EXE=", t);
677 IOVEC_SET_STRING(iovec[n++], x);
680 r = get_process_cmdline(object_pid, 0, false, &t);
682 x = strappenda("OBJECT_CMDLINE=", t);
684 IOVEC_SET_STRING(iovec[n++], x);
688 r = audit_session_from_pid(object_pid, &audit);
690 sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%lu", (unsigned long) audit);
691 IOVEC_SET_STRING(iovec[n++], o_audit_session);
694 r = audit_loginuid_from_pid(object_pid, &loginuid);
696 sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID=%lu", (unsigned long) loginuid);
697 IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
701 r = cg_pid_get_path_shifted(object_pid, NULL, &c);
703 x = strappenda("OBJECT_SYSTEMD_CGROUP=", c);
704 IOVEC_SET_STRING(iovec[n++], x);
706 r = cg_path_get_session(c, &t);
708 x = strappenda("OBJECT_SYSTEMD_SESSION=", t);
710 IOVEC_SET_STRING(iovec[n++], x);
713 if (cg_path_get_owner_uid(c, &owner) >= 0) {
714 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner);
715 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
718 if (cg_path_get_unit(c, &t) >= 0) {
719 x = strappenda("OBJECT_SYSTEMD_UNIT=", t);
721 IOVEC_SET_STRING(iovec[n++], x);
724 if (cg_path_get_user_unit(c, &t) >= 0) {
725 x = strappenda("OBJECT_SYSTEMD_USER_UNIT=", t);
727 IOVEC_SET_STRING(iovec[n++], x);
736 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
737 IOVEC_SET_STRING(iovec[n++], source_time);
740 /* Note that strictly speaking storing the boot id here is
741 * redundant since the entry includes this in-line
742 * anyway. However, we need this indexed, too. */
743 r = sd_id128_get_boot(&id);
745 sd_id128_to_string(id, boot_id + strlen("_BOOT_ID="));
746 IOVEC_SET_STRING(iovec[n++], boot_id);
749 r = sd_id128_get_machine(&id);
751 sd_id128_to_string(id, machine_id + strlen("_MACHINE_ID="));
752 IOVEC_SET_STRING(iovec[n++], machine_id);
755 t = gethostname_malloc();
757 x = strappenda("_HOSTNAME=", t);
759 IOVEC_SET_STRING(iovec[n++], x);
764 if (s->split_mode == SPLIT_UID && realuid > 0)
765 /* Split up strictly by any UID */
766 journal_uid = realuid;
767 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
768 /* Split up by login UIDs, this avoids creation of
769 * individual journals for system UIDs. We do this
770 * only if the realuid is not root, in order not to
771 * accidentally leak privileged information to the
772 * user that is logged by a privileged process that is
773 * part of an unprivileged session.*/
778 write_to_journal(s, journal_uid, iovec, n, priority);
781 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
782 char mid[11 + 32 + 1];
783 char buffer[16 + LINE_MAX + 1];
784 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
787 struct ucred ucred = {};
792 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
793 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
795 memcpy(buffer, "MESSAGE=", 8);
796 va_start(ap, format);
797 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
799 char_array_0(buffer);
800 IOVEC_SET_STRING(iovec[n++], buffer);
802 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
803 snprintf(mid, sizeof(mid), MESSAGE_ID(message_id));
805 IOVEC_SET_STRING(iovec[n++], mid);
808 ucred.pid = getpid();
809 ucred.uid = getuid();
810 ucred.gid = getgid();
812 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
815 void server_dispatch_message(
817 struct iovec *iovec, unsigned n, unsigned m,
820 const char *label, size_t label_len,
826 _cleanup_free_ char *path = NULL;
830 assert(iovec || n == 0);
835 if (LOG_PRI(priority) > s->max_level_store)
838 /* Stop early in case the information will not be stored
840 if (s->storage == STORAGE_NONE)
846 r = cg_pid_get_path_shifted(ucred->pid, NULL, &path);
850 /* example: /user/lennart/3/foobar
851 * /system/dbus.service/foobar
853 * So let's cut of everything past the third /, since that is
854 * where user directories start */
856 c = strchr(path, '/');
858 c = strchr(c+1, '/');
860 c = strchr(c+1, '/');
866 rl = journal_rate_limit_test(s->rate_limit, path,
867 priority & LOG_PRIMASK, available_space(s, false));
872 /* Write a suppression message if we suppressed something */
874 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
875 "Suppressed %u messages from %s", rl - 1, path);
878 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
882 static int system_journal_open(Server *s) {
888 r = sd_id128_get_machine(&machine);
890 log_error("Failed to get machine id: %s", strerror(-r));
894 sd_id128_to_string(machine, ids);
896 if (!s->system_journal &&
897 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
898 access("/run/systemd/journal/flushed", F_OK) >= 0) {
900 /* If in auto mode: first try to create the machine
901 * path, but not the prefix.
903 * If in persistent mode: create /var/log/journal and
904 * the machine path */
906 if (s->storage == STORAGE_PERSISTENT)
907 (void) mkdir("/var/log/journal/", 0755);
909 fn = strappenda("/var/log/journal/", ids);
910 (void) mkdir(fn, 0755);
912 fn = strappenda(fn, "/system.journal");
913 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
916 server_fix_perms(s, s->system_journal, 0);
918 if (r != -ENOENT && r != -EROFS)
919 log_warning("Failed to open system journal: %s", strerror(-r));
925 if (!s->runtime_journal &&
926 (s->storage != STORAGE_NONE)) {
928 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
932 if (s->system_journal) {
934 /* Try to open the runtime journal, but only
935 * if it already exists, so that we can flush
936 * it into the system journal */
938 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
943 log_warning("Failed to open runtime journal: %s", strerror(-r));
950 /* OK, we really need the runtime journal, so create
951 * it if necessary. */
953 (void) mkdir_parents(fn, 0755);
954 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
958 log_error("Failed to open runtime journal: %s", strerror(-r));
963 if (s->runtime_journal)
964 server_fix_perms(s, s->runtime_journal, 0);
967 available_space(s, true);
972 int server_flush_to_var(Server *s) {
975 sd_journal *j = NULL;
979 if (s->storage != STORAGE_AUTO &&
980 s->storage != STORAGE_PERSISTENT)
983 if (!s->runtime_journal)
986 system_journal_open(s);
988 if (!s->system_journal)
991 log_debug("Flushing to /var...");
993 r = sd_id128_get_machine(&machine);
997 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
999 log_error("Failed to read runtime journal: %s", strerror(-r));
1003 sd_journal_set_data_threshold(j, 0);
1005 SD_JOURNAL_FOREACH(j) {
1009 f = j->current_file;
1010 assert(f && f->current_offset > 0);
1012 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1014 log_error("Can't read entry: %s", strerror(-r));
1018 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1022 if (!shall_try_append_again(s->system_journal, r)) {
1023 log_error("Can't write entry: %s", strerror(-r));
1030 if (!s->system_journal) {
1031 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1036 log_debug("Retrying write.");
1037 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1039 log_error("Can't write entry: %s", strerror(-r));
1045 journal_file_post_change(s->system_journal);
1047 journal_file_close(s->runtime_journal);
1048 s->runtime_journal = NULL;
1051 rm_rf("/run/log/journal", false, true, false);
1053 sd_journal_close(j);
1058 int process_event(Server *s, struct epoll_event *ev) {
1062 if (ev->data.fd == s->signal_fd) {
1063 struct signalfd_siginfo sfsi;
1066 if (ev->events != EPOLLIN) {
1067 log_error("Got invalid event from epoll for %s: %"PRIx32,
1068 "signal fd", ev->events);
1072 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
1073 if (n != sizeof(sfsi)) {
1078 if (errno == EINTR || errno == EAGAIN)
1084 if (sfsi.ssi_signo == SIGUSR1) {
1085 log_info("Received request to flush runtime journal from PID %"PRIu32,
1087 touch("/run/systemd/journal/flushed");
1088 server_flush_to_var(s);
1093 if (sfsi.ssi_signo == SIGUSR2) {
1094 log_info("Received request to rotate journal from PID %"PRIu32,
1101 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
1105 } else if (ev->data.fd == s->sync_timer_fd) {
1109 log_debug("Got sync request from epoll.");
1111 r = read(ev->data.fd, (void *)&t, sizeof(t));
1118 } else if (ev->data.fd == s->dev_kmsg_fd) {
1121 if (ev->events & EPOLLERR)
1122 log_warning("/dev/kmsg buffer overrun, some messages lost.");
1124 if (!(ev->events & EPOLLIN)) {
1125 log_error("Got invalid event from epoll for %s: %"PRIx32,
1126 "/dev/kmsg", ev->events);
1130 r = server_read_dev_kmsg(s);
1136 } else if (ev->data.fd == s->native_fd ||
1137 ev->data.fd == s->syslog_fd) {
1139 if (ev->events != EPOLLIN) {
1140 log_error("Got invalid event from epoll for %s: %"PRIx32,
1141 ev->data.fd == s->native_fd ? "native fd" : "syslog fd",
1147 struct msghdr msghdr;
1149 struct ucred *ucred = NULL;
1150 struct timeval *tv = NULL;
1151 struct cmsghdr *cmsg;
1153 size_t label_len = 0;
1155 struct cmsghdr cmsghdr;
1157 /* We use NAME_MAX space for the
1158 * SELinux label here. The kernel
1159 * currently enforces no limit, but
1160 * according to suggestions from the
1161 * SELinux people this will change and
1162 * it will probably be identical to
1163 * NAME_MAX. For now we use that, but
1164 * this should be updated one day when
1165 * the final limit is known.*/
1166 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1167 CMSG_SPACE(sizeof(struct timeval)) +
1168 CMSG_SPACE(sizeof(int)) + /* fd */
1169 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1176 if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
1177 log_error("SIOCINQ failed: %m");
1181 if (s->buffer_size < (size_t) v) {
1185 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
1186 b = realloc(s->buffer, l+1);
1189 log_error("Couldn't increase buffer.");
1198 iovec.iov_base = s->buffer;
1199 iovec.iov_len = s->buffer_size;
1203 msghdr.msg_iov = &iovec;
1204 msghdr.msg_iovlen = 1;
1205 msghdr.msg_control = &control;
1206 msghdr.msg_controllen = sizeof(control);
1208 n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1211 if (errno == EINTR || errno == EAGAIN)
1214 log_error("recvmsg() failed: %m");
1218 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1220 if (cmsg->cmsg_level == SOL_SOCKET &&
1221 cmsg->cmsg_type == SCM_CREDENTIALS &&
1222 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1223 ucred = (struct ucred*) CMSG_DATA(cmsg);
1224 else if (cmsg->cmsg_level == SOL_SOCKET &&
1225 cmsg->cmsg_type == SCM_SECURITY) {
1226 label = (char*) CMSG_DATA(cmsg);
1227 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1228 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1229 cmsg->cmsg_type == SO_TIMESTAMP &&
1230 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1231 tv = (struct timeval*) CMSG_DATA(cmsg);
1232 else if (cmsg->cmsg_level == SOL_SOCKET &&
1233 cmsg->cmsg_type == SCM_RIGHTS) {
1234 fds = (int*) CMSG_DATA(cmsg);
1235 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1239 if (ev->data.fd == s->syslog_fd) {
1240 if (n > 0 && n_fds == 0) {
1242 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1243 } else if (n_fds > 0)
1244 log_warning("Got file descriptors via syslog socket. Ignoring.");
1247 if (n > 0 && n_fds == 0)
1248 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1249 else if (n == 0 && n_fds == 1)
1250 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1252 log_warning("Got too many file descriptors via native socket. Ignoring.");
1255 close_many(fds, n_fds);
1260 } else if (ev->data.fd == s->stdout_fd) {
1262 if (ev->events != EPOLLIN) {
1263 log_error("Got invalid event from epoll for %s: %"PRIx32,
1264 "stdout fd", ev->events);
1268 stdout_stream_new(s);
1272 StdoutStream *stream;
1274 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
1275 log_error("Got invalid event from epoll for %s: %"PRIx32,
1276 "stdout stream", ev->events);
1280 /* If it is none of the well-known fds, it must be an
1281 * stdout stream fd. Note that this is a bit ugly here
1282 * (since we rely that none of the well-known fds
1283 * could be interpreted as pointer), but nonetheless
1284 * safe, since the well-known fds would never get an
1285 * fd > 4096, i.e. beyond the first memory page */
1287 stream = ev->data.ptr;
1289 if (stdout_stream_process(stream) <= 0)
1290 stdout_stream_free(stream);
1295 log_error("Unknown event.");
1299 static int open_signalfd(Server *s) {
1301 struct epoll_event ev;
1305 assert_se(sigemptyset(&mask) == 0);
1306 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1307 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1309 s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
1310 if (s->signal_fd < 0) {
1311 log_error("signalfd(): %m");
1316 ev.events = EPOLLIN;
1317 ev.data.fd = s->signal_fd;
1319 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
1320 log_error("epoll_ctl(): %m");
1327 static int server_parse_proc_cmdline(Server *s) {
1328 _cleanup_free_ char *line = NULL;
1333 if (detect_container(NULL) > 0)
1336 r = read_one_line_file("/proc/cmdline", &line);
1338 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1342 FOREACH_WORD_QUOTED(w, l, line, state) {
1343 _cleanup_free_ char *word;
1345 word = strndup(w, l);
1349 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1350 r = parse_boolean(word + 35);
1352 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1354 s->forward_to_syslog = r;
1355 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1356 r = parse_boolean(word + 33);
1358 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1360 s->forward_to_kmsg = r;
1361 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1362 r = parse_boolean(word + 36);
1364 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1366 s->forward_to_console = r;
1367 } else if (startswith(word, "systemd.journald"))
1368 log_warning("Invalid systemd.journald parameter. Ignoring.");
1374 static int server_parse_config_file(Server *s) {
1375 static const char fn[] = "/etc/systemd/journald.conf";
1376 _cleanup_fclose_ FILE *f = NULL;
1381 f = fopen(fn, "re");
1383 if (errno == ENOENT)
1386 log_warning("Failed to open configuration file %s: %m", fn);
1390 r = config_parse(NULL, fn, f, "Journal\0", config_item_perf_lookup,
1391 (void*) journald_gperf_lookup, false, false, s);
1393 log_warning("Failed to parse configuration file: %s", strerror(-r));
1398 static int server_open_sync_timer(Server *s) {
1400 struct epoll_event ev;
1404 s->sync_timer_fd = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC);
1405 if (s->sync_timer_fd < 0)
1409 ev.events = EPOLLIN;
1410 ev.data.fd = s->sync_timer_fd;
1412 r = epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->sync_timer_fd, &ev);
1414 log_error("Failed to add idle timer fd to epoll object: %m");
1421 int server_schedule_sync(Server *s, int priority) {
1426 if (priority <= LOG_CRIT) {
1427 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1432 if (s->sync_scheduled)
1435 if (s->sync_interval_usec) {
1436 struct itimerspec sync_timer_enable = {};
1438 timespec_store(&sync_timer_enable.it_value, s->sync_interval_usec);
1440 r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_enable, NULL);
1445 s->sync_scheduled = true;
1450 int server_init(Server *s) {
1456 s->sync_timer_fd = s->syslog_fd = s->native_fd = s->stdout_fd =
1457 s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
1461 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1462 s->sync_scheduled = false;
1464 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1465 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1467 s->forward_to_syslog = true;
1469 s->max_level_store = LOG_DEBUG;
1470 s->max_level_syslog = LOG_DEBUG;
1471 s->max_level_kmsg = LOG_NOTICE;
1472 s->max_level_console = LOG_INFO;
1474 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1475 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1477 server_parse_config_file(s);
1478 server_parse_proc_cmdline(s);
1479 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1480 log_debug("Setting both rate limit interval and burst from %llu,%u to 0,0",
1481 (long long unsigned) s->rate_limit_interval,
1482 s->rate_limit_burst);
1483 s->rate_limit_interval = s->rate_limit_burst = 0;
1486 mkdir_p("/run/systemd/journal", 0755);
1488 s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
1489 if (!s->user_journals)
1492 s->mmap = mmap_cache_new();
1496 s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
1497 if (s->epoll_fd < 0) {
1498 log_error("Failed to create epoll object: %m");
1502 n = sd_listen_fds(true);
1504 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1508 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1510 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1512 if (s->native_fd >= 0) {
1513 log_error("Too many native sockets passed.");
1519 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1521 if (s->stdout_fd >= 0) {
1522 log_error("Too many stdout sockets passed.");
1528 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
1530 if (s->syslog_fd >= 0) {
1531 log_error("Too many /dev/log sockets passed.");
1538 log_error("Unknown socket passed.");
1543 r = server_open_syslog_socket(s);
1547 r = server_open_native_socket(s);
1551 r = server_open_stdout_socket(s);
1555 r = server_open_dev_kmsg(s);
1559 r = server_open_kernel_seqnum(s);
1563 r = server_open_sync_timer(s);
1567 r = open_signalfd(s);
1571 s->udev = udev_new();
1575 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval,
1576 s->rate_limit_burst);
1580 r = system_journal_open(s);
1587 void server_maybe_append_tags(Server *s) {
1593 n = now(CLOCK_REALTIME);
1595 if (s->system_journal)
1596 journal_file_maybe_append_tag(s->system_journal, n);
1598 HASHMAP_FOREACH(f, s->user_journals, i)
1599 journal_file_maybe_append_tag(f, n);
1603 void server_done(Server *s) {
1607 while (s->stdout_streams)
1608 stdout_stream_free(s->stdout_streams);
1610 if (s->system_journal)
1611 journal_file_close(s->system_journal);
1613 if (s->runtime_journal)
1614 journal_file_close(s->runtime_journal);
1616 while ((f = hashmap_steal_first(s->user_journals)))
1617 journal_file_close(f);
1619 hashmap_free(s->user_journals);
1621 if (s->epoll_fd >= 0)
1622 close_nointr_nofail(s->epoll_fd);
1624 if (s->signal_fd >= 0)
1625 close_nointr_nofail(s->signal_fd);
1627 if (s->syslog_fd >= 0)
1628 close_nointr_nofail(s->syslog_fd);
1630 if (s->native_fd >= 0)
1631 close_nointr_nofail(s->native_fd);
1633 if (s->stdout_fd >= 0)
1634 close_nointr_nofail(s->stdout_fd);
1636 if (s->dev_kmsg_fd >= 0)
1637 close_nointr_nofail(s->dev_kmsg_fd);
1639 if (s->sync_timer_fd >= 0)
1640 close_nointr_nofail(s->sync_timer_fd);
1643 journal_rate_limit_free(s->rate_limit);
1645 if (s->kernel_seqnum)
1646 munmap(s->kernel_seqnum, sizeof(uint64_t));
1652 mmap_cache_unref(s->mmap);
1655 udev_unref(s->udev);