1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
27 #include <sys/timerfd.h>
31 #include "sd-journal.h"
32 #include "sd-messages.h"
33 #include "sd-daemon.h"
37 #include "journal-file.h"
38 #include "socket-util.h"
39 #include "cgroup-util.h"
42 #include "conf-parser.h"
43 #include "selinux-util.h"
44 #include "journal-internal.h"
45 #include "journal-vacuum.h"
46 #include "journal-authenticate.h"
47 #include "journald-rate-limit.h"
48 #include "journald-kmsg.h"
49 #include "journald-syslog.h"
50 #include "journald-stream.h"
51 #include "journald-console.h"
52 #include "journald-native.h"
53 #include "journald-server.h"
57 #include <acl/libacl.h>
62 #include <selinux/selinux.h>
65 #define USER_JOURNALS_MAX 1024
67 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
68 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
69 #define DEFAULT_RATE_LIMIT_BURST 1000
71 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
73 static const char* const storage_table[] = {
74 [STORAGE_AUTO] = "auto",
75 [STORAGE_VOLATILE] = "volatile",
76 [STORAGE_PERSISTENT] = "persistent",
77 [STORAGE_NONE] = "none"
80 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
81 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
83 static const char* const split_mode_table[] = {
84 [SPLIT_NONE] = "none",
86 [SPLIT_LOGIN] = "login"
89 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
90 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
92 static uint64_t available_space(Server *s, bool verbose) {
94 _cleanup_free_ char *p = NULL;
97 uint64_t sum = 0, ss_avail = 0, avail = 0;
99 _cleanup_closedir_ DIR *d = NULL;
104 ts = now(CLOCK_MONOTONIC);
106 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts
108 return s->cached_available_space;
110 r = sd_id128_get_machine(&machine);
114 if (s->system_journal) {
115 f = "/var/log/journal/";
116 m = &s->system_metrics;
118 f = "/run/log/journal/";
119 m = &s->runtime_metrics;
124 p = strappend(f, sd_id128_to_string(machine, ids));
132 if (fstatvfs(dirfd(d), &ss) < 0)
138 union dirent_storage buf;
140 r = readdir_r(d, &buf.de, &de);
147 if (!endswith(de->d_name, ".journal") &&
148 !endswith(de->d_name, ".journal~"))
151 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
154 if (!S_ISREG(st.st_mode))
157 sum += (uint64_t) st.st_blocks * 512UL;
160 ss_avail = ss.f_bsize * ss.f_bavail;
161 avail = ss_avail > m->keep_free ? ss_avail - m->keep_free : 0;
163 s->cached_available_space = MIN(m->max_use, avail) > sum ? MIN(m->max_use, avail) - sum : 0;
164 s->cached_available_space_timestamp = ts;
167 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
168 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX];
170 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
171 "%s journal is using %s (max %s, leaving %s of free %s, current limit %s).",
172 s->system_journal ? "Permanent" : "Runtime",
173 format_bytes(fb1, sizeof(fb1), sum),
174 format_bytes(fb2, sizeof(fb2), m->max_use),
175 format_bytes(fb3, sizeof(fb3), m->keep_free),
176 format_bytes(fb4, sizeof(fb4), ss_avail),
177 format_bytes(fb5, sizeof(fb5), MIN(m->max_use, avail)));
180 return s->cached_available_space;
183 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
188 acl_permset_t permset;
193 r = fchmod(f->fd, 0640);
195 log_warning("Failed to fix access mode on %s, ignoring: %s", f->path, strerror(-r));
201 acl = acl_get_fd(f->fd);
203 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
207 r = acl_find_uid(acl, uid, &entry);
210 if (acl_create_entry(&acl, &entry) < 0 ||
211 acl_set_tag_type(entry, ACL_USER) < 0 ||
212 acl_set_qualifier(entry, &uid) < 0) {
213 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
218 /* We do not recalculate the mask unconditionally here,
219 * so that the fchmod() mask above stays intact. */
220 if (acl_get_permset(entry, &permset) < 0 ||
221 acl_add_perm(permset, ACL_READ) < 0 ||
222 calc_acl_mask_if_needed(&acl) < 0) {
223 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
227 if (acl_set_fd(f->fd, acl) < 0)
228 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
235 static JournalFile* find_journal(Server *s, uid_t uid) {
236 _cleanup_free_ char *p = NULL;
243 /* We split up user logs only on /var, not on /run. If the
244 * runtime file is open, we write to it exclusively, in order
245 * to guarantee proper order as soon as we flush /run to
246 * /var and close the runtime file. */
248 if (s->runtime_journal)
249 return s->runtime_journal;
252 return s->system_journal;
254 r = sd_id128_get_machine(&machine);
256 return s->system_journal;
258 f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
262 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
263 SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
264 return s->system_journal;
266 while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
267 /* Too many open? Then let's close one */
268 f = hashmap_steal_first(s->user_journals);
270 journal_file_close(f);
273 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
275 return s->system_journal;
277 server_fix_perms(s, f, uid);
279 r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
281 journal_file_close(f);
282 return s->system_journal;
288 void server_rotate(Server *s) {
294 log_debug("Rotating...");
296 if (s->runtime_journal) {
297 r = journal_file_rotate(&s->runtime_journal, s->compress, false);
299 if (s->runtime_journal)
300 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
302 log_error("Failed to create new runtime journal: %s", strerror(-r));
304 server_fix_perms(s, s->runtime_journal, 0);
307 if (s->system_journal) {
308 r = journal_file_rotate(&s->system_journal, s->compress, s->seal);
310 if (s->system_journal)
311 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
313 log_error("Failed to create new system journal: %s", strerror(-r));
316 server_fix_perms(s, s->system_journal, 0);
319 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
320 r = journal_file_rotate(&f, s->compress, s->seal);
323 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
325 log_error("Failed to create user journal: %s", strerror(-r));
326 hashmap_remove(s->user_journals, k);
329 hashmap_replace(s->user_journals, k, f);
330 server_fix_perms(s, f, PTR_TO_UINT32(k));
335 void server_sync(Server *s) {
336 static const struct itimerspec sync_timer_disable = {};
342 if (s->system_journal) {
343 r = journal_file_set_offline(s->system_journal);
345 log_error("Failed to sync system journal: %s", strerror(-r));
348 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
349 r = journal_file_set_offline(f);
351 log_error("Failed to sync user journal: %s", strerror(-r));
354 r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_disable, NULL);
356 log_error("Failed to disable max timer: %m");
358 s->sync_scheduled = false;
361 void server_vacuum(Server *s) {
366 log_debug("Vacuuming...");
368 s->oldest_file_usec = 0;
370 r = sd_id128_get_machine(&machine);
372 log_error("Failed to get machine ID: %s", strerror(-r));
376 sd_id128_to_string(machine, ids);
378 if (s->system_journal) {
379 char *p = strappenda("/var/log/journal/", ids);
381 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
382 if (r < 0 && r != -ENOENT)
383 log_error("Failed to vacuum %s: %s", p, strerror(-r));
386 if (s->runtime_journal) {
387 char *p = strappenda("/run/log/journal/", ids);
389 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
390 if (r < 0 && r != -ENOENT)
391 log_error("Failed to vacuum %s: %s", p, strerror(-r));
394 s->cached_available_space_timestamp = 0;
397 bool shall_try_append_again(JournalFile *f, int r) {
399 /* -E2BIG Hit configured limit
401 -EDQUOT Quota limit hit
403 -EHOSTDOWN Other machine
404 -EBUSY Unclean shutdown
405 -EPROTONOSUPPORT Unsupported feature
408 -ESHUTDOWN Already archived */
410 if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
411 log_debug("%s: Allocation limit reached, rotating.", f->path);
412 else if (r == -EHOSTDOWN)
413 log_info("%s: Journal file from other machine, rotating.", f->path);
414 else if (r == -EBUSY)
415 log_info("%s: Unclean shutdown, rotating.", f->path);
416 else if (r == -EPROTONOSUPPORT)
417 log_info("%s: Unsupported feature, rotating.", f->path);
418 else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
419 log_warning("%s: Journal file corrupted, rotating.", f->path);
426 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
428 bool vacuumed = false;
435 f = find_journal(s, uid);
439 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
440 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
445 f = find_journal(s, uid);
450 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
452 server_schedule_sync(s, priority);
456 if (vacuumed || !shall_try_append_again(f, r)) {
459 for (i = 0; i < n; i++)
460 size += iovec[i].iov_len;
462 log_error("Failed to write entry (%d items, %zu bytes), ignoring: %s", n, size, strerror(-r));
469 f = find_journal(s, uid);
473 log_debug("Retrying write.");
474 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
478 for (i = 0; i < n; i++)
479 size += iovec[i].iov_len;
481 log_error("Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %s", n, size, strerror(-r));
483 server_schedule_sync(s, priority);
486 static void dispatch_message_real(
488 struct iovec *iovec, unsigned n, unsigned m,
491 const char *label, size_t label_len,
496 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
497 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
498 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
499 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
500 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
501 boot_id[sizeof("_BOOT_ID=") + 32] = "_BOOT_ID=",
502 machine_id[sizeof("_MACHINE_ID=") + 32] = "_MACHINE_ID=",
503 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
504 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
505 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
512 uid_t realuid = 0, owner = 0, journal_uid;
513 bool owner_valid = false;
515 char audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
516 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
517 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
518 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
527 assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
530 realuid = ucred->uid;
532 sprintf(pid, "_PID=%lu", (unsigned long) ucred->pid);
533 IOVEC_SET_STRING(iovec[n++], pid);
535 sprintf(uid, "_UID=%lu", (unsigned long) ucred->uid);
536 IOVEC_SET_STRING(iovec[n++], uid);
538 sprintf(gid, "_GID=%lu", (unsigned long) ucred->gid);
539 IOVEC_SET_STRING(iovec[n++], gid);
541 r = get_process_comm(ucred->pid, &t);
543 x = strappenda("_COMM=", t);
545 IOVEC_SET_STRING(iovec[n++], x);
548 r = get_process_exe(ucred->pid, &t);
550 x = strappenda("_EXE=", t);
552 IOVEC_SET_STRING(iovec[n++], x);
555 r = get_process_cmdline(ucred->pid, 0, false, &t);
557 x = strappenda("_CMDLINE=", t);
559 IOVEC_SET_STRING(iovec[n++], x);
562 r = get_process_capeff(ucred->pid, &t);
564 x = strappenda("_CAP_EFFECTIVE=", t);
566 IOVEC_SET_STRING(iovec[n++], x);
570 r = audit_session_from_pid(ucred->pid, &audit);
572 sprintf(audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit);
573 IOVEC_SET_STRING(iovec[n++], audit_session);
576 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
578 sprintf(audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid);
579 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
583 r = cg_pid_get_path_shifted(ucred->pid, NULL, &c);
585 char *session = NULL;
587 x = strappenda("_SYSTEMD_CGROUP=", c);
588 IOVEC_SET_STRING(iovec[n++], x);
590 r = cg_path_get_session(c, &t);
592 session = strappenda("_SYSTEMD_SESSION=", t);
594 IOVEC_SET_STRING(iovec[n++], session);
597 if (cg_path_get_owner_uid(c, &owner) >= 0) {
600 sprintf(owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner);
601 IOVEC_SET_STRING(iovec[n++], owner_uid);
604 if (cg_path_get_unit(c, &t) >= 0) {
605 x = strappenda("_SYSTEMD_UNIT=", t);
607 IOVEC_SET_STRING(iovec[n++], x);
608 } else if (unit_id && !session) {
609 x = strappenda("_SYSTEMD_UNIT=", unit_id);
610 IOVEC_SET_STRING(iovec[n++], x);
613 if (cg_path_get_user_unit(c, &t) >= 0) {
614 x = strappenda("_SYSTEMD_USER_UNIT=", t);
616 IOVEC_SET_STRING(iovec[n++], x);
617 } else if (unit_id && session) {
618 x = strappenda("_SYSTEMD_USER_UNIT=", unit_id);
619 IOVEC_SET_STRING(iovec[n++], x);
622 if (cg_path_get_slice(c, &t) >= 0) {
623 x = strappenda("_SYSTEMD_SLICE=", t);
625 IOVEC_SET_STRING(iovec[n++], x);
629 } else if (unit_id) {
630 x = strappenda("_SYSTEMD_UNIT=", unit_id);
631 IOVEC_SET_STRING(iovec[n++], x);
637 x = alloca(sizeof("_SELINUX_CONTEXT=") + label_len);
639 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
640 IOVEC_SET_STRING(iovec[n++], x);
642 security_context_t con;
644 if (getpidcon(ucred->pid, &con) >= 0) {
645 x = strappenda("_SELINUX_CONTEXT=", con);
648 IOVEC_SET_STRING(iovec[n++], x);
657 r = get_process_uid(object_pid, &object_uid);
659 sprintf(o_uid, "OBJECT_UID=%lu", (unsigned long) object_uid);
660 IOVEC_SET_STRING(iovec[n++], o_uid);
663 r = get_process_gid(object_pid, &object_gid);
665 sprintf(o_gid, "OBJECT_GID=%lu", (unsigned long) object_gid);
666 IOVEC_SET_STRING(iovec[n++], o_gid);
669 r = get_process_comm(object_pid, &t);
671 x = strappenda("OBJECT_COMM=", t);
673 IOVEC_SET_STRING(iovec[n++], x);
676 r = get_process_exe(object_pid, &t);
678 x = strappenda("OBJECT_EXE=", t);
680 IOVEC_SET_STRING(iovec[n++], x);
683 r = get_process_cmdline(object_pid, 0, false, &t);
685 x = strappenda("OBJECT_CMDLINE=", t);
687 IOVEC_SET_STRING(iovec[n++], x);
691 r = audit_session_from_pid(object_pid, &audit);
693 sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%lu", (unsigned long) audit);
694 IOVEC_SET_STRING(iovec[n++], o_audit_session);
697 r = audit_loginuid_from_pid(object_pid, &loginuid);
699 sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID=%lu", (unsigned long) loginuid);
700 IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
704 r = cg_pid_get_path_shifted(object_pid, NULL, &c);
706 x = strappenda("OBJECT_SYSTEMD_CGROUP=", c);
707 IOVEC_SET_STRING(iovec[n++], x);
709 r = cg_path_get_session(c, &t);
711 x = strappenda("OBJECT_SYSTEMD_SESSION=", t);
713 IOVEC_SET_STRING(iovec[n++], x);
716 if (cg_path_get_owner_uid(c, &owner) >= 0) {
717 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner);
718 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
721 if (cg_path_get_unit(c, &t) >= 0) {
722 x = strappenda("OBJECT_SYSTEMD_UNIT=", t);
724 IOVEC_SET_STRING(iovec[n++], x);
727 if (cg_path_get_user_unit(c, &t) >= 0) {
728 x = strappenda("OBJECT_SYSTEMD_USER_UNIT=", t);
730 IOVEC_SET_STRING(iovec[n++], x);
739 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
740 IOVEC_SET_STRING(iovec[n++], source_time);
743 /* Note that strictly speaking storing the boot id here is
744 * redundant since the entry includes this in-line
745 * anyway. However, we need this indexed, too. */
746 r = sd_id128_get_boot(&id);
748 sd_id128_to_string(id, boot_id + strlen("_BOOT_ID="));
749 IOVEC_SET_STRING(iovec[n++], boot_id);
752 r = sd_id128_get_machine(&id);
754 sd_id128_to_string(id, machine_id + strlen("_MACHINE_ID="));
755 IOVEC_SET_STRING(iovec[n++], machine_id);
758 t = gethostname_malloc();
760 x = strappenda("_HOSTNAME=", t);
762 IOVEC_SET_STRING(iovec[n++], x);
767 if (s->split_mode == SPLIT_UID && realuid > 0)
768 /* Split up strictly by any UID */
769 journal_uid = realuid;
770 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
771 /* Split up by login UIDs, this avoids creation of
772 * individual journals for system UIDs. We do this
773 * only if the realuid is not root, in order not to
774 * accidentally leak privileged information to the
775 * user that is logged by a privileged process that is
776 * part of an unprivileged session.*/
781 write_to_journal(s, journal_uid, iovec, n, priority);
784 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
785 char mid[11 + 32 + 1];
786 char buffer[16 + LINE_MAX + 1];
787 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
790 struct ucred ucred = {};
795 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
796 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
798 memcpy(buffer, "MESSAGE=", 8);
799 va_start(ap, format);
800 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
802 char_array_0(buffer);
803 IOVEC_SET_STRING(iovec[n++], buffer);
805 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
806 snprintf(mid, sizeof(mid), MESSAGE_ID(message_id));
808 IOVEC_SET_STRING(iovec[n++], mid);
811 ucred.pid = getpid();
812 ucred.uid = getuid();
813 ucred.gid = getgid();
815 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
818 void server_dispatch_message(
820 struct iovec *iovec, unsigned n, unsigned m,
823 const char *label, size_t label_len,
829 _cleanup_free_ char *path = NULL;
833 assert(iovec || n == 0);
838 if (LOG_PRI(priority) > s->max_level_store)
841 /* Stop early in case the information will not be stored
843 if (s->storage == STORAGE_NONE)
849 r = cg_pid_get_path_shifted(ucred->pid, NULL, &path);
853 /* example: /user/lennart/3/foobar
854 * /system/dbus.service/foobar
856 * So let's cut of everything past the third /, since that is
857 * where user directories start */
859 c = strchr(path, '/');
861 c = strchr(c+1, '/');
863 c = strchr(c+1, '/');
869 rl = journal_rate_limit_test(s->rate_limit, path,
870 priority & LOG_PRIMASK, available_space(s, false));
875 /* Write a suppression message if we suppressed something */
877 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
878 "Suppressed %u messages from %s", rl - 1, path);
881 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
885 static int system_journal_open(Server *s) {
891 r = sd_id128_get_machine(&machine);
893 log_error("Failed to get machine id: %s", strerror(-r));
897 sd_id128_to_string(machine, ids);
899 if (!s->system_journal &&
900 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
901 access("/run/systemd/journal/flushed", F_OK) >= 0) {
903 /* If in auto mode: first try to create the machine
904 * path, but not the prefix.
906 * If in persistent mode: create /var/log/journal and
907 * the machine path */
909 if (s->storage == STORAGE_PERSISTENT)
910 (void) mkdir("/var/log/journal/", 0755);
912 fn = strappenda("/var/log/journal/", ids);
913 (void) mkdir(fn, 0755);
915 fn = strappenda(fn, "/system.journal");
916 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
919 server_fix_perms(s, s->system_journal, 0);
921 if (r != -ENOENT && r != -EROFS)
922 log_warning("Failed to open system journal: %s", strerror(-r));
928 if (!s->runtime_journal &&
929 (s->storage != STORAGE_NONE)) {
931 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
935 if (s->system_journal) {
937 /* Try to open the runtime journal, but only
938 * if it already exists, so that we can flush
939 * it into the system journal */
941 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
946 log_warning("Failed to open runtime journal: %s", strerror(-r));
953 /* OK, we really need the runtime journal, so create
954 * it if necessary. */
956 (void) mkdir_parents(fn, 0755);
957 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
961 log_error("Failed to open runtime journal: %s", strerror(-r));
966 if (s->runtime_journal)
967 server_fix_perms(s, s->runtime_journal, 0);
970 available_space(s, true);
975 int server_flush_to_var(Server *s) {
977 sd_journal *j = NULL;
978 char ts[FORMAT_TIMESPAN_MAX];
985 if (s->storage != STORAGE_AUTO &&
986 s->storage != STORAGE_PERSISTENT)
989 if (!s->runtime_journal)
992 system_journal_open(s);
994 if (!s->system_journal)
997 log_debug("Flushing to /var...");
999 start = now(CLOCK_MONOTONIC);
1001 r = sd_id128_get_machine(&machine);
1005 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1007 log_error("Failed to read runtime journal: %s", strerror(-r));
1011 sd_journal_set_data_threshold(j, 0);
1013 SD_JOURNAL_FOREACH(j) {
1017 f = j->current_file;
1018 assert(f && f->current_offset > 0);
1022 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1024 log_error("Can't read entry: %s", strerror(-r));
1028 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1032 if (!shall_try_append_again(s->system_journal, r)) {
1033 log_error("Can't write entry: %s", strerror(-r));
1040 if (!s->system_journal) {
1041 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1046 log_debug("Retrying write.");
1047 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1049 log_error("Can't write entry: %s", strerror(-r));
1055 journal_file_post_change(s->system_journal);
1057 journal_file_close(s->runtime_journal);
1058 s->runtime_journal = NULL;
1061 rm_rf("/run/log/journal", false, true, false);
1063 sd_journal_close(j);
1065 server_driver_message(s, SD_ID128_NULL, "Time spent on flushing to /var is %s for %u entries.", format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0), n);
1070 int process_event(Server *s, struct epoll_event *ev) {
1074 if (ev->data.fd == s->signal_fd) {
1075 struct signalfd_siginfo sfsi;
1078 if (ev->events != EPOLLIN) {
1079 log_error("Got invalid event from epoll for %s: %"PRIx32,
1080 "signal fd", ev->events);
1084 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
1085 if (n != sizeof(sfsi)) {
1090 if (errno == EINTR || errno == EAGAIN)
1096 if (sfsi.ssi_signo == SIGUSR1) {
1097 log_info("Received request to flush runtime journal from PID %"PRIu32,
1099 touch("/run/systemd/journal/flushed");
1100 server_flush_to_var(s);
1105 if (sfsi.ssi_signo == SIGUSR2) {
1106 log_info("Received request to rotate journal from PID %"PRIu32,
1113 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
1117 } else if (ev->data.fd == s->sync_timer_fd) {
1121 log_debug("Got sync request from epoll.");
1123 r = read(ev->data.fd, (void *)&t, sizeof(t));
1130 } else if (ev->data.fd == s->dev_kmsg_fd) {
1133 if (ev->events & EPOLLERR)
1134 log_warning("/dev/kmsg buffer overrun, some messages lost.");
1136 if (!(ev->events & EPOLLIN)) {
1137 log_error("Got invalid event from epoll for %s: %"PRIx32,
1138 "/dev/kmsg", ev->events);
1142 r = server_read_dev_kmsg(s);
1148 } else if (ev->data.fd == s->native_fd ||
1149 ev->data.fd == s->syslog_fd) {
1151 if (ev->events != EPOLLIN) {
1152 log_error("Got invalid event from epoll for %s: %"PRIx32,
1153 ev->data.fd == s->native_fd ? "native fd" : "syslog fd",
1159 struct ucred *ucred = NULL;
1160 struct timeval *tv = NULL;
1161 struct cmsghdr *cmsg;
1163 size_t label_len = 0;
1167 struct cmsghdr cmsghdr;
1169 /* We use NAME_MAX space for the
1170 * SELinux label here. The kernel
1171 * currently enforces no limit, but
1172 * according to suggestions from the
1173 * SELinux people this will change and
1174 * it will probably be identical to
1175 * NAME_MAX. For now we use that, but
1176 * this should be updated one day when
1177 * the final limit is known.*/
1178 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1179 CMSG_SPACE(sizeof(struct timeval)) +
1180 CMSG_SPACE(sizeof(int)) + /* fd */
1181 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1183 struct msghdr msghdr = {
1186 .msg_control = &control,
1187 .msg_controllen = sizeof(control),
1195 if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
1196 log_error("SIOCINQ failed: %m");
1200 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, LINE_MAX + (size_t) v))
1203 iovec.iov_base = s->buffer;
1204 iovec.iov_len = s->buffer_size;
1206 n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1208 if (errno == EINTR || errno == EAGAIN)
1211 log_error("recvmsg() failed: %m");
1215 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1217 if (cmsg->cmsg_level == SOL_SOCKET &&
1218 cmsg->cmsg_type == SCM_CREDENTIALS &&
1219 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1220 ucred = (struct ucred*) CMSG_DATA(cmsg);
1221 else if (cmsg->cmsg_level == SOL_SOCKET &&
1222 cmsg->cmsg_type == SCM_SECURITY) {
1223 label = (char*) CMSG_DATA(cmsg);
1224 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1225 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1226 cmsg->cmsg_type == SO_TIMESTAMP &&
1227 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1228 tv = (struct timeval*) CMSG_DATA(cmsg);
1229 else if (cmsg->cmsg_level == SOL_SOCKET &&
1230 cmsg->cmsg_type == SCM_RIGHTS) {
1231 fds = (int*) CMSG_DATA(cmsg);
1232 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1236 if (ev->data.fd == s->syslog_fd) {
1237 if (n > 0 && n_fds == 0) {
1239 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1240 } else if (n_fds > 0)
1241 log_warning("Got file descriptors via syslog socket. Ignoring.");
1244 if (n > 0 && n_fds == 0)
1245 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1246 else if (n == 0 && n_fds == 1)
1247 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1249 log_warning("Got too many file descriptors via native socket. Ignoring.");
1252 close_many(fds, n_fds);
1257 } else if (ev->data.fd == s->stdout_fd) {
1259 if (ev->events != EPOLLIN) {
1260 log_error("Got invalid event from epoll for %s: %"PRIx32,
1261 "stdout fd", ev->events);
1265 stdout_stream_new(s);
1269 StdoutStream *stream;
1271 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
1272 log_error("Got invalid event from epoll for %s: %"PRIx32,
1273 "stdout stream", ev->events);
1277 /* If it is none of the well-known fds, it must be an
1278 * stdout stream fd. Note that this is a bit ugly here
1279 * (since we rely that none of the well-known fds
1280 * could be interpreted as pointer), but nonetheless
1281 * safe, since the well-known fds would never get an
1282 * fd > 4096, i.e. beyond the first memory page */
1284 stream = ev->data.ptr;
1286 if (stdout_stream_process(stream) <= 0)
1287 stdout_stream_free(stream);
1292 log_error("Unknown event.");
1296 static int open_signalfd(Server *s) {
1298 struct epoll_event ev;
1302 assert_se(sigemptyset(&mask) == 0);
1303 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1304 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1306 s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
1307 if (s->signal_fd < 0) {
1308 log_error("signalfd(): %m");
1313 ev.events = EPOLLIN;
1314 ev.data.fd = s->signal_fd;
1316 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
1317 log_error("epoll_ctl(): %m");
1324 static int server_parse_proc_cmdline(Server *s) {
1325 _cleanup_free_ char *line = NULL;
1330 r = proc_cmdline(&line);
1332 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1336 FOREACH_WORD_QUOTED(w, l, line, state) {
1337 _cleanup_free_ char *word;
1339 word = strndup(w, l);
1343 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1344 r = parse_boolean(word + 35);
1346 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1348 s->forward_to_syslog = r;
1349 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1350 r = parse_boolean(word + 33);
1352 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1354 s->forward_to_kmsg = r;
1355 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1356 r = parse_boolean(word + 36);
1358 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1360 s->forward_to_console = r;
1361 } else if (startswith(word, "systemd.journald"))
1362 log_warning("Invalid systemd.journald parameter. Ignoring.");
1368 static int server_parse_config_file(Server *s) {
1369 static const char fn[] = "/etc/systemd/journald.conf";
1370 _cleanup_fclose_ FILE *f = NULL;
1375 f = fopen(fn, "re");
1377 if (errno == ENOENT)
1380 log_warning("Failed to open configuration file %s: %m", fn);
1384 r = config_parse(NULL, fn, f, "Journal\0", config_item_perf_lookup,
1385 (void*) journald_gperf_lookup, false, false, s);
1387 log_warning("Failed to parse configuration file: %s", strerror(-r));
1392 static int server_open_sync_timer(Server *s) {
1394 struct epoll_event ev;
1398 s->sync_timer_fd = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC);
1399 if (s->sync_timer_fd < 0)
1403 ev.events = EPOLLIN;
1404 ev.data.fd = s->sync_timer_fd;
1406 r = epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->sync_timer_fd, &ev);
1408 log_error("Failed to add idle timer fd to epoll object: %m");
1415 int server_schedule_sync(Server *s, int priority) {
1420 if (priority <= LOG_CRIT) {
1421 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1426 if (s->sync_scheduled)
1429 if (s->sync_interval_usec) {
1430 struct itimerspec sync_timer_enable = {};
1432 timespec_store(&sync_timer_enable.it_value, s->sync_interval_usec);
1434 r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_enable, NULL);
1439 s->sync_scheduled = true;
1444 int server_init(Server *s) {
1450 s->sync_timer_fd = s->syslog_fd = s->native_fd = s->stdout_fd =
1451 s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
1455 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1456 s->sync_scheduled = false;
1458 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1459 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1461 s->forward_to_syslog = true;
1463 s->max_level_store = LOG_DEBUG;
1464 s->max_level_syslog = LOG_DEBUG;
1465 s->max_level_kmsg = LOG_NOTICE;
1466 s->max_level_console = LOG_INFO;
1468 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1469 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1471 server_parse_config_file(s);
1472 server_parse_proc_cmdline(s);
1473 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1474 log_debug("Setting both rate limit interval and burst from %llu,%u to 0,0",
1475 (long long unsigned) s->rate_limit_interval,
1476 s->rate_limit_burst);
1477 s->rate_limit_interval = s->rate_limit_burst = 0;
1480 mkdir_p("/run/systemd/journal", 0755);
1482 s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
1483 if (!s->user_journals)
1486 s->mmap = mmap_cache_new();
1490 s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
1491 if (s->epoll_fd < 0) {
1492 log_error("Failed to create epoll object: %m");
1496 n = sd_listen_fds(true);
1498 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1502 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1504 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1506 if (s->native_fd >= 0) {
1507 log_error("Too many native sockets passed.");
1513 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1515 if (s->stdout_fd >= 0) {
1516 log_error("Too many stdout sockets passed.");
1522 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
1524 if (s->syslog_fd >= 0) {
1525 log_error("Too many /dev/log sockets passed.");
1532 log_error("Unknown socket passed.");
1537 r = server_open_syslog_socket(s);
1541 r = server_open_native_socket(s);
1545 r = server_open_stdout_socket(s);
1549 r = server_open_dev_kmsg(s);
1553 r = server_open_kernel_seqnum(s);
1557 r = server_open_sync_timer(s);
1561 r = open_signalfd(s);
1565 s->udev = udev_new();
1569 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval,
1570 s->rate_limit_burst);
1574 r = system_journal_open(s);
1581 void server_maybe_append_tags(Server *s) {
1587 n = now(CLOCK_REALTIME);
1589 if (s->system_journal)
1590 journal_file_maybe_append_tag(s->system_journal, n);
1592 HASHMAP_FOREACH(f, s->user_journals, i)
1593 journal_file_maybe_append_tag(f, n);
1597 void server_done(Server *s) {
1601 while (s->stdout_streams)
1602 stdout_stream_free(s->stdout_streams);
1604 if (s->system_journal)
1605 journal_file_close(s->system_journal);
1607 if (s->runtime_journal)
1608 journal_file_close(s->runtime_journal);
1610 while ((f = hashmap_steal_first(s->user_journals)))
1611 journal_file_close(f);
1613 hashmap_free(s->user_journals);
1615 if (s->epoll_fd >= 0)
1616 close_nointr_nofail(s->epoll_fd);
1618 if (s->signal_fd >= 0)
1619 close_nointr_nofail(s->signal_fd);
1621 if (s->syslog_fd >= 0)
1622 close_nointr_nofail(s->syslog_fd);
1624 if (s->native_fd >= 0)
1625 close_nointr_nofail(s->native_fd);
1627 if (s->stdout_fd >= 0)
1628 close_nointr_nofail(s->stdout_fd);
1630 if (s->dev_kmsg_fd >= 0)
1631 close_nointr_nofail(s->dev_kmsg_fd);
1633 if (s->sync_timer_fd >= 0)
1634 close_nointr_nofail(s->sync_timer_fd);
1637 journal_rate_limit_free(s->rate_limit);
1639 if (s->kernel_seqnum)
1640 munmap(s->kernel_seqnum, sizeof(uint64_t));
1646 mmap_cache_unref(s->mmap);
1649 udev_unref(s->udev);