1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
27 #include <sys/timerfd.h>
31 #include "sd-journal.h"
32 #include "sd-messages.h"
33 #include "sd-daemon.h"
37 #include "journal-file.h"
38 #include "socket-util.h"
39 #include "cgroup-util.h"
42 #include "conf-parser.h"
43 #include "selinux-util.h"
44 #include "journal-internal.h"
45 #include "journal-vacuum.h"
46 #include "journal-authenticate.h"
47 #include "journald-rate-limit.h"
48 #include "journald-kmsg.h"
49 #include "journald-syslog.h"
50 #include "journald-stream.h"
51 #include "journald-console.h"
52 #include "journald-native.h"
53 #include "journald-audit.h"
54 #include "journald-server.h"
58 #include <acl/libacl.h>
63 #include <selinux/selinux.h>
66 #define USER_JOURNALS_MAX 1024
68 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
69 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
70 #define DEFAULT_RATE_LIMIT_BURST 1000
71 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
73 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
75 static const char* const storage_table[_STORAGE_MAX] = {
76 [STORAGE_AUTO] = "auto",
77 [STORAGE_VOLATILE] = "volatile",
78 [STORAGE_PERSISTENT] = "persistent",
79 [STORAGE_NONE] = "none"
82 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
83 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
85 static const char* const split_mode_table[_SPLIT_MAX] = {
86 [SPLIT_LOGIN] = "login",
88 [SPLIT_NONE] = "none",
91 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
92 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
94 static uint64_t available_space(Server *s, bool verbose) {
96 _cleanup_free_ char *p = NULL;
99 uint64_t sum = 0, ss_avail = 0, avail = 0;
101 _cleanup_closedir_ DIR *d = NULL;
106 ts = now(CLOCK_MONOTONIC);
108 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts
110 return s->cached_available_space;
112 r = sd_id128_get_machine(&machine);
116 if (s->system_journal) {
117 f = "/var/log/journal/";
118 m = &s->system_metrics;
120 f = "/run/log/journal/";
121 m = &s->runtime_metrics;
126 p = strappend(f, sd_id128_to_string(machine, ids));
134 if (fstatvfs(dirfd(d), &ss) < 0)
143 if (!de && errno != 0)
149 if (!endswith(de->d_name, ".journal") &&
150 !endswith(de->d_name, ".journal~"))
153 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
156 if (!S_ISREG(st.st_mode))
159 sum += (uint64_t) st.st_blocks * 512UL;
162 ss_avail = ss.f_bsize * ss.f_bavail;
164 /* If we reached a high mark, we will always allow this much
165 * again, unless usage goes above max_use. This watermark
166 * value is cached so that we don't give up space on pressure,
167 * but hover below the maximum usage. */
172 avail = LESS_BY(ss_avail, m->keep_free);
174 s->cached_available_space = LESS_BY(MIN(m->max_use, avail), sum);
175 s->cached_available_space_timestamp = ts;
178 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
179 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX];
181 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
182 "%s journal is using %s (max allowed %s, "
183 "trying to leave %s free of %s available → current limit %s).",
184 s->system_journal ? "Permanent" : "Runtime",
185 format_bytes(fb1, sizeof(fb1), sum),
186 format_bytes(fb2, sizeof(fb2), m->max_use),
187 format_bytes(fb3, sizeof(fb3), m->keep_free),
188 format_bytes(fb4, sizeof(fb4), ss_avail),
189 format_bytes(fb5, sizeof(fb5), s->cached_available_space + sum));
192 return s->cached_available_space;
195 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
200 acl_permset_t permset;
205 r = fchmod(f->fd, 0640);
207 log_warning_errno(r, "Failed to fix access mode on %s, ignoring: %m", f->path);
210 if (uid <= SYSTEM_UID_MAX)
213 acl = acl_get_fd(f->fd);
215 log_warning_errno(errno, "Failed to read ACL on %s, ignoring: %m", f->path);
219 r = acl_find_uid(acl, uid, &entry);
222 if (acl_create_entry(&acl, &entry) < 0 ||
223 acl_set_tag_type(entry, ACL_USER) < 0 ||
224 acl_set_qualifier(entry, &uid) < 0) {
225 log_warning_errno(errno, "Failed to patch ACL on %s, ignoring: %m", f->path);
230 /* We do not recalculate the mask unconditionally here,
231 * so that the fchmod() mask above stays intact. */
232 if (acl_get_permset(entry, &permset) < 0 ||
233 acl_add_perm(permset, ACL_READ) < 0 ||
234 calc_acl_mask_if_needed(&acl) < 0) {
235 log_warning_errno(errno, "Failed to patch ACL on %s, ignoring: %m", f->path);
239 if (acl_set_fd(f->fd, acl) < 0)
240 log_warning_errno(errno, "Failed to set ACL on %s, ignoring: %m", f->path);
247 static JournalFile* find_journal(Server *s, uid_t uid) {
248 _cleanup_free_ char *p = NULL;
255 /* We split up user logs only on /var, not on /run. If the
256 * runtime file is open, we write to it exclusively, in order
257 * to guarantee proper order as soon as we flush /run to
258 * /var and close the runtime file. */
260 if (s->runtime_journal)
261 return s->runtime_journal;
263 if (uid <= SYSTEM_UID_MAX)
264 return s->system_journal;
266 r = sd_id128_get_machine(&machine);
268 return s->system_journal;
270 f = ordered_hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
274 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
275 SD_ID128_FORMAT_VAL(machine), uid) < 0)
276 return s->system_journal;
278 while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
279 /* Too many open? Then let's close one */
280 f = ordered_hashmap_steal_first(s->user_journals);
282 journal_file_close(f);
285 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
287 return s->system_journal;
289 server_fix_perms(s, f, uid);
291 r = ordered_hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
293 journal_file_close(f);
294 return s->system_journal;
300 static int do_rotate(
313 r = journal_file_rotate(f, s->compress, seal);
316 log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
318 log_error_errno(r, "Failed to create new %s journal: %m", name);
320 server_fix_perms(s, *f, uid);
325 void server_rotate(Server *s) {
331 log_debug("Rotating...");
333 do_rotate(s, &s->runtime_journal, "runtime", false, 0);
334 do_rotate(s, &s->system_journal, "system", s->seal, 0);
336 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
337 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UINT32(k));
339 ordered_hashmap_replace(s->user_journals, k, f);
341 /* Old file has been closed and deallocated */
342 ordered_hashmap_remove(s->user_journals, k);
346 void server_sync(Server *s) {
352 if (s->system_journal) {
353 r = journal_file_set_offline(s->system_journal);
355 log_error_errno(r, "Failed to sync system journal: %m");
358 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
359 r = journal_file_set_offline(f);
361 log_error_errno(r, "Failed to sync user journal: %m");
364 if (s->sync_event_source) {
365 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
367 log_error_errno(r, "Failed to disable sync timer source: %m");
370 s->sync_scheduled = false;
373 static void do_vacuum(
378 JournalMetrics *metrics) {
386 p = strappenda(path, id);
387 r = journal_directory_vacuum(p, metrics->max_use, s->max_retention_usec, &s->oldest_file_usec, false);
388 if (r < 0 && r != -ENOENT)
389 log_error_errno(r, "Failed to vacuum %s: %m", p);
392 void server_vacuum(Server *s) {
397 log_debug("Vacuuming...");
399 s->oldest_file_usec = 0;
401 r = sd_id128_get_machine(&machine);
403 log_error_errno(r, "Failed to get machine ID: %m");
406 sd_id128_to_string(machine, ids);
408 do_vacuum(s, ids, s->system_journal, "/var/log/journal/", &s->system_metrics);
409 do_vacuum(s, ids, s->runtime_journal, "/run/log/journal/", &s->runtime_metrics);
411 s->cached_available_space_timestamp = 0;
414 static void server_cache_machine_id(Server *s) {
420 r = sd_id128_get_machine(&id);
424 sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
427 static void server_cache_boot_id(Server *s) {
433 r = sd_id128_get_boot(&id);
437 sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
440 static void server_cache_hostname(Server *s) {
441 _cleanup_free_ char *t = NULL;
446 t = gethostname_malloc();
450 x = strappend("_HOSTNAME=", t);
454 free(s->hostname_field);
455 s->hostname_field = x;
458 static bool shall_try_append_again(JournalFile *f, int r) {
460 /* -E2BIG Hit configured limit
462 -EDQUOT Quota limit hit
464 -EIO I/O error of some kind (mmap)
465 -EHOSTDOWN Other machine
466 -EBUSY Unclean shutdown
467 -EPROTONOSUPPORT Unsupported feature
470 -ESHUTDOWN Already archived
471 -EIDRM Journal file has been deleted */
473 if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
474 log_debug("%s: Allocation limit reached, rotating.", f->path);
475 else if (r == -EHOSTDOWN)
476 log_info("%s: Journal file from other machine, rotating.", f->path);
477 else if (r == -EBUSY)
478 log_info("%s: Unclean shutdown, rotating.", f->path);
479 else if (r == -EPROTONOSUPPORT)
480 log_info("%s: Unsupported feature, rotating.", f->path);
481 else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
482 log_warning("%s: Journal file corrupted, rotating.", f->path);
484 log_warning("%s: IO error, rotating.", f->path);
485 else if (r == -EIDRM)
486 log_warning("%s: Journal file has been deleted, rotating.", f->path);
493 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
495 bool vacuumed = false;
502 f = find_journal(s, uid);
506 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
507 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
512 f = find_journal(s, uid);
517 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
519 server_schedule_sync(s, priority);
523 if (vacuumed || !shall_try_append_again(f, r)) {
526 for (i = 0; i < n; i++)
527 size += iovec[i].iov_len;
529 log_error_errno(r, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n, size);
536 f = find_journal(s, uid);
540 log_debug("Retrying write.");
541 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
545 for (i = 0; i < n; i++)
546 size += iovec[i].iov_len;
548 log_error_errno(r, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n, size);
550 server_schedule_sync(s, priority);
553 static void dispatch_message_real(
555 struct iovec *iovec, unsigned n, unsigned m,
556 const struct ucred *ucred,
557 const struct timeval *tv,
558 const char *label, size_t label_len,
563 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
564 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
565 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
566 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
567 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
568 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
569 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
570 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
576 uid_t realuid = 0, owner = 0, journal_uid;
577 bool owner_valid = false;
579 char audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
580 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
581 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
582 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
591 assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
594 realuid = ucred->uid;
596 sprintf(pid, "_PID="PID_FMT, ucred->pid);
597 IOVEC_SET_STRING(iovec[n++], pid);
599 sprintf(uid, "_UID="UID_FMT, ucred->uid);
600 IOVEC_SET_STRING(iovec[n++], uid);
602 sprintf(gid, "_GID="GID_FMT, ucred->gid);
603 IOVEC_SET_STRING(iovec[n++], gid);
605 r = get_process_comm(ucred->pid, &t);
607 x = strappenda("_COMM=", t);
609 IOVEC_SET_STRING(iovec[n++], x);
612 r = get_process_exe(ucred->pid, &t);
614 x = strappenda("_EXE=", t);
616 IOVEC_SET_STRING(iovec[n++], x);
619 r = get_process_cmdline(ucred->pid, 0, false, &t);
621 x = strappenda("_CMDLINE=", t);
623 IOVEC_SET_STRING(iovec[n++], x);
626 r = get_process_capeff(ucred->pid, &t);
628 x = strappenda("_CAP_EFFECTIVE=", t);
630 IOVEC_SET_STRING(iovec[n++], x);
634 r = audit_session_from_pid(ucred->pid, &audit);
636 sprintf(audit_session, "_AUDIT_SESSION=%"PRIu32, audit);
637 IOVEC_SET_STRING(iovec[n++], audit_session);
640 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
642 sprintf(audit_loginuid, "_AUDIT_LOGINUID="UID_FMT, loginuid);
643 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
647 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
649 char *session = NULL;
651 x = strappenda("_SYSTEMD_CGROUP=", c);
652 IOVEC_SET_STRING(iovec[n++], x);
654 r = cg_path_get_session(c, &t);
656 session = strappenda("_SYSTEMD_SESSION=", t);
658 IOVEC_SET_STRING(iovec[n++], session);
661 if (cg_path_get_owner_uid(c, &owner) >= 0) {
664 sprintf(owner_uid, "_SYSTEMD_OWNER_UID="UID_FMT, owner);
665 IOVEC_SET_STRING(iovec[n++], owner_uid);
668 if (cg_path_get_unit(c, &t) >= 0) {
669 x = strappenda("_SYSTEMD_UNIT=", t);
671 IOVEC_SET_STRING(iovec[n++], x);
672 } else if (unit_id && !session) {
673 x = strappenda("_SYSTEMD_UNIT=", unit_id);
674 IOVEC_SET_STRING(iovec[n++], x);
677 if (cg_path_get_user_unit(c, &t) >= 0) {
678 x = strappenda("_SYSTEMD_USER_UNIT=", t);
680 IOVEC_SET_STRING(iovec[n++], x);
681 } else if (unit_id && session) {
682 x = strappenda("_SYSTEMD_USER_UNIT=", unit_id);
683 IOVEC_SET_STRING(iovec[n++], x);
686 if (cg_path_get_slice(c, &t) >= 0) {
687 x = strappenda("_SYSTEMD_SLICE=", t);
689 IOVEC_SET_STRING(iovec[n++], x);
693 } else if (unit_id) {
694 x = strappenda("_SYSTEMD_UNIT=", unit_id);
695 IOVEC_SET_STRING(iovec[n++], x);
699 if (mac_selinux_use()) {
701 x = alloca(strlen("_SELINUX_CONTEXT=") + label_len + 1);
703 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
704 IOVEC_SET_STRING(iovec[n++], x);
706 security_context_t con;
708 if (getpidcon(ucred->pid, &con) >= 0) {
709 x = strappenda("_SELINUX_CONTEXT=", con);
712 IOVEC_SET_STRING(iovec[n++], x);
721 r = get_process_uid(object_pid, &object_uid);
723 sprintf(o_uid, "OBJECT_UID="UID_FMT, object_uid);
724 IOVEC_SET_STRING(iovec[n++], o_uid);
727 r = get_process_gid(object_pid, &object_gid);
729 sprintf(o_gid, "OBJECT_GID="GID_FMT, object_gid);
730 IOVEC_SET_STRING(iovec[n++], o_gid);
733 r = get_process_comm(object_pid, &t);
735 x = strappenda("OBJECT_COMM=", t);
737 IOVEC_SET_STRING(iovec[n++], x);
740 r = get_process_exe(object_pid, &t);
742 x = strappenda("OBJECT_EXE=", t);
744 IOVEC_SET_STRING(iovec[n++], x);
747 r = get_process_cmdline(object_pid, 0, false, &t);
749 x = strappenda("OBJECT_CMDLINE=", t);
751 IOVEC_SET_STRING(iovec[n++], x);
755 r = audit_session_from_pid(object_pid, &audit);
757 sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%"PRIu32, audit);
758 IOVEC_SET_STRING(iovec[n++], o_audit_session);
761 r = audit_loginuid_from_pid(object_pid, &loginuid);
763 sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID="UID_FMT, loginuid);
764 IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
768 r = cg_pid_get_path_shifted(object_pid, s->cgroup_root, &c);
770 x = strappenda("OBJECT_SYSTEMD_CGROUP=", c);
771 IOVEC_SET_STRING(iovec[n++], x);
773 r = cg_path_get_session(c, &t);
775 x = strappenda("OBJECT_SYSTEMD_SESSION=", t);
777 IOVEC_SET_STRING(iovec[n++], x);
780 if (cg_path_get_owner_uid(c, &owner) >= 0) {
781 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT, owner);
782 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
785 if (cg_path_get_unit(c, &t) >= 0) {
786 x = strappenda("OBJECT_SYSTEMD_UNIT=", t);
788 IOVEC_SET_STRING(iovec[n++], x);
791 if (cg_path_get_user_unit(c, &t) >= 0) {
792 x = strappenda("OBJECT_SYSTEMD_USER_UNIT=", t);
794 IOVEC_SET_STRING(iovec[n++], x);
803 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
804 IOVEC_SET_STRING(iovec[n++], source_time);
807 /* Note that strictly speaking storing the boot id here is
808 * redundant since the entry includes this in-line
809 * anyway. However, we need this indexed, too. */
810 if (!isempty(s->boot_id_field))
811 IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
813 if (!isempty(s->machine_id_field))
814 IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
816 if (!isempty(s->hostname_field))
817 IOVEC_SET_STRING(iovec[n++], s->hostname_field);
821 if (s->split_mode == SPLIT_UID && realuid > 0)
822 /* Split up strictly by any UID */
823 journal_uid = realuid;
824 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
825 /* Split up by login UIDs. We do this only if the
826 * realuid is not root, in order not to accidentally
827 * leak privileged information to the user that is
828 * logged by a privileged process that is part of an
829 * unprivileged session. */
834 write_to_journal(s, journal_uid, iovec, n, priority);
837 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
838 char mid[11 + 32 + 1];
839 char buffer[16 + LINE_MAX + 1];
840 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
843 struct ucred ucred = {};
848 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
849 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
851 memcpy(buffer, "MESSAGE=", 8);
852 va_start(ap, format);
853 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
855 char_array_0(buffer);
856 IOVEC_SET_STRING(iovec[n++], buffer);
858 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
859 snprintf(mid, sizeof(mid), LOG_MESSAGE_ID(message_id));
861 IOVEC_SET_STRING(iovec[n++], mid);
864 ucred.pid = getpid();
865 ucred.uid = getuid();
866 ucred.gid = getgid();
868 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
871 void server_dispatch_message(
873 struct iovec *iovec, unsigned n, unsigned m,
874 const struct ucred *ucred,
875 const struct timeval *tv,
876 const char *label, size_t label_len,
882 _cleanup_free_ char *path = NULL;
886 assert(iovec || n == 0);
891 if (LOG_PRI(priority) > s->max_level_store)
894 /* Stop early in case the information will not be stored
896 if (s->storage == STORAGE_NONE)
902 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &path);
906 /* example: /user/lennart/3/foobar
907 * /system/dbus.service/foobar
909 * So let's cut of everything past the third /, since that is
910 * where user directories start */
912 c = strchr(path, '/');
914 c = strchr(c+1, '/');
916 c = strchr(c+1, '/');
922 rl = journal_rate_limit_test(s->rate_limit, path,
923 priority & LOG_PRIMASK, available_space(s, false));
928 /* Write a suppression message if we suppressed something */
930 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
931 "Suppressed %u messages from %s", rl - 1, path);
934 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
938 static int system_journal_open(Server *s, bool flush_requested) {
944 r = sd_id128_get_machine(&machine);
946 return log_error_errno(r, "Failed to get machine id: %m");
948 sd_id128_to_string(machine, ids);
950 if (!s->system_journal &&
951 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
953 || access("/run/systemd/journal/flushed", F_OK) >= 0)) {
955 /* If in auto mode: first try to create the machine
956 * path, but not the prefix.
958 * If in persistent mode: create /var/log/journal and
959 * the machine path */
961 if (s->storage == STORAGE_PERSISTENT)
962 (void) mkdir("/var/log/journal/", 0755);
964 fn = strappenda("/var/log/journal/", ids);
965 (void) mkdir(fn, 0755);
967 fn = strappenda(fn, "/system.journal");
968 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
971 server_fix_perms(s, s->system_journal, 0);
973 if (r != -ENOENT && r != -EROFS)
974 log_warning_errno(r, "Failed to open system journal: %m");
980 if (!s->runtime_journal &&
981 (s->storage != STORAGE_NONE)) {
983 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
987 if (s->system_journal) {
989 /* Try to open the runtime journal, but only
990 * if it already exists, so that we can flush
991 * it into the system journal */
993 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
998 log_warning_errno(r, "Failed to open runtime journal: %m");
1005 /* OK, we really need the runtime journal, so create
1006 * it if necessary. */
1008 (void) mkdir("/run/log", 0755);
1009 (void) mkdir("/run/log/journal", 0755);
1010 (void) mkdir_parents(fn, 0750);
1012 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
1016 return log_error_errno(r, "Failed to open runtime journal: %m");
1019 if (s->runtime_journal)
1020 server_fix_perms(s, s->runtime_journal, 0);
1023 available_space(s, true);
1028 int server_flush_to_var(Server *s) {
1030 sd_journal *j = NULL;
1031 char ts[FORMAT_TIMESPAN_MAX];
1038 if (s->storage != STORAGE_AUTO &&
1039 s->storage != STORAGE_PERSISTENT)
1042 if (!s->runtime_journal)
1045 system_journal_open(s, true);
1047 if (!s->system_journal)
1050 log_debug("Flushing to /var...");
1052 start = now(CLOCK_MONOTONIC);
1054 r = sd_id128_get_machine(&machine);
1058 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1060 return log_error_errno(r, "Failed to read runtime journal: %m");
1062 sd_journal_set_data_threshold(j, 0);
1064 SD_JOURNAL_FOREACH(j) {
1068 f = j->current_file;
1069 assert(f && f->current_offset > 0);
1073 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1075 log_error_errno(r, "Can't read entry: %m");
1079 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1083 if (!shall_try_append_again(s->system_journal, r)) {
1084 log_error_errno(r, "Can't write entry: %m");
1091 if (!s->system_journal) {
1092 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1097 log_debug("Retrying write.");
1098 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1100 log_error_errno(r, "Can't write entry: %m");
1106 journal_file_post_change(s->system_journal);
1108 journal_file_close(s->runtime_journal);
1109 s->runtime_journal = NULL;
1112 rm_rf("/run/log/journal", false, true, false);
1114 sd_journal_close(j);
1116 server_driver_message(s, SD_ID128_NULL, "Time spent on flushing to /var is %s for %u entries.", format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0), n);
1121 int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1122 Server *s = userdata;
1125 assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
1127 if (revents != EPOLLIN) {
1128 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1133 struct ucred *ucred = NULL;
1134 struct timeval *tv = NULL;
1135 struct cmsghdr *cmsg;
1137 size_t label_len = 0;
1141 struct cmsghdr cmsghdr;
1143 /* We use NAME_MAX space for the SELinux label
1144 * here. The kernel currently enforces no
1145 * limit, but according to suggestions from
1146 * the SELinux people this will change and it
1147 * will probably be identical to NAME_MAX. For
1148 * now we use that, but this should be updated
1149 * one day when the final limit is known. */
1150 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1151 CMSG_SPACE(sizeof(struct timeval)) +
1152 CMSG_SPACE(sizeof(int)) + /* fd */
1153 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1155 union sockaddr_union sa = {};
1156 struct msghdr msghdr = {
1159 .msg_control = &control,
1160 .msg_controllen = sizeof(control),
1162 .msg_namelen = sizeof(sa),
1171 /* Try to get the right size, if we can. (Not all
1172 * sockets support SIOCINQ, hence we just try, but
1173 * don't rely on it. */
1174 (void) ioctl(fd, SIOCINQ, &v);
1176 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1177 m = PAGE_ALIGN(MAX3((size_t) v + 1,
1179 ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
1181 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1184 iovec.iov_base = s->buffer;
1185 iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
1187 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1189 if (errno == EINTR || errno == EAGAIN)
1192 log_error_errno(errno, "recvmsg() failed: %m");
1196 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1198 if (cmsg->cmsg_level == SOL_SOCKET &&
1199 cmsg->cmsg_type == SCM_CREDENTIALS &&
1200 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1201 ucred = (struct ucred*) CMSG_DATA(cmsg);
1202 else if (cmsg->cmsg_level == SOL_SOCKET &&
1203 cmsg->cmsg_type == SCM_SECURITY) {
1204 label = (char*) CMSG_DATA(cmsg);
1205 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1206 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1207 cmsg->cmsg_type == SO_TIMESTAMP &&
1208 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1209 tv = (struct timeval*) CMSG_DATA(cmsg);
1210 else if (cmsg->cmsg_level == SOL_SOCKET &&
1211 cmsg->cmsg_type == SCM_RIGHTS) {
1212 fds = (int*) CMSG_DATA(cmsg);
1213 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1217 /* And a trailing NUL, just in case */
1220 if (fd == s->syslog_fd) {
1221 if (n > 0 && n_fds == 0)
1222 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1224 log_warning("Got file descriptors via syslog socket. Ignoring.");
1226 } else if (fd == s->native_fd) {
1227 if (n > 0 && n_fds == 0)
1228 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1229 else if (n == 0 && n_fds == 1)
1230 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1232 log_warning("Got too many file descriptors via native socket. Ignoring.");
1235 assert(fd == s->audit_fd);
1237 if (n > 0 && n_fds == 0)
1238 server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1240 log_warning("Got file descriptors via audit socket. Ignoring.");
1243 close_many(fds, n_fds);
1247 static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1248 Server *s = userdata;
1252 log_info("Received request to flush runtime journal from PID %"PRIu32, si->ssi_pid);
1254 server_flush_to_var(s);
1258 touch("/run/systemd/journal/flushed");
1263 static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1264 Server *s = userdata;
1268 log_info("Received request to rotate journal from PID %"PRIu32, si->ssi_pid);
1275 static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1276 Server *s = userdata;
1280 log_received_signal(LOG_INFO, si);
1282 sd_event_exit(s->event, 0);
1286 static int setup_signals(Server *s) {
1292 assert_se(sigemptyset(&mask) == 0);
1293 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1294 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1296 r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
1300 r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
1304 r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
1308 r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
1315 static int server_parse_proc_cmdline(Server *s) {
1316 _cleanup_free_ char *line = NULL;
1317 const char *w, *state;
1321 r = proc_cmdline(&line);
1323 log_warning_errno(r, "Failed to read /proc/cmdline, ignoring: %m");
1327 FOREACH_WORD_QUOTED(w, l, line, state) {
1328 _cleanup_free_ char *word;
1330 word = strndup(w, l);
1334 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1335 r = parse_boolean(word + 35);
1337 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1339 s->forward_to_syslog = r;
1340 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1341 r = parse_boolean(word + 33);
1343 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1345 s->forward_to_kmsg = r;
1346 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1347 r = parse_boolean(word + 36);
1349 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1351 s->forward_to_console = r;
1352 } else if (startswith(word, "systemd.journald.forward_to_wall=")) {
1353 r = parse_boolean(word + 33);
1355 log_warning("Failed to parse forward to wall switch %s. Ignoring.", word + 33);
1357 s->forward_to_wall = r;
1358 } else if (startswith(word, "systemd.journald"))
1359 log_warning("Invalid systemd.journald parameter. Ignoring.");
1361 /* do not warn about state here, since probably systemd already did */
1366 static int server_parse_config_file(Server *s) {
1369 return config_parse_many("/etc/systemd/journald.conf",
1370 CONF_DIRS_NULSTR("systemd/journald.conf"),
1372 config_item_perf_lookup, journald_gperf_lookup,
1376 static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1377 Server *s = userdata;
1385 int server_schedule_sync(Server *s, int priority) {
1390 if (priority <= LOG_CRIT) {
1391 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1396 if (s->sync_scheduled)
1399 if (s->sync_interval_usec > 0) {
1402 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
1406 when += s->sync_interval_usec;
1408 if (!s->sync_event_source) {
1409 r = sd_event_add_time(
1411 &s->sync_event_source,
1414 server_dispatch_sync, s);
1418 r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1420 r = sd_event_source_set_time(s->sync_event_source, when);
1424 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1429 s->sync_scheduled = true;
1435 static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1436 Server *s = userdata;
1440 server_cache_hostname(s);
1444 static int server_open_hostname(Server *s) {
1449 s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
1450 if (s->hostname_fd < 0)
1451 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
1453 r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
1455 /* kernels prior to 3.2 don't support polling this file. Ignore
1458 log_warning("Failed to register hostname fd in event loop: %s. Ignoring.",
1460 s->hostname_fd = safe_close(s->hostname_fd);
1464 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
1467 r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
1469 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
1474 int server_init(Server *s) {
1480 s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = -1;
1484 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1485 s->sync_scheduled = false;
1487 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1488 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1490 s->forward_to_wall = true;
1492 s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1494 s->max_level_store = LOG_DEBUG;
1495 s->max_level_syslog = LOG_DEBUG;
1496 s->max_level_kmsg = LOG_NOTICE;
1497 s->max_level_console = LOG_INFO;
1498 s->max_level_wall = LOG_EMERG;
1500 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1501 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1503 server_parse_config_file(s);
1504 server_parse_proc_cmdline(s);
1505 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1506 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1507 s->rate_limit_interval, s->rate_limit_burst);
1508 s->rate_limit_interval = s->rate_limit_burst = 0;
1511 mkdir_p("/run/systemd/journal", 0755);
1513 s->user_journals = ordered_hashmap_new(NULL);
1514 if (!s->user_journals)
1517 s->mmap = mmap_cache_new();
1521 r = sd_event_default(&s->event);
1523 return log_error_errno(r, "Failed to create event loop: %m");
1525 sd_event_set_watchdog(s->event, true);
1527 n = sd_listen_fds(true);
1529 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
1531 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1533 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1535 if (s->native_fd >= 0) {
1536 log_error("Too many native sockets passed.");
1542 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1544 if (s->stdout_fd >= 0) {
1545 log_error("Too many stdout sockets passed.");
1551 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1552 sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1554 if (s->syslog_fd >= 0) {
1555 log_error("Too many /dev/log sockets passed.");
1561 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
1563 if (s->audit_fd >= 0) {
1564 log_error("Too many audit sockets passed.");
1571 log_warning("Unknown socket passed as file descriptor %d, ignoring.", fd);
1573 /* Let's close the fd, better be safe than
1574 sorry. The fd might reference some resource
1575 that we really want to release if we don't
1582 r = server_open_syslog_socket(s);
1586 r = server_open_native_socket(s);
1590 r = server_open_stdout_socket(s);
1594 r = server_open_dev_kmsg(s);
1598 r = server_open_audit(s);
1602 r = server_open_kernel_seqnum(s);
1606 r = server_open_hostname(s);
1610 r = setup_signals(s);
1614 s->udev = udev_new();
1618 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
1622 r = cg_get_root_path(&s->cgroup_root);
1626 server_cache_hostname(s);
1627 server_cache_boot_id(s);
1628 server_cache_machine_id(s);
1630 r = system_journal_open(s, false);
1637 void server_maybe_append_tags(Server *s) {
1643 n = now(CLOCK_REALTIME);
1645 if (s->system_journal)
1646 journal_file_maybe_append_tag(s->system_journal, n);
1648 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
1649 journal_file_maybe_append_tag(f, n);
1653 void server_done(Server *s) {
1657 while (s->stdout_streams)
1658 stdout_stream_free(s->stdout_streams);
1660 if (s->system_journal)
1661 journal_file_close(s->system_journal);
1663 if (s->runtime_journal)
1664 journal_file_close(s->runtime_journal);
1666 while ((f = ordered_hashmap_steal_first(s->user_journals)))
1667 journal_file_close(f);
1669 ordered_hashmap_free(s->user_journals);
1671 sd_event_source_unref(s->syslog_event_source);
1672 sd_event_source_unref(s->native_event_source);
1673 sd_event_source_unref(s->stdout_event_source);
1674 sd_event_source_unref(s->dev_kmsg_event_source);
1675 sd_event_source_unref(s->audit_event_source);
1676 sd_event_source_unref(s->sync_event_source);
1677 sd_event_source_unref(s->sigusr1_event_source);
1678 sd_event_source_unref(s->sigusr2_event_source);
1679 sd_event_source_unref(s->sigterm_event_source);
1680 sd_event_source_unref(s->sigint_event_source);
1681 sd_event_source_unref(s->hostname_event_source);
1682 sd_event_unref(s->event);
1684 safe_close(s->syslog_fd);
1685 safe_close(s->native_fd);
1686 safe_close(s->stdout_fd);
1687 safe_close(s->dev_kmsg_fd);
1688 safe_close(s->audit_fd);
1689 safe_close(s->hostname_fd);
1692 journal_rate_limit_free(s->rate_limit);
1694 if (s->kernel_seqnum)
1695 munmap(s->kernel_seqnum, sizeof(uint64_t));
1699 free(s->cgroup_root);
1700 free(s->hostname_field);
1703 mmap_cache_unref(s->mmap);
1706 udev_unref(s->udev);