1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
27 #include <sys/timerfd.h>
31 #include "sd-journal.h"
32 #include "sd-messages.h"
33 #include "sd-daemon.h"
37 #include "journal-file.h"
38 #include "socket-util.h"
39 #include "cgroup-util.h"
42 #include "conf-parser.h"
43 #include "selinux-util.h"
44 #include "journal-internal.h"
45 #include "journal-vacuum.h"
46 #include "journal-authenticate.h"
47 #include "journald-rate-limit.h"
48 #include "journald-kmsg.h"
49 #include "journald-syslog.h"
50 #include "journald-stream.h"
51 #include "journald-console.h"
52 #include "journald-native.h"
53 #include "journald-audit.h"
54 #include "journald-server.h"
58 #include <acl/libacl.h>
63 #include <selinux/selinux.h>
66 #define USER_JOURNALS_MAX 1024
68 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
69 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
70 #define DEFAULT_RATE_LIMIT_BURST 1000
71 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
73 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
75 static const char* const storage_table[_STORAGE_MAX] = {
76 [STORAGE_AUTO] = "auto",
77 [STORAGE_VOLATILE] = "volatile",
78 [STORAGE_PERSISTENT] = "persistent",
79 [STORAGE_NONE] = "none"
82 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
83 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
85 static const char* const split_mode_table[_SPLIT_MAX] = {
86 [SPLIT_LOGIN] = "login",
88 [SPLIT_NONE] = "none",
91 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
92 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
94 static uint64_t available_space(Server *s, bool verbose) {
96 _cleanup_free_ char *p = NULL;
99 uint64_t sum = 0, ss_avail = 0, avail = 0;
101 _cleanup_closedir_ DIR *d = NULL;
106 ts = now(CLOCK_MONOTONIC);
108 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts
110 return s->cached_available_space;
112 r = sd_id128_get_machine(&machine);
116 if (s->system_journal) {
117 f = "/var/log/journal/";
118 m = &s->system_metrics;
120 f = "/run/log/journal/";
121 m = &s->runtime_metrics;
126 p = strappend(f, sd_id128_to_string(machine, ids));
134 if (fstatvfs(dirfd(d), &ss) < 0)
143 if (!de && errno != 0)
149 if (!endswith(de->d_name, ".journal") &&
150 !endswith(de->d_name, ".journal~"))
153 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
156 if (!S_ISREG(st.st_mode))
159 sum += (uint64_t) st.st_blocks * 512UL;
162 ss_avail = ss.f_bsize * ss.f_bavail;
164 /* If we reached a high mark, we will always allow this much
165 * again, unless usage goes above max_use. This watermark
166 * value is cached so that we don't give up space on pressure,
167 * but hover below the maximum usage. */
172 avail = LESS_BY(ss_avail, m->keep_free);
174 s->cached_available_space = LESS_BY(MIN(m->max_use, avail), sum);
175 s->cached_available_space_timestamp = ts;
178 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
179 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX];
181 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
182 "%s journal is using %s (max allowed %s, "
183 "trying to leave %s free of %s available → current limit %s).",
184 s->system_journal ? "Permanent" : "Runtime",
185 format_bytes(fb1, sizeof(fb1), sum),
186 format_bytes(fb2, sizeof(fb2), m->max_use),
187 format_bytes(fb3, sizeof(fb3), m->keep_free),
188 format_bytes(fb4, sizeof(fb4), ss_avail),
189 format_bytes(fb5, sizeof(fb5), s->cached_available_space + sum));
192 return s->cached_available_space;
195 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
200 acl_permset_t permset;
205 r = fchmod(f->fd, 0640);
207 log_warning_errno(r, "Failed to fix access mode on %s, ignoring: %m", f->path);
210 if (uid <= SYSTEM_UID_MAX)
213 acl = acl_get_fd(f->fd);
215 log_warning_errno(errno, "Failed to read ACL on %s, ignoring: %m", f->path);
219 r = acl_find_uid(acl, uid, &entry);
222 if (acl_create_entry(&acl, &entry) < 0 ||
223 acl_set_tag_type(entry, ACL_USER) < 0 ||
224 acl_set_qualifier(entry, &uid) < 0) {
225 log_warning_errno(errno, "Failed to patch ACL on %s, ignoring: %m", f->path);
230 /* We do not recalculate the mask unconditionally here,
231 * so that the fchmod() mask above stays intact. */
232 if (acl_get_permset(entry, &permset) < 0 ||
233 acl_add_perm(permset, ACL_READ) < 0 ||
234 calc_acl_mask_if_needed(&acl) < 0) {
235 log_warning_errno(errno, "Failed to patch ACL on %s, ignoring: %m", f->path);
239 if (acl_set_fd(f->fd, acl) < 0)
240 log_warning_errno(errno, "Failed to set ACL on %s, ignoring: %m", f->path);
247 static JournalFile* find_journal(Server *s, uid_t uid) {
248 _cleanup_free_ char *p = NULL;
255 /* We split up user logs only on /var, not on /run. If the
256 * runtime file is open, we write to it exclusively, in order
257 * to guarantee proper order as soon as we flush /run to
258 * /var and close the runtime file. */
260 if (s->runtime_journal)
261 return s->runtime_journal;
263 if (uid <= SYSTEM_UID_MAX)
264 return s->system_journal;
266 r = sd_id128_get_machine(&machine);
268 return s->system_journal;
270 f = ordered_hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
274 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
275 SD_ID128_FORMAT_VAL(machine), uid) < 0)
276 return s->system_journal;
278 while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
279 /* Too many open? Then let's close one */
280 f = ordered_hashmap_steal_first(s->user_journals);
282 journal_file_close(f);
285 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
287 return s->system_journal;
289 server_fix_perms(s, f, uid);
291 r = ordered_hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
293 journal_file_close(f);
294 return s->system_journal;
300 static int do_rotate(
313 r = journal_file_rotate(f, s->compress, seal);
316 log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
318 log_error_errno(r, "Failed to create new %s journal: %m", name);
320 server_fix_perms(s, *f, uid);
324 void server_rotate(Server *s) {
330 log_debug("Rotating...");
332 do_rotate(s, &s->runtime_journal, "runtime", false, 0);
333 do_rotate(s, &s->system_journal, "system", s->seal, 0);
335 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
336 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UINT32(k));
338 ordered_hashmap_replace(s->user_journals, k, f);
340 /* Old file has been closed and deallocated */
341 ordered_hashmap_remove(s->user_journals, k);
345 void server_sync(Server *s) {
351 if (s->system_journal) {
352 r = journal_file_set_offline(s->system_journal);
354 log_error_errno(r, "Failed to sync system journal: %m");
357 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
358 r = journal_file_set_offline(f);
360 log_error_errno(r, "Failed to sync user journal: %m");
363 if (s->sync_event_source) {
364 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
366 log_error_errno(r, "Failed to disable sync timer source: %m");
369 s->sync_scheduled = false;
372 static void do_vacuum(
377 JournalMetrics *metrics) {
385 p = strappenda(path, id);
386 r = journal_directory_vacuum(p, metrics->max_use, s->max_retention_usec, &s->oldest_file_usec, false);
387 if (r < 0 && r != -ENOENT)
388 log_error_errno(r, "Failed to vacuum %s: %m", p);
391 void server_vacuum(Server *s) {
396 log_debug("Vacuuming...");
398 s->oldest_file_usec = 0;
400 r = sd_id128_get_machine(&machine);
402 log_error_errno(r, "Failed to get machine ID: %m");
405 sd_id128_to_string(machine, ids);
407 do_vacuum(s, ids, s->system_journal, "/var/log/journal/", &s->system_metrics);
408 do_vacuum(s, ids, s->runtime_journal, "/run/log/journal/", &s->runtime_metrics);
410 s->cached_available_space_timestamp = 0;
413 static void server_cache_machine_id(Server *s) {
419 r = sd_id128_get_machine(&id);
423 sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
426 static void server_cache_boot_id(Server *s) {
432 r = sd_id128_get_boot(&id);
436 sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
439 static void server_cache_hostname(Server *s) {
440 _cleanup_free_ char *t = NULL;
445 t = gethostname_malloc();
449 x = strappend("_HOSTNAME=", t);
453 free(s->hostname_field);
454 s->hostname_field = x;
457 static bool shall_try_append_again(JournalFile *f, int r) {
459 /* -E2BIG Hit configured limit
461 -EDQUOT Quota limit hit
463 -EIO I/O error of some kind (mmap)
464 -EHOSTDOWN Other machine
465 -EBUSY Unclean shutdown
466 -EPROTONOSUPPORT Unsupported feature
469 -ESHUTDOWN Already archived */
471 if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
472 log_debug("%s: Allocation limit reached, rotating.", f->path);
473 else if (r == -EHOSTDOWN)
474 log_info("%s: Journal file from other machine, rotating.", f->path);
475 else if (r == -EBUSY)
476 log_info("%s: Unclean shutdown, rotating.", f->path);
477 else if (r == -EPROTONOSUPPORT)
478 log_info("%s: Unsupported feature, rotating.", f->path);
479 else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
480 log_warning("%s: Journal file corrupted, rotating.", f->path);
482 log_warning("%s: IO error, rotating.", f->path);
489 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
491 bool vacuumed = false;
498 f = find_journal(s, uid);
502 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
503 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
508 f = find_journal(s, uid);
513 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
515 server_schedule_sync(s, priority);
519 if (vacuumed || !shall_try_append_again(f, r)) {
522 for (i = 0; i < n; i++)
523 size += iovec[i].iov_len;
525 log_error_errno(r, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n, size);
532 f = find_journal(s, uid);
536 log_debug("Retrying write.");
537 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
541 for (i = 0; i < n; i++)
542 size += iovec[i].iov_len;
544 log_error_errno(r, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n, size);
546 server_schedule_sync(s, priority);
549 static void dispatch_message_real(
551 struct iovec *iovec, unsigned n, unsigned m,
552 const struct ucred *ucred,
553 const struct timeval *tv,
554 const char *label, size_t label_len,
559 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
560 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
561 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
562 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
563 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
564 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
565 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
566 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
572 uid_t realuid = 0, owner = 0, journal_uid;
573 bool owner_valid = false;
575 char audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
576 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
577 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
578 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
587 assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
590 realuid = ucred->uid;
592 sprintf(pid, "_PID="PID_FMT, ucred->pid);
593 IOVEC_SET_STRING(iovec[n++], pid);
595 sprintf(uid, "_UID="UID_FMT, ucred->uid);
596 IOVEC_SET_STRING(iovec[n++], uid);
598 sprintf(gid, "_GID="GID_FMT, ucred->gid);
599 IOVEC_SET_STRING(iovec[n++], gid);
601 r = get_process_comm(ucred->pid, &t);
603 x = strappenda("_COMM=", t);
605 IOVEC_SET_STRING(iovec[n++], x);
608 r = get_process_exe(ucred->pid, &t);
610 x = strappenda("_EXE=", t);
612 IOVEC_SET_STRING(iovec[n++], x);
615 r = get_process_cmdline(ucred->pid, 0, false, &t);
617 x = strappenda("_CMDLINE=", t);
619 IOVEC_SET_STRING(iovec[n++], x);
622 r = get_process_capeff(ucred->pid, &t);
624 x = strappenda("_CAP_EFFECTIVE=", t);
626 IOVEC_SET_STRING(iovec[n++], x);
630 r = audit_session_from_pid(ucred->pid, &audit);
632 sprintf(audit_session, "_AUDIT_SESSION=%"PRIu32, audit);
633 IOVEC_SET_STRING(iovec[n++], audit_session);
636 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
638 sprintf(audit_loginuid, "_AUDIT_LOGINUID="UID_FMT, loginuid);
639 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
643 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
645 char *session = NULL;
647 x = strappenda("_SYSTEMD_CGROUP=", c);
648 IOVEC_SET_STRING(iovec[n++], x);
650 r = cg_path_get_session(c, &t);
652 session = strappenda("_SYSTEMD_SESSION=", t);
654 IOVEC_SET_STRING(iovec[n++], session);
657 if (cg_path_get_owner_uid(c, &owner) >= 0) {
660 sprintf(owner_uid, "_SYSTEMD_OWNER_UID="UID_FMT, owner);
661 IOVEC_SET_STRING(iovec[n++], owner_uid);
664 if (cg_path_get_unit(c, &t) >= 0) {
665 x = strappenda("_SYSTEMD_UNIT=", t);
667 IOVEC_SET_STRING(iovec[n++], x);
668 } else if (unit_id && !session) {
669 x = strappenda("_SYSTEMD_UNIT=", unit_id);
670 IOVEC_SET_STRING(iovec[n++], x);
673 if (cg_path_get_user_unit(c, &t) >= 0) {
674 x = strappenda("_SYSTEMD_USER_UNIT=", t);
676 IOVEC_SET_STRING(iovec[n++], x);
677 } else if (unit_id && session) {
678 x = strappenda("_SYSTEMD_USER_UNIT=", unit_id);
679 IOVEC_SET_STRING(iovec[n++], x);
682 if (cg_path_get_slice(c, &t) >= 0) {
683 x = strappenda("_SYSTEMD_SLICE=", t);
685 IOVEC_SET_STRING(iovec[n++], x);
689 } else if (unit_id) {
690 x = strappenda("_SYSTEMD_UNIT=", unit_id);
691 IOVEC_SET_STRING(iovec[n++], x);
695 if (mac_selinux_use()) {
697 x = alloca(strlen("_SELINUX_CONTEXT=") + label_len + 1);
699 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
700 IOVEC_SET_STRING(iovec[n++], x);
702 security_context_t con;
704 if (getpidcon(ucred->pid, &con) >= 0) {
705 x = strappenda("_SELINUX_CONTEXT=", con);
708 IOVEC_SET_STRING(iovec[n++], x);
717 r = get_process_uid(object_pid, &object_uid);
719 sprintf(o_uid, "OBJECT_UID="UID_FMT, object_uid);
720 IOVEC_SET_STRING(iovec[n++], o_uid);
723 r = get_process_gid(object_pid, &object_gid);
725 sprintf(o_gid, "OBJECT_GID="GID_FMT, object_gid);
726 IOVEC_SET_STRING(iovec[n++], o_gid);
729 r = get_process_comm(object_pid, &t);
731 x = strappenda("OBJECT_COMM=", t);
733 IOVEC_SET_STRING(iovec[n++], x);
736 r = get_process_exe(object_pid, &t);
738 x = strappenda("OBJECT_EXE=", t);
740 IOVEC_SET_STRING(iovec[n++], x);
743 r = get_process_cmdline(object_pid, 0, false, &t);
745 x = strappenda("OBJECT_CMDLINE=", t);
747 IOVEC_SET_STRING(iovec[n++], x);
751 r = audit_session_from_pid(object_pid, &audit);
753 sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%"PRIu32, audit);
754 IOVEC_SET_STRING(iovec[n++], o_audit_session);
757 r = audit_loginuid_from_pid(object_pid, &loginuid);
759 sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID="UID_FMT, loginuid);
760 IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
764 r = cg_pid_get_path_shifted(object_pid, s->cgroup_root, &c);
766 x = strappenda("OBJECT_SYSTEMD_CGROUP=", c);
767 IOVEC_SET_STRING(iovec[n++], x);
769 r = cg_path_get_session(c, &t);
771 x = strappenda("OBJECT_SYSTEMD_SESSION=", t);
773 IOVEC_SET_STRING(iovec[n++], x);
776 if (cg_path_get_owner_uid(c, &owner) >= 0) {
777 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT, owner);
778 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
781 if (cg_path_get_unit(c, &t) >= 0) {
782 x = strappenda("OBJECT_SYSTEMD_UNIT=", t);
784 IOVEC_SET_STRING(iovec[n++], x);
787 if (cg_path_get_user_unit(c, &t) >= 0) {
788 x = strappenda("OBJECT_SYSTEMD_USER_UNIT=", t);
790 IOVEC_SET_STRING(iovec[n++], x);
799 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
800 IOVEC_SET_STRING(iovec[n++], source_time);
803 /* Note that strictly speaking storing the boot id here is
804 * redundant since the entry includes this in-line
805 * anyway. However, we need this indexed, too. */
806 if (!isempty(s->boot_id_field))
807 IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
809 if (!isempty(s->machine_id_field))
810 IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
812 if (!isempty(s->hostname_field))
813 IOVEC_SET_STRING(iovec[n++], s->hostname_field);
817 if (s->split_mode == SPLIT_UID && realuid > 0)
818 /* Split up strictly by any UID */
819 journal_uid = realuid;
820 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
821 /* Split up by login UIDs. We do this only if the
822 * realuid is not root, in order not to accidentally
823 * leak privileged information to the user that is
824 * logged by a privileged process that is part of an
825 * unprivileged session. */
830 write_to_journal(s, journal_uid, iovec, n, priority);
833 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
834 char mid[11 + 32 + 1];
835 char buffer[16 + LINE_MAX + 1];
836 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
839 struct ucred ucred = {};
844 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
845 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
847 memcpy(buffer, "MESSAGE=", 8);
848 va_start(ap, format);
849 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
851 char_array_0(buffer);
852 IOVEC_SET_STRING(iovec[n++], buffer);
854 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
855 snprintf(mid, sizeof(mid), LOG_MESSAGE_ID(message_id));
857 IOVEC_SET_STRING(iovec[n++], mid);
860 ucred.pid = getpid();
861 ucred.uid = getuid();
862 ucred.gid = getgid();
864 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
867 void server_dispatch_message(
869 struct iovec *iovec, unsigned n, unsigned m,
870 const struct ucred *ucred,
871 const struct timeval *tv,
872 const char *label, size_t label_len,
878 _cleanup_free_ char *path = NULL;
882 assert(iovec || n == 0);
887 if (LOG_PRI(priority) > s->max_level_store)
890 /* Stop early in case the information will not be stored
892 if (s->storage == STORAGE_NONE)
898 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &path);
902 /* example: /user/lennart/3/foobar
903 * /system/dbus.service/foobar
905 * So let's cut of everything past the third /, since that is
906 * where user directories start */
908 c = strchr(path, '/');
910 c = strchr(c+1, '/');
912 c = strchr(c+1, '/');
918 rl = journal_rate_limit_test(s->rate_limit, path,
919 priority & LOG_PRIMASK, available_space(s, false));
924 /* Write a suppression message if we suppressed something */
926 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
927 "Suppressed %u messages from %s", rl - 1, path);
930 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
934 static int system_journal_open(Server *s, bool flush_requested) {
940 r = sd_id128_get_machine(&machine);
942 return log_error_errno(r, "Failed to get machine id: %m");
944 sd_id128_to_string(machine, ids);
946 if (!s->system_journal &&
947 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
949 || access("/run/systemd/journal/flushed", F_OK) >= 0)) {
951 /* If in auto mode: first try to create the machine
952 * path, but not the prefix.
954 * If in persistent mode: create /var/log/journal and
955 * the machine path */
957 if (s->storage == STORAGE_PERSISTENT)
958 (void) mkdir("/var/log/journal/", 0755);
960 fn = strappenda("/var/log/journal/", ids);
961 (void) mkdir(fn, 0755);
963 fn = strappenda(fn, "/system.journal");
964 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
967 server_fix_perms(s, s->system_journal, 0);
969 if (r != -ENOENT && r != -EROFS)
970 log_warning_errno(r, "Failed to open system journal: %m");
976 if (!s->runtime_journal &&
977 (s->storage != STORAGE_NONE)) {
979 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
983 if (s->system_journal) {
985 /* Try to open the runtime journal, but only
986 * if it already exists, so that we can flush
987 * it into the system journal */
989 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
994 log_warning_errno(r, "Failed to open runtime journal: %m");
1001 /* OK, we really need the runtime journal, so create
1002 * it if necessary. */
1004 (void) mkdir("/run/log", 0755);
1005 (void) mkdir("/run/log/journal", 0755);
1006 (void) mkdir_parents(fn, 0750);
1008 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
1012 return log_error_errno(r, "Failed to open runtime journal: %m");
1015 if (s->runtime_journal)
1016 server_fix_perms(s, s->runtime_journal, 0);
1019 available_space(s, true);
1024 int server_flush_to_var(Server *s) {
1026 sd_journal *j = NULL;
1027 char ts[FORMAT_TIMESPAN_MAX];
1034 if (s->storage != STORAGE_AUTO &&
1035 s->storage != STORAGE_PERSISTENT)
1038 if (!s->runtime_journal)
1041 system_journal_open(s, true);
1043 if (!s->system_journal)
1046 log_debug("Flushing to /var...");
1048 start = now(CLOCK_MONOTONIC);
1050 r = sd_id128_get_machine(&machine);
1054 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1056 return log_error_errno(r, "Failed to read runtime journal: %m");
1058 sd_journal_set_data_threshold(j, 0);
1060 SD_JOURNAL_FOREACH(j) {
1064 f = j->current_file;
1065 assert(f && f->current_offset > 0);
1069 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1071 log_error_errno(r, "Can't read entry: %m");
1075 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1079 if (!shall_try_append_again(s->system_journal, r)) {
1080 log_error_errno(r, "Can't write entry: %m");
1087 if (!s->system_journal) {
1088 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1093 log_debug("Retrying write.");
1094 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1096 log_error_errno(r, "Can't write entry: %m");
1102 journal_file_post_change(s->system_journal);
1104 journal_file_close(s->runtime_journal);
1105 s->runtime_journal = NULL;
1108 rm_rf("/run/log/journal", false, true, false);
1110 sd_journal_close(j);
1112 server_driver_message(s, SD_ID128_NULL, "Time spent on flushing to /var is %s for %u entries.", format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0), n);
1117 int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1118 Server *s = userdata;
1121 assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
1123 if (revents != EPOLLIN) {
1124 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1129 struct ucred *ucred = NULL;
1130 struct timeval *tv = NULL;
1131 struct cmsghdr *cmsg;
1133 size_t label_len = 0;
1137 struct cmsghdr cmsghdr;
1139 /* We use NAME_MAX space for the SELinux label
1140 * here. The kernel currently enforces no
1141 * limit, but according to suggestions from
1142 * the SELinux people this will change and it
1143 * will probably be identical to NAME_MAX. For
1144 * now we use that, but this should be updated
1145 * one day when the final limit is known. */
1146 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1147 CMSG_SPACE(sizeof(struct timeval)) +
1148 CMSG_SPACE(sizeof(int)) + /* fd */
1149 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1151 union sockaddr_union sa = {};
1152 struct msghdr msghdr = {
1155 .msg_control = &control,
1156 .msg_controllen = sizeof(control),
1158 .msg_namelen = sizeof(sa),
1167 /* Try to get the right size, if we can. (Not all
1168 * sockets support SIOCINQ, hence we just try, but
1169 * don't rely on it. */
1170 (void) ioctl(fd, SIOCINQ, &v);
1172 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1173 m = PAGE_ALIGN(MAX3((size_t) v + 1,
1175 ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
1177 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1180 iovec.iov_base = s->buffer;
1181 iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
1183 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1185 if (errno == EINTR || errno == EAGAIN)
1188 log_error_errno(errno, "recvmsg() failed: %m");
1192 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1194 if (cmsg->cmsg_level == SOL_SOCKET &&
1195 cmsg->cmsg_type == SCM_CREDENTIALS &&
1196 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1197 ucred = (struct ucred*) CMSG_DATA(cmsg);
1198 else if (cmsg->cmsg_level == SOL_SOCKET &&
1199 cmsg->cmsg_type == SCM_SECURITY) {
1200 label = (char*) CMSG_DATA(cmsg);
1201 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1202 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1203 cmsg->cmsg_type == SO_TIMESTAMP &&
1204 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1205 tv = (struct timeval*) CMSG_DATA(cmsg);
1206 else if (cmsg->cmsg_level == SOL_SOCKET &&
1207 cmsg->cmsg_type == SCM_RIGHTS) {
1208 fds = (int*) CMSG_DATA(cmsg);
1209 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1213 /* And a trailing NUL, just in case */
1216 if (fd == s->syslog_fd) {
1217 if (n > 0 && n_fds == 0)
1218 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1220 log_warning("Got file descriptors via syslog socket. Ignoring.");
1222 } else if (fd == s->native_fd) {
1223 if (n > 0 && n_fds == 0)
1224 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1225 else if (n == 0 && n_fds == 1)
1226 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1228 log_warning("Got too many file descriptors via native socket. Ignoring.");
1231 assert(fd == s->audit_fd);
1233 if (n > 0 && n_fds == 0)
1234 server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1236 log_warning("Got file descriptors via audit socket. Ignoring.");
1239 close_many(fds, n_fds);
1243 static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1244 Server *s = userdata;
1248 log_info("Received request to flush runtime journal from PID %"PRIu32, si->ssi_pid);
1250 server_flush_to_var(s);
1254 touch("/run/systemd/journal/flushed");
1259 static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1260 Server *s = userdata;
1264 log_info("Received request to rotate journal from PID %"PRIu32, si->ssi_pid);
1271 static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1272 Server *s = userdata;
1276 log_received_signal(LOG_INFO, si);
1278 sd_event_exit(s->event, 0);
1282 static int setup_signals(Server *s) {
1288 assert_se(sigemptyset(&mask) == 0);
1289 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1290 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1292 r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
1296 r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
1300 r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
1304 r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
1311 static int server_parse_proc_cmdline(Server *s) {
1312 _cleanup_free_ char *line = NULL;
1313 const char *w, *state;
1317 r = proc_cmdline(&line);
1319 log_warning_errno(r, "Failed to read /proc/cmdline, ignoring: %m");
1323 FOREACH_WORD_QUOTED(w, l, line, state) {
1324 _cleanup_free_ char *word;
1326 word = strndup(w, l);
1330 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1331 r = parse_boolean(word + 35);
1333 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1335 s->forward_to_syslog = r;
1336 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1337 r = parse_boolean(word + 33);
1339 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1341 s->forward_to_kmsg = r;
1342 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1343 r = parse_boolean(word + 36);
1345 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1347 s->forward_to_console = r;
1348 } else if (startswith(word, "systemd.journald.forward_to_wall=")) {
1349 r = parse_boolean(word + 33);
1351 log_warning("Failed to parse forward to wall switch %s. Ignoring.", word + 33);
1353 s->forward_to_wall = r;
1354 } else if (startswith(word, "systemd.journald"))
1355 log_warning("Invalid systemd.journald parameter. Ignoring.");
1357 /* do not warn about state here, since probably systemd already did */
1362 static int server_parse_config_file(Server *s) {
1365 return config_parse_many("/etc/systemd/journald.conf",
1366 CONF_DIRS_NULSTR("systemd/journald.conf"),
1368 config_item_perf_lookup, journald_gperf_lookup,
1372 static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1373 Server *s = userdata;
1381 int server_schedule_sync(Server *s, int priority) {
1386 if (priority <= LOG_CRIT) {
1387 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1392 if (s->sync_scheduled)
1395 if (s->sync_interval_usec > 0) {
1398 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
1402 when += s->sync_interval_usec;
1404 if (!s->sync_event_source) {
1405 r = sd_event_add_time(
1407 &s->sync_event_source,
1410 server_dispatch_sync, s);
1414 r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1416 r = sd_event_source_set_time(s->sync_event_source, when);
1420 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1425 s->sync_scheduled = true;
1431 static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1432 Server *s = userdata;
1436 server_cache_hostname(s);
1440 static int server_open_hostname(Server *s) {
1445 s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
1446 if (s->hostname_fd < 0)
1447 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
1449 r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
1451 /* kernels prior to 3.2 don't support polling this file. Ignore
1454 log_warning("Failed to register hostname fd in event loop: %s. Ignoring.",
1456 s->hostname_fd = safe_close(s->hostname_fd);
1460 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
1463 r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
1465 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
1470 int server_init(Server *s) {
1476 s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = -1;
1480 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1481 s->sync_scheduled = false;
1483 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1484 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1486 s->forward_to_wall = true;
1488 s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1490 s->max_level_store = LOG_DEBUG;
1491 s->max_level_syslog = LOG_DEBUG;
1492 s->max_level_kmsg = LOG_NOTICE;
1493 s->max_level_console = LOG_INFO;
1494 s->max_level_wall = LOG_EMERG;
1496 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1497 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1499 server_parse_config_file(s);
1500 server_parse_proc_cmdline(s);
1501 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1502 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1503 s->rate_limit_interval, s->rate_limit_burst);
1504 s->rate_limit_interval = s->rate_limit_burst = 0;
1507 mkdir_p("/run/systemd/journal", 0755);
1509 s->user_journals = ordered_hashmap_new(NULL);
1510 if (!s->user_journals)
1513 s->mmap = mmap_cache_new();
1517 r = sd_event_default(&s->event);
1519 return log_error_errno(r, "Failed to create event loop: %m");
1521 sd_event_set_watchdog(s->event, true);
1523 n = sd_listen_fds(true);
1525 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
1527 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1529 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1531 if (s->native_fd >= 0) {
1532 log_error("Too many native sockets passed.");
1538 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1540 if (s->stdout_fd >= 0) {
1541 log_error("Too many stdout sockets passed.");
1547 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1548 sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1550 if (s->syslog_fd >= 0) {
1551 log_error("Too many /dev/log sockets passed.");
1557 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
1559 if (s->audit_fd >= 0) {
1560 log_error("Too many audit sockets passed.");
1567 log_warning("Unknown socket passed as file descriptor %d, ignoring.", fd);
1569 /* Let's close the fd, better be safe than
1570 sorry. The fd might reference some resource
1571 that we really want to release if we don't
1578 r = server_open_syslog_socket(s);
1582 r = server_open_native_socket(s);
1586 r = server_open_stdout_socket(s);
1590 r = server_open_dev_kmsg(s);
1594 r = server_open_audit(s);
1598 r = server_open_kernel_seqnum(s);
1602 r = server_open_hostname(s);
1606 r = setup_signals(s);
1610 s->udev = udev_new();
1614 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
1618 r = cg_get_root_path(&s->cgroup_root);
1622 server_cache_hostname(s);
1623 server_cache_boot_id(s);
1624 server_cache_machine_id(s);
1626 r = system_journal_open(s, false);
1633 void server_maybe_append_tags(Server *s) {
1639 n = now(CLOCK_REALTIME);
1641 if (s->system_journal)
1642 journal_file_maybe_append_tag(s->system_journal, n);
1644 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
1645 journal_file_maybe_append_tag(f, n);
1649 void server_done(Server *s) {
1653 while (s->stdout_streams)
1654 stdout_stream_free(s->stdout_streams);
1656 if (s->system_journal)
1657 journal_file_close(s->system_journal);
1659 if (s->runtime_journal)
1660 journal_file_close(s->runtime_journal);
1662 while ((f = ordered_hashmap_steal_first(s->user_journals)))
1663 journal_file_close(f);
1665 ordered_hashmap_free(s->user_journals);
1667 sd_event_source_unref(s->syslog_event_source);
1668 sd_event_source_unref(s->native_event_source);
1669 sd_event_source_unref(s->stdout_event_source);
1670 sd_event_source_unref(s->dev_kmsg_event_source);
1671 sd_event_source_unref(s->audit_event_source);
1672 sd_event_source_unref(s->sync_event_source);
1673 sd_event_source_unref(s->sigusr1_event_source);
1674 sd_event_source_unref(s->sigusr2_event_source);
1675 sd_event_source_unref(s->sigterm_event_source);
1676 sd_event_source_unref(s->sigint_event_source);
1677 sd_event_source_unref(s->hostname_event_source);
1678 sd_event_unref(s->event);
1680 safe_close(s->syslog_fd);
1681 safe_close(s->native_fd);
1682 safe_close(s->stdout_fd);
1683 safe_close(s->dev_kmsg_fd);
1684 safe_close(s->audit_fd);
1685 safe_close(s->hostname_fd);
1688 journal_rate_limit_free(s->rate_limit);
1690 if (s->kernel_seqnum)
1691 munmap(s->kernel_seqnum, sizeof(uint64_t));
1695 free(s->cgroup_root);
1696 free(s->hostname_field);
1699 mmap_cache_unref(s->mmap);
1702 udev_unref(s->udev);