1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
27 #include <sys/timerfd.h>
31 #include "sd-journal.h"
32 #include "sd-messages.h"
33 #include "sd-daemon.h"
37 #include "journal-file.h"
38 #include "socket-util.h"
39 #include "cgroup-util.h"
42 #include "conf-parser.h"
43 #include "selinux-util.h"
44 #include "journal-internal.h"
45 #include "journal-vacuum.h"
46 #include "journal-authenticate.h"
47 #include "journald-rate-limit.h"
48 #include "journald-kmsg.h"
49 #include "journald-syslog.h"
50 #include "journald-stream.h"
51 #include "journald-console.h"
52 #include "journald-native.h"
53 #include "journald-audit.h"
54 #include "journald-server.h"
58 #include <acl/libacl.h>
63 #include <selinux/selinux.h>
66 #define USER_JOURNALS_MAX 1024
68 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
69 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
70 #define DEFAULT_RATE_LIMIT_BURST 1000
71 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
73 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
75 static const char* const storage_table[_STORAGE_MAX] = {
76 [STORAGE_AUTO] = "auto",
77 [STORAGE_VOLATILE] = "volatile",
78 [STORAGE_PERSISTENT] = "persistent",
79 [STORAGE_NONE] = "none"
82 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
83 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
85 static const char* const split_mode_table[_SPLIT_MAX] = {
86 [SPLIT_LOGIN] = "login",
88 [SPLIT_NONE] = "none",
91 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
92 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
94 static uint64_t available_space(Server *s, bool verbose) {
96 _cleanup_free_ char *p = NULL;
99 uint64_t sum = 0, ss_avail = 0, avail = 0;
101 _cleanup_closedir_ DIR *d = NULL;
106 ts = now(CLOCK_MONOTONIC);
108 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts
110 return s->cached_available_space;
112 r = sd_id128_get_machine(&machine);
116 if (s->system_journal) {
117 f = "/var/log/journal/";
118 m = &s->system_metrics;
120 f = "/run/log/journal/";
121 m = &s->runtime_metrics;
126 p = strappend(f, sd_id128_to_string(machine, ids));
134 if (fstatvfs(dirfd(d), &ss) < 0)
143 if (!de && errno != 0)
149 if (!endswith(de->d_name, ".journal") &&
150 !endswith(de->d_name, ".journal~"))
153 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
156 if (!S_ISREG(st.st_mode))
159 sum += (uint64_t) st.st_blocks * 512UL;
162 ss_avail = ss.f_bsize * ss.f_bavail;
164 /* If we reached a high mark, we will always allow this much
165 * again, unless usage goes above max_use. This watermark
166 * value is cached so that we don't give up space on pressure,
167 * but hover below the maximum usage. */
172 avail = LESS_BY(ss_avail, m->keep_free);
174 s->cached_available_space = LESS_BY(MIN(m->max_use, avail), sum);
175 s->cached_available_space_timestamp = ts;
178 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
179 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX];
181 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
182 "%s journal is using %s (max allowed %s, "
183 "trying to leave %s free of %s available → current limit %s).",
184 s->system_journal ? "Permanent" : "Runtime",
185 format_bytes(fb1, sizeof(fb1), sum),
186 format_bytes(fb2, sizeof(fb2), m->max_use),
187 format_bytes(fb3, sizeof(fb3), m->keep_free),
188 format_bytes(fb4, sizeof(fb4), ss_avail),
189 format_bytes(fb5, sizeof(fb5), s->cached_available_space + sum));
192 return s->cached_available_space;
195 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
200 acl_permset_t permset;
205 r = fchmod(f->fd, 0640);
207 log_warning_errno(r, "Failed to fix access mode on %s, ignoring: %m", f->path);
210 if (uid <= SYSTEM_UID_MAX)
213 acl = acl_get_fd(f->fd);
215 log_warning_errno(errno, "Failed to read ACL on %s, ignoring: %m", f->path);
219 r = acl_find_uid(acl, uid, &entry);
222 if (acl_create_entry(&acl, &entry) < 0 ||
223 acl_set_tag_type(entry, ACL_USER) < 0 ||
224 acl_set_qualifier(entry, &uid) < 0) {
225 log_warning_errno(errno, "Failed to patch ACL on %s, ignoring: %m", f->path);
230 /* We do not recalculate the mask unconditionally here,
231 * so that the fchmod() mask above stays intact. */
232 if (acl_get_permset(entry, &permset) < 0 ||
233 acl_add_perm(permset, ACL_READ) < 0 ||
234 calc_acl_mask_if_needed(&acl) < 0) {
235 log_warning_errno(errno, "Failed to patch ACL on %s, ignoring: %m", f->path);
239 if (acl_set_fd(f->fd, acl) < 0)
240 log_warning_errno(errno, "Failed to set ACL on %s, ignoring: %m", f->path);
247 static JournalFile* find_journal(Server *s, uid_t uid) {
248 _cleanup_free_ char *p = NULL;
255 /* We split up user logs only on /var, not on /run. If the
256 * runtime file is open, we write to it exclusively, in order
257 * to guarantee proper order as soon as we flush /run to
258 * /var and close the runtime file. */
260 if (s->runtime_journal)
261 return s->runtime_journal;
263 if (uid <= SYSTEM_UID_MAX)
264 return s->system_journal;
266 r = sd_id128_get_machine(&machine);
268 return s->system_journal;
270 f = ordered_hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
274 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
275 SD_ID128_FORMAT_VAL(machine), uid) < 0)
276 return s->system_journal;
278 while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
279 /* Too many open? Then let's close one */
280 f = ordered_hashmap_steal_first(s->user_journals);
282 journal_file_close(f);
285 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
287 return s->system_journal;
289 server_fix_perms(s, f, uid);
291 r = ordered_hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
293 journal_file_close(f);
294 return s->system_journal;
300 static int do_rotate(
313 r = journal_file_rotate(f, s->compress, seal);
316 log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
318 log_error_errno(r, "Failed to create new %s journal: %m", name);
320 server_fix_perms(s, *f, uid);
325 void server_rotate(Server *s) {
331 log_debug("Rotating...");
333 do_rotate(s, &s->runtime_journal, "runtime", false, 0);
334 do_rotate(s, &s->system_journal, "system", s->seal, 0);
336 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
337 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UINT32(k));
339 ordered_hashmap_replace(s->user_journals, k, f);
341 /* Old file has been closed and deallocated */
342 ordered_hashmap_remove(s->user_journals, k);
346 void server_sync(Server *s) {
352 if (s->system_journal) {
353 r = journal_file_set_offline(s->system_journal);
355 log_error_errno(r, "Failed to sync system journal: %m");
358 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
359 r = journal_file_set_offline(f);
361 log_error_errno(r, "Failed to sync user journal: %m");
364 if (s->sync_event_source) {
365 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
367 log_error_errno(r, "Failed to disable sync timer source: %m");
370 s->sync_scheduled = false;
373 static void do_vacuum(
378 JournalMetrics *metrics) {
386 p = strappenda(path, id);
387 r = journal_directory_vacuum(p, metrics->max_use, s->max_retention_usec, &s->oldest_file_usec, false);
388 if (r < 0 && r != -ENOENT)
389 log_error_errno(r, "Failed to vacuum %s: %m", p);
392 void server_vacuum(Server *s) {
397 log_debug("Vacuuming...");
399 s->oldest_file_usec = 0;
401 r = sd_id128_get_machine(&machine);
403 log_error_errno(r, "Failed to get machine ID: %m");
406 sd_id128_to_string(machine, ids);
408 do_vacuum(s, ids, s->system_journal, "/var/log/journal/", &s->system_metrics);
409 do_vacuum(s, ids, s->runtime_journal, "/run/log/journal/", &s->runtime_metrics);
411 s->cached_available_space_timestamp = 0;
414 static void server_cache_machine_id(Server *s) {
420 r = sd_id128_get_machine(&id);
424 sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
427 static void server_cache_boot_id(Server *s) {
433 r = sd_id128_get_boot(&id);
437 sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
440 static void server_cache_hostname(Server *s) {
441 _cleanup_free_ char *t = NULL;
446 t = gethostname_malloc();
450 x = strappend("_HOSTNAME=", t);
454 free(s->hostname_field);
455 s->hostname_field = x;
458 static bool shall_try_append_again(JournalFile *f, int r) {
460 /* -E2BIG Hit configured limit
462 -EDQUOT Quota limit hit
464 -EIO I/O error of some kind (mmap)
465 -EHOSTDOWN Other machine
466 -EBUSY Unclean shutdown
467 -EPROTONOSUPPORT Unsupported feature
470 -ESHUTDOWN Already archived
471 -EIDRM Journal file has been deleted */
473 if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
474 log_debug("%s: Allocation limit reached, rotating.", f->path);
475 else if (r == -EHOSTDOWN)
476 log_info("%s: Journal file from other machine, rotating.", f->path);
477 else if (r == -EBUSY)
478 log_info("%s: Unclean shutdown, rotating.", f->path);
479 else if (r == -EPROTONOSUPPORT)
480 log_info("%s: Unsupported feature, rotating.", f->path);
481 else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
482 log_warning("%s: Journal file corrupted, rotating.", f->path);
484 log_warning("%s: IO error, rotating.", f->path);
485 else if (r == -EIDRM)
486 log_warning("%s: Journal file has been deleted, rotating.", f->path);
493 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
495 bool vacuumed = false;
502 f = find_journal(s, uid);
506 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
507 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
512 f = find_journal(s, uid);
517 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
519 server_schedule_sync(s, priority);
523 if (vacuumed || !shall_try_append_again(f, r)) {
524 log_error_errno(r, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
531 f = find_journal(s, uid);
535 log_debug("Retrying write.");
536 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
538 log_error_errno(r, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
540 server_schedule_sync(s, priority);
543 static void dispatch_message_real(
545 struct iovec *iovec, unsigned n, unsigned m,
546 const struct ucred *ucred,
547 const struct timeval *tv,
548 const char *label, size_t label_len,
553 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
554 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
555 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
556 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
557 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
558 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
559 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
560 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
566 uid_t realuid = 0, owner = 0, journal_uid;
567 bool owner_valid = false;
569 char audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
570 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
571 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
572 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
581 assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
584 realuid = ucred->uid;
586 sprintf(pid, "_PID="PID_FMT, ucred->pid);
587 IOVEC_SET_STRING(iovec[n++], pid);
589 sprintf(uid, "_UID="UID_FMT, ucred->uid);
590 IOVEC_SET_STRING(iovec[n++], uid);
592 sprintf(gid, "_GID="GID_FMT, ucred->gid);
593 IOVEC_SET_STRING(iovec[n++], gid);
595 r = get_process_comm(ucred->pid, &t);
597 x = strappenda("_COMM=", t);
599 IOVEC_SET_STRING(iovec[n++], x);
602 r = get_process_exe(ucred->pid, &t);
604 x = strappenda("_EXE=", t);
606 IOVEC_SET_STRING(iovec[n++], x);
609 r = get_process_cmdline(ucred->pid, 0, false, &t);
611 x = strappenda("_CMDLINE=", t);
613 IOVEC_SET_STRING(iovec[n++], x);
616 r = get_process_capeff(ucred->pid, &t);
618 x = strappenda("_CAP_EFFECTIVE=", t);
620 IOVEC_SET_STRING(iovec[n++], x);
624 r = audit_session_from_pid(ucred->pid, &audit);
626 sprintf(audit_session, "_AUDIT_SESSION=%"PRIu32, audit);
627 IOVEC_SET_STRING(iovec[n++], audit_session);
630 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
632 sprintf(audit_loginuid, "_AUDIT_LOGINUID="UID_FMT, loginuid);
633 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
637 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
639 char *session = NULL;
641 x = strappenda("_SYSTEMD_CGROUP=", c);
642 IOVEC_SET_STRING(iovec[n++], x);
644 r = cg_path_get_session(c, &t);
646 session = strappenda("_SYSTEMD_SESSION=", t);
648 IOVEC_SET_STRING(iovec[n++], session);
651 if (cg_path_get_owner_uid(c, &owner) >= 0) {
654 sprintf(owner_uid, "_SYSTEMD_OWNER_UID="UID_FMT, owner);
655 IOVEC_SET_STRING(iovec[n++], owner_uid);
658 if (cg_path_get_unit(c, &t) >= 0) {
659 x = strappenda("_SYSTEMD_UNIT=", t);
661 IOVEC_SET_STRING(iovec[n++], x);
662 } else if (unit_id && !session) {
663 x = strappenda("_SYSTEMD_UNIT=", unit_id);
664 IOVEC_SET_STRING(iovec[n++], x);
667 if (cg_path_get_user_unit(c, &t) >= 0) {
668 x = strappenda("_SYSTEMD_USER_UNIT=", t);
670 IOVEC_SET_STRING(iovec[n++], x);
671 } else if (unit_id && session) {
672 x = strappenda("_SYSTEMD_USER_UNIT=", unit_id);
673 IOVEC_SET_STRING(iovec[n++], x);
676 if (cg_path_get_slice(c, &t) >= 0) {
677 x = strappenda("_SYSTEMD_SLICE=", t);
679 IOVEC_SET_STRING(iovec[n++], x);
683 } else if (unit_id) {
684 x = strappenda("_SYSTEMD_UNIT=", unit_id);
685 IOVEC_SET_STRING(iovec[n++], x);
689 if (mac_selinux_use()) {
691 x = alloca(strlen("_SELINUX_CONTEXT=") + label_len + 1);
693 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
694 IOVEC_SET_STRING(iovec[n++], x);
696 security_context_t con;
698 if (getpidcon(ucred->pid, &con) >= 0) {
699 x = strappenda("_SELINUX_CONTEXT=", con);
702 IOVEC_SET_STRING(iovec[n++], x);
711 r = get_process_uid(object_pid, &object_uid);
713 sprintf(o_uid, "OBJECT_UID="UID_FMT, object_uid);
714 IOVEC_SET_STRING(iovec[n++], o_uid);
717 r = get_process_gid(object_pid, &object_gid);
719 sprintf(o_gid, "OBJECT_GID="GID_FMT, object_gid);
720 IOVEC_SET_STRING(iovec[n++], o_gid);
723 r = get_process_comm(object_pid, &t);
725 x = strappenda("OBJECT_COMM=", t);
727 IOVEC_SET_STRING(iovec[n++], x);
730 r = get_process_exe(object_pid, &t);
732 x = strappenda("OBJECT_EXE=", t);
734 IOVEC_SET_STRING(iovec[n++], x);
737 r = get_process_cmdline(object_pid, 0, false, &t);
739 x = strappenda("OBJECT_CMDLINE=", t);
741 IOVEC_SET_STRING(iovec[n++], x);
745 r = audit_session_from_pid(object_pid, &audit);
747 sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%"PRIu32, audit);
748 IOVEC_SET_STRING(iovec[n++], o_audit_session);
751 r = audit_loginuid_from_pid(object_pid, &loginuid);
753 sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID="UID_FMT, loginuid);
754 IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
758 r = cg_pid_get_path_shifted(object_pid, s->cgroup_root, &c);
760 x = strappenda("OBJECT_SYSTEMD_CGROUP=", c);
761 IOVEC_SET_STRING(iovec[n++], x);
763 r = cg_path_get_session(c, &t);
765 x = strappenda("OBJECT_SYSTEMD_SESSION=", t);
767 IOVEC_SET_STRING(iovec[n++], x);
770 if (cg_path_get_owner_uid(c, &owner) >= 0) {
771 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT, owner);
772 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
775 if (cg_path_get_unit(c, &t) >= 0) {
776 x = strappenda("OBJECT_SYSTEMD_UNIT=", t);
778 IOVEC_SET_STRING(iovec[n++], x);
781 if (cg_path_get_user_unit(c, &t) >= 0) {
782 x = strappenda("OBJECT_SYSTEMD_USER_UNIT=", t);
784 IOVEC_SET_STRING(iovec[n++], x);
793 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
794 IOVEC_SET_STRING(iovec[n++], source_time);
797 /* Note that strictly speaking storing the boot id here is
798 * redundant since the entry includes this in-line
799 * anyway. However, we need this indexed, too. */
800 if (!isempty(s->boot_id_field))
801 IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
803 if (!isempty(s->machine_id_field))
804 IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
806 if (!isempty(s->hostname_field))
807 IOVEC_SET_STRING(iovec[n++], s->hostname_field);
811 if (s->split_mode == SPLIT_UID && realuid > 0)
812 /* Split up strictly by any UID */
813 journal_uid = realuid;
814 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
815 /* Split up by login UIDs. We do this only if the
816 * realuid is not root, in order not to accidentally
817 * leak privileged information to the user that is
818 * logged by a privileged process that is part of an
819 * unprivileged session. */
824 write_to_journal(s, journal_uid, iovec, n, priority);
827 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
828 char mid[11 + 32 + 1];
829 char buffer[16 + LINE_MAX + 1];
830 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
833 struct ucred ucred = {};
838 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
839 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
841 memcpy(buffer, "MESSAGE=", 8);
842 va_start(ap, format);
843 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
845 char_array_0(buffer);
846 IOVEC_SET_STRING(iovec[n++], buffer);
848 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
849 snprintf(mid, sizeof(mid), LOG_MESSAGE_ID(message_id));
851 IOVEC_SET_STRING(iovec[n++], mid);
854 ucred.pid = getpid();
855 ucred.uid = getuid();
856 ucred.gid = getgid();
858 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
861 void server_dispatch_message(
863 struct iovec *iovec, unsigned n, unsigned m,
864 const struct ucred *ucred,
865 const struct timeval *tv,
866 const char *label, size_t label_len,
872 _cleanup_free_ char *path = NULL;
876 assert(iovec || n == 0);
881 if (LOG_PRI(priority) > s->max_level_store)
884 /* Stop early in case the information will not be stored
886 if (s->storage == STORAGE_NONE)
892 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &path);
896 /* example: /user/lennart/3/foobar
897 * /system/dbus.service/foobar
899 * So let's cut of everything past the third /, since that is
900 * where user directories start */
902 c = strchr(path, '/');
904 c = strchr(c+1, '/');
906 c = strchr(c+1, '/');
912 rl = journal_rate_limit_test(s->rate_limit, path,
913 priority & LOG_PRIMASK, available_space(s, false));
918 /* Write a suppression message if we suppressed something */
920 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
921 "Suppressed %u messages from %s", rl - 1, path);
924 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
928 static int system_journal_open(Server *s, bool flush_requested) {
934 r = sd_id128_get_machine(&machine);
936 return log_error_errno(r, "Failed to get machine id: %m");
938 sd_id128_to_string(machine, ids);
940 if (!s->system_journal &&
941 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
943 || access("/run/systemd/journal/flushed", F_OK) >= 0)) {
945 /* If in auto mode: first try to create the machine
946 * path, but not the prefix.
948 * If in persistent mode: create /var/log/journal and
949 * the machine path */
951 if (s->storage == STORAGE_PERSISTENT)
952 (void) mkdir("/var/log/journal/", 0755);
954 fn = strappenda("/var/log/journal/", ids);
955 (void) mkdir(fn, 0755);
957 fn = strappenda(fn, "/system.journal");
958 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
961 server_fix_perms(s, s->system_journal, 0);
963 if (r != -ENOENT && r != -EROFS)
964 log_warning_errno(r, "Failed to open system journal: %m");
970 if (!s->runtime_journal &&
971 (s->storage != STORAGE_NONE)) {
973 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
977 if (s->system_journal) {
979 /* Try to open the runtime journal, but only
980 * if it already exists, so that we can flush
981 * it into the system journal */
983 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
988 log_warning_errno(r, "Failed to open runtime journal: %m");
995 /* OK, we really need the runtime journal, so create
996 * it if necessary. */
998 (void) mkdir("/run/log", 0755);
999 (void) mkdir("/run/log/journal", 0755);
1000 (void) mkdir_parents(fn, 0750);
1002 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
1006 return log_error_errno(r, "Failed to open runtime journal: %m");
1009 if (s->runtime_journal)
1010 server_fix_perms(s, s->runtime_journal, 0);
1013 available_space(s, true);
1018 int server_flush_to_var(Server *s) {
1020 sd_journal *j = NULL;
1021 char ts[FORMAT_TIMESPAN_MAX];
1028 if (s->storage != STORAGE_AUTO &&
1029 s->storage != STORAGE_PERSISTENT)
1032 if (!s->runtime_journal)
1035 system_journal_open(s, true);
1037 if (!s->system_journal)
1040 log_debug("Flushing to /var...");
1042 start = now(CLOCK_MONOTONIC);
1044 r = sd_id128_get_machine(&machine);
1048 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1050 return log_error_errno(r, "Failed to read runtime journal: %m");
1052 sd_journal_set_data_threshold(j, 0);
1054 SD_JOURNAL_FOREACH(j) {
1058 f = j->current_file;
1059 assert(f && f->current_offset > 0);
1063 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1065 log_error_errno(r, "Can't read entry: %m");
1069 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1073 if (!shall_try_append_again(s->system_journal, r)) {
1074 log_error_errno(r, "Can't write entry: %m");
1081 if (!s->system_journal) {
1082 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1087 log_debug("Retrying write.");
1088 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1090 log_error_errno(r, "Can't write entry: %m");
1096 journal_file_post_change(s->system_journal);
1098 journal_file_close(s->runtime_journal);
1099 s->runtime_journal = NULL;
1102 rm_rf("/run/log/journal", false, true, false);
1104 sd_journal_close(j);
1106 server_driver_message(s, SD_ID128_NULL, "Time spent on flushing to /var is %s for %u entries.", format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0), n);
1111 int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1112 Server *s = userdata;
1115 assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
1117 if (revents != EPOLLIN) {
1118 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1123 struct ucred *ucred = NULL;
1124 struct timeval *tv = NULL;
1125 struct cmsghdr *cmsg;
1127 size_t label_len = 0;
1131 struct cmsghdr cmsghdr;
1133 /* We use NAME_MAX space for the SELinux label
1134 * here. The kernel currently enforces no
1135 * limit, but according to suggestions from
1136 * the SELinux people this will change and it
1137 * will probably be identical to NAME_MAX. For
1138 * now we use that, but this should be updated
1139 * one day when the final limit is known. */
1140 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1141 CMSG_SPACE(sizeof(struct timeval)) +
1142 CMSG_SPACE(sizeof(int)) + /* fd */
1143 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1145 union sockaddr_union sa = {};
1146 struct msghdr msghdr = {
1149 .msg_control = &control,
1150 .msg_controllen = sizeof(control),
1152 .msg_namelen = sizeof(sa),
1161 /* Try to get the right size, if we can. (Not all
1162 * sockets support SIOCINQ, hence we just try, but
1163 * don't rely on it. */
1164 (void) ioctl(fd, SIOCINQ, &v);
1166 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1167 m = PAGE_ALIGN(MAX3((size_t) v + 1,
1169 ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
1171 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1174 iovec.iov_base = s->buffer;
1175 iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
1177 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1179 if (errno == EINTR || errno == EAGAIN)
1182 log_error_errno(errno, "recvmsg() failed: %m");
1186 log_error("Got EOF on socket.");
1190 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1192 if (cmsg->cmsg_level == SOL_SOCKET &&
1193 cmsg->cmsg_type == SCM_CREDENTIALS &&
1194 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1195 ucred = (struct ucred*) CMSG_DATA(cmsg);
1196 else if (cmsg->cmsg_level == SOL_SOCKET &&
1197 cmsg->cmsg_type == SCM_SECURITY) {
1198 label = (char*) CMSG_DATA(cmsg);
1199 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1200 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1201 cmsg->cmsg_type == SO_TIMESTAMP &&
1202 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1203 tv = (struct timeval*) CMSG_DATA(cmsg);
1204 else if (cmsg->cmsg_level == SOL_SOCKET &&
1205 cmsg->cmsg_type == SCM_RIGHTS) {
1206 fds = (int*) CMSG_DATA(cmsg);
1207 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1211 /* And a trailing NUL, just in case */
1214 if (fd == s->syslog_fd) {
1215 if (n > 0 && n_fds == 0)
1216 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1218 log_warning("Got file descriptors via syslog socket. Ignoring.");
1220 } else if (fd == s->native_fd) {
1221 if (n > 0 && n_fds == 0)
1222 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1223 else if (n == 0 && n_fds == 1)
1224 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1226 log_warning("Got too many file descriptors via native socket. Ignoring.");
1229 assert(fd == s->audit_fd);
1231 if (n > 0 && n_fds == 0)
1232 server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1234 log_warning("Got file descriptors via audit socket. Ignoring.");
1237 close_many(fds, n_fds);
1241 static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1242 Server *s = userdata;
1246 log_info("Received request to flush runtime journal from PID %"PRIu32, si->ssi_pid);
1248 server_flush_to_var(s);
1252 touch("/run/systemd/journal/flushed");
1257 static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1258 Server *s = userdata;
1262 log_info("Received request to rotate journal from PID %"PRIu32, si->ssi_pid);
1269 static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1270 Server *s = userdata;
1274 log_received_signal(LOG_INFO, si);
1276 sd_event_exit(s->event, 0);
1280 static int setup_signals(Server *s) {
1286 assert_se(sigemptyset(&mask) == 0);
1287 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1288 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1290 r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
1294 r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
1298 r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
1302 r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
1309 static int server_parse_proc_cmdline(Server *s) {
1310 _cleanup_free_ char *line = NULL;
1311 const char *w, *state;
1315 r = proc_cmdline(&line);
1317 log_warning_errno(r, "Failed to read /proc/cmdline, ignoring: %m");
1321 FOREACH_WORD_QUOTED(w, l, line, state) {
1322 _cleanup_free_ char *word;
1324 word = strndup(w, l);
1328 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1329 r = parse_boolean(word + 35);
1331 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1333 s->forward_to_syslog = r;
1334 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1335 r = parse_boolean(word + 33);
1337 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1339 s->forward_to_kmsg = r;
1340 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1341 r = parse_boolean(word + 36);
1343 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1345 s->forward_to_console = r;
1346 } else if (startswith(word, "systemd.journald.forward_to_wall=")) {
1347 r = parse_boolean(word + 33);
1349 log_warning("Failed to parse forward to wall switch %s. Ignoring.", word + 33);
1351 s->forward_to_wall = r;
1352 } else if (startswith(word, "systemd.journald"))
1353 log_warning("Invalid systemd.journald parameter. Ignoring.");
1355 /* do not warn about state here, since probably systemd already did */
1360 static int server_parse_config_file(Server *s) {
1363 return config_parse_many("/etc/systemd/journald.conf",
1364 CONF_DIRS_NULSTR("systemd/journald.conf"),
1366 config_item_perf_lookup, journald_gperf_lookup,
1370 static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1371 Server *s = userdata;
1379 int server_schedule_sync(Server *s, int priority) {
1384 if (priority <= LOG_CRIT) {
1385 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1390 if (s->sync_scheduled)
1393 if (s->sync_interval_usec > 0) {
1396 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
1400 when += s->sync_interval_usec;
1402 if (!s->sync_event_source) {
1403 r = sd_event_add_time(
1405 &s->sync_event_source,
1408 server_dispatch_sync, s);
1412 r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1414 r = sd_event_source_set_time(s->sync_event_source, when);
1418 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1423 s->sync_scheduled = true;
1429 static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1430 Server *s = userdata;
1434 server_cache_hostname(s);
1438 static int server_open_hostname(Server *s) {
1443 s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
1444 if (s->hostname_fd < 0)
1445 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
1447 r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
1449 /* kernels prior to 3.2 don't support polling this file. Ignore
1452 log_warning("Failed to register hostname fd in event loop: %s. Ignoring.",
1454 s->hostname_fd = safe_close(s->hostname_fd);
1458 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
1461 r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
1463 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
1468 int server_init(Server *s) {
1469 _cleanup_fdset_free_ FDSet *fds = NULL;
1475 s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = -1;
1479 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1480 s->sync_scheduled = false;
1482 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1483 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1485 s->forward_to_wall = true;
1487 s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1489 s->max_level_store = LOG_DEBUG;
1490 s->max_level_syslog = LOG_DEBUG;
1491 s->max_level_kmsg = LOG_NOTICE;
1492 s->max_level_console = LOG_INFO;
1493 s->max_level_wall = LOG_EMERG;
1495 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1496 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1498 server_parse_config_file(s);
1499 server_parse_proc_cmdline(s);
1500 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1501 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1502 s->rate_limit_interval, s->rate_limit_burst);
1503 s->rate_limit_interval = s->rate_limit_burst = 0;
1506 mkdir_p("/run/systemd/journal", 0755);
1508 s->user_journals = ordered_hashmap_new(NULL);
1509 if (!s->user_journals)
1512 s->mmap = mmap_cache_new();
1516 r = sd_event_default(&s->event);
1518 return log_error_errno(r, "Failed to create event loop: %m");
1520 sd_event_set_watchdog(s->event, true);
1522 n = sd_listen_fds(true);
1524 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
1526 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1528 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1530 if (s->native_fd >= 0) {
1531 log_error("Too many native sockets passed.");
1537 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1539 if (s->stdout_fd >= 0) {
1540 log_error("Too many stdout sockets passed.");
1546 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1547 sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1549 if (s->syslog_fd >= 0) {
1550 log_error("Too many /dev/log sockets passed.");
1556 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
1558 if (s->audit_fd >= 0) {
1559 log_error("Too many audit sockets passed.");
1573 r = fdset_put(fds, fd);
1579 r = server_open_stdout_socket(s, fds);
1583 if (fdset_size(fds) > 0) {
1584 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds));
1585 fds = fdset_free(fds);
1588 r = server_open_syslog_socket(s);
1592 r = server_open_native_socket(s);
1596 r = server_open_dev_kmsg(s);
1600 r = server_open_audit(s);
1604 r = server_open_kernel_seqnum(s);
1608 r = server_open_hostname(s);
1612 r = setup_signals(s);
1616 s->udev = udev_new();
1620 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
1624 r = cg_get_root_path(&s->cgroup_root);
1628 server_cache_hostname(s);
1629 server_cache_boot_id(s);
1630 server_cache_machine_id(s);
1632 r = system_journal_open(s, false);
1639 void server_maybe_append_tags(Server *s) {
1645 n = now(CLOCK_REALTIME);
1647 if (s->system_journal)
1648 journal_file_maybe_append_tag(s->system_journal, n);
1650 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
1651 journal_file_maybe_append_tag(f, n);
1655 void server_done(Server *s) {
1659 while (s->stdout_streams)
1660 stdout_stream_free(s->stdout_streams);
1662 if (s->system_journal)
1663 journal_file_close(s->system_journal);
1665 if (s->runtime_journal)
1666 journal_file_close(s->runtime_journal);
1668 while ((f = ordered_hashmap_steal_first(s->user_journals)))
1669 journal_file_close(f);
1671 ordered_hashmap_free(s->user_journals);
1673 sd_event_source_unref(s->syslog_event_source);
1674 sd_event_source_unref(s->native_event_source);
1675 sd_event_source_unref(s->stdout_event_source);
1676 sd_event_source_unref(s->dev_kmsg_event_source);
1677 sd_event_source_unref(s->audit_event_source);
1678 sd_event_source_unref(s->sync_event_source);
1679 sd_event_source_unref(s->sigusr1_event_source);
1680 sd_event_source_unref(s->sigusr2_event_source);
1681 sd_event_source_unref(s->sigterm_event_source);
1682 sd_event_source_unref(s->sigint_event_source);
1683 sd_event_source_unref(s->hostname_event_source);
1684 sd_event_unref(s->event);
1686 safe_close(s->syslog_fd);
1687 safe_close(s->native_fd);
1688 safe_close(s->stdout_fd);
1689 safe_close(s->dev_kmsg_fd);
1690 safe_close(s->audit_fd);
1691 safe_close(s->hostname_fd);
1694 journal_rate_limit_free(s->rate_limit);
1696 if (s->kernel_seqnum)
1697 munmap(s->kernel_seqnum, sizeof(uint64_t));
1701 free(s->cgroup_root);
1702 free(s->hostname_field);
1705 mmap_cache_unref(s->mmap);
1708 udev_unref(s->udev);