1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
27 #include <sys/timerfd.h>
31 #include "sd-journal.h"
32 #include "sd-messages.h"
33 #include "sd-daemon.h"
37 #include "journal-file.h"
38 #include "socket-util.h"
39 #include "cgroup-util.h"
42 #include "conf-parser.h"
43 #include "selinux-util.h"
44 #include "journal-internal.h"
45 #include "journal-vacuum.h"
46 #include "journal-authenticate.h"
47 #include "journald-rate-limit.h"
48 #include "journald-kmsg.h"
49 #include "journald-syslog.h"
50 #include "journald-stream.h"
51 #include "journald-console.h"
52 #include "journald-native.h"
53 #include "journald-audit.h"
54 #include "journald-server.h"
58 #include <acl/libacl.h>
63 #include <selinux/selinux.h>
66 #define USER_JOURNALS_MAX 1024
68 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
69 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
70 #define DEFAULT_RATE_LIMIT_BURST 1000
71 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
73 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
75 static const char* const storage_table[_STORAGE_MAX] = {
76 [STORAGE_AUTO] = "auto",
77 [STORAGE_VOLATILE] = "volatile",
78 [STORAGE_PERSISTENT] = "persistent",
79 [STORAGE_NONE] = "none"
82 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
83 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
85 static const char* const split_mode_table[_SPLIT_MAX] = {
86 [SPLIT_LOGIN] = "login",
88 [SPLIT_NONE] = "none",
91 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
92 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
94 static uint64_t available_space(Server *s, bool verbose) {
96 _cleanup_free_ char *p = NULL;
99 uint64_t sum = 0, ss_avail = 0, avail = 0;
101 _cleanup_closedir_ DIR *d = NULL;
106 ts = now(CLOCK_MONOTONIC);
108 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts
110 return s->cached_available_space;
112 r = sd_id128_get_machine(&machine);
116 if (s->system_journal) {
117 f = "/var/log/journal/";
118 m = &s->system_metrics;
120 f = "/run/log/journal/";
121 m = &s->runtime_metrics;
126 p = strappend(f, sd_id128_to_string(machine, ids));
134 if (fstatvfs(dirfd(d), &ss) < 0)
143 if (!de && errno != 0)
149 if (!endswith(de->d_name, ".journal") &&
150 !endswith(de->d_name, ".journal~"))
153 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
156 if (!S_ISREG(st.st_mode))
159 sum += (uint64_t) st.st_blocks * 512UL;
162 ss_avail = ss.f_bsize * ss.f_bavail;
164 /* If we reached a high mark, we will always allow this much
165 * again, unless usage goes above max_use. This watermark
166 * value is cached so that we don't give up space on pressure,
167 * but hover below the maximum usage. */
172 avail = LESS_BY(ss_avail, m->keep_free);
174 s->cached_available_space = LESS_BY(MIN(m->max_use, avail), sum);
175 s->cached_available_space_timestamp = ts;
178 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
179 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX];
181 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
182 "%s journal is using %s (max allowed %s, "
183 "trying to leave %s free of %s available → current limit %s).",
184 s->system_journal ? "Permanent" : "Runtime",
185 format_bytes(fb1, sizeof(fb1), sum),
186 format_bytes(fb2, sizeof(fb2), m->max_use),
187 format_bytes(fb3, sizeof(fb3), m->keep_free),
188 format_bytes(fb4, sizeof(fb4), ss_avail),
189 format_bytes(fb5, sizeof(fb5), s->cached_available_space + sum));
192 return s->cached_available_space;
195 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
200 acl_permset_t permset;
205 r = fchmod(f->fd, 0640);
207 log_warning_errno(r, "Failed to fix access mode on %s, ignoring: %m", f->path);
210 if (uid <= SYSTEM_UID_MAX)
213 acl = acl_get_fd(f->fd);
215 log_warning_errno(errno, "Failed to read ACL on %s, ignoring: %m", f->path);
219 r = acl_find_uid(acl, uid, &entry);
222 if (acl_create_entry(&acl, &entry) < 0 ||
223 acl_set_tag_type(entry, ACL_USER) < 0 ||
224 acl_set_qualifier(entry, &uid) < 0) {
225 log_warning_errno(errno, "Failed to patch ACL on %s, ignoring: %m", f->path);
230 /* We do not recalculate the mask unconditionally here,
231 * so that the fchmod() mask above stays intact. */
232 if (acl_get_permset(entry, &permset) < 0 ||
233 acl_add_perm(permset, ACL_READ) < 0 ||
234 calc_acl_mask_if_needed(&acl) < 0) {
235 log_warning_errno(errno, "Failed to patch ACL on %s, ignoring: %m", f->path);
239 if (acl_set_fd(f->fd, acl) < 0)
240 log_warning_errno(errno, "Failed to set ACL on %s, ignoring: %m", f->path);
247 static JournalFile* find_journal(Server *s, uid_t uid) {
248 _cleanup_free_ char *p = NULL;
255 /* We split up user logs only on /var, not on /run. If the
256 * runtime file is open, we write to it exclusively, in order
257 * to guarantee proper order as soon as we flush /run to
258 * /var and close the runtime file. */
260 if (s->runtime_journal)
261 return s->runtime_journal;
263 if (uid <= SYSTEM_UID_MAX)
264 return s->system_journal;
266 r = sd_id128_get_machine(&machine);
268 return s->system_journal;
270 f = ordered_hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
274 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
275 SD_ID128_FORMAT_VAL(machine), uid) < 0)
276 return s->system_journal;
278 while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
279 /* Too many open? Then let's close one */
280 f = ordered_hashmap_steal_first(s->user_journals);
282 journal_file_close(f);
285 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
287 return s->system_journal;
289 server_fix_perms(s, f, uid);
291 r = ordered_hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
293 journal_file_close(f);
294 return s->system_journal;
300 static int do_rotate(Server *s, JournalFile **f, const char* name,
301 bool seal, uint32_t uid) {
308 r = journal_file_rotate(f, s->compress, seal);
311 log_error_errno(r, "Failed to rotate %s: %m",
314 log_error_errno(r, "Failed to create new %s journal: %m",
317 server_fix_perms(s, *f, uid);
321 void server_rotate(Server *s) {
327 log_debug("Rotating...");
329 do_rotate(s, &s->runtime_journal, "runtime", false, 0);
330 do_rotate(s, &s->system_journal, "system", s->seal, 0);
332 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
333 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UINT32(k));
335 ordered_hashmap_replace(s->user_journals, k, f);
337 /* Old file has been closed and deallocated */
338 ordered_hashmap_remove(s->user_journals, k);
342 void server_sync(Server *s) {
348 if (s->system_journal) {
349 r = journal_file_set_offline(s->system_journal);
351 log_error_errno(r, "Failed to sync system journal: %m");
354 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
355 r = journal_file_set_offline(f);
357 log_error_errno(r, "Failed to sync user journal: %m");
360 if (s->sync_event_source) {
361 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
363 log_error_errno(r, "Failed to disable sync timer source: %m");
366 s->sync_scheduled = false;
369 static void do_vacuum(Server *s, char *ids, JournalFile *f, const char* path,
370 JournalMetrics *metrics) {
377 p = strappenda(path, ids);
378 r = journal_directory_vacuum(p, metrics->max_use, s->max_retention_usec, &s->oldest_file_usec, false);
379 if (r < 0 && r != -ENOENT)
380 log_error_errno(r, "Failed to vacuum %s: %m", p);
383 void server_vacuum(Server *s) {
388 log_debug("Vacuuming...");
390 s->oldest_file_usec = 0;
392 r = sd_id128_get_machine(&machine);
394 log_error_errno(r, "Failed to get machine ID: %m");
397 sd_id128_to_string(machine, ids);
399 do_vacuum(s, ids, s->system_journal, "/var/log/journal/", &s->system_metrics);
400 do_vacuum(s, ids, s->runtime_journal, "/run/log/journal/", &s->runtime_metrics);
402 s->cached_available_space_timestamp = 0;
405 static void server_cache_machine_id(Server *s) {
411 r = sd_id128_get_machine(&id);
415 sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
418 static void server_cache_boot_id(Server *s) {
424 r = sd_id128_get_boot(&id);
428 sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
431 static void server_cache_hostname(Server *s) {
432 _cleanup_free_ char *t = NULL;
437 t = gethostname_malloc();
441 x = strappend("_HOSTNAME=", t);
445 free(s->hostname_field);
446 s->hostname_field = x;
449 bool shall_try_append_again(JournalFile *f, int r) {
451 /* -E2BIG Hit configured limit
453 -EDQUOT Quota limit hit
455 -EHOSTDOWN Other machine
456 -EBUSY Unclean shutdown
457 -EPROTONOSUPPORT Unsupported feature
460 -ESHUTDOWN Already archived */
462 if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
463 log_debug("%s: Allocation limit reached, rotating.", f->path);
464 else if (r == -EHOSTDOWN)
465 log_info("%s: Journal file from other machine, rotating.", f->path);
466 else if (r == -EBUSY)
467 log_info("%s: Unclean shutdown, rotating.", f->path);
468 else if (r == -EPROTONOSUPPORT)
469 log_info("%s: Unsupported feature, rotating.", f->path);
470 else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
471 log_warning("%s: Journal file corrupted, rotating.", f->path);
478 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
480 bool vacuumed = false;
487 f = find_journal(s, uid);
491 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
492 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
497 f = find_journal(s, uid);
502 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
504 server_schedule_sync(s, priority);
508 if (vacuumed || !shall_try_append_again(f, r)) {
511 for (i = 0; i < n; i++)
512 size += iovec[i].iov_len;
514 log_error_errno(r, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n, size);
521 f = find_journal(s, uid);
525 log_debug("Retrying write.");
526 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
530 for (i = 0; i < n; i++)
531 size += iovec[i].iov_len;
533 log_error_errno(r, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n, size);
535 server_schedule_sync(s, priority);
538 static void dispatch_message_real(
540 struct iovec *iovec, unsigned n, unsigned m,
541 const struct ucred *ucred,
542 const struct timeval *tv,
543 const char *label, size_t label_len,
548 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
549 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
550 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
551 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
552 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
553 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
554 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
555 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
561 uid_t realuid = 0, owner = 0, journal_uid;
562 bool owner_valid = false;
564 char audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
565 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
566 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
567 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
576 assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
579 realuid = ucred->uid;
581 sprintf(pid, "_PID="PID_FMT, ucred->pid);
582 IOVEC_SET_STRING(iovec[n++], pid);
584 sprintf(uid, "_UID="UID_FMT, ucred->uid);
585 IOVEC_SET_STRING(iovec[n++], uid);
587 sprintf(gid, "_GID="GID_FMT, ucred->gid);
588 IOVEC_SET_STRING(iovec[n++], gid);
590 r = get_process_comm(ucred->pid, &t);
592 x = strappenda("_COMM=", t);
594 IOVEC_SET_STRING(iovec[n++], x);
597 r = get_process_exe(ucred->pid, &t);
599 x = strappenda("_EXE=", t);
601 IOVEC_SET_STRING(iovec[n++], x);
604 r = get_process_cmdline(ucred->pid, 0, false, &t);
606 x = strappenda("_CMDLINE=", t);
608 IOVEC_SET_STRING(iovec[n++], x);
611 r = get_process_capeff(ucred->pid, &t);
613 x = strappenda("_CAP_EFFECTIVE=", t);
615 IOVEC_SET_STRING(iovec[n++], x);
619 r = audit_session_from_pid(ucred->pid, &audit);
621 sprintf(audit_session, "_AUDIT_SESSION=%"PRIu32, audit);
622 IOVEC_SET_STRING(iovec[n++], audit_session);
625 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
627 sprintf(audit_loginuid, "_AUDIT_LOGINUID="UID_FMT, loginuid);
628 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
632 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
634 char *session = NULL;
636 x = strappenda("_SYSTEMD_CGROUP=", c);
637 IOVEC_SET_STRING(iovec[n++], x);
639 r = cg_path_get_session(c, &t);
641 session = strappenda("_SYSTEMD_SESSION=", t);
643 IOVEC_SET_STRING(iovec[n++], session);
646 if (cg_path_get_owner_uid(c, &owner) >= 0) {
649 sprintf(owner_uid, "_SYSTEMD_OWNER_UID="UID_FMT, owner);
650 IOVEC_SET_STRING(iovec[n++], owner_uid);
653 if (cg_path_get_unit(c, &t) >= 0) {
654 x = strappenda("_SYSTEMD_UNIT=", t);
656 IOVEC_SET_STRING(iovec[n++], x);
657 } else if (unit_id && !session) {
658 x = strappenda("_SYSTEMD_UNIT=", unit_id);
659 IOVEC_SET_STRING(iovec[n++], x);
662 if (cg_path_get_user_unit(c, &t) >= 0) {
663 x = strappenda("_SYSTEMD_USER_UNIT=", t);
665 IOVEC_SET_STRING(iovec[n++], x);
666 } else if (unit_id && session) {
667 x = strappenda("_SYSTEMD_USER_UNIT=", unit_id);
668 IOVEC_SET_STRING(iovec[n++], x);
671 if (cg_path_get_slice(c, &t) >= 0) {
672 x = strappenda("_SYSTEMD_SLICE=", t);
674 IOVEC_SET_STRING(iovec[n++], x);
678 } else if (unit_id) {
679 x = strappenda("_SYSTEMD_UNIT=", unit_id);
680 IOVEC_SET_STRING(iovec[n++], x);
684 if (mac_selinux_use()) {
686 x = alloca(strlen("_SELINUX_CONTEXT=") + label_len + 1);
688 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
689 IOVEC_SET_STRING(iovec[n++], x);
691 security_context_t con;
693 if (getpidcon(ucred->pid, &con) >= 0) {
694 x = strappenda("_SELINUX_CONTEXT=", con);
697 IOVEC_SET_STRING(iovec[n++], x);
706 r = get_process_uid(object_pid, &object_uid);
708 sprintf(o_uid, "OBJECT_UID="UID_FMT, object_uid);
709 IOVEC_SET_STRING(iovec[n++], o_uid);
712 r = get_process_gid(object_pid, &object_gid);
714 sprintf(o_gid, "OBJECT_GID="GID_FMT, object_gid);
715 IOVEC_SET_STRING(iovec[n++], o_gid);
718 r = get_process_comm(object_pid, &t);
720 x = strappenda("OBJECT_COMM=", t);
722 IOVEC_SET_STRING(iovec[n++], x);
725 r = get_process_exe(object_pid, &t);
727 x = strappenda("OBJECT_EXE=", t);
729 IOVEC_SET_STRING(iovec[n++], x);
732 r = get_process_cmdline(object_pid, 0, false, &t);
734 x = strappenda("OBJECT_CMDLINE=", t);
736 IOVEC_SET_STRING(iovec[n++], x);
740 r = audit_session_from_pid(object_pid, &audit);
742 sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%"PRIu32, audit);
743 IOVEC_SET_STRING(iovec[n++], o_audit_session);
746 r = audit_loginuid_from_pid(object_pid, &loginuid);
748 sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID="UID_FMT, loginuid);
749 IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
753 r = cg_pid_get_path_shifted(object_pid, s->cgroup_root, &c);
755 x = strappenda("OBJECT_SYSTEMD_CGROUP=", c);
756 IOVEC_SET_STRING(iovec[n++], x);
758 r = cg_path_get_session(c, &t);
760 x = strappenda("OBJECT_SYSTEMD_SESSION=", t);
762 IOVEC_SET_STRING(iovec[n++], x);
765 if (cg_path_get_owner_uid(c, &owner) >= 0) {
766 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT, owner);
767 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
770 if (cg_path_get_unit(c, &t) >= 0) {
771 x = strappenda("OBJECT_SYSTEMD_UNIT=", t);
773 IOVEC_SET_STRING(iovec[n++], x);
776 if (cg_path_get_user_unit(c, &t) >= 0) {
777 x = strappenda("OBJECT_SYSTEMD_USER_UNIT=", t);
779 IOVEC_SET_STRING(iovec[n++], x);
788 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
789 IOVEC_SET_STRING(iovec[n++], source_time);
792 /* Note that strictly speaking storing the boot id here is
793 * redundant since the entry includes this in-line
794 * anyway. However, we need this indexed, too. */
795 if (!isempty(s->boot_id_field))
796 IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
798 if (!isempty(s->machine_id_field))
799 IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
801 if (!isempty(s->hostname_field))
802 IOVEC_SET_STRING(iovec[n++], s->hostname_field);
806 if (s->split_mode == SPLIT_UID && realuid > 0)
807 /* Split up strictly by any UID */
808 journal_uid = realuid;
809 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
810 /* Split up by login UIDs. We do this only if the
811 * realuid is not root, in order not to accidentally
812 * leak privileged information to the user that is
813 * logged by a privileged process that is part of an
814 * unprivileged session. */
819 write_to_journal(s, journal_uid, iovec, n, priority);
822 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
823 char mid[11 + 32 + 1];
824 char buffer[16 + LINE_MAX + 1];
825 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
828 struct ucred ucred = {};
833 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
834 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
836 memcpy(buffer, "MESSAGE=", 8);
837 va_start(ap, format);
838 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
840 char_array_0(buffer);
841 IOVEC_SET_STRING(iovec[n++], buffer);
843 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
844 snprintf(mid, sizeof(mid), LOG_MESSAGE_ID(message_id));
846 IOVEC_SET_STRING(iovec[n++], mid);
849 ucred.pid = getpid();
850 ucred.uid = getuid();
851 ucred.gid = getgid();
853 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
856 void server_dispatch_message(
858 struct iovec *iovec, unsigned n, unsigned m,
859 const struct ucred *ucred,
860 const struct timeval *tv,
861 const char *label, size_t label_len,
867 _cleanup_free_ char *path = NULL;
871 assert(iovec || n == 0);
876 if (LOG_PRI(priority) > s->max_level_store)
879 /* Stop early in case the information will not be stored
881 if (s->storage == STORAGE_NONE)
887 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &path);
891 /* example: /user/lennart/3/foobar
892 * /system/dbus.service/foobar
894 * So let's cut of everything past the third /, since that is
895 * where user directories start */
897 c = strchr(path, '/');
899 c = strchr(c+1, '/');
901 c = strchr(c+1, '/');
907 rl = journal_rate_limit_test(s->rate_limit, path,
908 priority & LOG_PRIMASK, available_space(s, false));
913 /* Write a suppression message if we suppressed something */
915 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
916 "Suppressed %u messages from %s", rl - 1, path);
919 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
923 static int system_journal_open(Server *s, bool flush_requested) {
929 r = sd_id128_get_machine(&machine);
931 return log_error_errno(r, "Failed to get machine id: %m");
933 sd_id128_to_string(machine, ids);
935 if (!s->system_journal &&
936 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
938 || access("/run/systemd/journal/flushed", F_OK) >= 0)) {
940 /* If in auto mode: first try to create the machine
941 * path, but not the prefix.
943 * If in persistent mode: create /var/log/journal and
944 * the machine path */
946 if (s->storage == STORAGE_PERSISTENT)
947 (void) mkdir("/var/log/journal/", 0755);
949 fn = strappenda("/var/log/journal/", ids);
950 (void) mkdir(fn, 0755);
952 fn = strappenda(fn, "/system.journal");
953 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
956 server_fix_perms(s, s->system_journal, 0);
958 if (r != -ENOENT && r != -EROFS)
959 log_warning_errno(r, "Failed to open system journal: %m");
965 if (!s->runtime_journal &&
966 (s->storage != STORAGE_NONE)) {
968 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
972 if (s->system_journal) {
974 /* Try to open the runtime journal, but only
975 * if it already exists, so that we can flush
976 * it into the system journal */
978 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
983 log_warning_errno(r, "Failed to open runtime journal: %m");
990 /* OK, we really need the runtime journal, so create
991 * it if necessary. */
993 (void) mkdir("/run/log", 0755);
994 (void) mkdir("/run/log/journal", 0755);
995 (void) mkdir_parents(fn, 0750);
997 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
1001 return log_error_errno(r, "Failed to open runtime journal: %m");
1004 if (s->runtime_journal)
1005 server_fix_perms(s, s->runtime_journal, 0);
1008 available_space(s, true);
1013 int server_flush_to_var(Server *s) {
1015 sd_journal *j = NULL;
1016 char ts[FORMAT_TIMESPAN_MAX];
1023 if (s->storage != STORAGE_AUTO &&
1024 s->storage != STORAGE_PERSISTENT)
1027 if (!s->runtime_journal)
1030 system_journal_open(s, true);
1032 if (!s->system_journal)
1035 log_debug("Flushing to /var...");
1037 start = now(CLOCK_MONOTONIC);
1039 r = sd_id128_get_machine(&machine);
1043 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1045 return log_error_errno(r, "Failed to read runtime journal: %m");
1047 sd_journal_set_data_threshold(j, 0);
1049 SD_JOURNAL_FOREACH(j) {
1053 f = j->current_file;
1054 assert(f && f->current_offset > 0);
1058 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1060 log_error_errno(r, "Can't read entry: %m");
1064 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1068 if (!shall_try_append_again(s->system_journal, r)) {
1069 log_error_errno(r, "Can't write entry: %m");
1076 if (!s->system_journal) {
1077 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1082 log_debug("Retrying write.");
1083 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1085 log_error_errno(r, "Can't write entry: %m");
1091 journal_file_post_change(s->system_journal);
1093 journal_file_close(s->runtime_journal);
1094 s->runtime_journal = NULL;
1097 rm_rf("/run/log/journal", false, true, false);
1099 sd_journal_close(j);
1101 server_driver_message(s, SD_ID128_NULL, "Time spent on flushing to /var is %s for %u entries.", format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0), n);
1106 int process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1107 Server *s = userdata;
1110 assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
1112 if (revents != EPOLLIN) {
1113 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1118 struct ucred *ucred = NULL;
1119 struct timeval *tv = NULL;
1120 struct cmsghdr *cmsg;
1122 size_t label_len = 0;
1126 struct cmsghdr cmsghdr;
1128 /* We use NAME_MAX space for the SELinux label
1129 * here. The kernel currently enforces no
1130 * limit, but according to suggestions from
1131 * the SELinux people this will change and it
1132 * will probably be identical to NAME_MAX. For
1133 * now we use that, but this should be updated
1134 * one day when the final limit is known. */
1135 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1136 CMSG_SPACE(sizeof(struct timeval)) +
1137 CMSG_SPACE(sizeof(int)) + /* fd */
1138 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1140 union sockaddr_union sa = {};
1141 struct msghdr msghdr = {
1144 .msg_control = &control,
1145 .msg_controllen = sizeof(control),
1147 .msg_namelen = sizeof(sa),
1156 /* Try to get the right size, if we can. (Not all
1157 * sockets support SIOCINQ, hence we just try, but
1158 * don't rely on it. */
1159 (void) ioctl(fd, SIOCINQ, &v);
1161 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1162 m = PAGE_ALIGN(MAX3((size_t) v + 1,
1164 ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
1166 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1169 iovec.iov_base = s->buffer;
1170 iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
1172 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1174 if (errno == EINTR || errno == EAGAIN)
1177 log_error_errno(errno, "recvmsg() failed: %m");
1181 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1183 if (cmsg->cmsg_level == SOL_SOCKET &&
1184 cmsg->cmsg_type == SCM_CREDENTIALS &&
1185 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1186 ucred = (struct ucred*) CMSG_DATA(cmsg);
1187 else if (cmsg->cmsg_level == SOL_SOCKET &&
1188 cmsg->cmsg_type == SCM_SECURITY) {
1189 label = (char*) CMSG_DATA(cmsg);
1190 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1191 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1192 cmsg->cmsg_type == SO_TIMESTAMP &&
1193 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1194 tv = (struct timeval*) CMSG_DATA(cmsg);
1195 else if (cmsg->cmsg_level == SOL_SOCKET &&
1196 cmsg->cmsg_type == SCM_RIGHTS) {
1197 fds = (int*) CMSG_DATA(cmsg);
1198 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1202 /* And a trailing NUL, just in case */
1205 if (fd == s->syslog_fd) {
1206 if (n > 0 && n_fds == 0)
1207 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1209 log_warning("Got file descriptors via syslog socket. Ignoring.");
1211 } else if (fd == s->native_fd) {
1212 if (n > 0 && n_fds == 0)
1213 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1214 else if (n == 0 && n_fds == 1)
1215 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1217 log_warning("Got too many file descriptors via native socket. Ignoring.");
1220 assert(fd == s->audit_fd);
1222 if (n > 0 && n_fds == 0)
1223 server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1225 log_warning("Got file descriptors via audit socket. Ignoring.");
1228 close_many(fds, n_fds);
1232 static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1233 Server *s = userdata;
1237 log_info("Received request to flush runtime journal from PID %"PRIu32, si->ssi_pid);
1239 server_flush_to_var(s);
1243 touch("/run/systemd/journal/flushed");
1248 static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1249 Server *s = userdata;
1253 log_info("Received request to rotate journal from PID %"PRIu32, si->ssi_pid);
1260 static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1261 Server *s = userdata;
1265 log_received_signal(LOG_INFO, si);
1267 sd_event_exit(s->event, 0);
1271 static int setup_signals(Server *s) {
1277 assert_se(sigemptyset(&mask) == 0);
1278 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1279 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1281 r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
1285 r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
1289 r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
1293 r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
1300 static int server_parse_proc_cmdline(Server *s) {
1301 _cleanup_free_ char *line = NULL;
1302 const char *w, *state;
1306 r = proc_cmdline(&line);
1308 log_warning_errno(r, "Failed to read /proc/cmdline, ignoring: %m");
1312 FOREACH_WORD_QUOTED(w, l, line, state) {
1313 _cleanup_free_ char *word;
1315 word = strndup(w, l);
1319 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1320 r = parse_boolean(word + 35);
1322 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1324 s->forward_to_syslog = r;
1325 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1326 r = parse_boolean(word + 33);
1328 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1330 s->forward_to_kmsg = r;
1331 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1332 r = parse_boolean(word + 36);
1334 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1336 s->forward_to_console = r;
1337 } else if (startswith(word, "systemd.journald.forward_to_wall=")) {
1338 r = parse_boolean(word + 33);
1340 log_warning("Failed to parse forward to wall switch %s. Ignoring.", word + 33);
1342 s->forward_to_wall = r;
1343 } else if (startswith(word, "systemd.journald"))
1344 log_warning("Invalid systemd.journald parameter. Ignoring.");
1346 /* do not warn about state here, since probably systemd already did */
1351 static int server_parse_config_file(Server *s) {
1354 return config_parse_many("/etc/systemd/journald.conf",
1355 CONF_DIRS_NULSTR("systemd/journald.conf"),
1357 config_item_perf_lookup, journald_gperf_lookup,
1361 static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1362 Server *s = userdata;
1370 int server_schedule_sync(Server *s, int priority) {
1375 if (priority <= LOG_CRIT) {
1376 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1381 if (s->sync_scheduled)
1384 if (s->sync_interval_usec > 0) {
1387 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
1391 when += s->sync_interval_usec;
1393 if (!s->sync_event_source) {
1394 r = sd_event_add_time(
1396 &s->sync_event_source,
1399 server_dispatch_sync, s);
1403 r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1405 r = sd_event_source_set_time(s->sync_event_source, when);
1409 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1414 s->sync_scheduled = true;
1420 static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1421 Server *s = userdata;
1425 server_cache_hostname(s);
1429 static int server_open_hostname(Server *s) {
1434 s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
1435 if (s->hostname_fd < 0)
1436 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
1438 r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
1440 /* kernels prior to 3.2 don't support polling this file. Ignore
1443 log_warning("Failed to register hostname fd in event loop: %s. Ignoring.",
1445 s->hostname_fd = safe_close(s->hostname_fd);
1449 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
1452 r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
1454 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
1459 int server_init(Server *s) {
1465 s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = -1;
1469 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1470 s->sync_scheduled = false;
1472 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1473 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1475 s->forward_to_wall = true;
1477 s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1479 s->max_level_store = LOG_DEBUG;
1480 s->max_level_syslog = LOG_DEBUG;
1481 s->max_level_kmsg = LOG_NOTICE;
1482 s->max_level_console = LOG_INFO;
1483 s->max_level_wall = LOG_EMERG;
1485 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1486 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1488 server_parse_config_file(s);
1489 server_parse_proc_cmdline(s);
1490 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1491 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1492 s->rate_limit_interval, s->rate_limit_burst);
1493 s->rate_limit_interval = s->rate_limit_burst = 0;
1496 mkdir_p("/run/systemd/journal", 0755);
1498 s->user_journals = ordered_hashmap_new(NULL);
1499 if (!s->user_journals)
1502 s->mmap = mmap_cache_new();
1506 r = sd_event_default(&s->event);
1508 return log_error_errno(r, "Failed to create event loop: %m");
1510 sd_event_set_watchdog(s->event, true);
1512 n = sd_listen_fds(true);
1514 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
1516 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1518 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1520 if (s->native_fd >= 0) {
1521 log_error("Too many native sockets passed.");
1527 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1529 if (s->stdout_fd >= 0) {
1530 log_error("Too many stdout sockets passed.");
1536 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1537 sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1539 if (s->syslog_fd >= 0) {
1540 log_error("Too many /dev/log sockets passed.");
1546 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
1548 if (s->audit_fd >= 0) {
1549 log_error("Too many audit sockets passed.");
1556 log_warning("Unknown socket passed as file descriptor %d, ignoring.", fd);
1558 /* Let's close the fd, better be safe than
1559 sorry. The fd might reference some resource
1560 that we really want to release if we don't
1567 r = server_open_syslog_socket(s);
1571 r = server_open_native_socket(s);
1575 r = server_open_stdout_socket(s);
1579 r = server_open_dev_kmsg(s);
1583 r = server_open_audit(s);
1587 r = server_open_kernel_seqnum(s);
1591 r = server_open_hostname(s);
1595 r = setup_signals(s);
1599 s->udev = udev_new();
1603 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
1607 r = cg_get_root_path(&s->cgroup_root);
1611 server_cache_hostname(s);
1612 server_cache_boot_id(s);
1613 server_cache_machine_id(s);
1615 r = system_journal_open(s, false);
1622 void server_maybe_append_tags(Server *s) {
1628 n = now(CLOCK_REALTIME);
1630 if (s->system_journal)
1631 journal_file_maybe_append_tag(s->system_journal, n);
1633 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
1634 journal_file_maybe_append_tag(f, n);
1638 void server_done(Server *s) {
1642 while (s->stdout_streams)
1643 stdout_stream_free(s->stdout_streams);
1645 if (s->system_journal)
1646 journal_file_close(s->system_journal);
1648 if (s->runtime_journal)
1649 journal_file_close(s->runtime_journal);
1651 while ((f = ordered_hashmap_steal_first(s->user_journals)))
1652 journal_file_close(f);
1654 ordered_hashmap_free(s->user_journals);
1656 sd_event_source_unref(s->syslog_event_source);
1657 sd_event_source_unref(s->native_event_source);
1658 sd_event_source_unref(s->stdout_event_source);
1659 sd_event_source_unref(s->dev_kmsg_event_source);
1660 sd_event_source_unref(s->audit_event_source);
1661 sd_event_source_unref(s->sync_event_source);
1662 sd_event_source_unref(s->sigusr1_event_source);
1663 sd_event_source_unref(s->sigusr2_event_source);
1664 sd_event_source_unref(s->sigterm_event_source);
1665 sd_event_source_unref(s->sigint_event_source);
1666 sd_event_source_unref(s->hostname_event_source);
1667 sd_event_unref(s->event);
1669 safe_close(s->syslog_fd);
1670 safe_close(s->native_fd);
1671 safe_close(s->stdout_fd);
1672 safe_close(s->dev_kmsg_fd);
1673 safe_close(s->audit_fd);
1674 safe_close(s->hostname_fd);
1677 journal_rate_limit_free(s->rate_limit);
1679 if (s->kernel_seqnum)
1680 munmap(s->kernel_seqnum, sizeof(uint64_t));
1684 free(s->cgroup_root);
1685 free(s->hostname_field);
1688 mmap_cache_unref(s->mmap);
1691 udev_unref(s->udev);