1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
30 #include "sd-journal.h"
31 #include "sd-messages.h"
32 #include "sd-daemon.h"
35 #include "journal-file.h"
36 #include "socket-util.h"
37 #include "cgroup-util.h"
39 #include "conf-parser.h"
40 #include "selinux-util.h"
41 #include "journal-internal.h"
42 #include "journal-vacuum.h"
43 #include "journal-authenticate.h"
44 #include "journald-rate-limit.h"
45 #include "journald-kmsg.h"
46 #include "journald-syslog.h"
47 #include "journald-stream.h"
48 #include "journald-native.h"
49 #include "journald-audit.h"
50 #include "journald-server.h"
54 #include <selinux/selinux.h>
57 #define USER_JOURNALS_MAX 1024
59 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
60 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
61 #define DEFAULT_RATE_LIMIT_BURST 1000
62 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
64 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
66 static const char* const storage_table[_STORAGE_MAX] = {
67 [STORAGE_AUTO] = "auto",
68 [STORAGE_VOLATILE] = "volatile",
69 [STORAGE_PERSISTENT] = "persistent",
70 [STORAGE_NONE] = "none"
73 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
74 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
76 static const char* const split_mode_table[_SPLIT_MAX] = {
77 [SPLIT_LOGIN] = "login",
79 [SPLIT_NONE] = "none",
82 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
83 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
85 static uint64_t available_space(Server *s, bool verbose) {
87 _cleanup_free_ char *p = NULL;
90 uint64_t sum = 0, ss_avail = 0, avail = 0;
92 _cleanup_closedir_ DIR *d = NULL;
97 ts = now(CLOCK_MONOTONIC);
99 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts
101 return s->cached_available_space;
103 r = sd_id128_get_machine(&machine);
107 if (s->system_journal) {
108 f = "/var/log/journal/";
109 m = &s->system_metrics;
111 f = "/run/log/journal/";
112 m = &s->runtime_metrics;
117 p = strappend(f, sd_id128_to_string(machine, ids));
125 if (fstatvfs(dirfd(d), &ss) < 0)
134 if (!de && errno != 0)
140 if (!endswith(de->d_name, ".journal") &&
141 !endswith(de->d_name, ".journal~"))
144 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
147 if (!S_ISREG(st.st_mode))
150 sum += (uint64_t) st.st_blocks * 512UL;
153 ss_avail = ss.f_bsize * ss.f_bavail;
155 /* If we reached a high mark, we will always allow this much
156 * again, unless usage goes above max_use. This watermark
157 * value is cached so that we don't give up space on pressure,
158 * but hover below the maximum usage. */
163 avail = LESS_BY(ss_avail, m->keep_free);
165 s->cached_available_space = LESS_BY(MIN(m->max_use, avail), sum);
166 s->cached_available_space_timestamp = ts;
169 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
170 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX];
172 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
173 "%s journal is using %s (max allowed %s, "
174 "trying to leave %s free of %s available → current limit %s).",
175 s->system_journal ? "Permanent" : "Runtime",
176 format_bytes(fb1, sizeof(fb1), sum),
177 format_bytes(fb2, sizeof(fb2), m->max_use),
178 format_bytes(fb3, sizeof(fb3), m->keep_free),
179 format_bytes(fb4, sizeof(fb4), ss_avail),
180 format_bytes(fb5, sizeof(fb5), s->cached_available_space + sum));
183 return s->cached_available_space;
186 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
191 acl_permset_t permset;
196 r = fchmod(f->fd, 0640);
198 log_warning_errno(r, "Failed to fix access mode on %s, ignoring: %m", f->path);
201 if (uid <= SYSTEM_UID_MAX)
204 acl = acl_get_fd(f->fd);
206 log_warning_errno(errno, "Failed to read ACL on %s, ignoring: %m", f->path);
210 r = acl_find_uid(acl, uid, &entry);
213 if (acl_create_entry(&acl, &entry) < 0 ||
214 acl_set_tag_type(entry, ACL_USER) < 0 ||
215 acl_set_qualifier(entry, &uid) < 0) {
216 log_warning_errno(errno, "Failed to patch ACL on %s, ignoring: %m", f->path);
221 /* We do not recalculate the mask unconditionally here,
222 * so that the fchmod() mask above stays intact. */
223 if (acl_get_permset(entry, &permset) < 0 ||
224 acl_add_perm(permset, ACL_READ) < 0 ||
225 calc_acl_mask_if_needed(&acl) < 0) {
226 log_warning_errno(errno, "Failed to patch ACL on %s, ignoring: %m", f->path);
230 if (acl_set_fd(f->fd, acl) < 0)
231 log_warning_errno(errno, "Failed to set ACL on %s, ignoring: %m", f->path);
238 static JournalFile* find_journal(Server *s, uid_t uid) {
239 _cleanup_free_ char *p = NULL;
246 /* We split up user logs only on /var, not on /run. If the
247 * runtime file is open, we write to it exclusively, in order
248 * to guarantee proper order as soon as we flush /run to
249 * /var and close the runtime file. */
251 if (s->runtime_journal)
252 return s->runtime_journal;
254 if (uid <= SYSTEM_UID_MAX)
255 return s->system_journal;
257 r = sd_id128_get_machine(&machine);
259 return s->system_journal;
261 f = ordered_hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
265 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
266 SD_ID128_FORMAT_VAL(machine), uid) < 0)
267 return s->system_journal;
269 while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
270 /* Too many open? Then let's close one */
271 f = ordered_hashmap_steal_first(s->user_journals);
273 journal_file_close(f);
276 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
278 return s->system_journal;
280 server_fix_perms(s, f, uid);
282 r = ordered_hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
284 journal_file_close(f);
285 return s->system_journal;
291 static int do_rotate(
304 r = journal_file_rotate(f, s->compress, seal);
307 log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
309 log_error_errno(r, "Failed to create new %s journal: %m", name);
311 server_fix_perms(s, *f, uid);
316 void server_rotate(Server *s) {
322 log_debug("Rotating...");
324 do_rotate(s, &s->runtime_journal, "runtime", false, 0);
325 do_rotate(s, &s->system_journal, "system", s->seal, 0);
327 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
328 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UINT32(k));
330 ordered_hashmap_replace(s->user_journals, k, f);
332 /* Old file has been closed and deallocated */
333 ordered_hashmap_remove(s->user_journals, k);
337 void server_sync(Server *s) {
343 if (s->system_journal) {
344 r = journal_file_set_offline(s->system_journal);
346 log_error_errno(r, "Failed to sync system journal: %m");
349 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
350 r = journal_file_set_offline(f);
352 log_error_errno(r, "Failed to sync user journal: %m");
355 if (s->sync_event_source) {
356 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
358 log_error_errno(r, "Failed to disable sync timer source: %m");
361 s->sync_scheduled = false;
364 static void do_vacuum(
369 JournalMetrics *metrics) {
377 p = strjoina(path, id);
378 r = journal_directory_vacuum(p, metrics->max_use, s->max_retention_usec, &s->oldest_file_usec, false);
379 if (r < 0 && r != -ENOENT)
380 log_error_errno(r, "Failed to vacuum %s: %m", p);
383 void server_vacuum(Server *s) {
388 log_debug("Vacuuming...");
390 s->oldest_file_usec = 0;
392 r = sd_id128_get_machine(&machine);
394 log_error_errno(r, "Failed to get machine ID: %m");
397 sd_id128_to_string(machine, ids);
399 do_vacuum(s, ids, s->system_journal, "/var/log/journal/", &s->system_metrics);
400 do_vacuum(s, ids, s->runtime_journal, "/run/log/journal/", &s->runtime_metrics);
402 s->cached_available_space_timestamp = 0;
405 static void server_cache_machine_id(Server *s) {
411 r = sd_id128_get_machine(&id);
415 sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
418 static void server_cache_boot_id(Server *s) {
424 r = sd_id128_get_boot(&id);
428 sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
431 static void server_cache_hostname(Server *s) {
432 _cleanup_free_ char *t = NULL;
437 t = gethostname_malloc();
441 x = strappend("_HOSTNAME=", t);
445 free(s->hostname_field);
446 s->hostname_field = x;
449 static bool shall_try_append_again(JournalFile *f, int r) {
451 /* -E2BIG Hit configured limit
453 -EDQUOT Quota limit hit
455 -EIO I/O error of some kind (mmap)
456 -EHOSTDOWN Other machine
457 -EBUSY Unclean shutdown
458 -EPROTONOSUPPORT Unsupported feature
461 -ESHUTDOWN Already archived
462 -EIDRM Journal file has been deleted */
464 if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
465 log_debug("%s: Allocation limit reached, rotating.", f->path);
466 else if (r == -EHOSTDOWN)
467 log_info("%s: Journal file from other machine, rotating.", f->path);
468 else if (r == -EBUSY)
469 log_info("%s: Unclean shutdown, rotating.", f->path);
470 else if (r == -EPROTONOSUPPORT)
471 log_info("%s: Unsupported feature, rotating.", f->path);
472 else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
473 log_warning("%s: Journal file corrupted, rotating.", f->path);
475 log_warning("%s: IO error, rotating.", f->path);
476 else if (r == -EIDRM)
477 log_warning("%s: Journal file has been deleted, rotating.", f->path);
484 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
486 bool vacuumed = false;
493 f = find_journal(s, uid);
497 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
498 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
503 f = find_journal(s, uid);
508 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
510 server_schedule_sync(s, priority);
514 if (vacuumed || !shall_try_append_again(f, r)) {
515 log_error_errno(r, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
522 f = find_journal(s, uid);
526 log_debug("Retrying write.");
527 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
529 log_error_errno(r, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
531 server_schedule_sync(s, priority);
534 static void dispatch_message_real(
536 struct iovec *iovec, unsigned n, unsigned m,
537 const struct ucred *ucred,
538 const struct timeval *tv,
539 const char *label, size_t label_len,
544 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
545 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
546 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
547 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
548 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
549 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
550 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
551 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
557 uid_t realuid = 0, owner = 0, journal_uid;
558 bool owner_valid = false;
560 char audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
561 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
562 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
563 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
572 assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
575 realuid = ucred->uid;
577 sprintf(pid, "_PID="PID_FMT, ucred->pid);
578 IOVEC_SET_STRING(iovec[n++], pid);
580 sprintf(uid, "_UID="UID_FMT, ucred->uid);
581 IOVEC_SET_STRING(iovec[n++], uid);
583 sprintf(gid, "_GID="GID_FMT, ucred->gid);
584 IOVEC_SET_STRING(iovec[n++], gid);
586 r = get_process_comm(ucred->pid, &t);
588 x = strjoina("_COMM=", t);
590 IOVEC_SET_STRING(iovec[n++], x);
593 r = get_process_exe(ucred->pid, &t);
595 x = strjoina("_EXE=", t);
597 IOVEC_SET_STRING(iovec[n++], x);
600 r = get_process_cmdline(ucred->pid, 0, false, &t);
602 x = strjoina("_CMDLINE=", t);
604 IOVEC_SET_STRING(iovec[n++], x);
607 r = get_process_capeff(ucred->pid, &t);
609 x = strjoina("_CAP_EFFECTIVE=", t);
611 IOVEC_SET_STRING(iovec[n++], x);
615 r = audit_session_from_pid(ucred->pid, &audit);
617 sprintf(audit_session, "_AUDIT_SESSION=%"PRIu32, audit);
618 IOVEC_SET_STRING(iovec[n++], audit_session);
621 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
623 sprintf(audit_loginuid, "_AUDIT_LOGINUID="UID_FMT, loginuid);
624 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
628 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
630 char *session = NULL;
632 x = strjoina("_SYSTEMD_CGROUP=", c);
633 IOVEC_SET_STRING(iovec[n++], x);
635 r = cg_path_get_session(c, &t);
637 session = strjoina("_SYSTEMD_SESSION=", t);
639 IOVEC_SET_STRING(iovec[n++], session);
642 if (cg_path_get_owner_uid(c, &owner) >= 0) {
645 sprintf(owner_uid, "_SYSTEMD_OWNER_UID="UID_FMT, owner);
646 IOVEC_SET_STRING(iovec[n++], owner_uid);
649 if (cg_path_get_unit(c, &t) >= 0) {
650 x = strjoina("_SYSTEMD_UNIT=", t);
652 IOVEC_SET_STRING(iovec[n++], x);
653 } else if (unit_id && !session) {
654 x = strjoina("_SYSTEMD_UNIT=", unit_id);
655 IOVEC_SET_STRING(iovec[n++], x);
658 if (cg_path_get_user_unit(c, &t) >= 0) {
659 x = strjoina("_SYSTEMD_USER_UNIT=", t);
661 IOVEC_SET_STRING(iovec[n++], x);
662 } else if (unit_id && session) {
663 x = strjoina("_SYSTEMD_USER_UNIT=", unit_id);
664 IOVEC_SET_STRING(iovec[n++], x);
667 if (cg_path_get_slice(c, &t) >= 0) {
668 x = strjoina("_SYSTEMD_SLICE=", t);
670 IOVEC_SET_STRING(iovec[n++], x);
674 } else if (unit_id) {
675 x = strjoina("_SYSTEMD_UNIT=", unit_id);
676 IOVEC_SET_STRING(iovec[n++], x);
680 if (mac_selinux_use()) {
682 x = alloca(strlen("_SELINUX_CONTEXT=") + label_len + 1);
684 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
685 IOVEC_SET_STRING(iovec[n++], x);
687 security_context_t con;
689 if (getpidcon(ucred->pid, &con) >= 0) {
690 x = strjoina("_SELINUX_CONTEXT=", con);
693 IOVEC_SET_STRING(iovec[n++], x);
702 r = get_process_uid(object_pid, &object_uid);
704 sprintf(o_uid, "OBJECT_UID="UID_FMT, object_uid);
705 IOVEC_SET_STRING(iovec[n++], o_uid);
708 r = get_process_gid(object_pid, &object_gid);
710 sprintf(o_gid, "OBJECT_GID="GID_FMT, object_gid);
711 IOVEC_SET_STRING(iovec[n++], o_gid);
714 r = get_process_comm(object_pid, &t);
716 x = strjoina("OBJECT_COMM=", t);
718 IOVEC_SET_STRING(iovec[n++], x);
721 r = get_process_exe(object_pid, &t);
723 x = strjoina("OBJECT_EXE=", t);
725 IOVEC_SET_STRING(iovec[n++], x);
728 r = get_process_cmdline(object_pid, 0, false, &t);
730 x = strjoina("OBJECT_CMDLINE=", t);
732 IOVEC_SET_STRING(iovec[n++], x);
736 r = audit_session_from_pid(object_pid, &audit);
738 sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%"PRIu32, audit);
739 IOVEC_SET_STRING(iovec[n++], o_audit_session);
742 r = audit_loginuid_from_pid(object_pid, &loginuid);
744 sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID="UID_FMT, loginuid);
745 IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
749 r = cg_pid_get_path_shifted(object_pid, s->cgroup_root, &c);
751 x = strjoina("OBJECT_SYSTEMD_CGROUP=", c);
752 IOVEC_SET_STRING(iovec[n++], x);
754 r = cg_path_get_session(c, &t);
756 x = strjoina("OBJECT_SYSTEMD_SESSION=", t);
758 IOVEC_SET_STRING(iovec[n++], x);
761 if (cg_path_get_owner_uid(c, &owner) >= 0) {
762 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT, owner);
763 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
766 if (cg_path_get_unit(c, &t) >= 0) {
767 x = strjoina("OBJECT_SYSTEMD_UNIT=", t);
769 IOVEC_SET_STRING(iovec[n++], x);
772 if (cg_path_get_user_unit(c, &t) >= 0) {
773 x = strjoina("OBJECT_SYSTEMD_USER_UNIT=", t);
775 IOVEC_SET_STRING(iovec[n++], x);
784 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
785 IOVEC_SET_STRING(iovec[n++], source_time);
788 /* Note that strictly speaking storing the boot id here is
789 * redundant since the entry includes this in-line
790 * anyway. However, we need this indexed, too. */
791 if (!isempty(s->boot_id_field))
792 IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
794 if (!isempty(s->machine_id_field))
795 IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
797 if (!isempty(s->hostname_field))
798 IOVEC_SET_STRING(iovec[n++], s->hostname_field);
802 if (s->split_mode == SPLIT_UID && realuid > 0)
803 /* Split up strictly by any UID */
804 journal_uid = realuid;
805 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
806 /* Split up by login UIDs. We do this only if the
807 * realuid is not root, in order not to accidentally
808 * leak privileged information to the user that is
809 * logged by a privileged process that is part of an
810 * unprivileged session. */
815 write_to_journal(s, journal_uid, iovec, n, priority);
818 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
819 char mid[11 + 32 + 1];
820 char buffer[16 + LINE_MAX + 1];
821 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
824 struct ucred ucred = {};
829 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
830 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
832 memcpy(buffer, "MESSAGE=", 8);
833 va_start(ap, format);
834 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
836 IOVEC_SET_STRING(iovec[n++], buffer);
838 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
839 snprintf(mid, sizeof(mid), LOG_MESSAGE_ID(message_id));
840 IOVEC_SET_STRING(iovec[n++], mid);
843 ucred.pid = getpid();
844 ucred.uid = getuid();
845 ucred.gid = getgid();
847 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
850 void server_dispatch_message(
852 struct iovec *iovec, unsigned n, unsigned m,
853 const struct ucred *ucred,
854 const struct timeval *tv,
855 const char *label, size_t label_len,
861 _cleanup_free_ char *path = NULL;
865 assert(iovec || n == 0);
870 if (LOG_PRI(priority) > s->max_level_store)
873 /* Stop early in case the information will not be stored
875 if (s->storage == STORAGE_NONE)
881 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &path);
885 /* example: /user/lennart/3/foobar
886 * /system/dbus.service/foobar
888 * So let's cut of everything past the third /, since that is
889 * where user directories start */
891 c = strchr(path, '/');
893 c = strchr(c+1, '/');
895 c = strchr(c+1, '/');
901 rl = journal_rate_limit_test(s->rate_limit, path,
902 priority & LOG_PRIMASK, available_space(s, false));
907 /* Write a suppression message if we suppressed something */
909 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
910 "Suppressed %u messages from %s", rl - 1, path);
913 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
917 static int system_journal_open(Server *s, bool flush_requested) {
923 r = sd_id128_get_machine(&machine);
925 return log_error_errno(r, "Failed to get machine id: %m");
927 sd_id128_to_string(machine, ids);
929 if (!s->system_journal &&
930 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
932 || access("/run/systemd/journal/flushed", F_OK) >= 0)) {
934 /* If in auto mode: first try to create the machine
935 * path, but not the prefix.
937 * If in persistent mode: create /var/log/journal and
938 * the machine path */
940 if (s->storage == STORAGE_PERSISTENT)
941 (void) mkdir("/var/log/journal/", 0755);
943 fn = strjoina("/var/log/journal/", ids);
944 (void) mkdir(fn, 0755);
946 fn = strjoina(fn, "/system.journal");
947 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
950 server_fix_perms(s, s->system_journal, 0);
952 if (r != -ENOENT && r != -EROFS)
953 log_warning_errno(r, "Failed to open system journal: %m");
959 if (!s->runtime_journal &&
960 (s->storage != STORAGE_NONE)) {
962 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
966 if (s->system_journal) {
968 /* Try to open the runtime journal, but only
969 * if it already exists, so that we can flush
970 * it into the system journal */
972 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
977 log_warning_errno(r, "Failed to open runtime journal: %m");
984 /* OK, we really need the runtime journal, so create
985 * it if necessary. */
987 (void) mkdir("/run/log", 0755);
988 (void) mkdir("/run/log/journal", 0755);
989 (void) mkdir_parents(fn, 0750);
991 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
995 return log_error_errno(r, "Failed to open runtime journal: %m");
998 if (s->runtime_journal)
999 server_fix_perms(s, s->runtime_journal, 0);
1002 available_space(s, true);
1007 int server_flush_to_var(Server *s) {
1009 sd_journal *j = NULL;
1010 char ts[FORMAT_TIMESPAN_MAX];
1017 if (s->storage != STORAGE_AUTO &&
1018 s->storage != STORAGE_PERSISTENT)
1021 if (!s->runtime_journal)
1024 system_journal_open(s, true);
1026 if (!s->system_journal)
1029 log_debug("Flushing to /var...");
1031 start = now(CLOCK_MONOTONIC);
1033 r = sd_id128_get_machine(&machine);
1037 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1039 return log_error_errno(r, "Failed to read runtime journal: %m");
1041 sd_journal_set_data_threshold(j, 0);
1043 SD_JOURNAL_FOREACH(j) {
1047 f = j->current_file;
1048 assert(f && f->current_offset > 0);
1052 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1054 log_error_errno(r, "Can't read entry: %m");
1058 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1062 if (!shall_try_append_again(s->system_journal, r)) {
1063 log_error_errno(r, "Can't write entry: %m");
1070 if (!s->system_journal) {
1071 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1076 log_debug("Retrying write.");
1077 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1079 log_error_errno(r, "Can't write entry: %m");
1085 journal_file_post_change(s->system_journal);
1087 journal_file_close(s->runtime_journal);
1088 s->runtime_journal = NULL;
1091 rm_rf("/run/log/journal", false, true, false);
1093 sd_journal_close(j);
1095 server_driver_message(s, SD_ID128_NULL, "Time spent on flushing to /var is %s for %u entries.", format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0), n);
1100 int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1101 Server *s = userdata;
1104 assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
1106 if (revents != EPOLLIN) {
1107 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1112 struct ucred *ucred = NULL;
1113 struct timeval *tv = NULL;
1114 struct cmsghdr *cmsg;
1116 size_t label_len = 0;
1120 struct cmsghdr cmsghdr;
1122 /* We use NAME_MAX space for the SELinux label
1123 * here. The kernel currently enforces no
1124 * limit, but according to suggestions from
1125 * the SELinux people this will change and it
1126 * will probably be identical to NAME_MAX. For
1127 * now we use that, but this should be updated
1128 * one day when the final limit is known. */
1129 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1130 CMSG_SPACE(sizeof(struct timeval)) +
1131 CMSG_SPACE(sizeof(int)) + /* fd */
1132 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1134 union sockaddr_union sa = {};
1135 struct msghdr msghdr = {
1138 .msg_control = &control,
1139 .msg_controllen = sizeof(control),
1141 .msg_namelen = sizeof(sa),
1150 /* Try to get the right size, if we can. (Not all
1151 * sockets support SIOCINQ, hence we just try, but
1152 * don't rely on it. */
1153 (void) ioctl(fd, SIOCINQ, &v);
1155 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1156 m = PAGE_ALIGN(MAX3((size_t) v + 1,
1158 ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
1160 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1163 iovec.iov_base = s->buffer;
1164 iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
1166 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1168 if (errno == EINTR || errno == EAGAIN)
1171 log_error_errno(errno, "recvmsg() failed: %m");
1175 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1177 if (cmsg->cmsg_level == SOL_SOCKET &&
1178 cmsg->cmsg_type == SCM_CREDENTIALS &&
1179 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1180 ucred = (struct ucred*) CMSG_DATA(cmsg);
1181 else if (cmsg->cmsg_level == SOL_SOCKET &&
1182 cmsg->cmsg_type == SCM_SECURITY) {
1183 label = (char*) CMSG_DATA(cmsg);
1184 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1185 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1186 cmsg->cmsg_type == SO_TIMESTAMP &&
1187 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1188 tv = (struct timeval*) CMSG_DATA(cmsg);
1189 else if (cmsg->cmsg_level == SOL_SOCKET &&
1190 cmsg->cmsg_type == SCM_RIGHTS) {
1191 fds = (int*) CMSG_DATA(cmsg);
1192 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1196 /* And a trailing NUL, just in case */
1199 if (fd == s->syslog_fd) {
1200 if (n > 0 && n_fds == 0)
1201 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1203 log_warning("Got file descriptors via syslog socket. Ignoring.");
1205 } else if (fd == s->native_fd) {
1206 if (n > 0 && n_fds == 0)
1207 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1208 else if (n == 0 && n_fds == 1)
1209 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1211 log_warning("Got too many file descriptors via native socket. Ignoring.");
1214 assert(fd == s->audit_fd);
1216 if (n > 0 && n_fds == 0)
1217 server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1219 log_warning("Got file descriptors via audit socket. Ignoring.");
1222 close_many(fds, n_fds);
1226 static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1227 Server *s = userdata;
1231 log_info("Received request to flush runtime journal from PID %"PRIu32, si->ssi_pid);
1233 server_flush_to_var(s);
1237 touch("/run/systemd/journal/flushed");
1242 static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1243 Server *s = userdata;
1247 log_info("Received request to rotate journal from PID %"PRIu32, si->ssi_pid);
1254 static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1255 Server *s = userdata;
1259 log_received_signal(LOG_INFO, si);
1261 sd_event_exit(s->event, 0);
1265 static int setup_signals(Server *s) {
1271 assert_se(sigemptyset(&mask) == 0);
1272 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1273 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1275 r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
1279 r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
1283 r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
1287 r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
1294 static int server_parse_proc_cmdline(Server *s) {
1295 _cleanup_free_ char *line = NULL;
1296 const char *w, *state;
1300 r = proc_cmdline(&line);
1302 log_warning_errno(r, "Failed to read /proc/cmdline, ignoring: %m");
1306 FOREACH_WORD_QUOTED(w, l, line, state) {
1307 _cleanup_free_ char *word;
1309 word = strndup(w, l);
1313 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1314 r = parse_boolean(word + 35);
1316 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1318 s->forward_to_syslog = r;
1319 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1320 r = parse_boolean(word + 33);
1322 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1324 s->forward_to_kmsg = r;
1325 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1326 r = parse_boolean(word + 36);
1328 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1330 s->forward_to_console = r;
1331 } else if (startswith(word, "systemd.journald.forward_to_wall=")) {
1332 r = parse_boolean(word + 33);
1334 log_warning("Failed to parse forward to wall switch %s. Ignoring.", word + 33);
1336 s->forward_to_wall = r;
1337 } else if (startswith(word, "systemd.journald"))
1338 log_warning("Invalid systemd.journald parameter. Ignoring.");
1340 /* do not warn about state here, since probably systemd already did */
1345 static int server_parse_config_file(Server *s) {
1348 return config_parse_many("/etc/systemd/journald.conf",
1349 CONF_DIRS_NULSTR("systemd/journald.conf"),
1351 config_item_perf_lookup, journald_gperf_lookup,
1355 static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1356 Server *s = userdata;
1364 int server_schedule_sync(Server *s, int priority) {
1369 if (priority <= LOG_CRIT) {
1370 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1375 if (s->sync_scheduled)
1378 if (s->sync_interval_usec > 0) {
1381 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
1385 when += s->sync_interval_usec;
1387 if (!s->sync_event_source) {
1388 r = sd_event_add_time(
1390 &s->sync_event_source,
1393 server_dispatch_sync, s);
1397 r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1399 r = sd_event_source_set_time(s->sync_event_source, when);
1403 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1408 s->sync_scheduled = true;
1414 static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1415 Server *s = userdata;
1419 server_cache_hostname(s);
1423 static int server_open_hostname(Server *s) {
1428 s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
1429 if (s->hostname_fd < 0)
1430 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
1432 r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
1434 /* kernels prior to 3.2 don't support polling this file. Ignore
1437 log_warning("Failed to register hostname fd in event loop: %s. Ignoring.",
1439 s->hostname_fd = safe_close(s->hostname_fd);
1443 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
1446 r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
1448 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
1453 int server_init(Server *s) {
1454 _cleanup_fdset_free_ FDSet *fds = NULL;
1460 s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = -1;
1464 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1465 s->sync_scheduled = false;
1467 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1468 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1470 s->forward_to_wall = true;
1472 s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1474 s->max_level_store = LOG_DEBUG;
1475 s->max_level_syslog = LOG_DEBUG;
1476 s->max_level_kmsg = LOG_NOTICE;
1477 s->max_level_console = LOG_INFO;
1478 s->max_level_wall = LOG_EMERG;
1480 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1481 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1483 server_parse_config_file(s);
1484 server_parse_proc_cmdline(s);
1485 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1486 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1487 s->rate_limit_interval, s->rate_limit_burst);
1488 s->rate_limit_interval = s->rate_limit_burst = 0;
1491 mkdir_p("/run/systemd/journal", 0755);
1493 s->user_journals = ordered_hashmap_new(NULL);
1494 if (!s->user_journals)
1497 s->mmap = mmap_cache_new();
1501 r = sd_event_default(&s->event);
1503 return log_error_errno(r, "Failed to create event loop: %m");
1505 sd_event_set_watchdog(s->event, true);
1507 n = sd_listen_fds(true);
1509 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
1511 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1513 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1515 if (s->native_fd >= 0) {
1516 log_error("Too many native sockets passed.");
1522 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1524 if (s->stdout_fd >= 0) {
1525 log_error("Too many stdout sockets passed.");
1531 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1532 sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1534 if (s->syslog_fd >= 0) {
1535 log_error("Too many /dev/log sockets passed.");
1541 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
1543 if (s->audit_fd >= 0) {
1544 log_error("Too many audit sockets passed.");
1558 r = fdset_put(fds, fd);
1564 r = server_open_stdout_socket(s, fds);
1568 if (fdset_size(fds) > 0) {
1569 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds));
1570 fds = fdset_free(fds);
1573 r = server_open_syslog_socket(s);
1577 r = server_open_native_socket(s);
1581 r = server_open_dev_kmsg(s);
1585 r = server_open_audit(s);
1589 r = server_open_kernel_seqnum(s);
1593 r = server_open_hostname(s);
1597 r = setup_signals(s);
1601 s->udev = udev_new();
1605 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
1609 r = cg_get_root_path(&s->cgroup_root);
1613 server_cache_hostname(s);
1614 server_cache_boot_id(s);
1615 server_cache_machine_id(s);
1617 r = system_journal_open(s, false);
1624 void server_maybe_append_tags(Server *s) {
1630 n = now(CLOCK_REALTIME);
1632 if (s->system_journal)
1633 journal_file_maybe_append_tag(s->system_journal, n);
1635 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
1636 journal_file_maybe_append_tag(f, n);
1640 void server_done(Server *s) {
1644 while (s->stdout_streams)
1645 stdout_stream_free(s->stdout_streams);
1647 if (s->system_journal)
1648 journal_file_close(s->system_journal);
1650 if (s->runtime_journal)
1651 journal_file_close(s->runtime_journal);
1653 while ((f = ordered_hashmap_steal_first(s->user_journals)))
1654 journal_file_close(f);
1656 ordered_hashmap_free(s->user_journals);
1658 sd_event_source_unref(s->syslog_event_source);
1659 sd_event_source_unref(s->native_event_source);
1660 sd_event_source_unref(s->stdout_event_source);
1661 sd_event_source_unref(s->dev_kmsg_event_source);
1662 sd_event_source_unref(s->audit_event_source);
1663 sd_event_source_unref(s->sync_event_source);
1664 sd_event_source_unref(s->sigusr1_event_source);
1665 sd_event_source_unref(s->sigusr2_event_source);
1666 sd_event_source_unref(s->sigterm_event_source);
1667 sd_event_source_unref(s->sigint_event_source);
1668 sd_event_source_unref(s->hostname_event_source);
1669 sd_event_unref(s->event);
1671 safe_close(s->syslog_fd);
1672 safe_close(s->native_fd);
1673 safe_close(s->stdout_fd);
1674 safe_close(s->dev_kmsg_fd);
1675 safe_close(s->audit_fd);
1676 safe_close(s->hostname_fd);
1679 journal_rate_limit_free(s->rate_limit);
1681 if (s->kernel_seqnum)
1682 munmap(s->kernel_seqnum, sizeof(uint64_t));
1686 free(s->cgroup_root);
1687 free(s->hostname_field);
1690 mmap_cache_unref(s->mmap);
1693 udev_unref(s->udev);