1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
27 #include <sys/timerfd.h>
31 #include "sd-journal.h"
32 #include "sd-messages.h"
33 #include "sd-daemon.h"
37 #include "journal-file.h"
38 #include "socket-util.h"
39 #include "cgroup-util.h"
42 #include "conf-parser.h"
43 #include "selinux-util.h"
44 #include "journal-internal.h"
45 #include "journal-vacuum.h"
46 #include "journal-authenticate.h"
47 #include "journald-rate-limit.h"
48 #include "journald-kmsg.h"
49 #include "journald-syslog.h"
50 #include "journald-stream.h"
51 #include "journald-console.h"
52 #include "journald-native.h"
53 #include "journald-audit.h"
54 #include "journald-server.h"
58 #include <selinux/selinux.h>
61 #define USER_JOURNALS_MAX 1024
63 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
64 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
65 #define DEFAULT_RATE_LIMIT_BURST 1000
66 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
68 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
70 static const char* const storage_table[_STORAGE_MAX] = {
71 [STORAGE_AUTO] = "auto",
72 [STORAGE_VOLATILE] = "volatile",
73 [STORAGE_PERSISTENT] = "persistent",
74 [STORAGE_NONE] = "none"
77 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
78 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
80 static const char* const split_mode_table[_SPLIT_MAX] = {
81 [SPLIT_LOGIN] = "login",
83 [SPLIT_NONE] = "none",
86 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
87 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
89 static uint64_t available_space(Server *s, bool verbose) {
91 _cleanup_free_ char *p = NULL;
94 uint64_t sum = 0, ss_avail = 0, avail = 0;
96 _cleanup_closedir_ DIR *d = NULL;
101 ts = now(CLOCK_MONOTONIC);
103 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts
105 return s->cached_available_space;
107 r = sd_id128_get_machine(&machine);
111 if (s->system_journal) {
112 f = "/var/log/journal/";
113 m = &s->system_metrics;
115 f = "/run/log/journal/";
116 m = &s->runtime_metrics;
121 p = strappend(f, sd_id128_to_string(machine, ids));
129 if (fstatvfs(dirfd(d), &ss) < 0)
138 if (!de && errno != 0)
144 if (!endswith(de->d_name, ".journal") &&
145 !endswith(de->d_name, ".journal~"))
148 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
151 if (!S_ISREG(st.st_mode))
154 sum += (uint64_t) st.st_blocks * 512UL;
157 ss_avail = ss.f_bsize * ss.f_bavail;
159 /* If we reached a high mark, we will always allow this much
160 * again, unless usage goes above max_use. This watermark
161 * value is cached so that we don't give up space on pressure,
162 * but hover below the maximum usage. */
167 avail = LESS_BY(ss_avail, m->keep_free);
169 s->cached_available_space = LESS_BY(MIN(m->max_use, avail), sum);
170 s->cached_available_space_timestamp = ts;
173 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
174 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX];
176 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
177 "%s journal is using %s (max allowed %s, "
178 "trying to leave %s free of %s available → current limit %s).",
179 s->system_journal ? "Permanent" : "Runtime",
180 format_bytes(fb1, sizeof(fb1), sum),
181 format_bytes(fb2, sizeof(fb2), m->max_use),
182 format_bytes(fb3, sizeof(fb3), m->keep_free),
183 format_bytes(fb4, sizeof(fb4), ss_avail),
184 format_bytes(fb5, sizeof(fb5), s->cached_available_space + sum));
187 return s->cached_available_space;
190 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
195 acl_permset_t permset;
200 r = fchmod(f->fd, 0640);
202 log_warning_errno(r, "Failed to fix access mode on %s, ignoring: %m", f->path);
205 if (uid <= SYSTEM_UID_MAX)
208 acl = acl_get_fd(f->fd);
210 log_warning_errno(errno, "Failed to read ACL on %s, ignoring: %m", f->path);
214 r = acl_find_uid(acl, uid, &entry);
217 if (acl_create_entry(&acl, &entry) < 0 ||
218 acl_set_tag_type(entry, ACL_USER) < 0 ||
219 acl_set_qualifier(entry, &uid) < 0) {
220 log_warning_errno(errno, "Failed to patch ACL on %s, ignoring: %m", f->path);
225 /* We do not recalculate the mask unconditionally here,
226 * so that the fchmod() mask above stays intact. */
227 if (acl_get_permset(entry, &permset) < 0 ||
228 acl_add_perm(permset, ACL_READ) < 0 ||
229 calc_acl_mask_if_needed(&acl) < 0) {
230 log_warning_errno(errno, "Failed to patch ACL on %s, ignoring: %m", f->path);
234 if (acl_set_fd(f->fd, acl) < 0)
235 log_warning_errno(errno, "Failed to set ACL on %s, ignoring: %m", f->path);
242 static JournalFile* find_journal(Server *s, uid_t uid) {
243 _cleanup_free_ char *p = NULL;
250 /* We split up user logs only on /var, not on /run. If the
251 * runtime file is open, we write to it exclusively, in order
252 * to guarantee proper order as soon as we flush /run to
253 * /var and close the runtime file. */
255 if (s->runtime_journal)
256 return s->runtime_journal;
258 if (uid <= SYSTEM_UID_MAX)
259 return s->system_journal;
261 r = sd_id128_get_machine(&machine);
263 return s->system_journal;
265 f = ordered_hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
269 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
270 SD_ID128_FORMAT_VAL(machine), uid) < 0)
271 return s->system_journal;
273 while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
274 /* Too many open? Then let's close one */
275 f = ordered_hashmap_steal_first(s->user_journals);
277 journal_file_close(f);
280 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
282 return s->system_journal;
284 server_fix_perms(s, f, uid);
286 r = ordered_hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
288 journal_file_close(f);
289 return s->system_journal;
295 static int do_rotate(
308 r = journal_file_rotate(f, s->compress, seal);
311 log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
313 log_error_errno(r, "Failed to create new %s journal: %m", name);
315 server_fix_perms(s, *f, uid);
320 void server_rotate(Server *s) {
326 log_debug("Rotating...");
328 do_rotate(s, &s->runtime_journal, "runtime", false, 0);
329 do_rotate(s, &s->system_journal, "system", s->seal, 0);
331 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
332 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UINT32(k));
334 ordered_hashmap_replace(s->user_journals, k, f);
336 /* Old file has been closed and deallocated */
337 ordered_hashmap_remove(s->user_journals, k);
341 void server_sync(Server *s) {
347 if (s->system_journal) {
348 r = journal_file_set_offline(s->system_journal);
350 log_error_errno(r, "Failed to sync system journal: %m");
353 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
354 r = journal_file_set_offline(f);
356 log_error_errno(r, "Failed to sync user journal: %m");
359 if (s->sync_event_source) {
360 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
362 log_error_errno(r, "Failed to disable sync timer source: %m");
365 s->sync_scheduled = false;
368 static void do_vacuum(
373 JournalMetrics *metrics) {
381 p = strjoina(path, id);
382 r = journal_directory_vacuum(p, metrics->max_use, s->max_retention_usec, &s->oldest_file_usec, false);
383 if (r < 0 && r != -ENOENT)
384 log_error_errno(r, "Failed to vacuum %s: %m", p);
387 void server_vacuum(Server *s) {
392 log_debug("Vacuuming...");
394 s->oldest_file_usec = 0;
396 r = sd_id128_get_machine(&machine);
398 log_error_errno(r, "Failed to get machine ID: %m");
401 sd_id128_to_string(machine, ids);
403 do_vacuum(s, ids, s->system_journal, "/var/log/journal/", &s->system_metrics);
404 do_vacuum(s, ids, s->runtime_journal, "/run/log/journal/", &s->runtime_metrics);
406 s->cached_available_space_timestamp = 0;
409 static void server_cache_machine_id(Server *s) {
415 r = sd_id128_get_machine(&id);
419 sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
422 static void server_cache_boot_id(Server *s) {
428 r = sd_id128_get_boot(&id);
432 sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
435 static void server_cache_hostname(Server *s) {
436 _cleanup_free_ char *t = NULL;
441 t = gethostname_malloc();
445 x = strappend("_HOSTNAME=", t);
449 free(s->hostname_field);
450 s->hostname_field = x;
453 static bool shall_try_append_again(JournalFile *f, int r) {
455 /* -E2BIG Hit configured limit
457 -EDQUOT Quota limit hit
459 -EIO I/O error of some kind (mmap)
460 -EHOSTDOWN Other machine
461 -EBUSY Unclean shutdown
462 -EPROTONOSUPPORT Unsupported feature
465 -ESHUTDOWN Already archived
466 -EIDRM Journal file has been deleted */
468 if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
469 log_debug("%s: Allocation limit reached, rotating.", f->path);
470 else if (r == -EHOSTDOWN)
471 log_info("%s: Journal file from other machine, rotating.", f->path);
472 else if (r == -EBUSY)
473 log_info("%s: Unclean shutdown, rotating.", f->path);
474 else if (r == -EPROTONOSUPPORT)
475 log_info("%s: Unsupported feature, rotating.", f->path);
476 else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
477 log_warning("%s: Journal file corrupted, rotating.", f->path);
479 log_warning("%s: IO error, rotating.", f->path);
480 else if (r == -EIDRM)
481 log_warning("%s: Journal file has been deleted, rotating.", f->path);
488 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
490 bool vacuumed = false;
497 f = find_journal(s, uid);
501 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
502 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
507 f = find_journal(s, uid);
512 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
514 server_schedule_sync(s, priority);
518 if (vacuumed || !shall_try_append_again(f, r)) {
519 log_error_errno(r, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
526 f = find_journal(s, uid);
530 log_debug("Retrying write.");
531 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
533 log_error_errno(r, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
535 server_schedule_sync(s, priority);
538 static void dispatch_message_real(
540 struct iovec *iovec, unsigned n, unsigned m,
541 const struct ucred *ucred,
542 const struct timeval *tv,
543 const char *label, size_t label_len,
548 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
549 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
550 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
551 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
552 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
553 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
554 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
555 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
561 uid_t realuid = 0, owner = 0, journal_uid;
562 bool owner_valid = false;
564 char audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
565 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
566 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
567 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
576 assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
579 realuid = ucred->uid;
581 sprintf(pid, "_PID="PID_FMT, ucred->pid);
582 IOVEC_SET_STRING(iovec[n++], pid);
584 sprintf(uid, "_UID="UID_FMT, ucred->uid);
585 IOVEC_SET_STRING(iovec[n++], uid);
587 sprintf(gid, "_GID="GID_FMT, ucred->gid);
588 IOVEC_SET_STRING(iovec[n++], gid);
590 r = get_process_comm(ucred->pid, &t);
592 x = strjoina("_COMM=", t);
594 IOVEC_SET_STRING(iovec[n++], x);
597 r = get_process_exe(ucred->pid, &t);
599 x = strjoina("_EXE=", t);
601 IOVEC_SET_STRING(iovec[n++], x);
604 r = get_process_cmdline(ucred->pid, 0, false, &t);
606 x = strjoina("_CMDLINE=", t);
608 IOVEC_SET_STRING(iovec[n++], x);
611 r = get_process_capeff(ucred->pid, &t);
613 x = strjoina("_CAP_EFFECTIVE=", t);
615 IOVEC_SET_STRING(iovec[n++], x);
619 r = audit_session_from_pid(ucred->pid, &audit);
621 sprintf(audit_session, "_AUDIT_SESSION=%"PRIu32, audit);
622 IOVEC_SET_STRING(iovec[n++], audit_session);
625 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
627 sprintf(audit_loginuid, "_AUDIT_LOGINUID="UID_FMT, loginuid);
628 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
632 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
634 char *session = NULL;
636 x = strjoina("_SYSTEMD_CGROUP=", c);
637 IOVEC_SET_STRING(iovec[n++], x);
639 r = cg_path_get_session(c, &t);
641 session = strjoina("_SYSTEMD_SESSION=", t);
643 IOVEC_SET_STRING(iovec[n++], session);
646 if (cg_path_get_owner_uid(c, &owner) >= 0) {
649 sprintf(owner_uid, "_SYSTEMD_OWNER_UID="UID_FMT, owner);
650 IOVEC_SET_STRING(iovec[n++], owner_uid);
653 if (cg_path_get_unit(c, &t) >= 0) {
654 x = strjoina("_SYSTEMD_UNIT=", t);
656 IOVEC_SET_STRING(iovec[n++], x);
657 } else if (unit_id && !session) {
658 x = strjoina("_SYSTEMD_UNIT=", unit_id);
659 IOVEC_SET_STRING(iovec[n++], x);
662 if (cg_path_get_user_unit(c, &t) >= 0) {
663 x = strjoina("_SYSTEMD_USER_UNIT=", t);
665 IOVEC_SET_STRING(iovec[n++], x);
666 } else if (unit_id && session) {
667 x = strjoina("_SYSTEMD_USER_UNIT=", unit_id);
668 IOVEC_SET_STRING(iovec[n++], x);
671 if (cg_path_get_slice(c, &t) >= 0) {
672 x = strjoina("_SYSTEMD_SLICE=", t);
674 IOVEC_SET_STRING(iovec[n++], x);
678 } else if (unit_id) {
679 x = strjoina("_SYSTEMD_UNIT=", unit_id);
680 IOVEC_SET_STRING(iovec[n++], x);
684 if (mac_selinux_use()) {
686 x = alloca(strlen("_SELINUX_CONTEXT=") + label_len + 1);
688 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
689 IOVEC_SET_STRING(iovec[n++], x);
691 security_context_t con;
693 if (getpidcon(ucred->pid, &con) >= 0) {
694 x = strjoina("_SELINUX_CONTEXT=", con);
697 IOVEC_SET_STRING(iovec[n++], x);
706 r = get_process_uid(object_pid, &object_uid);
708 sprintf(o_uid, "OBJECT_UID="UID_FMT, object_uid);
709 IOVEC_SET_STRING(iovec[n++], o_uid);
712 r = get_process_gid(object_pid, &object_gid);
714 sprintf(o_gid, "OBJECT_GID="GID_FMT, object_gid);
715 IOVEC_SET_STRING(iovec[n++], o_gid);
718 r = get_process_comm(object_pid, &t);
720 x = strjoina("OBJECT_COMM=", t);
722 IOVEC_SET_STRING(iovec[n++], x);
725 r = get_process_exe(object_pid, &t);
727 x = strjoina("OBJECT_EXE=", t);
729 IOVEC_SET_STRING(iovec[n++], x);
732 r = get_process_cmdline(object_pid, 0, false, &t);
734 x = strjoina("OBJECT_CMDLINE=", t);
736 IOVEC_SET_STRING(iovec[n++], x);
740 r = audit_session_from_pid(object_pid, &audit);
742 sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%"PRIu32, audit);
743 IOVEC_SET_STRING(iovec[n++], o_audit_session);
746 r = audit_loginuid_from_pid(object_pid, &loginuid);
748 sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID="UID_FMT, loginuid);
749 IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
753 r = cg_pid_get_path_shifted(object_pid, s->cgroup_root, &c);
755 x = strjoina("OBJECT_SYSTEMD_CGROUP=", c);
756 IOVEC_SET_STRING(iovec[n++], x);
758 r = cg_path_get_session(c, &t);
760 x = strjoina("OBJECT_SYSTEMD_SESSION=", t);
762 IOVEC_SET_STRING(iovec[n++], x);
765 if (cg_path_get_owner_uid(c, &owner) >= 0) {
766 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT, owner);
767 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
770 if (cg_path_get_unit(c, &t) >= 0) {
771 x = strjoina("OBJECT_SYSTEMD_UNIT=", t);
773 IOVEC_SET_STRING(iovec[n++], x);
776 if (cg_path_get_user_unit(c, &t) >= 0) {
777 x = strjoina("OBJECT_SYSTEMD_USER_UNIT=", t);
779 IOVEC_SET_STRING(iovec[n++], x);
788 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
789 IOVEC_SET_STRING(iovec[n++], source_time);
792 /* Note that strictly speaking storing the boot id here is
793 * redundant since the entry includes this in-line
794 * anyway. However, we need this indexed, too. */
795 if (!isempty(s->boot_id_field))
796 IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
798 if (!isempty(s->machine_id_field))
799 IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
801 if (!isempty(s->hostname_field))
802 IOVEC_SET_STRING(iovec[n++], s->hostname_field);
806 if (s->split_mode == SPLIT_UID && realuid > 0)
807 /* Split up strictly by any UID */
808 journal_uid = realuid;
809 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
810 /* Split up by login UIDs. We do this only if the
811 * realuid is not root, in order not to accidentally
812 * leak privileged information to the user that is
813 * logged by a privileged process that is part of an
814 * unprivileged session. */
819 write_to_journal(s, journal_uid, iovec, n, priority);
822 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
823 char mid[11 + 32 + 1];
824 char buffer[16 + LINE_MAX + 1];
825 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
828 struct ucred ucred = {};
833 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
834 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
836 memcpy(buffer, "MESSAGE=", 8);
837 va_start(ap, format);
838 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
840 IOVEC_SET_STRING(iovec[n++], buffer);
842 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
843 snprintf(mid, sizeof(mid), LOG_MESSAGE_ID(message_id));
844 IOVEC_SET_STRING(iovec[n++], mid);
847 ucred.pid = getpid();
848 ucred.uid = getuid();
849 ucred.gid = getgid();
851 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
854 void server_dispatch_message(
856 struct iovec *iovec, unsigned n, unsigned m,
857 const struct ucred *ucred,
858 const struct timeval *tv,
859 const char *label, size_t label_len,
865 _cleanup_free_ char *path = NULL;
869 assert(iovec || n == 0);
874 if (LOG_PRI(priority) > s->max_level_store)
877 /* Stop early in case the information will not be stored
879 if (s->storage == STORAGE_NONE)
885 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &path);
889 /* example: /user/lennart/3/foobar
890 * /system/dbus.service/foobar
892 * So let's cut of everything past the third /, since that is
893 * where user directories start */
895 c = strchr(path, '/');
897 c = strchr(c+1, '/');
899 c = strchr(c+1, '/');
905 rl = journal_rate_limit_test(s->rate_limit, path,
906 priority & LOG_PRIMASK, available_space(s, false));
911 /* Write a suppression message if we suppressed something */
913 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
914 "Suppressed %u messages from %s", rl - 1, path);
917 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
921 static int system_journal_open(Server *s, bool flush_requested) {
927 r = sd_id128_get_machine(&machine);
929 return log_error_errno(r, "Failed to get machine id: %m");
931 sd_id128_to_string(machine, ids);
933 if (!s->system_journal &&
934 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
936 || access("/run/systemd/journal/flushed", F_OK) >= 0)) {
938 /* If in auto mode: first try to create the machine
939 * path, but not the prefix.
941 * If in persistent mode: create /var/log/journal and
942 * the machine path */
944 if (s->storage == STORAGE_PERSISTENT)
945 (void) mkdir("/var/log/journal/", 0755);
947 fn = strjoina("/var/log/journal/", ids);
948 (void) mkdir(fn, 0755);
950 fn = strjoina(fn, "/system.journal");
951 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
954 server_fix_perms(s, s->system_journal, 0);
956 if (r != -ENOENT && r != -EROFS)
957 log_warning_errno(r, "Failed to open system journal: %m");
963 if (!s->runtime_journal &&
964 (s->storage != STORAGE_NONE)) {
966 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
970 if (s->system_journal) {
972 /* Try to open the runtime journal, but only
973 * if it already exists, so that we can flush
974 * it into the system journal */
976 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
981 log_warning_errno(r, "Failed to open runtime journal: %m");
988 /* OK, we really need the runtime journal, so create
989 * it if necessary. */
991 (void) mkdir("/run/log", 0755);
992 (void) mkdir("/run/log/journal", 0755);
993 (void) mkdir_parents(fn, 0750);
995 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
999 return log_error_errno(r, "Failed to open runtime journal: %m");
1002 if (s->runtime_journal)
1003 server_fix_perms(s, s->runtime_journal, 0);
1006 available_space(s, true);
1011 int server_flush_to_var(Server *s) {
1013 sd_journal *j = NULL;
1014 char ts[FORMAT_TIMESPAN_MAX];
1021 if (s->storage != STORAGE_AUTO &&
1022 s->storage != STORAGE_PERSISTENT)
1025 if (!s->runtime_journal)
1028 system_journal_open(s, true);
1030 if (!s->system_journal)
1033 log_debug("Flushing to /var...");
1035 start = now(CLOCK_MONOTONIC);
1037 r = sd_id128_get_machine(&machine);
1041 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1043 return log_error_errno(r, "Failed to read runtime journal: %m");
1045 sd_journal_set_data_threshold(j, 0);
1047 SD_JOURNAL_FOREACH(j) {
1051 f = j->current_file;
1052 assert(f && f->current_offset > 0);
1056 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1058 log_error_errno(r, "Can't read entry: %m");
1062 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1066 if (!shall_try_append_again(s->system_journal, r)) {
1067 log_error_errno(r, "Can't write entry: %m");
1074 if (!s->system_journal) {
1075 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1080 log_debug("Retrying write.");
1081 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1083 log_error_errno(r, "Can't write entry: %m");
1089 journal_file_post_change(s->system_journal);
1091 journal_file_close(s->runtime_journal);
1092 s->runtime_journal = NULL;
1095 rm_rf("/run/log/journal", false, true, false);
1097 sd_journal_close(j);
1099 server_driver_message(s, SD_ID128_NULL, "Time spent on flushing to /var is %s for %u entries.", format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0), n);
1104 int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1105 Server *s = userdata;
1108 assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
1110 if (revents != EPOLLIN) {
1111 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1116 struct ucred *ucred = NULL;
1117 struct timeval *tv = NULL;
1118 struct cmsghdr *cmsg;
1120 size_t label_len = 0;
1124 struct cmsghdr cmsghdr;
1126 /* We use NAME_MAX space for the SELinux label
1127 * here. The kernel currently enforces no
1128 * limit, but according to suggestions from
1129 * the SELinux people this will change and it
1130 * will probably be identical to NAME_MAX. For
1131 * now we use that, but this should be updated
1132 * one day when the final limit is known. */
1133 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1134 CMSG_SPACE(sizeof(struct timeval)) +
1135 CMSG_SPACE(sizeof(int)) + /* fd */
1136 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1138 union sockaddr_union sa = {};
1139 struct msghdr msghdr = {
1142 .msg_control = &control,
1143 .msg_controllen = sizeof(control),
1145 .msg_namelen = sizeof(sa),
1154 /* Try to get the right size, if we can. (Not all
1155 * sockets support SIOCINQ, hence we just try, but
1156 * don't rely on it. */
1157 (void) ioctl(fd, SIOCINQ, &v);
1159 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1160 m = PAGE_ALIGN(MAX3((size_t) v + 1,
1162 ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
1164 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1167 iovec.iov_base = s->buffer;
1168 iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
1170 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1172 if (errno == EINTR || errno == EAGAIN)
1175 log_error_errno(errno, "recvmsg() failed: %m");
1179 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1181 if (cmsg->cmsg_level == SOL_SOCKET &&
1182 cmsg->cmsg_type == SCM_CREDENTIALS &&
1183 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1184 ucred = (struct ucred*) CMSG_DATA(cmsg);
1185 else if (cmsg->cmsg_level == SOL_SOCKET &&
1186 cmsg->cmsg_type == SCM_SECURITY) {
1187 label = (char*) CMSG_DATA(cmsg);
1188 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1189 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1190 cmsg->cmsg_type == SO_TIMESTAMP &&
1191 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1192 tv = (struct timeval*) CMSG_DATA(cmsg);
1193 else if (cmsg->cmsg_level == SOL_SOCKET &&
1194 cmsg->cmsg_type == SCM_RIGHTS) {
1195 fds = (int*) CMSG_DATA(cmsg);
1196 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1200 /* And a trailing NUL, just in case */
1203 if (fd == s->syslog_fd) {
1204 if (n > 0 && n_fds == 0)
1205 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1207 log_warning("Got file descriptors via syslog socket. Ignoring.");
1209 } else if (fd == s->native_fd) {
1210 if (n > 0 && n_fds == 0)
1211 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1212 else if (n == 0 && n_fds == 1)
1213 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1215 log_warning("Got too many file descriptors via native socket. Ignoring.");
1218 assert(fd == s->audit_fd);
1220 if (n > 0 && n_fds == 0)
1221 server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1223 log_warning("Got file descriptors via audit socket. Ignoring.");
1226 close_many(fds, n_fds);
1230 static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1231 Server *s = userdata;
1235 log_info("Received request to flush runtime journal from PID %"PRIu32, si->ssi_pid);
1237 server_flush_to_var(s);
1241 touch("/run/systemd/journal/flushed");
1246 static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1247 Server *s = userdata;
1251 log_info("Received request to rotate journal from PID %"PRIu32, si->ssi_pid);
1258 static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1259 Server *s = userdata;
1263 log_received_signal(LOG_INFO, si);
1265 sd_event_exit(s->event, 0);
1269 static int setup_signals(Server *s) {
1275 assert_se(sigemptyset(&mask) == 0);
1276 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1277 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1279 r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
1283 r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
1287 r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
1291 r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
1298 static int server_parse_proc_cmdline(Server *s) {
1299 _cleanup_free_ char *line = NULL;
1300 const char *w, *state;
1304 r = proc_cmdline(&line);
1306 log_warning_errno(r, "Failed to read /proc/cmdline, ignoring: %m");
1310 FOREACH_WORD_QUOTED(w, l, line, state) {
1311 _cleanup_free_ char *word;
1313 word = strndup(w, l);
1317 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1318 r = parse_boolean(word + 35);
1320 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1322 s->forward_to_syslog = r;
1323 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1324 r = parse_boolean(word + 33);
1326 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1328 s->forward_to_kmsg = r;
1329 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1330 r = parse_boolean(word + 36);
1332 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1334 s->forward_to_console = r;
1335 } else if (startswith(word, "systemd.journald.forward_to_wall=")) {
1336 r = parse_boolean(word + 33);
1338 log_warning("Failed to parse forward to wall switch %s. Ignoring.", word + 33);
1340 s->forward_to_wall = r;
1341 } else if (startswith(word, "systemd.journald"))
1342 log_warning("Invalid systemd.journald parameter. Ignoring.");
1344 /* do not warn about state here, since probably systemd already did */
1349 static int server_parse_config_file(Server *s) {
1352 return config_parse_many("/etc/systemd/journald.conf",
1353 CONF_DIRS_NULSTR("systemd/journald.conf"),
1355 config_item_perf_lookup, journald_gperf_lookup,
1359 static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1360 Server *s = userdata;
1368 int server_schedule_sync(Server *s, int priority) {
1373 if (priority <= LOG_CRIT) {
1374 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1379 if (s->sync_scheduled)
1382 if (s->sync_interval_usec > 0) {
1385 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
1389 when += s->sync_interval_usec;
1391 if (!s->sync_event_source) {
1392 r = sd_event_add_time(
1394 &s->sync_event_source,
1397 server_dispatch_sync, s);
1401 r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1403 r = sd_event_source_set_time(s->sync_event_source, when);
1407 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1412 s->sync_scheduled = true;
1418 static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1419 Server *s = userdata;
1423 server_cache_hostname(s);
1427 static int server_open_hostname(Server *s) {
1432 s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
1433 if (s->hostname_fd < 0)
1434 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
1436 r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
1438 /* kernels prior to 3.2 don't support polling this file. Ignore
1441 log_warning("Failed to register hostname fd in event loop: %s. Ignoring.",
1443 s->hostname_fd = safe_close(s->hostname_fd);
1447 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
1450 r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
1452 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
1457 int server_init(Server *s) {
1458 _cleanup_fdset_free_ FDSet *fds = NULL;
1464 s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = -1;
1468 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1469 s->sync_scheduled = false;
1471 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1472 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1474 s->forward_to_wall = true;
1476 s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1478 s->max_level_store = LOG_DEBUG;
1479 s->max_level_syslog = LOG_DEBUG;
1480 s->max_level_kmsg = LOG_NOTICE;
1481 s->max_level_console = LOG_INFO;
1482 s->max_level_wall = LOG_EMERG;
1484 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1485 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1487 server_parse_config_file(s);
1488 server_parse_proc_cmdline(s);
1489 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1490 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1491 s->rate_limit_interval, s->rate_limit_burst);
1492 s->rate_limit_interval = s->rate_limit_burst = 0;
1495 mkdir_p("/run/systemd/journal", 0755);
1497 s->user_journals = ordered_hashmap_new(NULL);
1498 if (!s->user_journals)
1501 s->mmap = mmap_cache_new();
1505 r = sd_event_default(&s->event);
1507 return log_error_errno(r, "Failed to create event loop: %m");
1509 sd_event_set_watchdog(s->event, true);
1511 n = sd_listen_fds(true);
1513 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
1515 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1517 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1519 if (s->native_fd >= 0) {
1520 log_error("Too many native sockets passed.");
1526 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1528 if (s->stdout_fd >= 0) {
1529 log_error("Too many stdout sockets passed.");
1535 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1536 sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1538 if (s->syslog_fd >= 0) {
1539 log_error("Too many /dev/log sockets passed.");
1545 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
1547 if (s->audit_fd >= 0) {
1548 log_error("Too many audit sockets passed.");
1562 r = fdset_put(fds, fd);
1568 r = server_open_stdout_socket(s, fds);
1572 if (fdset_size(fds) > 0) {
1573 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds));
1574 fds = fdset_free(fds);
1577 r = server_open_syslog_socket(s);
1581 r = server_open_native_socket(s);
1585 r = server_open_dev_kmsg(s);
1589 r = server_open_audit(s);
1593 r = server_open_kernel_seqnum(s);
1597 r = server_open_hostname(s);
1601 r = setup_signals(s);
1605 s->udev = udev_new();
1609 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
1613 r = cg_get_root_path(&s->cgroup_root);
1617 server_cache_hostname(s);
1618 server_cache_boot_id(s);
1619 server_cache_machine_id(s);
1621 r = system_journal_open(s, false);
1628 void server_maybe_append_tags(Server *s) {
1634 n = now(CLOCK_REALTIME);
1636 if (s->system_journal)
1637 journal_file_maybe_append_tag(s->system_journal, n);
1639 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
1640 journal_file_maybe_append_tag(f, n);
1644 void server_done(Server *s) {
1648 while (s->stdout_streams)
1649 stdout_stream_free(s->stdout_streams);
1651 if (s->system_journal)
1652 journal_file_close(s->system_journal);
1654 if (s->runtime_journal)
1655 journal_file_close(s->runtime_journal);
1657 while ((f = ordered_hashmap_steal_first(s->user_journals)))
1658 journal_file_close(f);
1660 ordered_hashmap_free(s->user_journals);
1662 sd_event_source_unref(s->syslog_event_source);
1663 sd_event_source_unref(s->native_event_source);
1664 sd_event_source_unref(s->stdout_event_source);
1665 sd_event_source_unref(s->dev_kmsg_event_source);
1666 sd_event_source_unref(s->audit_event_source);
1667 sd_event_source_unref(s->sync_event_source);
1668 sd_event_source_unref(s->sigusr1_event_source);
1669 sd_event_source_unref(s->sigusr2_event_source);
1670 sd_event_source_unref(s->sigterm_event_source);
1671 sd_event_source_unref(s->sigint_event_source);
1672 sd_event_source_unref(s->hostname_event_source);
1673 sd_event_unref(s->event);
1675 safe_close(s->syslog_fd);
1676 safe_close(s->native_fd);
1677 safe_close(s->stdout_fd);
1678 safe_close(s->dev_kmsg_fd);
1679 safe_close(s->audit_fd);
1680 safe_close(s->hostname_fd);
1683 journal_rate_limit_free(s->rate_limit);
1685 if (s->kernel_seqnum)
1686 munmap(s->kernel_seqnum, sizeof(uint64_t));
1690 free(s->cgroup_root);
1691 free(s->hostname_field);
1694 mmap_cache_unref(s->mmap);
1697 udev_unref(s->udev);