1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
27 #include <sys/timerfd.h>
31 #include "sd-journal.h"
32 #include "sd-messages.h"
33 #include "sd-daemon.h"
37 #include "journal-file.h"
38 #include "socket-util.h"
39 #include "cgroup-util.h"
42 #include "conf-parser.h"
43 #include "selinux-util.h"
44 #include "journal-internal.h"
45 #include "journal-vacuum.h"
46 #include "journal-authenticate.h"
47 #include "journald-rate-limit.h"
48 #include "journald-kmsg.h"
49 #include "journald-syslog.h"
50 #include "journald-stream.h"
51 #include "journald-console.h"
52 #include "journald-native.h"
53 #include "journald-server.h"
57 #include <acl/libacl.h>
62 #include <selinux/selinux.h>
65 #define USER_JOURNALS_MAX 1024
67 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
68 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
69 #define DEFAULT_RATE_LIMIT_BURST 1000
71 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
73 static const char* const storage_table[_STORAGE_MAX] = {
74 [STORAGE_AUTO] = "auto",
75 [STORAGE_VOLATILE] = "volatile",
76 [STORAGE_PERSISTENT] = "persistent",
77 [STORAGE_NONE] = "none"
80 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
81 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
83 static const char* const split_mode_table[_SPLIT_MAX] = {
84 [SPLIT_LOGIN] = "login",
86 [SPLIT_NONE] = "none",
89 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
90 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
92 static uint64_t available_space(Server *s, bool verbose) {
94 _cleanup_free_ char *p = NULL;
97 uint64_t sum = 0, ss_avail = 0, avail = 0;
99 _cleanup_closedir_ DIR *d = NULL;
104 ts = now(CLOCK_MONOTONIC);
106 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts
108 return s->cached_available_space;
110 r = sd_id128_get_machine(&machine);
114 if (s->system_journal) {
115 f = "/var/log/journal/";
116 m = &s->system_metrics;
118 f = "/run/log/journal/";
119 m = &s->runtime_metrics;
124 p = strappend(f, sd_id128_to_string(machine, ids));
132 if (fstatvfs(dirfd(d), &ss) < 0)
141 if (!de && errno != 0)
147 if (!endswith(de->d_name, ".journal") &&
148 !endswith(de->d_name, ".journal~"))
151 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
154 if (!S_ISREG(st.st_mode))
157 sum += (uint64_t) st.st_blocks * 512UL;
160 ss_avail = ss.f_bsize * ss.f_bavail;
162 /* If we reached a high mark, we will always allow this much
163 * again, unless usage goes above max_use. This watermark
164 * value is cached so that we don't give up space on pressure,
165 * but hover below the maximum usage. */
170 avail = LESS_BY(ss_avail, m->keep_free);
172 s->cached_available_space = LESS_BY(MIN(m->max_use, avail), sum);
173 s->cached_available_space_timestamp = ts;
176 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
177 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX];
179 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
180 "%s journal is using %s (max allowed %s, "
181 "trying to leave %s free of %s available → current limit %s).",
182 s->system_journal ? "Permanent" : "Runtime",
183 format_bytes(fb1, sizeof(fb1), sum),
184 format_bytes(fb2, sizeof(fb2), m->max_use),
185 format_bytes(fb3, sizeof(fb3), m->keep_free),
186 format_bytes(fb4, sizeof(fb4), ss_avail),
187 format_bytes(fb5, sizeof(fb5), s->cached_available_space + sum));
190 return s->cached_available_space;
193 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
198 acl_permset_t permset;
203 r = fchmod(f->fd, 0640);
205 log_warning("Failed to fix access mode on %s, ignoring: %s", f->path, strerror(-r));
211 acl = acl_get_fd(f->fd);
213 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
217 r = acl_find_uid(acl, uid, &entry);
220 if (acl_create_entry(&acl, &entry) < 0 ||
221 acl_set_tag_type(entry, ACL_USER) < 0 ||
222 acl_set_qualifier(entry, &uid) < 0) {
223 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
228 /* We do not recalculate the mask unconditionally here,
229 * so that the fchmod() mask above stays intact. */
230 if (acl_get_permset(entry, &permset) < 0 ||
231 acl_add_perm(permset, ACL_READ) < 0 ||
232 calc_acl_mask_if_needed(&acl) < 0) {
233 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
237 if (acl_set_fd(f->fd, acl) < 0)
238 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
245 static JournalFile* find_journal(Server *s, uid_t uid) {
246 _cleanup_free_ char *p = NULL;
253 /* We split up user logs only on /var, not on /run. If the
254 * runtime file is open, we write to it exclusively, in order
255 * to guarantee proper order as soon as we flush /run to
256 * /var and close the runtime file. */
258 if (s->runtime_journal)
259 return s->runtime_journal;
262 return s->system_journal;
264 r = sd_id128_get_machine(&machine);
266 return s->system_journal;
268 f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
272 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
273 SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
274 return s->system_journal;
276 while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
277 /* Too many open? Then let's close one */
278 f = hashmap_steal_first(s->user_journals);
280 journal_file_close(f);
283 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
285 return s->system_journal;
287 server_fix_perms(s, f, uid);
289 r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
291 journal_file_close(f);
292 return s->system_journal;
298 void server_rotate(Server *s) {
304 log_debug("Rotating...");
306 if (s->runtime_journal) {
307 r = journal_file_rotate(&s->runtime_journal, s->compress, false);
309 if (s->runtime_journal)
310 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
312 log_error("Failed to create new runtime journal: %s", strerror(-r));
314 server_fix_perms(s, s->runtime_journal, 0);
317 if (s->system_journal) {
318 r = journal_file_rotate(&s->system_journal, s->compress, s->seal);
320 if (s->system_journal)
321 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
323 log_error("Failed to create new system journal: %s", strerror(-r));
326 server_fix_perms(s, s->system_journal, 0);
329 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
330 r = journal_file_rotate(&f, s->compress, s->seal);
333 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
335 log_error("Failed to create user journal: %s", strerror(-r));
336 hashmap_remove(s->user_journals, k);
339 hashmap_replace(s->user_journals, k, f);
340 server_fix_perms(s, f, PTR_TO_UINT32(k));
345 void server_sync(Server *s) {
351 if (s->system_journal) {
352 r = journal_file_set_offline(s->system_journal);
354 log_error("Failed to sync system journal: %s", strerror(-r));
357 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
358 r = journal_file_set_offline(f);
360 log_error("Failed to sync user journal: %s", strerror(-r));
363 if (s->sync_event_source) {
364 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
366 log_error("Failed to disable sync timer source: %s", strerror(-r));
369 s->sync_scheduled = false;
372 void server_vacuum(Server *s) {
377 log_debug("Vacuuming...");
379 s->oldest_file_usec = 0;
381 r = sd_id128_get_machine(&machine);
383 log_error("Failed to get machine ID: %s", strerror(-r));
387 sd_id128_to_string(machine, ids);
389 if (s->system_journal) {
390 char *p = strappenda("/var/log/journal/", ids);
392 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->max_retention_usec, &s->oldest_file_usec);
393 if (r < 0 && r != -ENOENT)
394 log_error("Failed to vacuum %s: %s", p, strerror(-r));
397 if (s->runtime_journal) {
398 char *p = strappenda("/run/log/journal/", ids);
400 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->max_retention_usec, &s->oldest_file_usec);
401 if (r < 0 && r != -ENOENT)
402 log_error("Failed to vacuum %s: %s", p, strerror(-r));
405 s->cached_available_space_timestamp = 0;
408 static void server_cache_machine_id(Server *s) {
414 r = sd_id128_get_machine(&id);
418 sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
421 static void server_cache_boot_id(Server *s) {
427 r = sd_id128_get_boot(&id);
431 sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
434 static void server_cache_hostname(Server *s) {
435 _cleanup_free_ char *t = NULL;
440 t = gethostname_malloc();
444 x = strappend("_HOSTNAME=", t);
448 free(s->hostname_field);
449 s->hostname_field = x;
452 bool shall_try_append_again(JournalFile *f, int r) {
454 /* -E2BIG Hit configured limit
456 -EDQUOT Quota limit hit
458 -EHOSTDOWN Other machine
459 -EBUSY Unclean shutdown
460 -EPROTONOSUPPORT Unsupported feature
463 -ESHUTDOWN Already archived */
465 if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
466 log_debug("%s: Allocation limit reached, rotating.", f->path);
467 else if (r == -EHOSTDOWN)
468 log_info("%s: Journal file from other machine, rotating.", f->path);
469 else if (r == -EBUSY)
470 log_info("%s: Unclean shutdown, rotating.", f->path);
471 else if (r == -EPROTONOSUPPORT)
472 log_info("%s: Unsupported feature, rotating.", f->path);
473 else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
474 log_warning("%s: Journal file corrupted, rotating.", f->path);
481 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
483 bool vacuumed = false;
490 f = find_journal(s, uid);
494 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
495 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
500 f = find_journal(s, uid);
505 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
507 server_schedule_sync(s, priority);
511 if (vacuumed || !shall_try_append_again(f, r)) {
514 for (i = 0; i < n; i++)
515 size += iovec[i].iov_len;
517 log_error("Failed to write entry (%d items, %zu bytes), ignoring: %s", n, size, strerror(-r));
524 f = find_journal(s, uid);
528 log_debug("Retrying write.");
529 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
533 for (i = 0; i < n; i++)
534 size += iovec[i].iov_len;
536 log_error("Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %s", n, size, strerror(-r));
538 server_schedule_sync(s, priority);
541 static void dispatch_message_real(
543 struct iovec *iovec, unsigned n, unsigned m,
546 const char *label, size_t label_len,
551 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
552 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
553 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
554 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
555 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
556 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
557 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
558 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
564 uid_t realuid = 0, owner = 0, journal_uid;
565 bool owner_valid = false;
567 char audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
568 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
569 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
570 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
579 assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
582 realuid = ucred->uid;
584 sprintf(pid, "_PID=%lu", (unsigned long) ucred->pid);
585 IOVEC_SET_STRING(iovec[n++], pid);
587 sprintf(uid, "_UID=%lu", (unsigned long) ucred->uid);
588 IOVEC_SET_STRING(iovec[n++], uid);
590 sprintf(gid, "_GID=%lu", (unsigned long) ucred->gid);
591 IOVEC_SET_STRING(iovec[n++], gid);
593 r = get_process_comm(ucred->pid, &t);
595 x = strappenda("_COMM=", t);
597 IOVEC_SET_STRING(iovec[n++], x);
600 r = get_process_exe(ucred->pid, &t);
602 x = strappenda("_EXE=", t);
604 IOVEC_SET_STRING(iovec[n++], x);
607 r = get_process_cmdline(ucred->pid, 0, false, &t);
609 x = strappenda("_CMDLINE=", t);
611 IOVEC_SET_STRING(iovec[n++], x);
614 r = get_process_capeff(ucred->pid, &t);
616 x = strappenda("_CAP_EFFECTIVE=", t);
618 IOVEC_SET_STRING(iovec[n++], x);
622 r = audit_session_from_pid(ucred->pid, &audit);
624 sprintf(audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit);
625 IOVEC_SET_STRING(iovec[n++], audit_session);
628 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
630 sprintf(audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid);
631 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
635 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
637 char *session = NULL;
639 x = strappenda("_SYSTEMD_CGROUP=", c);
640 IOVEC_SET_STRING(iovec[n++], x);
642 r = cg_path_get_session(c, &t);
644 session = strappenda("_SYSTEMD_SESSION=", t);
646 IOVEC_SET_STRING(iovec[n++], session);
649 if (cg_path_get_owner_uid(c, &owner) >= 0) {
652 sprintf(owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner);
653 IOVEC_SET_STRING(iovec[n++], owner_uid);
656 if (cg_path_get_unit(c, &t) >= 0) {
657 x = strappenda("_SYSTEMD_UNIT=", t);
659 IOVEC_SET_STRING(iovec[n++], x);
660 } else if (unit_id && !session) {
661 x = strappenda("_SYSTEMD_UNIT=", unit_id);
662 IOVEC_SET_STRING(iovec[n++], x);
665 if (cg_path_get_user_unit(c, &t) >= 0) {
666 x = strappenda("_SYSTEMD_USER_UNIT=", t);
668 IOVEC_SET_STRING(iovec[n++], x);
669 } else if (unit_id && session) {
670 x = strappenda("_SYSTEMD_USER_UNIT=", unit_id);
671 IOVEC_SET_STRING(iovec[n++], x);
674 if (cg_path_get_slice(c, &t) >= 0) {
675 x = strappenda("_SYSTEMD_SLICE=", t);
677 IOVEC_SET_STRING(iovec[n++], x);
681 } else if (unit_id) {
682 x = strappenda("_SYSTEMD_UNIT=", unit_id);
683 IOVEC_SET_STRING(iovec[n++], x);
689 x = alloca(sizeof("_SELINUX_CONTEXT=") + label_len);
691 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
692 IOVEC_SET_STRING(iovec[n++], x);
694 security_context_t con;
696 if (getpidcon(ucred->pid, &con) >= 0) {
697 x = strappenda("_SELINUX_CONTEXT=", con);
700 IOVEC_SET_STRING(iovec[n++], x);
709 r = get_process_uid(object_pid, &object_uid);
711 sprintf(o_uid, "OBJECT_UID=%lu", (unsigned long) object_uid);
712 IOVEC_SET_STRING(iovec[n++], o_uid);
715 r = get_process_gid(object_pid, &object_gid);
717 sprintf(o_gid, "OBJECT_GID=%lu", (unsigned long) object_gid);
718 IOVEC_SET_STRING(iovec[n++], o_gid);
721 r = get_process_comm(object_pid, &t);
723 x = strappenda("OBJECT_COMM=", t);
725 IOVEC_SET_STRING(iovec[n++], x);
728 r = get_process_exe(object_pid, &t);
730 x = strappenda("OBJECT_EXE=", t);
732 IOVEC_SET_STRING(iovec[n++], x);
735 r = get_process_cmdline(object_pid, 0, false, &t);
737 x = strappenda("OBJECT_CMDLINE=", t);
739 IOVEC_SET_STRING(iovec[n++], x);
743 r = audit_session_from_pid(object_pid, &audit);
745 sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%lu", (unsigned long) audit);
746 IOVEC_SET_STRING(iovec[n++], o_audit_session);
749 r = audit_loginuid_from_pid(object_pid, &loginuid);
751 sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID=%lu", (unsigned long) loginuid);
752 IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
756 r = cg_pid_get_path_shifted(object_pid, s->cgroup_root, &c);
758 x = strappenda("OBJECT_SYSTEMD_CGROUP=", c);
759 IOVEC_SET_STRING(iovec[n++], x);
761 r = cg_path_get_session(c, &t);
763 x = strappenda("OBJECT_SYSTEMD_SESSION=", t);
765 IOVEC_SET_STRING(iovec[n++], x);
768 if (cg_path_get_owner_uid(c, &owner) >= 0) {
769 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner);
770 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
773 if (cg_path_get_unit(c, &t) >= 0) {
774 x = strappenda("OBJECT_SYSTEMD_UNIT=", t);
776 IOVEC_SET_STRING(iovec[n++], x);
779 if (cg_path_get_user_unit(c, &t) >= 0) {
780 x = strappenda("OBJECT_SYSTEMD_USER_UNIT=", t);
782 IOVEC_SET_STRING(iovec[n++], x);
791 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
792 IOVEC_SET_STRING(iovec[n++], source_time);
795 /* Note that strictly speaking storing the boot id here is
796 * redundant since the entry includes this in-line
797 * anyway. However, we need this indexed, too. */
798 if (!isempty(s->boot_id_field))
799 IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
801 if (!isempty(s->machine_id_field))
802 IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
804 if (!isempty(s->hostname_field))
805 IOVEC_SET_STRING(iovec[n++], s->hostname_field);
809 if (s->split_mode == SPLIT_UID && realuid > 0)
810 /* Split up strictly by any UID */
811 journal_uid = realuid;
812 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
813 /* Split up by login UIDs, this avoids creation of
814 * individual journals for system UIDs. We do this
815 * only if the realuid is not root, in order not to
816 * accidentally leak privileged information to the
817 * user that is logged by a privileged process that is
818 * part of an unprivileged session.*/
823 write_to_journal(s, journal_uid, iovec, n, priority);
826 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
827 char mid[11 + 32 + 1];
828 char buffer[16 + LINE_MAX + 1];
829 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
832 struct ucred ucred = {};
837 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
838 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
840 memcpy(buffer, "MESSAGE=", 8);
841 va_start(ap, format);
842 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
844 char_array_0(buffer);
845 IOVEC_SET_STRING(iovec[n++], buffer);
847 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
848 snprintf(mid, sizeof(mid), MESSAGE_ID(message_id));
850 IOVEC_SET_STRING(iovec[n++], mid);
853 ucred.pid = getpid();
854 ucred.uid = getuid();
855 ucred.gid = getgid();
857 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
860 void server_dispatch_message(
862 struct iovec *iovec, unsigned n, unsigned m,
865 const char *label, size_t label_len,
871 _cleanup_free_ char *path = NULL;
875 assert(iovec || n == 0);
880 if (LOG_PRI(priority) > s->max_level_store)
883 /* Stop early in case the information will not be stored
885 if (s->storage == STORAGE_NONE)
891 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &path);
895 /* example: /user/lennart/3/foobar
896 * /system/dbus.service/foobar
898 * So let's cut of everything past the third /, since that is
899 * where user directories start */
901 c = strchr(path, '/');
903 c = strchr(c+1, '/');
905 c = strchr(c+1, '/');
911 rl = journal_rate_limit_test(s->rate_limit, path,
912 priority & LOG_PRIMASK, available_space(s, false));
917 /* Write a suppression message if we suppressed something */
919 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
920 "Suppressed %u messages from %s", rl - 1, path);
923 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
927 static int system_journal_open(Server *s) {
933 r = sd_id128_get_machine(&machine);
935 log_error("Failed to get machine id: %s", strerror(-r));
939 sd_id128_to_string(machine, ids);
941 if (!s->system_journal &&
942 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
943 access("/run/systemd/journal/flushed", F_OK) >= 0) {
945 /* If in auto mode: first try to create the machine
946 * path, but not the prefix.
948 * If in persistent mode: create /var/log/journal and
949 * the machine path */
951 if (s->storage == STORAGE_PERSISTENT)
952 (void) mkdir("/var/log/journal/", 0755);
954 fn = strappenda("/var/log/journal/", ids);
955 (void) mkdir(fn, 0755);
957 fn = strappenda(fn, "/system.journal");
958 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
961 server_fix_perms(s, s->system_journal, 0);
963 if (r != -ENOENT && r != -EROFS)
964 log_warning("Failed to open system journal: %s", strerror(-r));
970 if (!s->runtime_journal &&
971 (s->storage != STORAGE_NONE)) {
973 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
977 if (s->system_journal) {
979 /* Try to open the runtime journal, but only
980 * if it already exists, so that we can flush
981 * it into the system journal */
983 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
988 log_warning("Failed to open runtime journal: %s", strerror(-r));
995 /* OK, we really need the runtime journal, so create
996 * it if necessary. */
998 (void) mkdir_parents(fn, 0755);
999 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
1003 log_error("Failed to open runtime journal: %s", strerror(-r));
1008 if (s->runtime_journal)
1009 server_fix_perms(s, s->runtime_journal, 0);
1012 available_space(s, true);
1017 int server_flush_to_var(Server *s) {
1019 sd_journal *j = NULL;
1020 char ts[FORMAT_TIMESPAN_MAX];
1027 if (s->storage != STORAGE_AUTO &&
1028 s->storage != STORAGE_PERSISTENT)
1031 if (!s->runtime_journal)
1034 system_journal_open(s);
1036 if (!s->system_journal)
1039 log_debug("Flushing to /var...");
1041 start = now(CLOCK_MONOTONIC);
1043 r = sd_id128_get_machine(&machine);
1047 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1049 log_error("Failed to read runtime journal: %s", strerror(-r));
1053 sd_journal_set_data_threshold(j, 0);
1055 SD_JOURNAL_FOREACH(j) {
1059 f = j->current_file;
1060 assert(f && f->current_offset > 0);
1064 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1066 log_error("Can't read entry: %s", strerror(-r));
1070 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1074 if (!shall_try_append_again(s->system_journal, r)) {
1075 log_error("Can't write entry: %s", strerror(-r));
1082 if (!s->system_journal) {
1083 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1088 log_debug("Retrying write.");
1089 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1091 log_error("Can't write entry: %s", strerror(-r));
1097 journal_file_post_change(s->system_journal);
1099 journal_file_close(s->runtime_journal);
1100 s->runtime_journal = NULL;
1103 rm_rf("/run/log/journal", false, true, false);
1105 sd_journal_close(j);
1107 server_driver_message(s, SD_ID128_NULL, "Time spent on flushing to /var is %s for %u entries.", format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0), n);
1112 int process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1113 Server *s = userdata;
1116 assert(fd == s->native_fd || fd == s->syslog_fd);
1118 if (revents != EPOLLIN) {
1119 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1124 struct ucred *ucred = NULL;
1125 struct timeval *tv = NULL;
1126 struct cmsghdr *cmsg;
1128 size_t label_len = 0;
1132 struct cmsghdr cmsghdr;
1134 /* We use NAME_MAX space for the SELinux label
1135 * here. The kernel currently enforces no
1136 * limit, but according to suggestions from
1137 * the SELinux people this will change and it
1138 * will probably be identical to NAME_MAX. For
1139 * now we use that, but this should be updated
1140 * one day when the final limit is known.*/
1141 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1142 CMSG_SPACE(sizeof(struct timeval)) +
1143 CMSG_SPACE(sizeof(int)) + /* fd */
1144 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1146 struct msghdr msghdr = {
1149 .msg_control = &control,
1150 .msg_controllen = sizeof(control),
1158 if (ioctl(fd, SIOCINQ, &v) < 0) {
1159 log_error("SIOCINQ failed: %m");
1163 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, LINE_MAX + (size_t) v))
1166 iovec.iov_base = s->buffer;
1167 iovec.iov_len = s->buffer_size;
1169 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1171 if (errno == EINTR || errno == EAGAIN)
1174 log_error("recvmsg() failed: %m");
1178 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1180 if (cmsg->cmsg_level == SOL_SOCKET &&
1181 cmsg->cmsg_type == SCM_CREDENTIALS &&
1182 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1183 ucred = (struct ucred*) CMSG_DATA(cmsg);
1184 else if (cmsg->cmsg_level == SOL_SOCKET &&
1185 cmsg->cmsg_type == SCM_SECURITY) {
1186 label = (char*) CMSG_DATA(cmsg);
1187 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1188 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1189 cmsg->cmsg_type == SO_TIMESTAMP &&
1190 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1191 tv = (struct timeval*) CMSG_DATA(cmsg);
1192 else if (cmsg->cmsg_level == SOL_SOCKET &&
1193 cmsg->cmsg_type == SCM_RIGHTS) {
1194 fds = (int*) CMSG_DATA(cmsg);
1195 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1199 if (fd == s->syslog_fd) {
1200 if (n > 0 && n_fds == 0) {
1202 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1203 } else if (n_fds > 0)
1204 log_warning("Got file descriptors via syslog socket. Ignoring.");
1207 if (n > 0 && n_fds == 0)
1208 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1209 else if (n == 0 && n_fds == 1)
1210 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1212 log_warning("Got too many file descriptors via native socket. Ignoring.");
1215 close_many(fds, n_fds);
1219 static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1220 Server *s = userdata;
1224 log_info("Received request to flush runtime journal from PID %"PRIu32, si->ssi_pid);
1226 touch("/run/systemd/journal/flushed");
1227 server_flush_to_var(s);
1233 static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1234 Server *s = userdata;
1238 log_info("Received request to rotate journal from PID %"PRIu32, si->ssi_pid);
1245 static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1246 Server *s = userdata;
1250 log_received_signal(LOG_INFO, si);
1252 sd_event_exit(s->event, 0);
1256 static int setup_signals(Server *s) {
1262 assert_se(sigemptyset(&mask) == 0);
1263 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1264 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1266 r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
1270 r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
1274 r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
1278 r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
1285 static int server_parse_proc_cmdline(Server *s) {
1286 _cleanup_free_ char *line = NULL;
1291 r = proc_cmdline(&line);
1293 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1297 FOREACH_WORD_QUOTED(w, l, line, state) {
1298 _cleanup_free_ char *word;
1300 word = strndup(w, l);
1304 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1305 r = parse_boolean(word + 35);
1307 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1309 s->forward_to_syslog = r;
1310 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1311 r = parse_boolean(word + 33);
1313 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1315 s->forward_to_kmsg = r;
1316 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1317 r = parse_boolean(word + 36);
1319 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1321 s->forward_to_console = r;
1322 } else if (startswith(word, "systemd.journald"))
1323 log_warning("Invalid systemd.journald parameter. Ignoring.");
1329 static int server_parse_config_file(Server *s) {
1330 static const char fn[] = "/etc/systemd/journald.conf";
1331 _cleanup_fclose_ FILE *f = NULL;
1336 f = fopen(fn, "re");
1338 if (errno == ENOENT)
1341 log_warning("Failed to open configuration file %s: %m", fn);
1345 r = config_parse(NULL, fn, f, "Journal\0", config_item_perf_lookup,
1346 (void*) journald_gperf_lookup, false, false, s);
1348 log_warning("Failed to parse configuration file: %s", strerror(-r));
1353 static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1354 Server *s = userdata;
1362 int server_schedule_sync(Server *s, int priority) {
1367 if (priority <= LOG_CRIT) {
1368 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1373 if (s->sync_scheduled)
1376 if (s->sync_interval_usec > 0) {
1379 r = sd_event_get_now_monotonic(s->event, &when);
1383 when += s->sync_interval_usec;
1385 if (!s->sync_event_source) {
1386 r = sd_event_add_monotonic(s->event, &s->sync_event_source, when, 0, server_dispatch_sync, s);
1390 r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1392 r = sd_event_source_set_time(s->sync_event_source, when);
1396 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1401 s->sync_scheduled = true;
1407 static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1408 Server *s = userdata;
1412 server_cache_hostname(s);
1416 static int server_open_hostname(Server *s) {
1421 s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
1422 if (s->hostname_fd < 0) {
1423 log_error("Failed to open /proc/sys/kernel/hostname: %m");
1427 r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
1429 /* kernels prior to 3.2 don't support polling this file. Ignore
1432 log_warning("Failed to register hostname fd in event loop: %s. Ignoring.",
1434 close_nointr_nofail(s->hostname_fd);
1435 s->hostname_fd = -1;
1439 log_error("Failed to register hostname fd in event loop: %s", strerror(-r));
1443 r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
1445 log_error("Failed to adjust priority of host name event source: %s", strerror(-r));
1452 int server_init(Server *s) {
1458 s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->hostname_fd = -1;
1462 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1463 s->sync_scheduled = false;
1465 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1466 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1468 s->forward_to_syslog = true;
1470 s->max_level_store = LOG_DEBUG;
1471 s->max_level_syslog = LOG_DEBUG;
1472 s->max_level_kmsg = LOG_NOTICE;
1473 s->max_level_console = LOG_INFO;
1475 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1476 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1478 server_parse_config_file(s);
1479 server_parse_proc_cmdline(s);
1480 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1481 log_debug("Setting both rate limit interval and burst from %llu,%u to 0,0",
1482 (long long unsigned) s->rate_limit_interval,
1483 s->rate_limit_burst);
1484 s->rate_limit_interval = s->rate_limit_burst = 0;
1487 mkdir_p("/run/systemd/journal", 0755);
1489 s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
1490 if (!s->user_journals)
1493 s->mmap = mmap_cache_new();
1497 r = sd_event_default(&s->event);
1499 log_error("Failed to create event loop: %s", strerror(-r));
1503 sd_event_set_watchdog(s->event, true);
1505 n = sd_listen_fds(true);
1507 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1511 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1513 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1515 if (s->native_fd >= 0) {
1516 log_error("Too many native sockets passed.");
1522 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1524 if (s->stdout_fd >= 0) {
1525 log_error("Too many stdout sockets passed.");
1531 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
1533 if (s->syslog_fd >= 0) {
1534 log_error("Too many /dev/log sockets passed.");
1541 log_error("Unknown socket passed.");
1546 r = server_open_syslog_socket(s);
1550 r = server_open_native_socket(s);
1554 r = server_open_stdout_socket(s);
1558 r = server_open_dev_kmsg(s);
1562 r = server_open_kernel_seqnum(s);
1566 r = server_open_hostname(s);
1570 r = setup_signals(s);
1574 s->udev = udev_new();
1578 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
1582 r = cg_get_root_path(&s->cgroup_root);
1586 server_cache_hostname(s);
1587 server_cache_boot_id(s);
1588 server_cache_machine_id(s);
1590 r = system_journal_open(s);
1597 void server_maybe_append_tags(Server *s) {
1603 n = now(CLOCK_REALTIME);
1605 if (s->system_journal)
1606 journal_file_maybe_append_tag(s->system_journal, n);
1608 HASHMAP_FOREACH(f, s->user_journals, i)
1609 journal_file_maybe_append_tag(f, n);
1613 void server_done(Server *s) {
1617 while (s->stdout_streams)
1618 stdout_stream_free(s->stdout_streams);
1620 if (s->system_journal)
1621 journal_file_close(s->system_journal);
1623 if (s->runtime_journal)
1624 journal_file_close(s->runtime_journal);
1626 while ((f = hashmap_steal_first(s->user_journals)))
1627 journal_file_close(f);
1629 hashmap_free(s->user_journals);
1631 sd_event_source_unref(s->syslog_event_source);
1632 sd_event_source_unref(s->native_event_source);
1633 sd_event_source_unref(s->stdout_event_source);
1634 sd_event_source_unref(s->dev_kmsg_event_source);
1635 sd_event_source_unref(s->sync_event_source);
1636 sd_event_source_unref(s->sigusr1_event_source);
1637 sd_event_source_unref(s->sigusr2_event_source);
1638 sd_event_source_unref(s->sigterm_event_source);
1639 sd_event_source_unref(s->sigint_event_source);
1640 sd_event_source_unref(s->hostname_event_source);
1641 sd_event_unref(s->event);
1643 if (s->syslog_fd >= 0)
1644 close_nointr_nofail(s->syslog_fd);
1646 if (s->native_fd >= 0)
1647 close_nointr_nofail(s->native_fd);
1649 if (s->stdout_fd >= 0)
1650 close_nointr_nofail(s->stdout_fd);
1652 if (s->dev_kmsg_fd >= 0)
1653 close_nointr_nofail(s->dev_kmsg_fd);
1655 if (s->hostname_fd >= 0)
1656 close_nointr_nofail(s->hostname_fd);
1659 journal_rate_limit_free(s->rate_limit);
1661 if (s->kernel_seqnum)
1662 munmap(s->kernel_seqnum, sizeof(uint64_t));
1666 free(s->cgroup_root);
1669 mmap_cache_unref(s->mmap);
1672 udev_unref(s->udev);