1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
27 #include <sys/timerfd.h>
30 #include <systemd/sd-journal.h>
31 #include <systemd/sd-messages.h>
32 #include <systemd/sd-daemon.h>
37 #include "journal-file.h"
38 #include "socket-util.h"
39 #include "cgroup-util.h"
43 #include "conf-parser.h"
44 #include "journal-internal.h"
45 #include "journal-vacuum.h"
46 #include "journal-authenticate.h"
47 #include "journald-server.h"
48 #include "journald-rate-limit.h"
49 #include "journald-kmsg.h"
50 #include "journald-syslog.h"
51 #include "journald-stream.h"
52 #include "journald-console.h"
53 #include "journald-native.h"
57 #include <acl/libacl.h>
62 #include <selinux/selinux.h>
65 #define USER_JOURNALS_MAX 1024
67 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
68 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
69 #define DEFAULT_RATE_LIMIT_BURST 200
71 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
73 static const char* const storage_table[] = {
74 [STORAGE_AUTO] = "auto",
75 [STORAGE_VOLATILE] = "volatile",
76 [STORAGE_PERSISTENT] = "persistent",
77 [STORAGE_NONE] = "none"
80 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
81 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
83 static const char* const split_mode_table[] = {
84 [SPLIT_NONE] = "none",
86 [SPLIT_LOGIN] = "login"
89 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
90 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
92 static uint64_t available_space(Server *s) {
94 _cleanup_free_ char *p = NULL;
98 uint64_t sum = 0, avail = 0, ss_avail = 0;
100 _cleanup_closedir_ DIR *d = NULL;
104 ts = now(CLOCK_MONOTONIC);
106 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
107 return s->cached_available_space;
109 r = sd_id128_get_machine(&machine);
113 if (s->system_journal) {
114 f = "/var/log/journal/";
115 m = &s->system_metrics;
117 f = "/run/log/journal/";
118 m = &s->runtime_metrics;
123 p = strappend(f, sd_id128_to_string(machine, ids));
131 if (fstatvfs(dirfd(d), &ss) < 0)
137 union dirent_storage buf;
139 r = readdir_r(d, &buf.de, &de);
146 if (!endswith(de->d_name, ".journal") &&
147 !endswith(de->d_name, ".journal~"))
150 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
153 if (!S_ISREG(st.st_mode))
156 sum += (uint64_t) st.st_blocks * 512UL;
159 avail = sum >= m->max_use ? 0 : m->max_use - sum;
161 ss_avail = ss.f_bsize * ss.f_bavail;
163 ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
165 if (ss_avail < avail)
168 s->cached_available_space = avail;
169 s->cached_available_space_timestamp = ts;
174 static void server_read_file_gid(Server *s) {
175 const char *g = "systemd-journal";
180 if (s->file_gid_valid)
183 r = get_group_creds(&g, &s->file_gid);
185 log_warning("Failed to resolve '%s' group: %s", g, strerror(-r));
187 /* if we couldn't read the gid, then it will be 0, but that's
188 * fine and we shouldn't try to resolve the group again, so
189 * let's just pretend it worked right-away. */
190 s->file_gid_valid = true;
193 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
198 acl_permset_t permset;
203 server_read_file_gid(s);
205 r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
207 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
213 acl = acl_get_fd(f->fd);
215 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
219 r = acl_find_uid(acl, uid, &entry);
222 if (acl_create_entry(&acl, &entry) < 0 ||
223 acl_set_tag_type(entry, ACL_USER) < 0 ||
224 acl_set_qualifier(entry, &uid) < 0) {
225 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
230 /* We do not recalculate the mask unconditionally here,
231 * so that the fchmod() mask above stays intact. */
232 if (acl_get_permset(entry, &permset) < 0 ||
233 acl_add_perm(permset, ACL_READ) < 0 ||
234 calc_acl_mask_if_needed(&acl) < 0) {
235 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
239 if (acl_set_fd(f->fd, acl) < 0)
240 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
247 static JournalFile* find_journal(Server *s, uid_t uid) {
248 _cleanup_free_ char *p = NULL;
255 /* We split up user logs only on /var, not on /run. If the
256 * runtime file is open, we write to it exclusively, in order
257 * to guarantee proper order as soon as we flush /run to
258 * /var and close the runtime file. */
260 if (s->runtime_journal)
261 return s->runtime_journal;
264 return s->system_journal;
266 r = sd_id128_get_machine(&machine);
268 return s->system_journal;
270 f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
274 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
275 SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
276 return s->system_journal;
278 while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
279 /* Too many open? Then let's close one */
280 f = hashmap_steal_first(s->user_journals);
282 journal_file_close(f);
285 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
287 return s->system_journal;
289 server_fix_perms(s, f, uid);
291 r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
293 journal_file_close(f);
294 return s->system_journal;
300 void server_rotate(Server *s) {
306 log_debug("Rotating...");
308 if (s->runtime_journal) {
309 r = journal_file_rotate(&s->runtime_journal, s->compress, false);
311 if (s->runtime_journal)
312 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
314 log_error("Failed to create new runtime journal: %s", strerror(-r));
316 server_fix_perms(s, s->runtime_journal, 0);
319 if (s->system_journal) {
320 r = journal_file_rotate(&s->system_journal, s->compress, s->seal);
322 if (s->system_journal)
323 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
325 log_error("Failed to create new system journal: %s", strerror(-r));
328 server_fix_perms(s, s->system_journal, 0);
331 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
332 r = journal_file_rotate(&f, s->compress, s->seal);
335 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
337 log_error("Failed to create user journal: %s", strerror(-r));
339 hashmap_replace(s->user_journals, k, f);
340 server_fix_perms(s, f, PTR_TO_UINT32(k));
345 void server_sync(Server *s) {
351 static const struct itimerspec sync_timer_disable = {};
353 if (s->system_journal) {
354 r = journal_file_set_offline(s->system_journal);
356 log_error("Failed to sync system journal: %s", strerror(-r));
359 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
360 r = journal_file_set_offline(f);
362 log_error("Failed to sync user journal: %s", strerror(-r));
365 r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_disable, NULL);
367 log_error("Failed to disable max timer: %m");
369 s->sync_scheduled = false;
372 void server_vacuum(Server *s) {
377 log_debug("Vacuuming...");
379 s->oldest_file_usec = 0;
381 r = sd_id128_get_machine(&machine);
383 log_error("Failed to get machine ID: %s", strerror(-r));
387 sd_id128_to_string(machine, ids);
389 if (s->system_journal) {
390 char *p = strappenda("/var/log/journal/", ids);
392 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
393 if (r < 0 && r != -ENOENT)
394 log_error("Failed to vacuum %s: %s", p, strerror(-r));
397 if (s->runtime_journal) {
398 char *p = strappenda("/run/log/journal/", ids);
400 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
401 if (r < 0 && r != -ENOENT)
402 log_error("Failed to vacuum %s: %s", p, strerror(-r));
405 s->cached_available_space_timestamp = 0;
408 bool shall_try_append_again(JournalFile *f, int r) {
410 /* -E2BIG Hit configured limit
412 -EDQUOT Quota limit hit
414 -EHOSTDOWN Other machine
415 -EBUSY Unclean shutdown
416 -EPROTONOSUPPORT Unsupported feature
419 -ESHUTDOWN Already archived */
421 if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
422 log_debug("%s: Allocation limit reached, rotating.", f->path);
423 else if (r == -EHOSTDOWN)
424 log_info("%s: Journal file from other machine, rotating.", f->path);
425 else if (r == -EBUSY)
426 log_info("%s: Unclean shutdown, rotating.", f->path);
427 else if (r == -EPROTONOSUPPORT)
428 log_info("%s: Unsupported feature, rotating.", f->path);
429 else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
430 log_warning("%s: Journal file corrupted, rotating.", f->path);
437 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n) {
439 bool vacuumed = false;
446 f = find_journal(s, uid);
450 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
451 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
456 f = find_journal(s, uid);
461 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
463 server_schedule_sync(s);
467 if (vacuumed || !shall_try_append_again(f, r)) {
468 log_error("Failed to write entry, ignoring: %s", strerror(-r));
475 f = find_journal(s, uid);
479 log_debug("Retrying write.");
480 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
482 log_error("Failed to write entry, ignoring: %s", strerror(-r));
485 static void dispatch_message_real(
487 struct iovec *iovec, unsigned n, unsigned m,
490 const char *label, size_t label_len,
491 const char *unit_id) {
493 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
494 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
495 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
496 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
497 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
498 boot_id[sizeof("_BOOT_ID=") + 32] = "_BOOT_ID=",
499 machine_id[sizeof("_MACHINE_ID=") + 32] = "_MACHINE_ID=";
500 char *comm, *exe, *cmdline, *cgroup, *session, *unit, *hostname;
504 uid_t realuid = 0, owner = 0, journal_uid;
505 bool owner_valid = false;
507 char audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
508 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
517 assert(n + N_IOVEC_META_FIELDS <= m);
520 realuid = ucred->uid;
522 sprintf(pid, "_PID=%lu", (unsigned long) ucred->pid);
523 IOVEC_SET_STRING(iovec[n++], pid);
525 sprintf(uid, "_UID=%lu", (unsigned long) ucred->uid);
526 IOVEC_SET_STRING(iovec[n++], uid);
528 sprintf(gid, "_GID=%lu", (unsigned long) ucred->gid);
529 IOVEC_SET_STRING(iovec[n++], gid);
531 r = get_process_comm(ucred->pid, &t);
533 comm = strappenda("_COMM=", t);
535 IOVEC_SET_STRING(iovec[n++], comm);
538 r = get_process_exe(ucred->pid, &t);
540 exe = strappenda("_EXE=", t);
542 IOVEC_SET_STRING(iovec[n++], exe);
545 r = get_process_cmdline(ucred->pid, 0, false, &t);
547 cmdline = strappenda("_CMDLINE=", t);
549 IOVEC_SET_STRING(iovec[n++], cmdline);
553 r = audit_session_from_pid(ucred->pid, &audit);
555 sprintf(audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit);
556 IOVEC_SET_STRING(iovec[n++], audit_session);
559 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
561 sprintf(audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid);
562 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
566 r = cg_pid_get_path_shifted(ucred->pid, NULL, &c);
568 cgroup = strappenda("_SYSTEMD_CGROUP=", c);
569 IOVEC_SET_STRING(iovec[n++], cgroup);
571 r = cg_path_get_session(c, &t);
573 session = strappenda("_SYSTEMD_SESSION=", t);
575 IOVEC_SET_STRING(iovec[n++], session);
578 if (cg_path_get_owner_uid(c, &owner) >= 0) {
581 sprintf(owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner);
582 IOVEC_SET_STRING(iovec[n++], owner_uid);
585 if (cg_path_get_unit(c, &t) >= 0) {
586 unit = strappenda("_SYSTEMD_UNIT=", t);
588 } else if (cg_path_get_user_unit(c, &t) >= 0) {
589 unit = strappenda("_SYSTEMD_USER_UNIT=", t);
591 } else if (unit_id) {
593 unit = strappenda("_SYSTEMD_USER_UNIT=", unit_id);
595 unit = strappenda("_SYSTEMD_UNIT=", unit_id);
600 IOVEC_SET_STRING(iovec[n++], unit);
607 char *selinux_context = alloca(sizeof("_SELINUX_CONTEXT=") + label_len);
609 *((char*) mempcpy(stpcpy(selinux_context, "_SELINUX_CONTEXT="), label, label_len)) = 0;
610 IOVEC_SET_STRING(iovec[n++], selinux_context);
612 security_context_t con;
614 if (getpidcon(ucred->pid, &con) >= 0) {
615 char *selinux_context = strappenda("_SELINUX_CONTEXT=", con);
618 IOVEC_SET_STRING(iovec[n++], selinux_context);
625 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
626 IOVEC_SET_STRING(iovec[n++], source_time);
629 /* Note that strictly speaking storing the boot id here is
630 * redundant since the entry includes this in-line
631 * anyway. However, we need this indexed, too. */
632 r = sd_id128_get_boot(&id);
634 sd_id128_to_string(id, boot_id + sizeof("_BOOT_ID=") - 1);
635 IOVEC_SET_STRING(iovec[n++], boot_id);
638 r = sd_id128_get_machine(&id);
640 sd_id128_to_string(id, machine_id + sizeof("_MACHINE_ID=") - 1);
641 IOVEC_SET_STRING(iovec[n++], machine_id);
644 t = gethostname_malloc();
646 hostname = strappenda("_HOSTNAME=", t);
648 IOVEC_SET_STRING(iovec[n++], hostname);
653 if (s->split_mode == SPLIT_UID && realuid > 0)
654 /* Split up strictly by any UID */
655 journal_uid = realuid;
656 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
657 /* Split up by login UIDs, this avoids creation of
658 * individual journals for system UIDs. We do this
659 * only if the realuid is not root, in order not to
660 * accidentally leak privileged information to the
661 * user that is logged by a privileged process that is
662 * part of an unprivileged session.*/
667 write_to_journal(s, journal_uid, iovec, n);
670 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
671 char mid[11 + 32 + 1];
672 char buffer[16 + LINE_MAX + 1];
673 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
676 struct ucred ucred = {};
681 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
682 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
684 memcpy(buffer, "MESSAGE=", 8);
685 va_start(ap, format);
686 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
688 char_array_0(buffer);
689 IOVEC_SET_STRING(iovec[n++], buffer);
691 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
692 snprintf(mid, sizeof(mid), MESSAGE_ID(message_id));
694 IOVEC_SET_STRING(iovec[n++], mid);
697 ucred.pid = getpid();
698 ucred.uid = getuid();
699 ucred.gid = getgid();
701 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL);
704 void server_dispatch_message(
706 struct iovec *iovec, unsigned n, unsigned m,
709 const char *label, size_t label_len,
714 _cleanup_free_ char *path = NULL;
718 assert(iovec || n == 0);
723 if (LOG_PRI(priority) > s->max_level_store)
729 r = cg_pid_get_path_shifted(ucred->pid, NULL, &path);
733 /* example: /user/lennart/3/foobar
734 * /system/dbus.service/foobar
736 * So let's cut of everything past the third /, since that is
737 * where user directories start */
739 c = strchr(path, '/');
741 c = strchr(c+1, '/');
743 c = strchr(c+1, '/');
749 rl = journal_rate_limit_test(s->rate_limit, path,
750 priority & LOG_PRIMASK, available_space(s));
755 /* Write a suppression message if we suppressed something */
757 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
758 "Suppressed %u messages from %s", rl - 1, path);
761 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id);
765 static int system_journal_open(Server *s) {
772 avail = available_space(s);
774 r = sd_id128_get_machine(&machine);
778 sd_id128_to_string(machine, ids);
780 if (!s->system_journal &&
781 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
782 access("/run/systemd/journal/flushed", F_OK) >= 0) {
784 /* If in auto mode: first try to create the machine
785 * path, but not the prefix.
787 * If in persistent mode: create /var/log/journal and
788 * the machine path */
790 if (s->storage == STORAGE_PERSISTENT)
791 (void) mkdir("/var/log/journal/", 0755);
793 fn = strappend("/var/log/journal/", ids);
797 (void) mkdir(fn, 0755);
800 fn = strjoin("/var/log/journal/", ids, "/system.journal", NULL);
804 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
808 char fb[FORMAT_BYTES_MAX];
810 server_fix_perms(s, s->system_journal, 0);
811 server_driver_message(s, SD_ID128_NULL, "Allowing system journal files to grow to %s.",
812 format_bytes(fb, sizeof(fb), s->system_metrics.max_use));
814 if (s->system_metrics.max_use > avail)
815 server_driver_message(s, SD_ID128_NULL, "Journal size currently limited to %s due to SystemKeepFree.",
816 format_bytes(fb, sizeof(fb), avail));
820 if (r != -ENOENT && r != -EROFS)
821 log_warning("Failed to open system journal: %s", strerror(-r));
827 if (!s->runtime_journal &&
828 (s->storage != STORAGE_NONE)) {
830 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
834 if (s->system_journal) {
836 /* Try to open the runtime journal, but only
837 * if it already exists, so that we can flush
838 * it into the system journal */
840 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
845 log_warning("Failed to open runtime journal: %s", strerror(-r));
852 /* OK, we really need the runtime journal, so create
853 * it if necessary. */
855 (void) mkdir_parents(fn, 0755);
856 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
860 log_error("Failed to open runtime journal: %s", strerror(-r));
865 if (s->runtime_journal) {
866 char fb[FORMAT_BYTES_MAX];
868 server_fix_perms(s, s->runtime_journal, 0);
869 server_driver_message(s, SD_ID128_NULL, "Allowing runtime journal files to grow to %s.",
870 format_bytes(fb, sizeof(fb), s->runtime_metrics.max_use));
872 if (s->system_metrics.max_use > avail)
873 server_driver_message(s, SD_ID128_NULL, "Journal size currently limited to %s due to RuntimeKeepFree.",
874 format_bytes(fb, sizeof(fb), avail));
881 int server_flush_to_var(Server *s) {
884 sd_journal *j = NULL;
888 if (s->storage != STORAGE_AUTO &&
889 s->storage != STORAGE_PERSISTENT)
892 if (!s->runtime_journal)
895 system_journal_open(s);
897 if (!s->system_journal)
900 log_debug("Flushing to /var...");
902 r = sd_id128_get_machine(&machine);
904 log_error("Failed to get machine id: %s", strerror(-r));
908 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
910 log_error("Failed to read runtime journal: %s", strerror(-r));
914 sd_journal_set_data_threshold(j, 0);
916 SD_JOURNAL_FOREACH(j) {
921 assert(f && f->current_offset > 0);
923 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
925 log_error("Can't read entry: %s", strerror(-r));
929 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
933 if (!shall_try_append_again(s->system_journal, r)) {
934 log_error("Can't write entry: %s", strerror(-r));
941 if (!s->system_journal) {
942 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
947 log_debug("Retrying write.");
948 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
950 log_error("Can't write entry: %s", strerror(-r));
956 journal_file_post_change(s->system_journal);
958 journal_file_close(s->runtime_journal);
959 s->runtime_journal = NULL;
962 rm_rf("/run/log/journal", false, true, false);
969 int process_event(Server *s, struct epoll_event *ev) {
973 if (ev->data.fd == s->signal_fd) {
974 struct signalfd_siginfo sfsi;
977 if (ev->events != EPOLLIN) {
978 log_error("Got invalid event from epoll.");
982 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
983 if (n != sizeof(sfsi)) {
988 if (errno == EINTR || errno == EAGAIN)
994 if (sfsi.ssi_signo == SIGUSR1) {
995 touch("/run/systemd/journal/flushed");
996 server_flush_to_var(s);
1001 if (sfsi.ssi_signo == SIGUSR2) {
1007 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
1011 } else if (ev->data.fd == s->sync_timer_fd) {
1015 log_debug("Got sync request from epoll.");
1017 r = read(ev->data.fd, (void *)&t, sizeof(t));
1024 } else if (ev->data.fd == s->dev_kmsg_fd) {
1027 if (ev->events != EPOLLIN) {
1028 log_error("Got invalid event from epoll.");
1032 r = server_read_dev_kmsg(s);
1038 } else if (ev->data.fd == s->native_fd ||
1039 ev->data.fd == s->syslog_fd) {
1041 if (ev->events != EPOLLIN) {
1042 log_error("Got invalid event from epoll.");
1047 struct msghdr msghdr;
1049 struct ucred *ucred = NULL;
1050 struct timeval *tv = NULL;
1051 struct cmsghdr *cmsg;
1053 size_t label_len = 0;
1055 struct cmsghdr cmsghdr;
1057 /* We use NAME_MAX space for the
1058 * SELinux label here. The kernel
1059 * currently enforces no limit, but
1060 * according to suggestions from the
1061 * SELinux people this will change and
1062 * it will probably be identical to
1063 * NAME_MAX. For now we use that, but
1064 * this should be updated one day when
1065 * the final limit is known.*/
1066 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1067 CMSG_SPACE(sizeof(struct timeval)) +
1068 CMSG_SPACE(sizeof(int)) + /* fd */
1069 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1076 if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
1077 log_error("SIOCINQ failed: %m");
1081 if (s->buffer_size < (size_t) v) {
1085 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
1086 b = realloc(s->buffer, l+1);
1089 log_error("Couldn't increase buffer.");
1098 iovec.iov_base = s->buffer;
1099 iovec.iov_len = s->buffer_size;
1103 msghdr.msg_iov = &iovec;
1104 msghdr.msg_iovlen = 1;
1105 msghdr.msg_control = &control;
1106 msghdr.msg_controllen = sizeof(control);
1108 n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1111 if (errno == EINTR || errno == EAGAIN)
1114 log_error("recvmsg() failed: %m");
1118 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1120 if (cmsg->cmsg_level == SOL_SOCKET &&
1121 cmsg->cmsg_type == SCM_CREDENTIALS &&
1122 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1123 ucred = (struct ucred*) CMSG_DATA(cmsg);
1124 else if (cmsg->cmsg_level == SOL_SOCKET &&
1125 cmsg->cmsg_type == SCM_SECURITY) {
1126 label = (char*) CMSG_DATA(cmsg);
1127 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1128 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1129 cmsg->cmsg_type == SO_TIMESTAMP &&
1130 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1131 tv = (struct timeval*) CMSG_DATA(cmsg);
1132 else if (cmsg->cmsg_level == SOL_SOCKET &&
1133 cmsg->cmsg_type == SCM_RIGHTS) {
1134 fds = (int*) CMSG_DATA(cmsg);
1135 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1139 if (ev->data.fd == s->syslog_fd) {
1142 if (n > 0 && n_fds == 0) {
1143 e = memchr(s->buffer, '\n', n);
1149 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1150 } else if (n_fds > 0)
1151 log_warning("Got file descriptors via syslog socket. Ignoring.");
1154 if (n > 0 && n_fds == 0)
1155 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1156 else if (n == 0 && n_fds == 1)
1157 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1159 log_warning("Got too many file descriptors via native socket. Ignoring.");
1162 close_many(fds, n_fds);
1167 } else if (ev->data.fd == s->stdout_fd) {
1169 if (ev->events != EPOLLIN) {
1170 log_error("Got invalid event from epoll.");
1174 stdout_stream_new(s);
1178 StdoutStream *stream;
1180 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
1181 log_error("Got invalid event from epoll.");
1185 /* If it is none of the well-known fds, it must be an
1186 * stdout stream fd. Note that this is a bit ugly here
1187 * (since we rely that none of the well-known fds
1188 * could be interpreted as pointer), but nonetheless
1189 * safe, since the well-known fds would never get an
1190 * fd > 4096, i.e. beyond the first memory page */
1192 stream = ev->data.ptr;
1194 if (stdout_stream_process(stream) <= 0)
1195 stdout_stream_free(stream);
1200 log_error("Unknown event.");
1204 static int open_signalfd(Server *s) {
1206 struct epoll_event ev;
1210 assert_se(sigemptyset(&mask) == 0);
1211 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1212 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1214 s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
1215 if (s->signal_fd < 0) {
1216 log_error("signalfd(): %m");
1221 ev.events = EPOLLIN;
1222 ev.data.fd = s->signal_fd;
1224 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
1225 log_error("epoll_ctl(): %m");
1232 static int server_parse_proc_cmdline(Server *s) {
1233 _cleanup_free_ char *line = NULL;
1238 if (detect_container(NULL) > 0)
1241 r = read_one_line_file("/proc/cmdline", &line);
1243 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1247 FOREACH_WORD_QUOTED(w, l, line, state) {
1248 _cleanup_free_ char *word;
1250 word = strndup(w, l);
1254 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1255 r = parse_boolean(word + 35);
1257 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1259 s->forward_to_syslog = r;
1260 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1261 r = parse_boolean(word + 33);
1263 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1265 s->forward_to_kmsg = r;
1266 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1267 r = parse_boolean(word + 36);
1269 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1271 s->forward_to_console = r;
1272 } else if (startswith(word, "systemd.journald"))
1273 log_warning("Invalid systemd.journald parameter. Ignoring.");
1279 static int server_parse_config_file(Server *s) {
1280 static const char fn[] = "/etc/systemd/journald.conf";
1281 _cleanup_fclose_ FILE *f = NULL;
1286 f = fopen(fn, "re");
1288 if (errno == ENOENT)
1291 log_warning("Failed to open configuration file %s: %m", fn);
1295 r = config_parse(NULL, fn, f, "Journal\0", config_item_perf_lookup,
1296 (void*) journald_gperf_lookup, false, false, s);
1298 log_warning("Failed to parse configuration file: %s", strerror(-r));
1303 static int server_open_sync_timer(Server *s) {
1305 struct epoll_event ev;
1309 s->sync_timer_fd = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC);
1310 if (s->sync_timer_fd < 0)
1314 ev.events = EPOLLIN;
1315 ev.data.fd = s->sync_timer_fd;
1317 r = epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->sync_timer_fd, &ev);
1319 log_error("Failed to add idle timer fd to epoll object: %m");
1326 int server_schedule_sync(Server *s) {
1331 if (s->sync_scheduled)
1334 if (s->sync_interval_usec) {
1335 struct itimerspec sync_timer_enable = {};
1337 timespec_store(&sync_timer_enable.it_value, s->sync_interval_usec);
1339 r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_enable, NULL);
1344 s->sync_scheduled = true;
1349 int server_init(Server *s) {
1355 s->sync_timer_fd = s->syslog_fd = s->native_fd = s->stdout_fd =
1356 s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
1360 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1361 s->sync_scheduled = false;
1363 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1364 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1366 s->forward_to_syslog = true;
1368 s->max_level_store = LOG_DEBUG;
1369 s->max_level_syslog = LOG_DEBUG;
1370 s->max_level_kmsg = LOG_NOTICE;
1371 s->max_level_console = LOG_INFO;
1373 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1374 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1376 server_parse_config_file(s);
1377 server_parse_proc_cmdline(s);
1378 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1379 log_debug("Setting both rate limit interval and burst from %llu,%u to 0,0",
1380 (long long unsigned) s->rate_limit_interval,
1381 s->rate_limit_burst);
1382 s->rate_limit_interval = s->rate_limit_burst = 0;
1385 mkdir_p("/run/systemd/journal", 0755);
1387 s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
1388 if (!s->user_journals)
1391 s->mmap = mmap_cache_new();
1395 s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
1396 if (s->epoll_fd < 0) {
1397 log_error("Failed to create epoll object: %m");
1401 n = sd_listen_fds(true);
1403 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1407 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1409 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1411 if (s->native_fd >= 0) {
1412 log_error("Too many native sockets passed.");
1418 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1420 if (s->stdout_fd >= 0) {
1421 log_error("Too many stdout sockets passed.");
1427 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
1429 if (s->syslog_fd >= 0) {
1430 log_error("Too many /dev/log sockets passed.");
1437 log_error("Unknown socket passed.");
1442 r = server_open_syslog_socket(s);
1446 r = server_open_native_socket(s);
1450 r = server_open_stdout_socket(s);
1454 r = server_open_dev_kmsg(s);
1458 r = server_open_kernel_seqnum(s);
1462 r = server_open_sync_timer(s);
1466 r = open_signalfd(s);
1470 s->udev = udev_new();
1474 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval,
1475 s->rate_limit_burst);
1479 r = system_journal_open(s);
1486 void server_maybe_append_tags(Server *s) {
1492 n = now(CLOCK_REALTIME);
1494 if (s->system_journal)
1495 journal_file_maybe_append_tag(s->system_journal, n);
1497 HASHMAP_FOREACH(f, s->user_journals, i)
1498 journal_file_maybe_append_tag(f, n);
1502 void server_done(Server *s) {
1506 while (s->stdout_streams)
1507 stdout_stream_free(s->stdout_streams);
1509 if (s->system_journal)
1510 journal_file_close(s->system_journal);
1512 if (s->runtime_journal)
1513 journal_file_close(s->runtime_journal);
1515 while ((f = hashmap_steal_first(s->user_journals)))
1516 journal_file_close(f);
1518 hashmap_free(s->user_journals);
1520 if (s->epoll_fd >= 0)
1521 close_nointr_nofail(s->epoll_fd);
1523 if (s->signal_fd >= 0)
1524 close_nointr_nofail(s->signal_fd);
1526 if (s->syslog_fd >= 0)
1527 close_nointr_nofail(s->syslog_fd);
1529 if (s->native_fd >= 0)
1530 close_nointr_nofail(s->native_fd);
1532 if (s->stdout_fd >= 0)
1533 close_nointr_nofail(s->stdout_fd);
1535 if (s->dev_kmsg_fd >= 0)
1536 close_nointr_nofail(s->dev_kmsg_fd);
1538 if (s->sync_timer_fd >= 0)
1539 close_nointr_nofail(s->sync_timer_fd);
1542 journal_rate_limit_free(s->rate_limit);
1544 if (s->kernel_seqnum)
1545 munmap(s->kernel_seqnum, sizeof(uint64_t));
1551 mmap_cache_unref(s->mmap);
1554 udev_unref(s->udev);