1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
27 #include <sys/timerfd.h>
30 #include <systemd/sd-journal.h>
31 #include <systemd/sd-messages.h>
32 #include <systemd/sd-daemon.h>
37 #include "journal-file.h"
38 #include "socket-util.h"
39 #include "cgroup-util.h"
43 #include "conf-parser.h"
44 #include "journal-internal.h"
45 #include "journal-vacuum.h"
46 #include "journal-authenticate.h"
47 #include "journald-server.h"
48 #include "journald-rate-limit.h"
49 #include "journald-kmsg.h"
50 #include "journald-syslog.h"
51 #include "journald-stream.h"
52 #include "journald-console.h"
53 #include "journald-native.h"
57 #include <acl/libacl.h>
62 #include <selinux/selinux.h>
65 #define USER_JOURNALS_MAX 1024
67 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
68 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
69 #define DEFAULT_RATE_LIMIT_BURST 200
71 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
73 static const char* const storage_table[] = {
74 [STORAGE_AUTO] = "auto",
75 [STORAGE_VOLATILE] = "volatile",
76 [STORAGE_PERSISTENT] = "persistent",
77 [STORAGE_NONE] = "none"
80 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
81 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
83 static const char* const split_mode_table[] = {
84 [SPLIT_NONE] = "none",
86 [SPLIT_LOGIN] = "login"
89 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
90 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
92 static uint64_t available_space(Server *s) {
94 _cleanup_free_ char *p = NULL;
98 uint64_t sum = 0, avail = 0, ss_avail = 0;
100 _cleanup_closedir_ DIR *d = NULL;
104 ts = now(CLOCK_MONOTONIC);
106 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
107 return s->cached_available_space;
109 r = sd_id128_get_machine(&machine);
113 if (s->system_journal) {
114 f = "/var/log/journal/";
115 m = &s->system_metrics;
117 f = "/run/log/journal/";
118 m = &s->runtime_metrics;
123 p = strappend(f, sd_id128_to_string(machine, ids));
131 if (fstatvfs(dirfd(d), &ss) < 0)
137 union dirent_storage buf;
139 r = readdir_r(d, &buf.de, &de);
146 if (!endswith(de->d_name, ".journal") &&
147 !endswith(de->d_name, ".journal~"))
150 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
153 if (!S_ISREG(st.st_mode))
156 sum += (uint64_t) st.st_blocks * 512UL;
159 avail = sum >= m->max_use ? 0 : m->max_use - sum;
161 ss_avail = ss.f_bsize * ss.f_bavail;
163 ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
165 if (ss_avail < avail)
168 s->cached_available_space = avail;
169 s->cached_available_space_timestamp = ts;
174 static void server_read_file_gid(Server *s) {
175 const char *g = "systemd-journal";
180 if (s->file_gid_valid)
183 r = get_group_creds(&g, &s->file_gid);
185 log_warning("Failed to resolve '%s' group: %s", g, strerror(-r));
187 /* if we couldn't read the gid, then it will be 0, but that's
188 * fine and we shouldn't try to resolve the group again, so
189 * let's just pretend it worked right-away. */
190 s->file_gid_valid = true;
193 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
198 acl_permset_t permset;
203 server_read_file_gid(s);
205 r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
207 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
213 acl = acl_get_fd(f->fd);
215 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
219 r = acl_find_uid(acl, uid, &entry);
222 if (acl_create_entry(&acl, &entry) < 0 ||
223 acl_set_tag_type(entry, ACL_USER) < 0 ||
224 acl_set_qualifier(entry, &uid) < 0) {
225 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
230 /* We do not recalculate the mask unconditionally here,
231 * so that the fchmod() mask above stays intact. */
232 if (acl_get_permset(entry, &permset) < 0 ||
233 acl_add_perm(permset, ACL_READ) < 0 ||
234 calc_acl_mask_if_needed(&acl) < 0) {
235 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
239 if (acl_set_fd(f->fd, acl) < 0)
240 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
247 static JournalFile* find_journal(Server *s, uid_t uid) {
248 _cleanup_free_ char *p = NULL;
255 /* We split up user logs only on /var, not on /run. If the
256 * runtime file is open, we write to it exclusively, in order
257 * to guarantee proper order as soon as we flush /run to
258 * /var and close the runtime file. */
260 if (s->runtime_journal)
261 return s->runtime_journal;
264 return s->system_journal;
266 r = sd_id128_get_machine(&machine);
268 return s->system_journal;
270 f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
274 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
275 SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
276 return s->system_journal;
278 while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
279 /* Too many open? Then let's close one */
280 f = hashmap_steal_first(s->user_journals);
282 journal_file_close(f);
285 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
287 return s->system_journal;
289 server_fix_perms(s, f, uid);
291 r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
293 journal_file_close(f);
294 return s->system_journal;
300 void server_rotate(Server *s) {
306 log_debug("Rotating...");
308 if (s->runtime_journal) {
309 r = journal_file_rotate(&s->runtime_journal, s->compress, false);
311 if (s->runtime_journal)
312 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
314 log_error("Failed to create new runtime journal: %s", strerror(-r));
316 server_fix_perms(s, s->runtime_journal, 0);
319 if (s->system_journal) {
320 r = journal_file_rotate(&s->system_journal, s->compress, s->seal);
322 if (s->system_journal)
323 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
325 log_error("Failed to create new system journal: %s", strerror(-r));
328 server_fix_perms(s, s->system_journal, 0);
331 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
332 r = journal_file_rotate(&f, s->compress, s->seal);
335 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
337 log_error("Failed to create user journal: %s", strerror(-r));
339 hashmap_replace(s->user_journals, k, f);
340 server_fix_perms(s, f, PTR_TO_UINT32(k));
345 void server_sync(Server *s) {
351 static const struct itimerspec sync_timer_disable = {};
353 if (s->system_journal) {
354 r = journal_file_set_offline(s->system_journal);
356 log_error("Failed to sync system journal: %s", strerror(-r));
359 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
360 r = journal_file_set_offline(f);
362 log_error("Failed to sync user journal: %s", strerror(-r));
365 r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_disable, NULL);
367 log_error("Failed to disable max timer: %m");
369 s->sync_scheduled = false;
372 void server_vacuum(Server *s) {
377 log_debug("Vacuuming...");
379 s->oldest_file_usec = 0;
381 r = sd_id128_get_machine(&machine);
383 log_error("Failed to get machine ID: %s", strerror(-r));
387 sd_id128_to_string(machine, ids);
389 if (s->system_journal) {
390 char *p = strappenda("/var/log/journal/", ids);
392 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
393 if (r < 0 && r != -ENOENT)
394 log_error("Failed to vacuum %s: %s", p, strerror(-r));
397 if (s->runtime_journal) {
398 char *p = strappenda("/run/log/journal/", ids);
400 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
401 if (r < 0 && r != -ENOENT)
402 log_error("Failed to vacuum %s: %s", p, strerror(-r));
405 s->cached_available_space_timestamp = 0;
408 bool shall_try_append_again(JournalFile *f, int r) {
410 /* -E2BIG Hit configured limit
412 -EDQUOT Quota limit hit
414 -EHOSTDOWN Other machine
415 -EBUSY Unclean shutdown
416 -EPROTONOSUPPORT Unsupported feature
419 -ESHUTDOWN Already archived */
421 if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
422 log_debug("%s: Allocation limit reached, rotating.", f->path);
423 else if (r == -EHOSTDOWN)
424 log_info("%s: Journal file from other machine, rotating.", f->path);
425 else if (r == -EBUSY)
426 log_info("%s: Unclean shutdown, rotating.", f->path);
427 else if (r == -EPROTONOSUPPORT)
428 log_info("%s: Unsupported feature, rotating.", f->path);
429 else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
430 log_warning("%s: Journal file corrupted, rotating.", f->path);
437 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n) {
439 bool vacuumed = false;
446 f = find_journal(s, uid);
450 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
451 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
456 f = find_journal(s, uid);
461 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
463 server_schedule_sync(s);
467 if (vacuumed || !shall_try_append_again(f, r)) {
470 for (i = 0; i < n; i++)
471 size += iovec[i].iov_len;
473 log_error("Failed to write entry (%d items, %zu bytes), ignoring: %s", n, size, strerror(-r));
480 f = find_journal(s, uid);
484 log_debug("Retrying write.");
485 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
489 for (i = 0; i < n; i++)
490 size += iovec[i].iov_len;
492 log_error("Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %s", n, size, strerror(-r));
496 static void dispatch_message_real(
498 struct iovec *iovec, unsigned n, unsigned m,
501 const char *label, size_t label_len,
502 const char *unit_id) {
504 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
505 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
506 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
507 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
508 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
509 boot_id[sizeof("_BOOT_ID=") + 32] = "_BOOT_ID=",
510 machine_id[sizeof("_MACHINE_ID=") + 32] = "_MACHINE_ID=";
511 char *comm, *exe, *cmdline, *cgroup, *session, *unit, *hostname;
515 uid_t realuid = 0, owner = 0, journal_uid;
516 bool owner_valid = false;
518 char audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
519 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
528 assert(n + N_IOVEC_META_FIELDS <= m);
531 realuid = ucred->uid;
533 sprintf(pid, "_PID=%lu", (unsigned long) ucred->pid);
534 IOVEC_SET_STRING(iovec[n++], pid);
536 sprintf(uid, "_UID=%lu", (unsigned long) ucred->uid);
537 IOVEC_SET_STRING(iovec[n++], uid);
539 sprintf(gid, "_GID=%lu", (unsigned long) ucred->gid);
540 IOVEC_SET_STRING(iovec[n++], gid);
542 r = get_process_comm(ucred->pid, &t);
544 comm = strappenda("_COMM=", t);
546 IOVEC_SET_STRING(iovec[n++], comm);
549 r = get_process_exe(ucred->pid, &t);
551 exe = strappenda("_EXE=", t);
553 IOVEC_SET_STRING(iovec[n++], exe);
556 r = get_process_cmdline(ucred->pid, 0, false, &t);
558 cmdline = strappenda("_CMDLINE=", t);
560 IOVEC_SET_STRING(iovec[n++], cmdline);
564 r = audit_session_from_pid(ucred->pid, &audit);
566 sprintf(audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit);
567 IOVEC_SET_STRING(iovec[n++], audit_session);
570 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
572 sprintf(audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid);
573 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
577 r = cg_pid_get_path_shifted(ucred->pid, NULL, &c);
579 cgroup = strappenda("_SYSTEMD_CGROUP=", c);
580 IOVEC_SET_STRING(iovec[n++], cgroup);
582 r = cg_path_get_session(c, &t);
584 session = strappenda("_SYSTEMD_SESSION=", t);
586 IOVEC_SET_STRING(iovec[n++], session);
589 if (cg_path_get_owner_uid(c, &owner) >= 0) {
592 sprintf(owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner);
593 IOVEC_SET_STRING(iovec[n++], owner_uid);
596 if (cg_path_get_unit(c, &t) >= 0) {
597 unit = strappenda("_SYSTEMD_UNIT=", t);
599 } else if (cg_path_get_user_unit(c, &t) >= 0) {
600 unit = strappenda("_SYSTEMD_USER_UNIT=", t);
602 } else if (unit_id) {
604 unit = strappenda("_SYSTEMD_USER_UNIT=", unit_id);
606 unit = strappenda("_SYSTEMD_UNIT=", unit_id);
611 IOVEC_SET_STRING(iovec[n++], unit);
618 char *selinux_context = alloca(sizeof("_SELINUX_CONTEXT=") + label_len);
620 *((char*) mempcpy(stpcpy(selinux_context, "_SELINUX_CONTEXT="), label, label_len)) = 0;
621 IOVEC_SET_STRING(iovec[n++], selinux_context);
623 security_context_t con;
625 if (getpidcon(ucred->pid, &con) >= 0) {
626 char *selinux_context = strappenda("_SELINUX_CONTEXT=", con);
629 IOVEC_SET_STRING(iovec[n++], selinux_context);
636 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
637 IOVEC_SET_STRING(iovec[n++], source_time);
640 /* Note that strictly speaking storing the boot id here is
641 * redundant since the entry includes this in-line
642 * anyway. However, we need this indexed, too. */
643 r = sd_id128_get_boot(&id);
645 sd_id128_to_string(id, boot_id + sizeof("_BOOT_ID=") - 1);
646 IOVEC_SET_STRING(iovec[n++], boot_id);
649 r = sd_id128_get_machine(&id);
651 sd_id128_to_string(id, machine_id + sizeof("_MACHINE_ID=") - 1);
652 IOVEC_SET_STRING(iovec[n++], machine_id);
655 t = gethostname_malloc();
657 hostname = strappenda("_HOSTNAME=", t);
659 IOVEC_SET_STRING(iovec[n++], hostname);
664 if (s->split_mode == SPLIT_UID && realuid > 0)
665 /* Split up strictly by any UID */
666 journal_uid = realuid;
667 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
668 /* Split up by login UIDs, this avoids creation of
669 * individual journals for system UIDs. We do this
670 * only if the realuid is not root, in order not to
671 * accidentally leak privileged information to the
672 * user that is logged by a privileged process that is
673 * part of an unprivileged session.*/
678 write_to_journal(s, journal_uid, iovec, n);
681 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
682 char mid[11 + 32 + 1];
683 char buffer[16 + LINE_MAX + 1];
684 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
687 struct ucred ucred = {};
692 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
693 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
695 memcpy(buffer, "MESSAGE=", 8);
696 va_start(ap, format);
697 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
699 char_array_0(buffer);
700 IOVEC_SET_STRING(iovec[n++], buffer);
702 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
703 snprintf(mid, sizeof(mid), MESSAGE_ID(message_id));
705 IOVEC_SET_STRING(iovec[n++], mid);
708 ucred.pid = getpid();
709 ucred.uid = getuid();
710 ucred.gid = getgid();
712 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL);
715 void server_dispatch_message(
717 struct iovec *iovec, unsigned n, unsigned m,
720 const char *label, size_t label_len,
725 _cleanup_free_ char *path = NULL;
729 assert(iovec || n == 0);
734 if (LOG_PRI(priority) > s->max_level_store)
740 r = cg_pid_get_path_shifted(ucred->pid, NULL, &path);
744 /* example: /user/lennart/3/foobar
745 * /system/dbus.service/foobar
747 * So let's cut of everything past the third /, since that is
748 * where user directories start */
750 c = strchr(path, '/');
752 c = strchr(c+1, '/');
754 c = strchr(c+1, '/');
760 rl = journal_rate_limit_test(s->rate_limit, path,
761 priority & LOG_PRIMASK, available_space(s));
766 /* Write a suppression message if we suppressed something */
768 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
769 "Suppressed %u messages from %s", rl - 1, path);
772 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id);
776 static int system_journal_open(Server *s) {
782 r = sd_id128_get_machine(&machine);
786 sd_id128_to_string(machine, ids);
788 if (!s->system_journal &&
789 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
790 access("/run/systemd/journal/flushed", F_OK) >= 0) {
792 /* If in auto mode: first try to create the machine
793 * path, but not the prefix.
795 * If in persistent mode: create /var/log/journal and
796 * the machine path */
798 if (s->storage == STORAGE_PERSISTENT)
799 (void) mkdir("/var/log/journal/", 0755);
801 fn = strappenda("/var/log/journal/", ids);
802 (void) mkdir(fn, 0755);
804 fn = strappenda(fn, "/system.journal");
805 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
808 char fb[FORMAT_BYTES_MAX];
811 server_fix_perms(s, s->system_journal, 0);
813 server_driver_message(s, SD_ID128_NULL, "Allowing system journal files to grow to %s.",
814 format_bytes(fb, sizeof(fb), s->system_metrics.max_use));
816 avail = available_space(s);
818 if (s->system_metrics.max_use > avail)
819 server_driver_message(s, SD_ID128_NULL, "Journal size currently limited to %s due to SystemKeepFree.",
820 format_bytes(fb, sizeof(fb), avail));
824 if (r != -ENOENT && r != -EROFS)
825 log_warning("Failed to open system journal: %s", strerror(-r));
831 if (!s->runtime_journal &&
832 (s->storage != STORAGE_NONE)) {
834 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
838 if (s->system_journal) {
840 /* Try to open the runtime journal, but only
841 * if it already exists, so that we can flush
842 * it into the system journal */
844 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
849 log_warning("Failed to open runtime journal: %s", strerror(-r));
856 /* OK, we really need the runtime journal, so create
857 * it if necessary. */
859 (void) mkdir_parents(fn, 0755);
860 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
864 log_error("Failed to open runtime journal: %s", strerror(-r));
869 if (s->runtime_journal) {
870 char fb[FORMAT_BYTES_MAX];
873 server_fix_perms(s, s->runtime_journal, 0);
874 server_driver_message(s, SD_ID128_NULL, "Allowing runtime journal files to grow to %s.",
875 format_bytes(fb, sizeof(fb), s->runtime_metrics.max_use));
877 avail = available_space(s);
879 if (s->system_metrics.max_use > avail)
880 server_driver_message(s, SD_ID128_NULL, "Journal size currently limited to %s due to RuntimeKeepFree.",
881 format_bytes(fb, sizeof(fb), avail));
888 int server_flush_to_var(Server *s) {
891 sd_journal *j = NULL;
895 if (s->storage != STORAGE_AUTO &&
896 s->storage != STORAGE_PERSISTENT)
899 if (!s->runtime_journal)
902 system_journal_open(s);
904 if (!s->system_journal)
907 log_debug("Flushing to /var...");
909 r = sd_id128_get_machine(&machine);
911 log_error("Failed to get machine id: %s", strerror(-r));
915 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
917 log_error("Failed to read runtime journal: %s", strerror(-r));
921 sd_journal_set_data_threshold(j, 0);
923 SD_JOURNAL_FOREACH(j) {
928 assert(f && f->current_offset > 0);
930 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
932 log_error("Can't read entry: %s", strerror(-r));
936 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
940 if (!shall_try_append_again(s->system_journal, r)) {
941 log_error("Can't write entry: %s", strerror(-r));
948 if (!s->system_journal) {
949 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
954 log_debug("Retrying write.");
955 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
957 log_error("Can't write entry: %s", strerror(-r));
963 journal_file_post_change(s->system_journal);
965 journal_file_close(s->runtime_journal);
966 s->runtime_journal = NULL;
969 rm_rf("/run/log/journal", false, true, false);
976 int process_event(Server *s, struct epoll_event *ev) {
980 if (ev->data.fd == s->signal_fd) {
981 struct signalfd_siginfo sfsi;
984 if (ev->events != EPOLLIN) {
985 log_error("Got invalid event from epoll.");
989 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
990 if (n != sizeof(sfsi)) {
995 if (errno == EINTR || errno == EAGAIN)
1001 if (sfsi.ssi_signo == SIGUSR1) {
1002 touch("/run/systemd/journal/flushed");
1003 server_flush_to_var(s);
1008 if (sfsi.ssi_signo == SIGUSR2) {
1014 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
1018 } else if (ev->data.fd == s->sync_timer_fd) {
1022 log_debug("Got sync request from epoll.");
1024 r = read(ev->data.fd, (void *)&t, sizeof(t));
1031 } else if (ev->data.fd == s->dev_kmsg_fd) {
1034 if (ev->events != EPOLLIN) {
1035 log_error("Got invalid event from epoll.");
1039 r = server_read_dev_kmsg(s);
1045 } else if (ev->data.fd == s->native_fd ||
1046 ev->data.fd == s->syslog_fd) {
1048 if (ev->events != EPOLLIN) {
1049 log_error("Got invalid event from epoll.");
1054 struct msghdr msghdr;
1056 struct ucred *ucred = NULL;
1057 struct timeval *tv = NULL;
1058 struct cmsghdr *cmsg;
1060 size_t label_len = 0;
1062 struct cmsghdr cmsghdr;
1064 /* We use NAME_MAX space for the
1065 * SELinux label here. The kernel
1066 * currently enforces no limit, but
1067 * according to suggestions from the
1068 * SELinux people this will change and
1069 * it will probably be identical to
1070 * NAME_MAX. For now we use that, but
1071 * this should be updated one day when
1072 * the final limit is known.*/
1073 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1074 CMSG_SPACE(sizeof(struct timeval)) +
1075 CMSG_SPACE(sizeof(int)) + /* fd */
1076 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1083 if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
1084 log_error("SIOCINQ failed: %m");
1088 if (s->buffer_size < (size_t) v) {
1092 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
1093 b = realloc(s->buffer, l+1);
1096 log_error("Couldn't increase buffer.");
1105 iovec.iov_base = s->buffer;
1106 iovec.iov_len = s->buffer_size;
1110 msghdr.msg_iov = &iovec;
1111 msghdr.msg_iovlen = 1;
1112 msghdr.msg_control = &control;
1113 msghdr.msg_controllen = sizeof(control);
1115 n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1118 if (errno == EINTR || errno == EAGAIN)
1121 log_error("recvmsg() failed: %m");
1125 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1127 if (cmsg->cmsg_level == SOL_SOCKET &&
1128 cmsg->cmsg_type == SCM_CREDENTIALS &&
1129 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1130 ucred = (struct ucred*) CMSG_DATA(cmsg);
1131 else if (cmsg->cmsg_level == SOL_SOCKET &&
1132 cmsg->cmsg_type == SCM_SECURITY) {
1133 label = (char*) CMSG_DATA(cmsg);
1134 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1135 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1136 cmsg->cmsg_type == SO_TIMESTAMP &&
1137 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1138 tv = (struct timeval*) CMSG_DATA(cmsg);
1139 else if (cmsg->cmsg_level == SOL_SOCKET &&
1140 cmsg->cmsg_type == SCM_RIGHTS) {
1141 fds = (int*) CMSG_DATA(cmsg);
1142 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1146 if (ev->data.fd == s->syslog_fd) {
1149 if (n > 0 && n_fds == 0) {
1150 e = memchr(s->buffer, '\n', n);
1156 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1157 } else if (n_fds > 0)
1158 log_warning("Got file descriptors via syslog socket. Ignoring.");
1161 if (n > 0 && n_fds == 0)
1162 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1163 else if (n == 0 && n_fds == 1)
1164 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1166 log_warning("Got too many file descriptors via native socket. Ignoring.");
1169 close_many(fds, n_fds);
1174 } else if (ev->data.fd == s->stdout_fd) {
1176 if (ev->events != EPOLLIN) {
1177 log_error("Got invalid event from epoll.");
1181 stdout_stream_new(s);
1185 StdoutStream *stream;
1187 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
1188 log_error("Got invalid event from epoll.");
1192 /* If it is none of the well-known fds, it must be an
1193 * stdout stream fd. Note that this is a bit ugly here
1194 * (since we rely that none of the well-known fds
1195 * could be interpreted as pointer), but nonetheless
1196 * safe, since the well-known fds would never get an
1197 * fd > 4096, i.e. beyond the first memory page */
1199 stream = ev->data.ptr;
1201 if (stdout_stream_process(stream) <= 0)
1202 stdout_stream_free(stream);
1207 log_error("Unknown event.");
1211 static int open_signalfd(Server *s) {
1213 struct epoll_event ev;
1217 assert_se(sigemptyset(&mask) == 0);
1218 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1219 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1221 s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
1222 if (s->signal_fd < 0) {
1223 log_error("signalfd(): %m");
1228 ev.events = EPOLLIN;
1229 ev.data.fd = s->signal_fd;
1231 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
1232 log_error("epoll_ctl(): %m");
1239 static int server_parse_proc_cmdline(Server *s) {
1240 _cleanup_free_ char *line = NULL;
1245 if (detect_container(NULL) > 0)
1248 r = read_one_line_file("/proc/cmdline", &line);
1250 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1254 FOREACH_WORD_QUOTED(w, l, line, state) {
1255 _cleanup_free_ char *word;
1257 word = strndup(w, l);
1261 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1262 r = parse_boolean(word + 35);
1264 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1266 s->forward_to_syslog = r;
1267 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1268 r = parse_boolean(word + 33);
1270 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1272 s->forward_to_kmsg = r;
1273 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1274 r = parse_boolean(word + 36);
1276 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1278 s->forward_to_console = r;
1279 } else if (startswith(word, "systemd.journald"))
1280 log_warning("Invalid systemd.journald parameter. Ignoring.");
1286 static int server_parse_config_file(Server *s) {
1287 static const char fn[] = "/etc/systemd/journald.conf";
1288 _cleanup_fclose_ FILE *f = NULL;
1293 f = fopen(fn, "re");
1295 if (errno == ENOENT)
1298 log_warning("Failed to open configuration file %s: %m", fn);
1302 r = config_parse(NULL, fn, f, "Journal\0", config_item_perf_lookup,
1303 (void*) journald_gperf_lookup, false, false, s);
1305 log_warning("Failed to parse configuration file: %s", strerror(-r));
1310 static int server_open_sync_timer(Server *s) {
1312 struct epoll_event ev;
1316 s->sync_timer_fd = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC);
1317 if (s->sync_timer_fd < 0)
1321 ev.events = EPOLLIN;
1322 ev.data.fd = s->sync_timer_fd;
1324 r = epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->sync_timer_fd, &ev);
1326 log_error("Failed to add idle timer fd to epoll object: %m");
1333 int server_schedule_sync(Server *s) {
1338 if (s->sync_scheduled)
1341 if (s->sync_interval_usec) {
1342 struct itimerspec sync_timer_enable = {};
1344 timespec_store(&sync_timer_enable.it_value, s->sync_interval_usec);
1346 r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_enable, NULL);
1351 s->sync_scheduled = true;
1356 int server_init(Server *s) {
1362 s->sync_timer_fd = s->syslog_fd = s->native_fd = s->stdout_fd =
1363 s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
1367 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1368 s->sync_scheduled = false;
1370 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1371 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1373 s->forward_to_syslog = true;
1375 s->max_level_store = LOG_DEBUG;
1376 s->max_level_syslog = LOG_DEBUG;
1377 s->max_level_kmsg = LOG_NOTICE;
1378 s->max_level_console = LOG_INFO;
1380 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1381 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1383 server_parse_config_file(s);
1384 server_parse_proc_cmdline(s);
1385 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1386 log_debug("Setting both rate limit interval and burst from %llu,%u to 0,0",
1387 (long long unsigned) s->rate_limit_interval,
1388 s->rate_limit_burst);
1389 s->rate_limit_interval = s->rate_limit_burst = 0;
1392 mkdir_p("/run/systemd/journal", 0755);
1394 s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
1395 if (!s->user_journals)
1398 s->mmap = mmap_cache_new();
1402 s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
1403 if (s->epoll_fd < 0) {
1404 log_error("Failed to create epoll object: %m");
1408 n = sd_listen_fds(true);
1410 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1414 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1416 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1418 if (s->native_fd >= 0) {
1419 log_error("Too many native sockets passed.");
1425 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1427 if (s->stdout_fd >= 0) {
1428 log_error("Too many stdout sockets passed.");
1434 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
1436 if (s->syslog_fd >= 0) {
1437 log_error("Too many /dev/log sockets passed.");
1444 log_error("Unknown socket passed.");
1449 r = server_open_syslog_socket(s);
1453 r = server_open_native_socket(s);
1457 r = server_open_stdout_socket(s);
1461 r = server_open_dev_kmsg(s);
1465 r = server_open_kernel_seqnum(s);
1469 r = server_open_sync_timer(s);
1473 r = open_signalfd(s);
1477 s->udev = udev_new();
1481 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval,
1482 s->rate_limit_burst);
1486 r = system_journal_open(s);
1493 void server_maybe_append_tags(Server *s) {
1499 n = now(CLOCK_REALTIME);
1501 if (s->system_journal)
1502 journal_file_maybe_append_tag(s->system_journal, n);
1504 HASHMAP_FOREACH(f, s->user_journals, i)
1505 journal_file_maybe_append_tag(f, n);
1509 void server_done(Server *s) {
1513 while (s->stdout_streams)
1514 stdout_stream_free(s->stdout_streams);
1516 if (s->system_journal)
1517 journal_file_close(s->system_journal);
1519 if (s->runtime_journal)
1520 journal_file_close(s->runtime_journal);
1522 while ((f = hashmap_steal_first(s->user_journals)))
1523 journal_file_close(f);
1525 hashmap_free(s->user_journals);
1527 if (s->epoll_fd >= 0)
1528 close_nointr_nofail(s->epoll_fd);
1530 if (s->signal_fd >= 0)
1531 close_nointr_nofail(s->signal_fd);
1533 if (s->syslog_fd >= 0)
1534 close_nointr_nofail(s->syslog_fd);
1536 if (s->native_fd >= 0)
1537 close_nointr_nofail(s->native_fd);
1539 if (s->stdout_fd >= 0)
1540 close_nointr_nofail(s->stdout_fd);
1542 if (s->dev_kmsg_fd >= 0)
1543 close_nointr_nofail(s->dev_kmsg_fd);
1545 if (s->sync_timer_fd >= 0)
1546 close_nointr_nofail(s->sync_timer_fd);
1549 journal_rate_limit_free(s->rate_limit);
1551 if (s->kernel_seqnum)
1552 munmap(s->kernel_seqnum, sizeof(uint64_t));
1558 mmap_cache_unref(s->mmap);
1561 udev_unref(s->udev);