1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
27 #include <sys/timerfd.h>
30 #include <systemd/sd-journal.h>
31 #include <systemd/sd-messages.h>
32 #include <systemd/sd-daemon.h>
37 #include "journal-file.h"
38 #include "socket-util.h"
39 #include "cgroup-util.h"
43 #include "conf-parser.h"
44 #include "journal-internal.h"
45 #include "journal-vacuum.h"
46 #include "journal-authenticate.h"
47 #include "journald-server.h"
48 #include "journald-rate-limit.h"
49 #include "journald-kmsg.h"
50 #include "journald-syslog.h"
51 #include "journald-stream.h"
52 #include "journald-console.h"
53 #include "journald-native.h"
54 #include "selinux-util.h"
58 #include <acl/libacl.h>
63 #include <selinux/selinux.h>
66 #define USER_JOURNALS_MAX 1024
68 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
69 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
70 #define DEFAULT_RATE_LIMIT_BURST 1000
72 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
74 static const char* const storage_table[] = {
75 [STORAGE_AUTO] = "auto",
76 [STORAGE_VOLATILE] = "volatile",
77 [STORAGE_PERSISTENT] = "persistent",
78 [STORAGE_NONE] = "none"
81 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
82 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
84 static const char* const split_mode_table[] = {
85 [SPLIT_NONE] = "none",
87 [SPLIT_LOGIN] = "login"
90 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
91 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
93 static uint64_t available_space(Server *s, bool verbose) {
95 _cleanup_free_ char *p = NULL;
98 uint64_t sum = 0, ss_avail = 0, avail = 0;
100 _cleanup_closedir_ DIR *d = NULL;
105 ts = now(CLOCK_MONOTONIC);
107 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts
109 return s->cached_available_space;
111 r = sd_id128_get_machine(&machine);
115 if (s->system_journal) {
116 f = "/var/log/journal/";
117 m = &s->system_metrics;
119 f = "/run/log/journal/";
120 m = &s->runtime_metrics;
125 p = strappend(f, sd_id128_to_string(machine, ids));
133 if (fstatvfs(dirfd(d), &ss) < 0)
139 union dirent_storage buf;
141 r = readdir_r(d, &buf.de, &de);
148 if (!endswith(de->d_name, ".journal") &&
149 !endswith(de->d_name, ".journal~"))
152 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
155 if (!S_ISREG(st.st_mode))
158 sum += (uint64_t) st.st_blocks * 512UL;
161 ss_avail = ss.f_bsize * ss.f_bavail;
162 avail = ss_avail > m->keep_free ? ss_avail - m->keep_free : 0;
164 s->cached_available_space = MIN(m->max_use, avail) > sum ? MIN(m->max_use, avail) - sum : 0;
165 s->cached_available_space_timestamp = ts;
168 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
169 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX];
171 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
172 "%s journal is using %s (max %s, leaving %s of free %s, current limit %s).",
173 s->system_journal ? "Permanent" : "Runtime",
174 format_bytes(fb1, sizeof(fb1), sum),
175 format_bytes(fb2, sizeof(fb2), m->max_use),
176 format_bytes(fb3, sizeof(fb3), m->keep_free),
177 format_bytes(fb4, sizeof(fb4), ss_avail),
178 format_bytes(fb5, sizeof(fb5), MIN(m->max_use, avail)));
181 return s->cached_available_space;
184 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
189 acl_permset_t permset;
194 r = fchmod(f->fd, 0640);
196 log_warning("Failed to fix access mode on %s, ignoring: %s", f->path, strerror(-r));
202 acl = acl_get_fd(f->fd);
204 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
208 r = acl_find_uid(acl, uid, &entry);
211 if (acl_create_entry(&acl, &entry) < 0 ||
212 acl_set_tag_type(entry, ACL_USER) < 0 ||
213 acl_set_qualifier(entry, &uid) < 0) {
214 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
219 /* We do not recalculate the mask unconditionally here,
220 * so that the fchmod() mask above stays intact. */
221 if (acl_get_permset(entry, &permset) < 0 ||
222 acl_add_perm(permset, ACL_READ) < 0 ||
223 calc_acl_mask_if_needed(&acl) < 0) {
224 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
228 if (acl_set_fd(f->fd, acl) < 0)
229 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
236 static JournalFile* find_journal(Server *s, uid_t uid) {
237 _cleanup_free_ char *p = NULL;
244 /* We split up user logs only on /var, not on /run. If the
245 * runtime file is open, we write to it exclusively, in order
246 * to guarantee proper order as soon as we flush /run to
247 * /var and close the runtime file. */
249 if (s->runtime_journal)
250 return s->runtime_journal;
253 return s->system_journal;
255 r = sd_id128_get_machine(&machine);
257 return s->system_journal;
259 f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
263 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
264 SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
265 return s->system_journal;
267 while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
268 /* Too many open? Then let's close one */
269 f = hashmap_steal_first(s->user_journals);
271 journal_file_close(f);
274 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
276 return s->system_journal;
278 server_fix_perms(s, f, uid);
280 r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
282 journal_file_close(f);
283 return s->system_journal;
289 void server_rotate(Server *s) {
295 log_debug("Rotating...");
297 if (s->runtime_journal) {
298 r = journal_file_rotate(&s->runtime_journal, s->compress, false);
300 if (s->runtime_journal)
301 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
303 log_error("Failed to create new runtime journal: %s", strerror(-r));
305 server_fix_perms(s, s->runtime_journal, 0);
308 if (s->system_journal) {
309 r = journal_file_rotate(&s->system_journal, s->compress, s->seal);
311 if (s->system_journal)
312 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
314 log_error("Failed to create new system journal: %s", strerror(-r));
317 server_fix_perms(s, s->system_journal, 0);
320 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
321 r = journal_file_rotate(&f, s->compress, s->seal);
324 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
326 log_error("Failed to create user journal: %s", strerror(-r));
327 hashmap_remove(s->user_journals, k);
330 hashmap_replace(s->user_journals, k, f);
331 server_fix_perms(s, f, PTR_TO_UINT32(k));
336 void server_sync(Server *s) {
337 static const struct itimerspec sync_timer_disable = {};
343 if (s->system_journal) {
344 r = journal_file_set_offline(s->system_journal);
346 log_error("Failed to sync system journal: %s", strerror(-r));
349 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
350 r = journal_file_set_offline(f);
352 log_error("Failed to sync user journal: %s", strerror(-r));
355 r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_disable, NULL);
357 log_error("Failed to disable max timer: %m");
359 s->sync_scheduled = false;
362 void server_vacuum(Server *s) {
367 log_debug("Vacuuming...");
369 s->oldest_file_usec = 0;
371 r = sd_id128_get_machine(&machine);
373 log_error("Failed to get machine ID: %s", strerror(-r));
377 sd_id128_to_string(machine, ids);
379 if (s->system_journal) {
380 char *p = strappenda("/var/log/journal/", ids);
382 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
383 if (r < 0 && r != -ENOENT)
384 log_error("Failed to vacuum %s: %s", p, strerror(-r));
387 if (s->runtime_journal) {
388 char *p = strappenda("/run/log/journal/", ids);
390 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
391 if (r < 0 && r != -ENOENT)
392 log_error("Failed to vacuum %s: %s", p, strerror(-r));
395 s->cached_available_space_timestamp = 0;
398 bool shall_try_append_again(JournalFile *f, int r) {
400 /* -E2BIG Hit configured limit
402 -EDQUOT Quota limit hit
404 -EHOSTDOWN Other machine
405 -EBUSY Unclean shutdown
406 -EPROTONOSUPPORT Unsupported feature
409 -ESHUTDOWN Already archived */
411 if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
412 log_debug("%s: Allocation limit reached, rotating.", f->path);
413 else if (r == -EHOSTDOWN)
414 log_info("%s: Journal file from other machine, rotating.", f->path);
415 else if (r == -EBUSY)
416 log_info("%s: Unclean shutdown, rotating.", f->path);
417 else if (r == -EPROTONOSUPPORT)
418 log_info("%s: Unsupported feature, rotating.", f->path);
419 else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
420 log_warning("%s: Journal file corrupted, rotating.", f->path);
427 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
429 bool vacuumed = false;
436 f = find_journal(s, uid);
440 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
441 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
446 f = find_journal(s, uid);
451 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
453 server_schedule_sync(s, priority);
457 if (vacuumed || !shall_try_append_again(f, r)) {
460 for (i = 0; i < n; i++)
461 size += iovec[i].iov_len;
463 log_error("Failed to write entry (%d items, %zu bytes), ignoring: %s", n, size, strerror(-r));
470 f = find_journal(s, uid);
474 log_debug("Retrying write.");
475 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
479 for (i = 0; i < n; i++)
480 size += iovec[i].iov_len;
482 log_error("Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %s", n, size, strerror(-r));
484 server_schedule_sync(s, priority);
487 static void dispatch_message_real(
489 struct iovec *iovec, unsigned n, unsigned m,
492 const char *label, size_t label_len,
497 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
498 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
499 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
500 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
501 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
502 boot_id[sizeof("_BOOT_ID=") + 32] = "_BOOT_ID=",
503 machine_id[sizeof("_MACHINE_ID=") + 32] = "_MACHINE_ID=",
504 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
505 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
506 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
513 uid_t realuid = 0, owner = 0, journal_uid;
514 bool owner_valid = false;
516 char audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
517 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
518 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
519 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
528 assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
531 realuid = ucred->uid;
533 sprintf(pid, "_PID=%lu", (unsigned long) ucred->pid);
534 IOVEC_SET_STRING(iovec[n++], pid);
536 sprintf(uid, "_UID=%lu", (unsigned long) ucred->uid);
537 IOVEC_SET_STRING(iovec[n++], uid);
539 sprintf(gid, "_GID=%lu", (unsigned long) ucred->gid);
540 IOVEC_SET_STRING(iovec[n++], gid);
542 r = get_process_comm(ucred->pid, &t);
544 x = strappenda("_COMM=", t);
546 IOVEC_SET_STRING(iovec[n++], x);
549 r = get_process_exe(ucred->pid, &t);
551 x = strappenda("_EXE=", t);
553 IOVEC_SET_STRING(iovec[n++], x);
556 r = get_process_cmdline(ucred->pid, 0, false, &t);
558 x = strappenda("_CMDLINE=", t);
560 IOVEC_SET_STRING(iovec[n++], x);
563 r = get_process_capeff(ucred->pid, &t);
565 x = strappenda("_CAP_EFFECTIVE=", t);
567 IOVEC_SET_STRING(iovec[n++], x);
571 r = audit_session_from_pid(ucred->pid, &audit);
573 sprintf(audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit);
574 IOVEC_SET_STRING(iovec[n++], audit_session);
577 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
579 sprintf(audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid);
580 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
584 r = cg_pid_get_path_shifted(ucred->pid, NULL, &c);
586 char *session = NULL;
588 x = strappenda("_SYSTEMD_CGROUP=", c);
589 IOVEC_SET_STRING(iovec[n++], x);
591 r = cg_path_get_session(c, &t);
593 session = strappenda("_SYSTEMD_SESSION=", t);
595 IOVEC_SET_STRING(iovec[n++], session);
598 if (cg_path_get_owner_uid(c, &owner) >= 0) {
601 sprintf(owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner);
602 IOVEC_SET_STRING(iovec[n++], owner_uid);
605 if (cg_path_get_unit(c, &t) >= 0) {
606 x = strappenda("_SYSTEMD_UNIT=", t);
608 IOVEC_SET_STRING(iovec[n++], x);
609 } else if (unit_id && !session) {
610 x = strappenda("_SYSTEMD_UNIT=", unit_id);
611 IOVEC_SET_STRING(iovec[n++], x);
614 if (cg_path_get_user_unit(c, &t) >= 0) {
615 x = strappenda("_SYSTEMD_USER_UNIT=", t);
617 IOVEC_SET_STRING(iovec[n++], x);
618 } else if (unit_id && session) {
619 x = strappenda("_SYSTEMD_USER_UNIT=", unit_id);
620 IOVEC_SET_STRING(iovec[n++], x);
623 if (cg_path_get_slice(c, &t) >= 0) {
624 x = strappenda("_SYSTEMD_SLICE=", t);
626 IOVEC_SET_STRING(iovec[n++], x);
635 x = alloca(sizeof("_SELINUX_CONTEXT=") + label_len);
637 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
638 IOVEC_SET_STRING(iovec[n++], x);
640 security_context_t con;
642 if (getpidcon(ucred->pid, &con) >= 0) {
643 x = strappenda("_SELINUX_CONTEXT=", con);
646 IOVEC_SET_STRING(iovec[n++], x);
655 r = get_process_uid(object_pid, &object_uid);
657 sprintf(o_uid, "OBJECT_UID=%lu", (unsigned long) object_uid);
658 IOVEC_SET_STRING(iovec[n++], o_uid);
661 r = get_process_gid(object_pid, &object_gid);
663 sprintf(o_gid, "OBJECT_GID=%lu", (unsigned long) object_gid);
664 IOVEC_SET_STRING(iovec[n++], o_gid);
667 r = get_process_comm(object_pid, &t);
669 x = strappenda("OBJECT_COMM=", t);
671 IOVEC_SET_STRING(iovec[n++], x);
674 r = get_process_exe(object_pid, &t);
676 x = strappenda("OBJECT_EXE=", t);
678 IOVEC_SET_STRING(iovec[n++], x);
681 r = get_process_cmdline(object_pid, 0, false, &t);
683 x = strappenda("OBJECT_CMDLINE=", t);
685 IOVEC_SET_STRING(iovec[n++], x);
689 r = audit_session_from_pid(object_pid, &audit);
691 sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%lu", (unsigned long) audit);
692 IOVEC_SET_STRING(iovec[n++], o_audit_session);
695 r = audit_loginuid_from_pid(object_pid, &loginuid);
697 sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID=%lu", (unsigned long) loginuid);
698 IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
702 r = cg_pid_get_path_shifted(object_pid, NULL, &c);
704 x = strappenda("OBJECT_SYSTEMD_CGROUP=", c);
705 IOVEC_SET_STRING(iovec[n++], x);
707 r = cg_path_get_session(c, &t);
709 x = strappenda("OBJECT_SYSTEMD_SESSION=", t);
711 IOVEC_SET_STRING(iovec[n++], x);
714 if (cg_path_get_owner_uid(c, &owner) >= 0) {
715 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner);
716 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
719 if (cg_path_get_unit(c, &t) >= 0) {
720 x = strappenda("OBJECT_SYSTEMD_UNIT=", t);
722 IOVEC_SET_STRING(iovec[n++], x);
725 if (cg_path_get_user_unit(c, &t) >= 0) {
726 x = strappenda("OBJECT_SYSTEMD_USER_UNIT=", t);
728 IOVEC_SET_STRING(iovec[n++], x);
737 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
738 IOVEC_SET_STRING(iovec[n++], source_time);
741 /* Note that strictly speaking storing the boot id here is
742 * redundant since the entry includes this in-line
743 * anyway. However, we need this indexed, too. */
744 r = sd_id128_get_boot(&id);
746 sd_id128_to_string(id, boot_id + strlen("_BOOT_ID="));
747 IOVEC_SET_STRING(iovec[n++], boot_id);
750 r = sd_id128_get_machine(&id);
752 sd_id128_to_string(id, machine_id + strlen("_MACHINE_ID="));
753 IOVEC_SET_STRING(iovec[n++], machine_id);
756 t = gethostname_malloc();
758 x = strappenda("_HOSTNAME=", t);
760 IOVEC_SET_STRING(iovec[n++], x);
765 if (s->split_mode == SPLIT_UID && realuid > 0)
766 /* Split up strictly by any UID */
767 journal_uid = realuid;
768 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
769 /* Split up by login UIDs, this avoids creation of
770 * individual journals for system UIDs. We do this
771 * only if the realuid is not root, in order not to
772 * accidentally leak privileged information to the
773 * user that is logged by a privileged process that is
774 * part of an unprivileged session.*/
779 write_to_journal(s, journal_uid, iovec, n, priority);
782 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
783 char mid[11 + 32 + 1];
784 char buffer[16 + LINE_MAX + 1];
785 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
788 struct ucred ucred = {};
793 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
794 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
796 memcpy(buffer, "MESSAGE=", 8);
797 va_start(ap, format);
798 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
800 char_array_0(buffer);
801 IOVEC_SET_STRING(iovec[n++], buffer);
803 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
804 snprintf(mid, sizeof(mid), MESSAGE_ID(message_id));
806 IOVEC_SET_STRING(iovec[n++], mid);
809 ucred.pid = getpid();
810 ucred.uid = getuid();
811 ucred.gid = getgid();
813 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
816 void server_dispatch_message(
818 struct iovec *iovec, unsigned n, unsigned m,
821 const char *label, size_t label_len,
827 _cleanup_free_ char *path = NULL;
831 assert(iovec || n == 0);
836 if (LOG_PRI(priority) > s->max_level_store)
839 /* Stop early in case the information will not be stored
841 if (s->storage == STORAGE_NONE)
847 r = cg_pid_get_path_shifted(ucred->pid, NULL, &path);
851 /* example: /user/lennart/3/foobar
852 * /system/dbus.service/foobar
854 * So let's cut of everything past the third /, since that is
855 * where user directories start */
857 c = strchr(path, '/');
859 c = strchr(c+1, '/');
861 c = strchr(c+1, '/');
867 rl = journal_rate_limit_test(s->rate_limit, path,
868 priority & LOG_PRIMASK, available_space(s, false));
873 /* Write a suppression message if we suppressed something */
875 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
876 "Suppressed %u messages from %s", rl - 1, path);
879 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
883 static int system_journal_open(Server *s) {
889 r = sd_id128_get_machine(&machine);
891 log_error("Failed to get machine id: %s", strerror(-r));
895 sd_id128_to_string(machine, ids);
897 if (!s->system_journal &&
898 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
899 access("/run/systemd/journal/flushed", F_OK) >= 0) {
901 /* If in auto mode: first try to create the machine
902 * path, but not the prefix.
904 * If in persistent mode: create /var/log/journal and
905 * the machine path */
907 if (s->storage == STORAGE_PERSISTENT)
908 (void) mkdir("/var/log/journal/", 0755);
910 fn = strappenda("/var/log/journal/", ids);
911 (void) mkdir(fn, 0755);
913 fn = strappenda(fn, "/system.journal");
914 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
917 server_fix_perms(s, s->system_journal, 0);
919 if (r != -ENOENT && r != -EROFS)
920 log_warning("Failed to open system journal: %s", strerror(-r));
926 if (!s->runtime_journal &&
927 (s->storage != STORAGE_NONE)) {
929 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
933 if (s->system_journal) {
935 /* Try to open the runtime journal, but only
936 * if it already exists, so that we can flush
937 * it into the system journal */
939 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
944 log_warning("Failed to open runtime journal: %s", strerror(-r));
951 /* OK, we really need the runtime journal, so create
952 * it if necessary. */
954 (void) mkdir_parents(fn, 0755);
955 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
959 log_error("Failed to open runtime journal: %s", strerror(-r));
964 if (s->runtime_journal)
965 server_fix_perms(s, s->runtime_journal, 0);
968 available_space(s, true);
973 int server_flush_to_var(Server *s) {
976 sd_journal *j = NULL;
980 if (s->storage != STORAGE_AUTO &&
981 s->storage != STORAGE_PERSISTENT)
984 if (!s->runtime_journal)
987 system_journal_open(s);
989 if (!s->system_journal)
992 log_debug("Flushing to /var...");
994 r = sd_id128_get_machine(&machine);
998 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1000 log_error("Failed to read runtime journal: %s", strerror(-r));
1004 sd_journal_set_data_threshold(j, 0);
1006 SD_JOURNAL_FOREACH(j) {
1010 f = j->current_file;
1011 assert(f && f->current_offset > 0);
1013 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1015 log_error("Can't read entry: %s", strerror(-r));
1019 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1023 if (!shall_try_append_again(s->system_journal, r)) {
1024 log_error("Can't write entry: %s", strerror(-r));
1031 if (!s->system_journal) {
1032 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1037 log_debug("Retrying write.");
1038 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1040 log_error("Can't write entry: %s", strerror(-r));
1046 journal_file_post_change(s->system_journal);
1048 journal_file_close(s->runtime_journal);
1049 s->runtime_journal = NULL;
1052 rm_rf("/run/log/journal", false, true, false);
1054 sd_journal_close(j);
1059 int process_event(Server *s, struct epoll_event *ev) {
1063 if (ev->data.fd == s->signal_fd) {
1064 struct signalfd_siginfo sfsi;
1067 if (ev->events != EPOLLIN) {
1068 log_error("Got invalid event from epoll for %s: %"PRIx32,
1069 "signal fd", ev->events);
1073 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
1074 if (n != sizeof(sfsi)) {
1079 if (errno == EINTR || errno == EAGAIN)
1085 if (sfsi.ssi_signo == SIGUSR1) {
1086 log_info("Received request to flush runtime journal from PID %"PRIu32,
1088 touch("/run/systemd/journal/flushed");
1089 server_flush_to_var(s);
1094 if (sfsi.ssi_signo == SIGUSR2) {
1095 log_info("Received request to rotate journal from PID %"PRIu32,
1102 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
1106 } else if (ev->data.fd == s->sync_timer_fd) {
1110 log_debug("Got sync request from epoll.");
1112 r = read(ev->data.fd, (void *)&t, sizeof(t));
1119 } else if (ev->data.fd == s->dev_kmsg_fd) {
1122 if (ev->events & EPOLLERR)
1123 log_warning("/dev/kmsg buffer overrun, some messages lost.");
1125 if (!(ev->events & EPOLLIN)) {
1126 log_error("Got invalid event from epoll for %s: %"PRIx32,
1127 "/dev/kmsg", ev->events);
1131 r = server_read_dev_kmsg(s);
1137 } else if (ev->data.fd == s->native_fd ||
1138 ev->data.fd == s->syslog_fd) {
1140 if (ev->events != EPOLLIN) {
1141 log_error("Got invalid event from epoll for %s: %"PRIx32,
1142 ev->data.fd == s->native_fd ? "native fd" : "syslog fd",
1148 struct ucred *ucred = NULL;
1149 struct timeval *tv = NULL;
1150 struct cmsghdr *cmsg;
1152 size_t label_len = 0;
1156 struct cmsghdr cmsghdr;
1158 /* We use NAME_MAX space for the
1159 * SELinux label here. The kernel
1160 * currently enforces no limit, but
1161 * according to suggestions from the
1162 * SELinux people this will change and
1163 * it will probably be identical to
1164 * NAME_MAX. For now we use that, but
1165 * this should be updated one day when
1166 * the final limit is known.*/
1167 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1168 CMSG_SPACE(sizeof(struct timeval)) +
1169 CMSG_SPACE(sizeof(int)) + /* fd */
1170 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1172 struct msghdr msghdr = {
1175 .msg_control = &control,
1176 .msg_controllen = sizeof(control),
1184 if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
1185 log_error("SIOCINQ failed: %m");
1189 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, LINE_MAX + (size_t) v))
1192 iovec.iov_base = s->buffer;
1193 iovec.iov_len = s->buffer_size;
1195 n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1197 if (errno == EINTR || errno == EAGAIN)
1200 log_error("recvmsg() failed: %m");
1204 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1206 if (cmsg->cmsg_level == SOL_SOCKET &&
1207 cmsg->cmsg_type == SCM_CREDENTIALS &&
1208 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1209 ucred = (struct ucred*) CMSG_DATA(cmsg);
1210 else if (cmsg->cmsg_level == SOL_SOCKET &&
1211 cmsg->cmsg_type == SCM_SECURITY) {
1212 label = (char*) CMSG_DATA(cmsg);
1213 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1214 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1215 cmsg->cmsg_type == SO_TIMESTAMP &&
1216 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1217 tv = (struct timeval*) CMSG_DATA(cmsg);
1218 else if (cmsg->cmsg_level == SOL_SOCKET &&
1219 cmsg->cmsg_type == SCM_RIGHTS) {
1220 fds = (int*) CMSG_DATA(cmsg);
1221 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1225 if (ev->data.fd == s->syslog_fd) {
1226 if (n > 0 && n_fds == 0) {
1228 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1229 } else if (n_fds > 0)
1230 log_warning("Got file descriptors via syslog socket. Ignoring.");
1233 if (n > 0 && n_fds == 0)
1234 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1235 else if (n == 0 && n_fds == 1)
1236 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1238 log_warning("Got too many file descriptors via native socket. Ignoring.");
1241 close_many(fds, n_fds);
1246 } else if (ev->data.fd == s->stdout_fd) {
1248 if (ev->events != EPOLLIN) {
1249 log_error("Got invalid event from epoll for %s: %"PRIx32,
1250 "stdout fd", ev->events);
1254 stdout_stream_new(s);
1258 StdoutStream *stream;
1260 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
1261 log_error("Got invalid event from epoll for %s: %"PRIx32,
1262 "stdout stream", ev->events);
1266 /* If it is none of the well-known fds, it must be an
1267 * stdout stream fd. Note that this is a bit ugly here
1268 * (since we rely that none of the well-known fds
1269 * could be interpreted as pointer), but nonetheless
1270 * safe, since the well-known fds would never get an
1271 * fd > 4096, i.e. beyond the first memory page */
1273 stream = ev->data.ptr;
1275 if (stdout_stream_process(stream) <= 0)
1276 stdout_stream_free(stream);
1281 log_error("Unknown event.");
1285 static int open_signalfd(Server *s) {
1287 struct epoll_event ev;
1291 assert_se(sigemptyset(&mask) == 0);
1292 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1293 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1295 s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
1296 if (s->signal_fd < 0) {
1297 log_error("signalfd(): %m");
1302 ev.events = EPOLLIN;
1303 ev.data.fd = s->signal_fd;
1305 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
1306 log_error("epoll_ctl(): %m");
1313 static int server_parse_proc_cmdline(Server *s) {
1314 _cleanup_free_ char *line = NULL;
1319 if (detect_container(NULL) > 0)
1322 r = read_one_line_file("/proc/cmdline", &line);
1324 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1328 FOREACH_WORD_QUOTED(w, l, line, state) {
1329 _cleanup_free_ char *word;
1331 word = strndup(w, l);
1335 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1336 r = parse_boolean(word + 35);
1338 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1340 s->forward_to_syslog = r;
1341 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1342 r = parse_boolean(word + 33);
1344 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1346 s->forward_to_kmsg = r;
1347 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1348 r = parse_boolean(word + 36);
1350 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1352 s->forward_to_console = r;
1353 } else if (startswith(word, "systemd.journald"))
1354 log_warning("Invalid systemd.journald parameter. Ignoring.");
1360 static int server_parse_config_file(Server *s) {
1361 static const char fn[] = "/etc/systemd/journald.conf";
1362 _cleanup_fclose_ FILE *f = NULL;
1367 f = fopen(fn, "re");
1369 if (errno == ENOENT)
1372 log_warning("Failed to open configuration file %s: %m", fn);
1376 r = config_parse(NULL, fn, f, "Journal\0", config_item_perf_lookup,
1377 (void*) journald_gperf_lookup, false, false, s);
1379 log_warning("Failed to parse configuration file: %s", strerror(-r));
1384 static int server_open_sync_timer(Server *s) {
1386 struct epoll_event ev;
1390 s->sync_timer_fd = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC);
1391 if (s->sync_timer_fd < 0)
1395 ev.events = EPOLLIN;
1396 ev.data.fd = s->sync_timer_fd;
1398 r = epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->sync_timer_fd, &ev);
1400 log_error("Failed to add idle timer fd to epoll object: %m");
1407 int server_schedule_sync(Server *s, int priority) {
1412 if (priority <= LOG_CRIT) {
1413 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1418 if (s->sync_scheduled)
1421 if (s->sync_interval_usec) {
1422 struct itimerspec sync_timer_enable = {};
1424 timespec_store(&sync_timer_enable.it_value, s->sync_interval_usec);
1426 r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_enable, NULL);
1431 s->sync_scheduled = true;
1436 int server_init(Server *s) {
1442 s->sync_timer_fd = s->syslog_fd = s->native_fd = s->stdout_fd =
1443 s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
1447 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1448 s->sync_scheduled = false;
1450 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1451 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1453 s->forward_to_syslog = true;
1455 s->max_level_store = LOG_DEBUG;
1456 s->max_level_syslog = LOG_DEBUG;
1457 s->max_level_kmsg = LOG_NOTICE;
1458 s->max_level_console = LOG_INFO;
1460 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1461 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1463 server_parse_config_file(s);
1464 server_parse_proc_cmdline(s);
1465 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1466 log_debug("Setting both rate limit interval and burst from %llu,%u to 0,0",
1467 (long long unsigned) s->rate_limit_interval,
1468 s->rate_limit_burst);
1469 s->rate_limit_interval = s->rate_limit_burst = 0;
1472 mkdir_p("/run/systemd/journal", 0755);
1474 s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
1475 if (!s->user_journals)
1478 s->mmap = mmap_cache_new();
1482 s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
1483 if (s->epoll_fd < 0) {
1484 log_error("Failed to create epoll object: %m");
1488 n = sd_listen_fds(true);
1490 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1494 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1496 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1498 if (s->native_fd >= 0) {
1499 log_error("Too many native sockets passed.");
1505 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1507 if (s->stdout_fd >= 0) {
1508 log_error("Too many stdout sockets passed.");
1514 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
1516 if (s->syslog_fd >= 0) {
1517 log_error("Too many /dev/log sockets passed.");
1524 log_error("Unknown socket passed.");
1529 r = server_open_syslog_socket(s);
1533 r = server_open_native_socket(s);
1537 r = server_open_stdout_socket(s);
1541 r = server_open_dev_kmsg(s);
1545 r = server_open_kernel_seqnum(s);
1549 r = server_open_sync_timer(s);
1553 r = open_signalfd(s);
1557 s->udev = udev_new();
1561 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval,
1562 s->rate_limit_burst);
1566 r = system_journal_open(s);
1573 void server_maybe_append_tags(Server *s) {
1579 n = now(CLOCK_REALTIME);
1581 if (s->system_journal)
1582 journal_file_maybe_append_tag(s->system_journal, n);
1584 HASHMAP_FOREACH(f, s->user_journals, i)
1585 journal_file_maybe_append_tag(f, n);
1589 void server_done(Server *s) {
1593 while (s->stdout_streams)
1594 stdout_stream_free(s->stdout_streams);
1596 if (s->system_journal)
1597 journal_file_close(s->system_journal);
1599 if (s->runtime_journal)
1600 journal_file_close(s->runtime_journal);
1602 while ((f = hashmap_steal_first(s->user_journals)))
1603 journal_file_close(f);
1605 hashmap_free(s->user_journals);
1607 if (s->epoll_fd >= 0)
1608 close_nointr_nofail(s->epoll_fd);
1610 if (s->signal_fd >= 0)
1611 close_nointr_nofail(s->signal_fd);
1613 if (s->syslog_fd >= 0)
1614 close_nointr_nofail(s->syslog_fd);
1616 if (s->native_fd >= 0)
1617 close_nointr_nofail(s->native_fd);
1619 if (s->stdout_fd >= 0)
1620 close_nointr_nofail(s->stdout_fd);
1622 if (s->dev_kmsg_fd >= 0)
1623 close_nointr_nofail(s->dev_kmsg_fd);
1625 if (s->sync_timer_fd >= 0)
1626 close_nointr_nofail(s->sync_timer_fd);
1629 journal_rate_limit_free(s->rate_limit);
1631 if (s->kernel_seqnum)
1632 munmap(s->kernel_seqnum, sizeof(uint64_t));
1638 mmap_cache_unref(s->mmap);
1641 udev_unref(s->udev);