1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
27 #include <sys/timerfd.h>
30 #include <systemd/sd-journal.h>
31 #include <systemd/sd-messages.h>
32 #include <systemd/sd-daemon.h>
37 #include "journal-file.h"
38 #include "socket-util.h"
39 #include "cgroup-util.h"
43 #include "conf-parser.h"
44 #include "journal-internal.h"
45 #include "journal-vacuum.h"
46 #include "journal-authenticate.h"
47 #include "journald-server.h"
48 #include "journald-rate-limit.h"
49 #include "journald-kmsg.h"
50 #include "journald-syslog.h"
51 #include "journald-stream.h"
52 #include "journald-console.h"
53 #include "journald-native.h"
57 #include <acl/libacl.h>
62 #include <selinux/selinux.h>
65 #define USER_JOURNALS_MAX 1024
67 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
68 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
69 #define DEFAULT_RATE_LIMIT_BURST 1000
71 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
73 static const char* const storage_table[] = {
74 [STORAGE_AUTO] = "auto",
75 [STORAGE_VOLATILE] = "volatile",
76 [STORAGE_PERSISTENT] = "persistent",
77 [STORAGE_NONE] = "none"
80 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
81 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
83 static const char* const split_mode_table[] = {
84 [SPLIT_NONE] = "none",
86 [SPLIT_LOGIN] = "login"
89 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
90 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
92 static uint64_t available_space(Server *s, bool verbose) {
94 _cleanup_free_ char *p = NULL;
97 uint64_t sum = 0, ss_avail = 0, avail = 0;
99 _cleanup_closedir_ DIR *d = NULL;
104 ts = now(CLOCK_MONOTONIC);
106 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts
108 return s->cached_available_space;
110 r = sd_id128_get_machine(&machine);
114 if (s->system_journal) {
115 f = "/var/log/journal/";
116 m = &s->system_metrics;
118 f = "/run/log/journal/";
119 m = &s->runtime_metrics;
124 p = strappend(f, sd_id128_to_string(machine, ids));
132 if (fstatvfs(dirfd(d), &ss) < 0)
138 union dirent_storage buf;
140 r = readdir_r(d, &buf.de, &de);
147 if (!endswith(de->d_name, ".journal") &&
148 !endswith(de->d_name, ".journal~"))
151 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
154 if (!S_ISREG(st.st_mode))
157 sum += (uint64_t) st.st_blocks * 512UL;
160 ss_avail = ss.f_bsize * ss.f_bavail;
161 avail = ss_avail > m->keep_free ? ss_avail - m->keep_free : 0;
163 s->cached_available_space = MIN(m->max_use, avail) > sum ? MIN(m->max_use, avail) - sum : 0;
164 s->cached_available_space_timestamp = ts;
167 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
168 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX];
170 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
171 "%s journal is using %s (max %s, leaving %s of free %s, current limit %s).",
172 s->system_journal ? "Permanent" : "Runtime",
173 format_bytes(fb1, sizeof(fb1), sum),
174 format_bytes(fb2, sizeof(fb2), m->max_use),
175 format_bytes(fb3, sizeof(fb3), m->keep_free),
176 format_bytes(fb4, sizeof(fb4), ss_avail),
177 format_bytes(fb5, sizeof(fb5), MIN(m->max_use, avail)));
180 return s->cached_available_space;
183 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
188 acl_permset_t permset;
193 r = fchmod(f->fd, 0640);
195 log_warning("Failed to fix access mode on %s, ignoring: %s", f->path, strerror(-r));
201 acl = acl_get_fd(f->fd);
203 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
207 r = acl_find_uid(acl, uid, &entry);
210 if (acl_create_entry(&acl, &entry) < 0 ||
211 acl_set_tag_type(entry, ACL_USER) < 0 ||
212 acl_set_qualifier(entry, &uid) < 0) {
213 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
218 /* We do not recalculate the mask unconditionally here,
219 * so that the fchmod() mask above stays intact. */
220 if (acl_get_permset(entry, &permset) < 0 ||
221 acl_add_perm(permset, ACL_READ) < 0 ||
222 calc_acl_mask_if_needed(&acl) < 0) {
223 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
227 if (acl_set_fd(f->fd, acl) < 0)
228 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
235 static JournalFile* find_journal(Server *s, uid_t uid) {
236 _cleanup_free_ char *p = NULL;
243 /* We split up user logs only on /var, not on /run. If the
244 * runtime file is open, we write to it exclusively, in order
245 * to guarantee proper order as soon as we flush /run to
246 * /var and close the runtime file. */
248 if (s->runtime_journal)
249 return s->runtime_journal;
252 return s->system_journal;
254 r = sd_id128_get_machine(&machine);
256 return s->system_journal;
258 f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
262 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
263 SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
264 return s->system_journal;
266 while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
267 /* Too many open? Then let's close one */
268 f = hashmap_steal_first(s->user_journals);
270 journal_file_close(f);
273 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
275 return s->system_journal;
277 server_fix_perms(s, f, uid);
279 r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
281 journal_file_close(f);
282 return s->system_journal;
288 void server_rotate(Server *s) {
294 log_debug("Rotating...");
296 if (s->runtime_journal) {
297 r = journal_file_rotate(&s->runtime_journal, s->compress, false);
299 if (s->runtime_journal)
300 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
302 log_error("Failed to create new runtime journal: %s", strerror(-r));
304 server_fix_perms(s, s->runtime_journal, 0);
307 if (s->system_journal) {
308 r = journal_file_rotate(&s->system_journal, s->compress, s->seal);
310 if (s->system_journal)
311 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
313 log_error("Failed to create new system journal: %s", strerror(-r));
316 server_fix_perms(s, s->system_journal, 0);
319 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
320 r = journal_file_rotate(&f, s->compress, s->seal);
323 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
325 log_error("Failed to create user journal: %s", strerror(-r));
327 hashmap_replace(s->user_journals, k, f);
328 server_fix_perms(s, f, PTR_TO_UINT32(k));
333 void server_sync(Server *s) {
334 static const struct itimerspec sync_timer_disable = {};
340 if (s->system_journal) {
341 r = journal_file_set_offline(s->system_journal);
343 log_error("Failed to sync system journal: %s", strerror(-r));
346 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
347 r = journal_file_set_offline(f);
349 log_error("Failed to sync user journal: %s", strerror(-r));
352 r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_disable, NULL);
354 log_error("Failed to disable max timer: %m");
356 s->sync_scheduled = false;
359 void server_vacuum(Server *s) {
364 log_debug("Vacuuming...");
366 s->oldest_file_usec = 0;
368 r = sd_id128_get_machine(&machine);
370 log_error("Failed to get machine ID: %s", strerror(-r));
374 sd_id128_to_string(machine, ids);
376 if (s->system_journal) {
377 char *p = strappenda("/var/log/journal/", ids);
379 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
380 if (r < 0 && r != -ENOENT)
381 log_error("Failed to vacuum %s: %s", p, strerror(-r));
384 if (s->runtime_journal) {
385 char *p = strappenda("/run/log/journal/", ids);
387 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
388 if (r < 0 && r != -ENOENT)
389 log_error("Failed to vacuum %s: %s", p, strerror(-r));
392 s->cached_available_space_timestamp = 0;
395 bool shall_try_append_again(JournalFile *f, int r) {
397 /* -E2BIG Hit configured limit
399 -EDQUOT Quota limit hit
401 -EHOSTDOWN Other machine
402 -EBUSY Unclean shutdown
403 -EPROTONOSUPPORT Unsupported feature
406 -ESHUTDOWN Already archived */
408 if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
409 log_debug("%s: Allocation limit reached, rotating.", f->path);
410 else if (r == -EHOSTDOWN)
411 log_info("%s: Journal file from other machine, rotating.", f->path);
412 else if (r == -EBUSY)
413 log_info("%s: Unclean shutdown, rotating.", f->path);
414 else if (r == -EPROTONOSUPPORT)
415 log_info("%s: Unsupported feature, rotating.", f->path);
416 else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
417 log_warning("%s: Journal file corrupted, rotating.", f->path);
424 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
426 bool vacuumed = false;
433 f = find_journal(s, uid);
437 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
438 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
443 f = find_journal(s, uid);
448 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
450 server_schedule_sync(s, priority);
454 if (vacuumed || !shall_try_append_again(f, r)) {
457 for (i = 0; i < n; i++)
458 size += iovec[i].iov_len;
460 log_error("Failed to write entry (%d items, %zu bytes), ignoring: %s", n, size, strerror(-r));
467 f = find_journal(s, uid);
471 log_debug("Retrying write.");
472 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
476 for (i = 0; i < n; i++)
477 size += iovec[i].iov_len;
479 log_error("Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %s", n, size, strerror(-r));
481 server_schedule_sync(s, priority);
484 static void dispatch_message_real(
486 struct iovec *iovec, unsigned n, unsigned m,
489 const char *label, size_t label_len,
494 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
495 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
496 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
497 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
498 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
499 boot_id[sizeof("_BOOT_ID=") + 32] = "_BOOT_ID=",
500 machine_id[sizeof("_MACHINE_ID=") + 32] = "_MACHINE_ID=",
501 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
502 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
503 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
510 uid_t realuid = 0, owner = 0, journal_uid;
511 bool owner_valid = false;
513 char audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
514 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
515 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
516 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
525 assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
528 realuid = ucred->uid;
530 sprintf(pid, "_PID=%lu", (unsigned long) ucred->pid);
531 IOVEC_SET_STRING(iovec[n++], pid);
533 sprintf(uid, "_UID=%lu", (unsigned long) ucred->uid);
534 IOVEC_SET_STRING(iovec[n++], uid);
536 sprintf(gid, "_GID=%lu", (unsigned long) ucred->gid);
537 IOVEC_SET_STRING(iovec[n++], gid);
539 r = get_process_comm(ucred->pid, &t);
541 x = strappenda("_COMM=", t);
543 IOVEC_SET_STRING(iovec[n++], x);
546 r = get_process_exe(ucred->pid, &t);
548 x = strappenda("_EXE=", t);
550 IOVEC_SET_STRING(iovec[n++], x);
553 r = get_process_cmdline(ucred->pid, 0, false, &t);
555 x = strappenda("_CMDLINE=", t);
557 IOVEC_SET_STRING(iovec[n++], x);
560 r = get_process_capeff(ucred->pid, &t);
562 x = strappenda("_CAP_EFFECTIVE=", t);
564 IOVEC_SET_STRING(iovec[n++], x);
568 r = audit_session_from_pid(ucred->pid, &audit);
570 sprintf(audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit);
571 IOVEC_SET_STRING(iovec[n++], audit_session);
574 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
576 sprintf(audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid);
577 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
581 r = cg_pid_get_path_shifted(ucred->pid, NULL, &c);
583 char *session = NULL;
585 x = strappenda("_SYSTEMD_CGROUP=", c);
586 IOVEC_SET_STRING(iovec[n++], x);
588 r = cg_path_get_session(c, &t);
590 session = strappenda("_SYSTEMD_SESSION=", t);
592 IOVEC_SET_STRING(iovec[n++], session);
595 if (cg_path_get_owner_uid(c, &owner) >= 0) {
598 sprintf(owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner);
599 IOVEC_SET_STRING(iovec[n++], owner_uid);
602 if (cg_path_get_unit(c, &t) >= 0) {
603 x = strappenda("_SYSTEMD_UNIT=", t);
605 IOVEC_SET_STRING(iovec[n++], x);
606 } else if (unit_id && !session) {
607 x = strappenda("_SYSTEMD_UNIT=", unit_id);
608 IOVEC_SET_STRING(iovec[n++], x);
611 if (cg_path_get_user_unit(c, &t) >= 0) {
612 x = strappenda("_SYSTEMD_USER_UNIT=", t);
614 IOVEC_SET_STRING(iovec[n++], x);
615 } else if (unit_id && session) {
616 x = strappenda("_SYSTEMD_USER_UNIT=", unit_id);
617 IOVEC_SET_STRING(iovec[n++], x);
620 if (cg_path_get_slice(c, &t) >= 0) {
621 x = strappenda("_SYSTEMD_SLICE=", t);
623 IOVEC_SET_STRING(iovec[n++], x);
631 x = alloca(sizeof("_SELINUX_CONTEXT=") + label_len);
633 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
634 IOVEC_SET_STRING(iovec[n++], x);
636 security_context_t con;
638 if (getpidcon(ucred->pid, &con) >= 0) {
639 x = strappenda("_SELINUX_CONTEXT=", con);
642 IOVEC_SET_STRING(iovec[n++], x);
650 r = get_process_uid(object_pid, &object_uid);
652 sprintf(o_uid, "OBJECT_UID=%lu", (unsigned long) object_uid);
653 IOVEC_SET_STRING(iovec[n++], o_uid);
656 r = get_process_gid(object_pid, &object_gid);
658 sprintf(o_gid, "OBJECT_GID=%lu", (unsigned long) object_gid);
659 IOVEC_SET_STRING(iovec[n++], o_gid);
662 r = get_process_comm(object_pid, &t);
664 x = strappenda("OBJECT_COMM=", t);
666 IOVEC_SET_STRING(iovec[n++], x);
669 r = get_process_exe(object_pid, &t);
671 x = strappenda("OBJECT_EXE=", t);
673 IOVEC_SET_STRING(iovec[n++], x);
676 r = get_process_cmdline(object_pid, 0, false, &t);
678 x = strappenda("OBJECT_CMDLINE=", t);
680 IOVEC_SET_STRING(iovec[n++], x);
684 r = audit_session_from_pid(object_pid, &audit);
686 sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%lu", (unsigned long) audit);
687 IOVEC_SET_STRING(iovec[n++], o_audit_session);
690 r = audit_loginuid_from_pid(object_pid, &loginuid);
692 sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID=%lu", (unsigned long) loginuid);
693 IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
697 r = cg_pid_get_path_shifted(object_pid, NULL, &c);
699 x = strappenda("OBJECT_SYSTEMD_CGROUP=", c);
700 IOVEC_SET_STRING(iovec[n++], x);
702 r = cg_path_get_session(c, &t);
704 x = strappenda("OBJECT_SYSTEMD_SESSION=", t);
706 IOVEC_SET_STRING(iovec[n++], x);
709 if (cg_path_get_owner_uid(c, &owner) >= 0) {
710 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner);
711 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
714 if (cg_path_get_unit(c, &t) >= 0) {
715 x = strappenda("OBJECT_SYSTEMD_UNIT=", t);
717 IOVEC_SET_STRING(iovec[n++], x);
720 if (cg_path_get_user_unit(c, &t) >= 0) {
721 x = strappenda("OBJECT_SYSTEMD_USER_UNIT=", t);
723 IOVEC_SET_STRING(iovec[n++], x);
732 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
733 IOVEC_SET_STRING(iovec[n++], source_time);
736 /* Note that strictly speaking storing the boot id here is
737 * redundant since the entry includes this in-line
738 * anyway. However, we need this indexed, too. */
739 r = sd_id128_get_boot(&id);
741 sd_id128_to_string(id, boot_id + strlen("_BOOT_ID="));
742 IOVEC_SET_STRING(iovec[n++], boot_id);
745 r = sd_id128_get_machine(&id);
747 sd_id128_to_string(id, machine_id + strlen("_MACHINE_ID="));
748 IOVEC_SET_STRING(iovec[n++], machine_id);
751 t = gethostname_malloc();
753 x = strappenda("_HOSTNAME=", t);
755 IOVEC_SET_STRING(iovec[n++], x);
760 if (s->split_mode == SPLIT_UID && realuid > 0)
761 /* Split up strictly by any UID */
762 journal_uid = realuid;
763 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
764 /* Split up by login UIDs, this avoids creation of
765 * individual journals for system UIDs. We do this
766 * only if the realuid is not root, in order not to
767 * accidentally leak privileged information to the
768 * user that is logged by a privileged process that is
769 * part of an unprivileged session.*/
774 write_to_journal(s, journal_uid, iovec, n, priority);
777 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
778 char mid[11 + 32 + 1];
779 char buffer[16 + LINE_MAX + 1];
780 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
783 struct ucred ucred = {};
788 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
789 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
791 memcpy(buffer, "MESSAGE=", 8);
792 va_start(ap, format);
793 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
795 char_array_0(buffer);
796 IOVEC_SET_STRING(iovec[n++], buffer);
798 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
799 snprintf(mid, sizeof(mid), MESSAGE_ID(message_id));
801 IOVEC_SET_STRING(iovec[n++], mid);
804 ucred.pid = getpid();
805 ucred.uid = getuid();
806 ucred.gid = getgid();
808 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
811 void server_dispatch_message(
813 struct iovec *iovec, unsigned n, unsigned m,
816 const char *label, size_t label_len,
822 _cleanup_free_ char *path = NULL;
826 assert(iovec || n == 0);
831 if (LOG_PRI(priority) > s->max_level_store)
834 /* Stop early in case the information will not be stored
836 if (s->storage == STORAGE_NONE)
842 r = cg_pid_get_path_shifted(ucred->pid, NULL, &path);
846 /* example: /user/lennart/3/foobar
847 * /system/dbus.service/foobar
849 * So let's cut of everything past the third /, since that is
850 * where user directories start */
852 c = strchr(path, '/');
854 c = strchr(c+1, '/');
856 c = strchr(c+1, '/');
862 rl = journal_rate_limit_test(s->rate_limit, path,
863 priority & LOG_PRIMASK, available_space(s, false));
868 /* Write a suppression message if we suppressed something */
870 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
871 "Suppressed %u messages from %s", rl - 1, path);
874 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
878 static int system_journal_open(Server *s) {
884 r = sd_id128_get_machine(&machine);
886 log_error("Failed to get machine id: %s", strerror(-r));
890 sd_id128_to_string(machine, ids);
892 if (!s->system_journal &&
893 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
894 access("/run/systemd/journal/flushed", F_OK) >= 0) {
896 /* If in auto mode: first try to create the machine
897 * path, but not the prefix.
899 * If in persistent mode: create /var/log/journal and
900 * the machine path */
902 if (s->storage == STORAGE_PERSISTENT)
903 (void) mkdir("/var/log/journal/", 0755);
905 fn = strappenda("/var/log/journal/", ids);
906 (void) mkdir(fn, 0755);
908 fn = strappenda(fn, "/system.journal");
909 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
912 server_fix_perms(s, s->system_journal, 0);
914 if (r != -ENOENT && r != -EROFS)
915 log_warning("Failed to open system journal: %s", strerror(-r));
921 if (!s->runtime_journal &&
922 (s->storage != STORAGE_NONE)) {
924 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
928 if (s->system_journal) {
930 /* Try to open the runtime journal, but only
931 * if it already exists, so that we can flush
932 * it into the system journal */
934 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
939 log_warning("Failed to open runtime journal: %s", strerror(-r));
946 /* OK, we really need the runtime journal, so create
947 * it if necessary. */
949 (void) mkdir_parents(fn, 0755);
950 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
954 log_error("Failed to open runtime journal: %s", strerror(-r));
959 if (s->runtime_journal)
960 server_fix_perms(s, s->runtime_journal, 0);
963 available_space(s, true);
968 int server_flush_to_var(Server *s) {
971 sd_journal *j = NULL;
975 if (s->storage != STORAGE_AUTO &&
976 s->storage != STORAGE_PERSISTENT)
979 if (!s->runtime_journal)
982 system_journal_open(s);
984 if (!s->system_journal)
987 log_debug("Flushing to /var...");
989 r = sd_id128_get_machine(&machine);
993 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
995 log_error("Failed to read runtime journal: %s", strerror(-r));
999 sd_journal_set_data_threshold(j, 0);
1001 SD_JOURNAL_FOREACH(j) {
1005 f = j->current_file;
1006 assert(f && f->current_offset > 0);
1008 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1010 log_error("Can't read entry: %s", strerror(-r));
1014 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1018 if (!shall_try_append_again(s->system_journal, r)) {
1019 log_error("Can't write entry: %s", strerror(-r));
1026 if (!s->system_journal) {
1027 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1032 log_debug("Retrying write.");
1033 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1035 log_error("Can't write entry: %s", strerror(-r));
1041 journal_file_post_change(s->system_journal);
1043 journal_file_close(s->runtime_journal);
1044 s->runtime_journal = NULL;
1047 rm_rf("/run/log/journal", false, true, false);
1049 sd_journal_close(j);
1054 int process_event(Server *s, struct epoll_event *ev) {
1058 if (ev->data.fd == s->signal_fd) {
1059 struct signalfd_siginfo sfsi;
1062 if (ev->events != EPOLLIN) {
1063 log_error("Got invalid event from epoll for %s: %"PRIx32,
1064 "signal fd", ev->events);
1068 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
1069 if (n != sizeof(sfsi)) {
1074 if (errno == EINTR || errno == EAGAIN)
1080 if (sfsi.ssi_signo == SIGUSR1) {
1081 log_info("Received request to flush runtime journal from PID %"PRIu32,
1083 touch("/run/systemd/journal/flushed");
1084 server_flush_to_var(s);
1089 if (sfsi.ssi_signo == SIGUSR2) {
1090 log_info("Received request to rotate journal from PID %"PRIu32,
1097 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
1101 } else if (ev->data.fd == s->sync_timer_fd) {
1105 log_debug("Got sync request from epoll.");
1107 r = read(ev->data.fd, (void *)&t, sizeof(t));
1114 } else if (ev->data.fd == s->dev_kmsg_fd) {
1117 if (ev->events & EPOLLERR)
1118 log_warning("/dev/kmsg buffer overrun, some messages lost.");
1120 if (!(ev->events & EPOLLIN)) {
1121 log_error("Got invalid event from epoll for %s: %"PRIx32,
1122 "/dev/kmsg", ev->events);
1126 r = server_read_dev_kmsg(s);
1132 } else if (ev->data.fd == s->native_fd ||
1133 ev->data.fd == s->syslog_fd) {
1135 if (ev->events != EPOLLIN) {
1136 log_error("Got invalid event from epoll for %s: %"PRIx32,
1137 ev->data.fd == s->native_fd ? "native fd" : "syslog fd",
1143 struct msghdr msghdr;
1145 struct ucred *ucred = NULL;
1146 struct timeval *tv = NULL;
1147 struct cmsghdr *cmsg;
1149 size_t label_len = 0;
1151 struct cmsghdr cmsghdr;
1153 /* We use NAME_MAX space for the
1154 * SELinux label here. The kernel
1155 * currently enforces no limit, but
1156 * according to suggestions from the
1157 * SELinux people this will change and
1158 * it will probably be identical to
1159 * NAME_MAX. For now we use that, but
1160 * this should be updated one day when
1161 * the final limit is known.*/
1162 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1163 CMSG_SPACE(sizeof(struct timeval)) +
1164 CMSG_SPACE(sizeof(int)) + /* fd */
1165 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1172 if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
1173 log_error("SIOCINQ failed: %m");
1177 if (s->buffer_size < (size_t) v) {
1181 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
1182 b = realloc(s->buffer, l+1);
1185 log_error("Couldn't increase buffer.");
1194 iovec.iov_base = s->buffer;
1195 iovec.iov_len = s->buffer_size;
1199 msghdr.msg_iov = &iovec;
1200 msghdr.msg_iovlen = 1;
1201 msghdr.msg_control = &control;
1202 msghdr.msg_controllen = sizeof(control);
1204 n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1207 if (errno == EINTR || errno == EAGAIN)
1210 log_error("recvmsg() failed: %m");
1214 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1216 if (cmsg->cmsg_level == SOL_SOCKET &&
1217 cmsg->cmsg_type == SCM_CREDENTIALS &&
1218 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1219 ucred = (struct ucred*) CMSG_DATA(cmsg);
1220 else if (cmsg->cmsg_level == SOL_SOCKET &&
1221 cmsg->cmsg_type == SCM_SECURITY) {
1222 label = (char*) CMSG_DATA(cmsg);
1223 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1224 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1225 cmsg->cmsg_type == SO_TIMESTAMP &&
1226 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1227 tv = (struct timeval*) CMSG_DATA(cmsg);
1228 else if (cmsg->cmsg_level == SOL_SOCKET &&
1229 cmsg->cmsg_type == SCM_RIGHTS) {
1230 fds = (int*) CMSG_DATA(cmsg);
1231 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1235 if (ev->data.fd == s->syslog_fd) {
1236 if (n > 0 && n_fds == 0) {
1238 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1239 } else if (n_fds > 0)
1240 log_warning("Got file descriptors via syslog socket. Ignoring.");
1243 if (n > 0 && n_fds == 0)
1244 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1245 else if (n == 0 && n_fds == 1)
1246 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1248 log_warning("Got too many file descriptors via native socket. Ignoring.");
1251 close_many(fds, n_fds);
1256 } else if (ev->data.fd == s->stdout_fd) {
1258 if (ev->events != EPOLLIN) {
1259 log_error("Got invalid event from epoll for %s: %"PRIx32,
1260 "stdout fd", ev->events);
1264 stdout_stream_new(s);
1268 StdoutStream *stream;
1270 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
1271 log_error("Got invalid event from epoll for %s: %"PRIx32,
1272 "stdout stream", ev->events);
1273 log_error("Got invalid event from epoll.");
1277 /* If it is none of the well-known fds, it must be an
1278 * stdout stream fd. Note that this is a bit ugly here
1279 * (since we rely that none of the well-known fds
1280 * could be interpreted as pointer), but nonetheless
1281 * safe, since the well-known fds would never get an
1282 * fd > 4096, i.e. beyond the first memory page */
1284 stream = ev->data.ptr;
1286 if (stdout_stream_process(stream) <= 0)
1287 stdout_stream_free(stream);
1292 log_error("Unknown event.");
1296 static int open_signalfd(Server *s) {
1298 struct epoll_event ev;
1302 assert_se(sigemptyset(&mask) == 0);
1303 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1304 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1306 s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
1307 if (s->signal_fd < 0) {
1308 log_error("signalfd(): %m");
1313 ev.events = EPOLLIN;
1314 ev.data.fd = s->signal_fd;
1316 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
1317 log_error("epoll_ctl(): %m");
1324 static int server_parse_proc_cmdline(Server *s) {
1325 _cleanup_free_ char *line = NULL;
1330 if (detect_container(NULL) > 0)
1333 r = read_one_line_file("/proc/cmdline", &line);
1335 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1339 FOREACH_WORD_QUOTED(w, l, line, state) {
1340 _cleanup_free_ char *word;
1342 word = strndup(w, l);
1346 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1347 r = parse_boolean(word + 35);
1349 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1351 s->forward_to_syslog = r;
1352 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1353 r = parse_boolean(word + 33);
1355 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1357 s->forward_to_kmsg = r;
1358 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1359 r = parse_boolean(word + 36);
1361 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1363 s->forward_to_console = r;
1364 } else if (startswith(word, "systemd.journald"))
1365 log_warning("Invalid systemd.journald parameter. Ignoring.");
1371 static int server_parse_config_file(Server *s) {
1372 static const char fn[] = "/etc/systemd/journald.conf";
1373 _cleanup_fclose_ FILE *f = NULL;
1378 f = fopen(fn, "re");
1380 if (errno == ENOENT)
1383 log_warning("Failed to open configuration file %s: %m", fn);
1387 r = config_parse(NULL, fn, f, "Journal\0", config_item_perf_lookup,
1388 (void*) journald_gperf_lookup, false, false, s);
1390 log_warning("Failed to parse configuration file: %s", strerror(-r));
1395 static int server_open_sync_timer(Server *s) {
1397 struct epoll_event ev;
1401 s->sync_timer_fd = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC);
1402 if (s->sync_timer_fd < 0)
1406 ev.events = EPOLLIN;
1407 ev.data.fd = s->sync_timer_fd;
1409 r = epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->sync_timer_fd, &ev);
1411 log_error("Failed to add idle timer fd to epoll object: %m");
1418 int server_schedule_sync(Server *s, int priority) {
1423 if (priority <= LOG_CRIT) {
1424 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1429 if (s->sync_scheduled)
1432 if (s->sync_interval_usec) {
1433 struct itimerspec sync_timer_enable = {};
1435 timespec_store(&sync_timer_enable.it_value, s->sync_interval_usec);
1437 r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_enable, NULL);
1442 s->sync_scheduled = true;
1447 int server_init(Server *s) {
1453 s->sync_timer_fd = s->syslog_fd = s->native_fd = s->stdout_fd =
1454 s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
1458 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1459 s->sync_scheduled = false;
1461 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1462 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1464 s->forward_to_syslog = true;
1466 s->max_level_store = LOG_DEBUG;
1467 s->max_level_syslog = LOG_DEBUG;
1468 s->max_level_kmsg = LOG_NOTICE;
1469 s->max_level_console = LOG_INFO;
1471 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1472 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1474 server_parse_config_file(s);
1475 server_parse_proc_cmdline(s);
1476 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1477 log_debug("Setting both rate limit interval and burst from %llu,%u to 0,0",
1478 (long long unsigned) s->rate_limit_interval,
1479 s->rate_limit_burst);
1480 s->rate_limit_interval = s->rate_limit_burst = 0;
1483 mkdir_p("/run/systemd/journal", 0755);
1485 s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
1486 if (!s->user_journals)
1489 s->mmap = mmap_cache_new();
1493 s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
1494 if (s->epoll_fd < 0) {
1495 log_error("Failed to create epoll object: %m");
1499 n = sd_listen_fds(true);
1501 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1505 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1507 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1509 if (s->native_fd >= 0) {
1510 log_error("Too many native sockets passed.");
1516 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1518 if (s->stdout_fd >= 0) {
1519 log_error("Too many stdout sockets passed.");
1525 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
1527 if (s->syslog_fd >= 0) {
1528 log_error("Too many /dev/log sockets passed.");
1535 log_error("Unknown socket passed.");
1540 r = server_open_syslog_socket(s);
1544 r = server_open_native_socket(s);
1548 r = server_open_stdout_socket(s);
1552 r = server_open_dev_kmsg(s);
1556 r = server_open_kernel_seqnum(s);
1560 r = server_open_sync_timer(s);
1564 r = open_signalfd(s);
1568 s->udev = udev_new();
1572 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval,
1573 s->rate_limit_burst);
1577 r = system_journal_open(s);
1584 void server_maybe_append_tags(Server *s) {
1590 n = now(CLOCK_REALTIME);
1592 if (s->system_journal)
1593 journal_file_maybe_append_tag(s->system_journal, n);
1595 HASHMAP_FOREACH(f, s->user_journals, i)
1596 journal_file_maybe_append_tag(f, n);
1600 void server_done(Server *s) {
1604 while (s->stdout_streams)
1605 stdout_stream_free(s->stdout_streams);
1607 if (s->system_journal)
1608 journal_file_close(s->system_journal);
1610 if (s->runtime_journal)
1611 journal_file_close(s->runtime_journal);
1613 while ((f = hashmap_steal_first(s->user_journals)))
1614 journal_file_close(f);
1616 hashmap_free(s->user_journals);
1618 if (s->epoll_fd >= 0)
1619 close_nointr_nofail(s->epoll_fd);
1621 if (s->signal_fd >= 0)
1622 close_nointr_nofail(s->signal_fd);
1624 if (s->syslog_fd >= 0)
1625 close_nointr_nofail(s->syslog_fd);
1627 if (s->native_fd >= 0)
1628 close_nointr_nofail(s->native_fd);
1630 if (s->stdout_fd >= 0)
1631 close_nointr_nofail(s->stdout_fd);
1633 if (s->dev_kmsg_fd >= 0)
1634 close_nointr_nofail(s->dev_kmsg_fd);
1636 if (s->sync_timer_fd >= 0)
1637 close_nointr_nofail(s->sync_timer_fd);
1640 journal_rate_limit_free(s->rate_limit);
1642 if (s->kernel_seqnum)
1643 munmap(s->kernel_seqnum, sizeof(uint64_t));
1649 mmap_cache_unref(s->mmap);
1652 udev_unref(s->udev);