1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
27 #include <sys/timerfd.h>
30 #include <systemd/sd-journal.h>
31 #include <systemd/sd-messages.h>
32 #include <systemd/sd-daemon.h>
37 #include "journal-file.h"
38 #include "socket-util.h"
39 #include "cgroup-util.h"
43 #include "conf-parser.h"
44 #include "journal-internal.h"
45 #include "journal-vacuum.h"
46 #include "journal-authenticate.h"
47 #include "journald-server.h"
48 #include "journald-rate-limit.h"
49 #include "journald-kmsg.h"
50 #include "journald-syslog.h"
51 #include "journald-stream.h"
52 #include "journald-console.h"
53 #include "journald-native.h"
57 #include <acl/libacl.h>
62 #include <selinux/selinux.h>
65 #define USER_JOURNALS_MAX 1024
67 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
68 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
69 #define DEFAULT_RATE_LIMIT_BURST 1000
71 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
73 static const char* const storage_table[] = {
74 [STORAGE_AUTO] = "auto",
75 [STORAGE_VOLATILE] = "volatile",
76 [STORAGE_PERSISTENT] = "persistent",
77 [STORAGE_NONE] = "none"
80 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
81 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
83 static const char* const split_mode_table[] = {
84 [SPLIT_NONE] = "none",
86 [SPLIT_LOGIN] = "login"
89 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
90 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
92 static uint64_t available_space(Server *s, bool verbose) {
94 _cleanup_free_ char *p = NULL;
97 uint64_t sum = 0, ss_avail = 0, avail = 0;
99 _cleanup_closedir_ DIR *d = NULL;
104 ts = now(CLOCK_MONOTONIC);
106 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts
108 return s->cached_available_space;
110 r = sd_id128_get_machine(&machine);
114 if (s->system_journal) {
115 f = "/var/log/journal/";
116 m = &s->system_metrics;
118 f = "/run/log/journal/";
119 m = &s->runtime_metrics;
124 p = strappend(f, sd_id128_to_string(machine, ids));
132 if (fstatvfs(dirfd(d), &ss) < 0)
138 union dirent_storage buf;
140 r = readdir_r(d, &buf.de, &de);
147 if (!endswith(de->d_name, ".journal") &&
148 !endswith(de->d_name, ".journal~"))
151 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
154 if (!S_ISREG(st.st_mode))
157 sum += (uint64_t) st.st_blocks * 512UL;
160 ss_avail = ss.f_bsize * ss.f_bavail;
161 avail = ss_avail > m->keep_free ? ss_avail - m->keep_free : 0;
163 s->cached_available_space = MIN(m->max_use, avail) > sum ? MIN(m->max_use, avail) - sum : 0;
164 s->cached_available_space_timestamp = ts;
167 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
168 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX];
170 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
171 "%s journal is using %s (max %s, leaving %s of free %s, current limit %s).",
172 s->system_journal ? "Permanent" : "Runtime",
173 format_bytes(fb1, sizeof(fb1), sum),
174 format_bytes(fb2, sizeof(fb2), m->max_use),
175 format_bytes(fb3, sizeof(fb3), m->keep_free),
176 format_bytes(fb4, sizeof(fb4), ss_avail),
177 format_bytes(fb5, sizeof(fb5), MIN(m->max_use, avail)));
180 return s->cached_available_space;
183 static void server_read_file_gid(Server *s) {
184 const char *g = "systemd-journal";
189 if (s->file_gid_valid)
192 r = get_group_creds(&g, &s->file_gid);
194 log_warning("Failed to resolve '%s' group: %s", g, strerror(-r));
196 /* if we couldn't read the gid, then it will be 0, but that's
197 * fine and we shouldn't try to resolve the group again, so
198 * let's just pretend it worked right-away. */
199 s->file_gid_valid = true;
202 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
207 acl_permset_t permset;
212 server_read_file_gid(s);
214 r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
216 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
222 acl = acl_get_fd(f->fd);
224 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
228 r = acl_find_uid(acl, uid, &entry);
231 if (acl_create_entry(&acl, &entry) < 0 ||
232 acl_set_tag_type(entry, ACL_USER) < 0 ||
233 acl_set_qualifier(entry, &uid) < 0) {
234 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
239 /* We do not recalculate the mask unconditionally here,
240 * so that the fchmod() mask above stays intact. */
241 if (acl_get_permset(entry, &permset) < 0 ||
242 acl_add_perm(permset, ACL_READ) < 0 ||
243 calc_acl_mask_if_needed(&acl) < 0) {
244 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
248 if (acl_set_fd(f->fd, acl) < 0)
249 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
256 static JournalFile* find_journal(Server *s, uid_t uid) {
257 _cleanup_free_ char *p = NULL;
264 /* We split up user logs only on /var, not on /run. If the
265 * runtime file is open, we write to it exclusively, in order
266 * to guarantee proper order as soon as we flush /run to
267 * /var and close the runtime file. */
269 if (s->runtime_journal)
270 return s->runtime_journal;
273 return s->system_journal;
275 r = sd_id128_get_machine(&machine);
277 return s->system_journal;
279 f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
283 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
284 SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
285 return s->system_journal;
287 while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
288 /* Too many open? Then let's close one */
289 f = hashmap_steal_first(s->user_journals);
291 journal_file_close(f);
294 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
296 return s->system_journal;
298 server_fix_perms(s, f, uid);
300 r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
302 journal_file_close(f);
303 return s->system_journal;
309 void server_rotate(Server *s) {
315 log_debug("Rotating...");
317 if (s->runtime_journal) {
318 r = journal_file_rotate(&s->runtime_journal, s->compress, false);
320 if (s->runtime_journal)
321 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
323 log_error("Failed to create new runtime journal: %s", strerror(-r));
325 server_fix_perms(s, s->runtime_journal, 0);
328 if (s->system_journal) {
329 r = journal_file_rotate(&s->system_journal, s->compress, s->seal);
331 if (s->system_journal)
332 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
334 log_error("Failed to create new system journal: %s", strerror(-r));
337 server_fix_perms(s, s->system_journal, 0);
340 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
341 r = journal_file_rotate(&f, s->compress, s->seal);
344 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
346 log_error("Failed to create user journal: %s", strerror(-r));
348 hashmap_replace(s->user_journals, k, f);
349 server_fix_perms(s, f, PTR_TO_UINT32(k));
354 void server_sync(Server *s) {
360 static const struct itimerspec sync_timer_disable = {};
362 if (s->system_journal) {
363 r = journal_file_set_offline(s->system_journal);
365 log_error("Failed to sync system journal: %s", strerror(-r));
368 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
369 r = journal_file_set_offline(f);
371 log_error("Failed to sync user journal: %s", strerror(-r));
374 r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_disable, NULL);
376 log_error("Failed to disable max timer: %m");
378 s->sync_scheduled = false;
381 void server_vacuum(Server *s) {
386 log_debug("Vacuuming...");
388 s->oldest_file_usec = 0;
390 r = sd_id128_get_machine(&machine);
392 log_error("Failed to get machine ID: %s", strerror(-r));
396 sd_id128_to_string(machine, ids);
398 if (s->system_journal) {
399 char *p = strappenda("/var/log/journal/", ids);
401 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
402 if (r < 0 && r != -ENOENT)
403 log_error("Failed to vacuum %s: %s", p, strerror(-r));
406 if (s->runtime_journal) {
407 char *p = strappenda("/run/log/journal/", ids);
409 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
410 if (r < 0 && r != -ENOENT)
411 log_error("Failed to vacuum %s: %s", p, strerror(-r));
414 s->cached_available_space_timestamp = 0;
417 bool shall_try_append_again(JournalFile *f, int r) {
419 /* -E2BIG Hit configured limit
421 -EDQUOT Quota limit hit
423 -EHOSTDOWN Other machine
424 -EBUSY Unclean shutdown
425 -EPROTONOSUPPORT Unsupported feature
428 -ESHUTDOWN Already archived */
430 if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
431 log_debug("%s: Allocation limit reached, rotating.", f->path);
432 else if (r == -EHOSTDOWN)
433 log_info("%s: Journal file from other machine, rotating.", f->path);
434 else if (r == -EBUSY)
435 log_info("%s: Unclean shutdown, rotating.", f->path);
436 else if (r == -EPROTONOSUPPORT)
437 log_info("%s: Unsupported feature, rotating.", f->path);
438 else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
439 log_warning("%s: Journal file corrupted, rotating.", f->path);
446 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n) {
448 bool vacuumed = false;
455 f = find_journal(s, uid);
459 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
460 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
465 f = find_journal(s, uid);
470 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
472 server_schedule_sync(s);
476 if (vacuumed || !shall_try_append_again(f, r)) {
479 for (i = 0; i < n; i++)
480 size += iovec[i].iov_len;
482 log_error("Failed to write entry (%d items, %zu bytes), ignoring: %s", n, size, strerror(-r));
489 f = find_journal(s, uid);
493 log_debug("Retrying write.");
494 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
498 for (i = 0; i < n; i++)
499 size += iovec[i].iov_len;
501 log_error("Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %s", n, size, strerror(-r));
505 static void dispatch_message_real(
507 struct iovec *iovec, unsigned n, unsigned m,
510 const char *label, size_t label_len,
514 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
515 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
516 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
517 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
518 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
519 boot_id[sizeof("_BOOT_ID=") + 32] = "_BOOT_ID=",
520 machine_id[sizeof("_MACHINE_ID=") + 32] = "_MACHINE_ID=",
521 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
522 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
523 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
531 uid_t realuid = 0, owner = 0, journal_uid;
532 bool owner_valid = false;
534 char audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
535 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
536 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
537 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
546 assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
549 realuid = ucred->uid;
551 sprintf(pid, "_PID=%lu", (unsigned long) ucred->pid);
552 IOVEC_SET_STRING(iovec[n++], pid);
554 sprintf(uid, "_UID=%lu", (unsigned long) ucred->uid);
555 IOVEC_SET_STRING(iovec[n++], uid);
557 sprintf(gid, "_GID=%lu", (unsigned long) ucred->gid);
558 IOVEC_SET_STRING(iovec[n++], gid);
560 r = get_process_comm(ucred->pid, &t);
562 x = strappenda("_COMM=", t);
564 IOVEC_SET_STRING(iovec[n++], x);
567 r = get_process_exe(ucred->pid, &t);
569 x = strappenda("_EXE=", t);
571 IOVEC_SET_STRING(iovec[n++], x);
574 r = get_process_cmdline(ucred->pid, 0, false, &t);
576 x = strappenda("_CMDLINE=", t);
578 IOVEC_SET_STRING(iovec[n++], x);
581 r = get_process_capeff(ucred->pid, &t);
583 x = strappenda("_CAP_EFFECTIVE=", t);
585 IOVEC_SET_STRING(iovec[n++], x);
589 r = audit_session_from_pid(ucred->pid, &audit);
591 sprintf(audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit);
592 IOVEC_SET_STRING(iovec[n++], audit_session);
595 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
597 sprintf(audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid);
598 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
602 r = cg_pid_get_path_shifted(ucred->pid, NULL, &c);
604 char *session = NULL;
606 x = strappenda("_SYSTEMD_CGROUP=", c);
607 IOVEC_SET_STRING(iovec[n++], x);
609 r = cg_path_get_session(c, &t);
611 session = strappenda("_SYSTEMD_SESSION=", t);
613 IOVEC_SET_STRING(iovec[n++], session);
616 if (cg_path_get_owner_uid(c, &owner) >= 0) {
619 sprintf(owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner);
620 IOVEC_SET_STRING(iovec[n++], owner_uid);
623 if (cg_path_get_unit(c, &t) >= 0) {
624 x = strappenda("_SYSTEMD_UNIT=", t);
626 } else if (cg_path_get_user_unit(c, &t) >= 0) {
627 x = strappenda("_SYSTEMD_USER_UNIT=", t);
629 } else if (unit_id) {
631 x = strappenda("_SYSTEMD_USER_UNIT=", unit_id);
633 x = strappenda("_SYSTEMD_UNIT=", unit_id);
638 IOVEC_SET_STRING(iovec[n++], x);
645 x = alloca(sizeof("_SELINUX_CONTEXT=") + label_len);
647 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
648 IOVEC_SET_STRING(iovec[n++], x);
650 security_context_t con;
652 if (getpidcon(ucred->pid, &con) >= 0) {
653 x = strappenda("_SELINUX_CONTEXT=", con);
656 IOVEC_SET_STRING(iovec[n++], x);
664 r = get_process_uid(object_pid, &object_uid);
666 sprintf(o_uid, "OBJECT_UID=%lu", (unsigned long) object_uid);
667 IOVEC_SET_STRING(iovec[n++], o_uid);
670 r = get_process_gid(object_pid, &object_gid);
672 sprintf(o_gid, "OBJECT_GID=%lu", (unsigned long) object_gid);
673 IOVEC_SET_STRING(iovec[n++], o_gid);
676 r = get_process_comm(object_pid, &t);
678 x = strappenda("OBJECT_COMM=", t);
680 IOVEC_SET_STRING(iovec[n++], x);
683 r = get_process_exe(object_pid, &t);
685 x = strappenda("OBJECT_EXE=", t);
687 IOVEC_SET_STRING(iovec[n++], x);
690 r = get_process_cmdline(object_pid, 0, false, &t);
692 x = strappenda("OBJECT_CMDLINE=", t);
694 IOVEC_SET_STRING(iovec[n++], x);
698 r = audit_session_from_pid(object_pid, &audit);
700 sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%lu", (unsigned long) audit);
701 IOVEC_SET_STRING(iovec[n++], o_audit_session);
704 r = audit_loginuid_from_pid(object_pid, &loginuid);
706 sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID=%lu", (unsigned long) loginuid);
707 IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
711 r = cg_pid_get_path_shifted(object_pid, NULL, &c);
713 x = strappenda("OBJECT_SYSTEMD_CGROUP=", c);
714 IOVEC_SET_STRING(iovec[n++], x);
716 r = cg_path_get_session(c, &t);
718 x = strappenda("OBJECT_SYSTEMD_SESSION=", t);
720 IOVEC_SET_STRING(iovec[n++], x);
723 if (cg_path_get_owner_uid(c, &owner) >= 0) {
724 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner);
725 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
728 if (cg_path_get_unit(c, &t) >= 0) {
729 x = strappenda("OBJECT_SYSTEMD_UNIT=", t);
731 } else if (cg_path_get_user_unit(c, &t) >= 0) {
732 x = strappenda("OBJECT_SYSTEMD_USER_UNIT=", t);
738 IOVEC_SET_STRING(iovec[n++], x);
746 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
747 IOVEC_SET_STRING(iovec[n++], source_time);
750 /* Note that strictly speaking storing the boot id here is
751 * redundant since the entry includes this in-line
752 * anyway. However, we need this indexed, too. */
753 r = sd_id128_get_boot(&id);
755 sd_id128_to_string(id, boot_id + strlen("_BOOT_ID="));
756 IOVEC_SET_STRING(iovec[n++], boot_id);
759 r = sd_id128_get_machine(&id);
761 sd_id128_to_string(id, machine_id + strlen("_MACHINE_ID="));
762 IOVEC_SET_STRING(iovec[n++], machine_id);
765 t = gethostname_malloc();
767 x = strappenda("_HOSTNAME=", t);
769 IOVEC_SET_STRING(iovec[n++], x);
774 if (s->split_mode == SPLIT_UID && realuid > 0)
775 /* Split up strictly by any UID */
776 journal_uid = realuid;
777 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
778 /* Split up by login UIDs, this avoids creation of
779 * individual journals for system UIDs. We do this
780 * only if the realuid is not root, in order not to
781 * accidentally leak privileged information to the
782 * user that is logged by a privileged process that is
783 * part of an unprivileged session.*/
788 write_to_journal(s, journal_uid, iovec, n);
791 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
792 char mid[11 + 32 + 1];
793 char buffer[16 + LINE_MAX + 1];
794 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
797 struct ucred ucred = {};
802 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
803 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
805 memcpy(buffer, "MESSAGE=", 8);
806 va_start(ap, format);
807 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
809 char_array_0(buffer);
810 IOVEC_SET_STRING(iovec[n++], buffer);
812 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
813 snprintf(mid, sizeof(mid), MESSAGE_ID(message_id));
815 IOVEC_SET_STRING(iovec[n++], mid);
818 ucred.pid = getpid();
819 ucred.uid = getuid();
820 ucred.gid = getgid();
822 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, 0);
825 void server_dispatch_message(
827 struct iovec *iovec, unsigned n, unsigned m,
830 const char *label, size_t label_len,
836 _cleanup_free_ char *path = NULL;
840 assert(iovec || n == 0);
845 if (LOG_PRI(priority) > s->max_level_store)
851 r = cg_pid_get_path_shifted(ucred->pid, NULL, &path);
855 /* example: /user/lennart/3/foobar
856 * /system/dbus.service/foobar
858 * So let's cut of everything past the third /, since that is
859 * where user directories start */
861 c = strchr(path, '/');
863 c = strchr(c+1, '/');
865 c = strchr(c+1, '/');
871 rl = journal_rate_limit_test(s->rate_limit, path,
872 priority & LOG_PRIMASK, available_space(s, false));
877 /* Write a suppression message if we suppressed something */
879 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
880 "Suppressed %u messages from %s", rl - 1, path);
883 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, object_pid);
887 static int system_journal_open(Server *s) {
893 r = sd_id128_get_machine(&machine);
897 sd_id128_to_string(machine, ids);
899 if (!s->system_journal &&
900 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
901 access("/run/systemd/journal/flushed", F_OK) >= 0) {
903 /* If in auto mode: first try to create the machine
904 * path, but not the prefix.
906 * If in persistent mode: create /var/log/journal and
907 * the machine path */
909 if (s->storage == STORAGE_PERSISTENT)
910 (void) mkdir("/var/log/journal/", 0755);
912 fn = strappenda("/var/log/journal/", ids);
913 (void) mkdir(fn, 0755);
915 fn = strappenda(fn, "/system.journal");
916 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
919 server_fix_perms(s, s->system_journal, 0);
921 if (r != -ENOENT && r != -EROFS)
922 log_warning("Failed to open system journal: %s", strerror(-r));
928 if (!s->runtime_journal &&
929 (s->storage != STORAGE_NONE)) {
931 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
935 if (s->system_journal) {
937 /* Try to open the runtime journal, but only
938 * if it already exists, so that we can flush
939 * it into the system journal */
941 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
946 log_warning("Failed to open runtime journal: %s", strerror(-r));
953 /* OK, we really need the runtime journal, so create
954 * it if necessary. */
956 (void) mkdir_parents(fn, 0755);
957 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
961 log_error("Failed to open runtime journal: %s", strerror(-r));
966 if (s->runtime_journal)
967 server_fix_perms(s, s->runtime_journal, 0);
970 available_space(s, true);
975 int server_flush_to_var(Server *s) {
978 sd_journal *j = NULL;
982 if (s->storage != STORAGE_AUTO &&
983 s->storage != STORAGE_PERSISTENT)
986 if (!s->runtime_journal)
989 system_journal_open(s);
991 if (!s->system_journal)
994 log_debug("Flushing to /var...");
996 r = sd_id128_get_machine(&machine);
998 log_error("Failed to get machine id: %s", strerror(-r));
1002 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1004 log_error("Failed to read runtime journal: %s", strerror(-r));
1008 sd_journal_set_data_threshold(j, 0);
1010 SD_JOURNAL_FOREACH(j) {
1014 f = j->current_file;
1015 assert(f && f->current_offset > 0);
1017 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1019 log_error("Can't read entry: %s", strerror(-r));
1023 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1027 if (!shall_try_append_again(s->system_journal, r)) {
1028 log_error("Can't write entry: %s", strerror(-r));
1035 if (!s->system_journal) {
1036 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1041 log_debug("Retrying write.");
1042 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1044 log_error("Can't write entry: %s", strerror(-r));
1050 journal_file_post_change(s->system_journal);
1052 journal_file_close(s->runtime_journal);
1053 s->runtime_journal = NULL;
1056 rm_rf("/run/log/journal", false, true, false);
1058 sd_journal_close(j);
1063 int process_event(Server *s, struct epoll_event *ev) {
1067 if (ev->data.fd == s->signal_fd) {
1068 struct signalfd_siginfo sfsi;
1071 if (ev->events != EPOLLIN) {
1072 log_error("Got invalid event from epoll.");
1076 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
1077 if (n != sizeof(sfsi)) {
1082 if (errno == EINTR || errno == EAGAIN)
1088 if (sfsi.ssi_signo == SIGUSR1) {
1089 touch("/run/systemd/journal/flushed");
1090 server_flush_to_var(s);
1095 if (sfsi.ssi_signo == SIGUSR2) {
1101 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
1105 } else if (ev->data.fd == s->sync_timer_fd) {
1109 log_debug("Got sync request from epoll.");
1111 r = read(ev->data.fd, (void *)&t, sizeof(t));
1118 } else if (ev->data.fd == s->dev_kmsg_fd) {
1121 if (ev->events != EPOLLIN) {
1122 log_error("Got invalid event from epoll.");
1126 r = server_read_dev_kmsg(s);
1132 } else if (ev->data.fd == s->native_fd ||
1133 ev->data.fd == s->syslog_fd) {
1135 if (ev->events != EPOLLIN) {
1136 log_error("Got invalid event from epoll.");
1141 struct msghdr msghdr;
1143 struct ucred *ucred = NULL;
1144 struct timeval *tv = NULL;
1145 struct cmsghdr *cmsg;
1147 size_t label_len = 0;
1149 struct cmsghdr cmsghdr;
1151 /* We use NAME_MAX space for the
1152 * SELinux label here. The kernel
1153 * currently enforces no limit, but
1154 * according to suggestions from the
1155 * SELinux people this will change and
1156 * it will probably be identical to
1157 * NAME_MAX. For now we use that, but
1158 * this should be updated one day when
1159 * the final limit is known.*/
1160 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1161 CMSG_SPACE(sizeof(struct timeval)) +
1162 CMSG_SPACE(sizeof(int)) + /* fd */
1163 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1170 if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
1171 log_error("SIOCINQ failed: %m");
1175 if (s->buffer_size < (size_t) v) {
1179 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
1180 b = realloc(s->buffer, l+1);
1183 log_error("Couldn't increase buffer.");
1192 iovec.iov_base = s->buffer;
1193 iovec.iov_len = s->buffer_size;
1197 msghdr.msg_iov = &iovec;
1198 msghdr.msg_iovlen = 1;
1199 msghdr.msg_control = &control;
1200 msghdr.msg_controllen = sizeof(control);
1202 n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1205 if (errno == EINTR || errno == EAGAIN)
1208 log_error("recvmsg() failed: %m");
1212 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1214 if (cmsg->cmsg_level == SOL_SOCKET &&
1215 cmsg->cmsg_type == SCM_CREDENTIALS &&
1216 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1217 ucred = (struct ucred*) CMSG_DATA(cmsg);
1218 else if (cmsg->cmsg_level == SOL_SOCKET &&
1219 cmsg->cmsg_type == SCM_SECURITY) {
1220 label = (char*) CMSG_DATA(cmsg);
1221 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1222 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1223 cmsg->cmsg_type == SO_TIMESTAMP &&
1224 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1225 tv = (struct timeval*) CMSG_DATA(cmsg);
1226 else if (cmsg->cmsg_level == SOL_SOCKET &&
1227 cmsg->cmsg_type == SCM_RIGHTS) {
1228 fds = (int*) CMSG_DATA(cmsg);
1229 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1233 if (ev->data.fd == s->syslog_fd) {
1236 if (n > 0 && n_fds == 0) {
1237 e = memchr(s->buffer, '\n', n);
1243 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1244 } else if (n_fds > 0)
1245 log_warning("Got file descriptors via syslog socket. Ignoring.");
1248 if (n > 0 && n_fds == 0)
1249 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1250 else if (n == 0 && n_fds == 1)
1251 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1253 log_warning("Got too many file descriptors via native socket. Ignoring.");
1256 close_many(fds, n_fds);
1261 } else if (ev->data.fd == s->stdout_fd) {
1263 if (ev->events != EPOLLIN) {
1264 log_error("Got invalid event from epoll.");
1268 stdout_stream_new(s);
1272 StdoutStream *stream;
1274 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
1275 log_error("Got invalid event from epoll.");
1279 /* If it is none of the well-known fds, it must be an
1280 * stdout stream fd. Note that this is a bit ugly here
1281 * (since we rely that none of the well-known fds
1282 * could be interpreted as pointer), but nonetheless
1283 * safe, since the well-known fds would never get an
1284 * fd > 4096, i.e. beyond the first memory page */
1286 stream = ev->data.ptr;
1288 if (stdout_stream_process(stream) <= 0)
1289 stdout_stream_free(stream);
1294 log_error("Unknown event.");
1298 static int open_signalfd(Server *s) {
1300 struct epoll_event ev;
1304 assert_se(sigemptyset(&mask) == 0);
1305 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1306 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1308 s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
1309 if (s->signal_fd < 0) {
1310 log_error("signalfd(): %m");
1315 ev.events = EPOLLIN;
1316 ev.data.fd = s->signal_fd;
1318 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
1319 log_error("epoll_ctl(): %m");
1326 static int server_parse_proc_cmdline(Server *s) {
1327 _cleanup_free_ char *line = NULL;
1332 if (detect_container(NULL) > 0)
1335 r = read_one_line_file("/proc/cmdline", &line);
1337 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1341 FOREACH_WORD_QUOTED(w, l, line, state) {
1342 _cleanup_free_ char *word;
1344 word = strndup(w, l);
1348 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1349 r = parse_boolean(word + 35);
1351 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1353 s->forward_to_syslog = r;
1354 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1355 r = parse_boolean(word + 33);
1357 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1359 s->forward_to_kmsg = r;
1360 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1361 r = parse_boolean(word + 36);
1363 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1365 s->forward_to_console = r;
1366 } else if (startswith(word, "systemd.journald"))
1367 log_warning("Invalid systemd.journald parameter. Ignoring.");
1373 static int server_parse_config_file(Server *s) {
1374 static const char fn[] = "/etc/systemd/journald.conf";
1375 _cleanup_fclose_ FILE *f = NULL;
1380 f = fopen(fn, "re");
1382 if (errno == ENOENT)
1385 log_warning("Failed to open configuration file %s: %m", fn);
1389 r = config_parse(NULL, fn, f, "Journal\0", config_item_perf_lookup,
1390 (void*) journald_gperf_lookup, false, false, s);
1392 log_warning("Failed to parse configuration file: %s", strerror(-r));
1397 static int server_open_sync_timer(Server *s) {
1399 struct epoll_event ev;
1403 s->sync_timer_fd = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC);
1404 if (s->sync_timer_fd < 0)
1408 ev.events = EPOLLIN;
1409 ev.data.fd = s->sync_timer_fd;
1411 r = epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->sync_timer_fd, &ev);
1413 log_error("Failed to add idle timer fd to epoll object: %m");
1420 int server_schedule_sync(Server *s) {
1425 if (s->sync_scheduled)
1428 if (s->sync_interval_usec) {
1429 struct itimerspec sync_timer_enable = {};
1431 timespec_store(&sync_timer_enable.it_value, s->sync_interval_usec);
1433 r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_enable, NULL);
1438 s->sync_scheduled = true;
1443 int server_init(Server *s) {
1449 s->sync_timer_fd = s->syslog_fd = s->native_fd = s->stdout_fd =
1450 s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
1454 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1455 s->sync_scheduled = false;
1457 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1458 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1460 s->forward_to_syslog = true;
1462 s->max_level_store = LOG_DEBUG;
1463 s->max_level_syslog = LOG_DEBUG;
1464 s->max_level_kmsg = LOG_NOTICE;
1465 s->max_level_console = LOG_INFO;
1467 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1468 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1470 server_parse_config_file(s);
1471 server_parse_proc_cmdline(s);
1472 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1473 log_debug("Setting both rate limit interval and burst from %llu,%u to 0,0",
1474 (long long unsigned) s->rate_limit_interval,
1475 s->rate_limit_burst);
1476 s->rate_limit_interval = s->rate_limit_burst = 0;
1479 mkdir_p("/run/systemd/journal", 0755);
1481 s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
1482 if (!s->user_journals)
1485 s->mmap = mmap_cache_new();
1489 s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
1490 if (s->epoll_fd < 0) {
1491 log_error("Failed to create epoll object: %m");
1495 n = sd_listen_fds(true);
1497 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1501 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1503 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1505 if (s->native_fd >= 0) {
1506 log_error("Too many native sockets passed.");
1512 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1514 if (s->stdout_fd >= 0) {
1515 log_error("Too many stdout sockets passed.");
1521 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
1523 if (s->syslog_fd >= 0) {
1524 log_error("Too many /dev/log sockets passed.");
1531 log_error("Unknown socket passed.");
1536 r = server_open_syslog_socket(s);
1540 r = server_open_native_socket(s);
1544 r = server_open_stdout_socket(s);
1548 r = server_open_dev_kmsg(s);
1552 r = server_open_kernel_seqnum(s);
1556 r = server_open_sync_timer(s);
1560 r = open_signalfd(s);
1564 s->udev = udev_new();
1568 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval,
1569 s->rate_limit_burst);
1573 r = system_journal_open(s);
1580 void server_maybe_append_tags(Server *s) {
1586 n = now(CLOCK_REALTIME);
1588 if (s->system_journal)
1589 journal_file_maybe_append_tag(s->system_journal, n);
1591 HASHMAP_FOREACH(f, s->user_journals, i)
1592 journal_file_maybe_append_tag(f, n);
1596 void server_done(Server *s) {
1600 while (s->stdout_streams)
1601 stdout_stream_free(s->stdout_streams);
1603 if (s->system_journal)
1604 journal_file_close(s->system_journal);
1606 if (s->runtime_journal)
1607 journal_file_close(s->runtime_journal);
1609 while ((f = hashmap_steal_first(s->user_journals)))
1610 journal_file_close(f);
1612 hashmap_free(s->user_journals);
1614 if (s->epoll_fd >= 0)
1615 close_nointr_nofail(s->epoll_fd);
1617 if (s->signal_fd >= 0)
1618 close_nointr_nofail(s->signal_fd);
1620 if (s->syslog_fd >= 0)
1621 close_nointr_nofail(s->syslog_fd);
1623 if (s->native_fd >= 0)
1624 close_nointr_nofail(s->native_fd);
1626 if (s->stdout_fd >= 0)
1627 close_nointr_nofail(s->stdout_fd);
1629 if (s->dev_kmsg_fd >= 0)
1630 close_nointr_nofail(s->dev_kmsg_fd);
1632 if (s->sync_timer_fd >= 0)
1633 close_nointr_nofail(s->sync_timer_fd);
1636 journal_rate_limit_free(s->rate_limit);
1638 if (s->kernel_seqnum)
1639 munmap(s->kernel_seqnum, sizeof(uint64_t));
1645 mmap_cache_unref(s->mmap);
1648 udev_unref(s->udev);