1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
27 #include <sys/timerfd.h>
30 #include <systemd/sd-journal.h>
31 #include <systemd/sd-messages.h>
32 #include <systemd/sd-daemon.h>
37 #include "journal-file.h"
38 #include "socket-util.h"
39 #include "cgroup-util.h"
43 #include "conf-parser.h"
44 #include "journal-internal.h"
45 #include "journal-vacuum.h"
46 #include "journal-authenticate.h"
47 #include "journald-server.h"
48 #include "journald-rate-limit.h"
49 #include "journald-kmsg.h"
50 #include "journald-syslog.h"
51 #include "journald-stream.h"
52 #include "journald-console.h"
53 #include "journald-native.h"
57 #include <acl/libacl.h>
62 #include <selinux/selinux.h>
65 #define USER_JOURNALS_MAX 1024
67 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
68 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
69 #define DEFAULT_RATE_LIMIT_BURST 1000
71 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
73 static const char* const storage_table[] = {
74 [STORAGE_AUTO] = "auto",
75 [STORAGE_VOLATILE] = "volatile",
76 [STORAGE_PERSISTENT] = "persistent",
77 [STORAGE_NONE] = "none"
80 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
81 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
83 static const char* const split_mode_table[] = {
84 [SPLIT_NONE] = "none",
86 [SPLIT_LOGIN] = "login"
89 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
90 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
92 static uint64_t available_space(Server *s, bool verbose) {
94 _cleanup_free_ char *p = NULL;
97 uint64_t sum = 0, ss_avail = 0, avail = 0;
99 _cleanup_closedir_ DIR *d = NULL;
104 ts = now(CLOCK_MONOTONIC);
106 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts
108 return s->cached_available_space;
110 r = sd_id128_get_machine(&machine);
114 if (s->system_journal) {
115 f = "/var/log/journal/";
116 m = &s->system_metrics;
118 f = "/run/log/journal/";
119 m = &s->runtime_metrics;
124 p = strappend(f, sd_id128_to_string(machine, ids));
132 if (fstatvfs(dirfd(d), &ss) < 0)
138 union dirent_storage buf;
140 r = readdir_r(d, &buf.de, &de);
147 if (!endswith(de->d_name, ".journal") &&
148 !endswith(de->d_name, ".journal~"))
151 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
154 if (!S_ISREG(st.st_mode))
157 sum += (uint64_t) st.st_blocks * 512UL;
160 ss_avail = ss.f_bsize * ss.f_bavail;
161 avail = ss_avail > m->keep_free ? ss_avail - m->keep_free : 0;
163 s->cached_available_space = MIN(m->max_use, avail) > sum ? MIN(m->max_use, avail) - sum : 0;
164 s->cached_available_space_timestamp = ts;
167 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
168 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX];
170 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
171 "%s journal is using %s (max %s, leaving %s of free %s, current limit %s).",
172 s->system_journal ? "Permanent" : "Runtime",
173 format_bytes(fb1, sizeof(fb1), sum),
174 format_bytes(fb2, sizeof(fb2), m->max_use),
175 format_bytes(fb3, sizeof(fb3), m->keep_free),
176 format_bytes(fb4, sizeof(fb4), ss_avail),
177 format_bytes(fb5, sizeof(fb5), MIN(m->max_use, avail)));
180 return s->cached_available_space;
183 static void server_read_file_gid(Server *s) {
184 const char *g = "systemd-journal";
189 if (s->file_gid_valid)
192 r = get_group_creds(&g, &s->file_gid);
194 log_warning("Failed to resolve '%s' group: %s", g, strerror(-r));
196 /* if we couldn't read the gid, then it will be 0, but that's
197 * fine and we shouldn't try to resolve the group again, so
198 * let's just pretend it worked right-away. */
199 s->file_gid_valid = true;
202 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
207 acl_permset_t permset;
212 server_read_file_gid(s);
214 r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
216 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
222 acl = acl_get_fd(f->fd);
224 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
228 r = acl_find_uid(acl, uid, &entry);
231 if (acl_create_entry(&acl, &entry) < 0 ||
232 acl_set_tag_type(entry, ACL_USER) < 0 ||
233 acl_set_qualifier(entry, &uid) < 0) {
234 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
239 /* We do not recalculate the mask unconditionally here,
240 * so that the fchmod() mask above stays intact. */
241 if (acl_get_permset(entry, &permset) < 0 ||
242 acl_add_perm(permset, ACL_READ) < 0 ||
243 calc_acl_mask_if_needed(&acl) < 0) {
244 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
248 if (acl_set_fd(f->fd, acl) < 0)
249 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
256 static JournalFile* find_journal(Server *s, uid_t uid) {
257 _cleanup_free_ char *p = NULL;
264 /* We split up user logs only on /var, not on /run. If the
265 * runtime file is open, we write to it exclusively, in order
266 * to guarantee proper order as soon as we flush /run to
267 * /var and close the runtime file. */
269 if (s->runtime_journal)
270 return s->runtime_journal;
273 return s->system_journal;
275 r = sd_id128_get_machine(&machine);
277 return s->system_journal;
279 f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
283 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
284 SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
285 return s->system_journal;
287 while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
288 /* Too many open? Then let's close one */
289 f = hashmap_steal_first(s->user_journals);
291 journal_file_close(f);
294 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
296 return s->system_journal;
298 server_fix_perms(s, f, uid);
300 r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
302 journal_file_close(f);
303 return s->system_journal;
309 void server_rotate(Server *s) {
315 log_debug("Rotating...");
317 if (s->runtime_journal) {
318 r = journal_file_rotate(&s->runtime_journal, s->compress, false);
320 if (s->runtime_journal)
321 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
323 log_error("Failed to create new runtime journal: %s", strerror(-r));
325 server_fix_perms(s, s->runtime_journal, 0);
328 if (s->system_journal) {
329 r = journal_file_rotate(&s->system_journal, s->compress, s->seal);
331 if (s->system_journal)
332 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
334 log_error("Failed to create new system journal: %s", strerror(-r));
337 server_fix_perms(s, s->system_journal, 0);
340 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
341 r = journal_file_rotate(&f, s->compress, s->seal);
344 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
346 log_error("Failed to create user journal: %s", strerror(-r));
348 hashmap_replace(s->user_journals, k, f);
349 server_fix_perms(s, f, PTR_TO_UINT32(k));
354 void server_sync(Server *s) {
360 static const struct itimerspec sync_timer_disable = {};
362 if (s->system_journal) {
363 r = journal_file_set_offline(s->system_journal);
365 log_error("Failed to sync system journal: %s", strerror(-r));
368 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
369 r = journal_file_set_offline(f);
371 log_error("Failed to sync user journal: %s", strerror(-r));
374 r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_disable, NULL);
376 log_error("Failed to disable max timer: %m");
378 s->sync_scheduled = false;
381 void server_vacuum(Server *s) {
386 log_debug("Vacuuming...");
388 s->oldest_file_usec = 0;
390 r = sd_id128_get_machine(&machine);
392 log_error("Failed to get machine ID: %s", strerror(-r));
396 sd_id128_to_string(machine, ids);
398 if (s->system_journal) {
399 char *p = strappenda("/var/log/journal/", ids);
401 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
402 if (r < 0 && r != -ENOENT)
403 log_error("Failed to vacuum %s: %s", p, strerror(-r));
406 if (s->runtime_journal) {
407 char *p = strappenda("/run/log/journal/", ids);
409 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
410 if (r < 0 && r != -ENOENT)
411 log_error("Failed to vacuum %s: %s", p, strerror(-r));
414 s->cached_available_space_timestamp = 0;
417 bool shall_try_append_again(JournalFile *f, int r) {
419 /* -E2BIG Hit configured limit
421 -EDQUOT Quota limit hit
423 -EHOSTDOWN Other machine
424 -EBUSY Unclean shutdown
425 -EPROTONOSUPPORT Unsupported feature
428 -ESHUTDOWN Already archived */
430 if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
431 log_debug("%s: Allocation limit reached, rotating.", f->path);
432 else if (r == -EHOSTDOWN)
433 log_info("%s: Journal file from other machine, rotating.", f->path);
434 else if (r == -EBUSY)
435 log_info("%s: Unclean shutdown, rotating.", f->path);
436 else if (r == -EPROTONOSUPPORT)
437 log_info("%s: Unsupported feature, rotating.", f->path);
438 else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
439 log_warning("%s: Journal file corrupted, rotating.", f->path);
446 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n) {
448 bool vacuumed = false;
455 f = find_journal(s, uid);
459 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
460 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
465 f = find_journal(s, uid);
470 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
472 server_schedule_sync(s);
476 if (vacuumed || !shall_try_append_again(f, r)) {
479 for (i = 0; i < n; i++)
480 size += iovec[i].iov_len;
482 log_error("Failed to write entry (%d items, %zu bytes), ignoring: %s", n, size, strerror(-r));
489 f = find_journal(s, uid);
493 log_debug("Retrying write.");
494 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
498 for (i = 0; i < n; i++)
499 size += iovec[i].iov_len;
501 log_error("Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %s", n, size, strerror(-r));
505 static void dispatch_message_real(
507 struct iovec *iovec, unsigned n, unsigned m,
510 const char *label, size_t label_len,
514 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
515 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
516 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
517 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
518 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
519 boot_id[sizeof("_BOOT_ID=") + 32] = "_BOOT_ID=",
520 machine_id[sizeof("_MACHINE_ID=") + 32] = "_MACHINE_ID=",
521 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
522 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
523 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
531 uid_t realuid = 0, owner = 0, journal_uid;
532 bool owner_valid = false;
534 char audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
535 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
536 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
537 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
546 assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
549 realuid = ucred->uid;
551 sprintf(pid, "_PID=%lu", (unsigned long) ucred->pid);
552 IOVEC_SET_STRING(iovec[n++], pid);
554 sprintf(uid, "_UID=%lu", (unsigned long) ucred->uid);
555 IOVEC_SET_STRING(iovec[n++], uid);
557 sprintf(gid, "_GID=%lu", (unsigned long) ucred->gid);
558 IOVEC_SET_STRING(iovec[n++], gid);
560 r = get_process_comm(ucred->pid, &t);
562 x = strappenda("_COMM=", t);
564 IOVEC_SET_STRING(iovec[n++], x);
567 r = get_process_exe(ucred->pid, &t);
569 x = strappenda("_EXE=", t);
571 IOVEC_SET_STRING(iovec[n++], x);
574 r = get_process_cmdline(ucred->pid, 0, false, &t);
576 x = strappenda("_CMDLINE=", t);
578 IOVEC_SET_STRING(iovec[n++], x);
582 r = audit_session_from_pid(ucred->pid, &audit);
584 sprintf(audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit);
585 IOVEC_SET_STRING(iovec[n++], audit_session);
588 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
590 sprintf(audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid);
591 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
595 r = cg_pid_get_path_shifted(ucred->pid, NULL, &c);
597 char *session = NULL;
599 x = strappenda("_SYSTEMD_CGROUP=", c);
600 IOVEC_SET_STRING(iovec[n++], x);
602 r = cg_path_get_session(c, &t);
604 session = strappenda("_SYSTEMD_SESSION=", t);
606 IOVEC_SET_STRING(iovec[n++], session);
609 if (cg_path_get_owner_uid(c, &owner) >= 0) {
612 sprintf(owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner);
613 IOVEC_SET_STRING(iovec[n++], owner_uid);
616 if (cg_path_get_unit(c, &t) >= 0) {
617 x = strappenda("_SYSTEMD_UNIT=", t);
619 } else if (cg_path_get_user_unit(c, &t) >= 0) {
620 x = strappenda("_SYSTEMD_USER_UNIT=", t);
622 } else if (unit_id) {
624 x = strappenda("_SYSTEMD_USER_UNIT=", unit_id);
626 x = strappenda("_SYSTEMD_UNIT=", unit_id);
631 IOVEC_SET_STRING(iovec[n++], x);
638 x = alloca(sizeof("_SELINUX_CONTEXT=") + label_len);
640 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
641 IOVEC_SET_STRING(iovec[n++], x);
643 security_context_t con;
645 if (getpidcon(ucred->pid, &con) >= 0) {
646 x = strappenda("_SELINUX_CONTEXT=", con);
649 IOVEC_SET_STRING(iovec[n++], x);
657 r = get_process_uid(object_pid, &object_uid);
659 sprintf(o_uid, "OBJECT_UID=%lu", (unsigned long) object_uid);
660 IOVEC_SET_STRING(iovec[n++], o_uid);
663 r = get_process_gid(object_pid, &object_gid);
665 sprintf(o_gid, "OBJECT_GID=%lu", (unsigned long) object_gid);
666 IOVEC_SET_STRING(iovec[n++], o_gid);
669 r = get_process_comm(object_pid, &t);
671 x = strappenda("OBJECT_COMM=", t);
673 IOVEC_SET_STRING(iovec[n++], x);
676 r = get_process_exe(object_pid, &t);
678 x = strappenda("OBJECT_EXE=", t);
680 IOVEC_SET_STRING(iovec[n++], x);
683 r = get_process_cmdline(object_pid, 0, false, &t);
685 x = strappenda("OBJECT_CMDLINE=", t);
687 IOVEC_SET_STRING(iovec[n++], x);
691 r = audit_session_from_pid(object_pid, &audit);
693 sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%lu", (unsigned long) audit);
694 IOVEC_SET_STRING(iovec[n++], o_audit_session);
697 r = audit_loginuid_from_pid(object_pid, &loginuid);
699 sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID=%lu", (unsigned long) loginuid);
700 IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
704 r = cg_pid_get_path_shifted(object_pid, NULL, &c);
706 x = strappenda("OBJECT_SYSTEMD_CGROUP=", c);
707 IOVEC_SET_STRING(iovec[n++], x);
709 r = cg_path_get_session(c, &t);
711 x = strappenda("OBJECT_SYSTEMD_SESSION=", t);
713 IOVEC_SET_STRING(iovec[n++], x);
716 if (cg_path_get_owner_uid(c, &owner) >= 0) {
717 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner);
718 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
721 if (cg_path_get_unit(c, &t) >= 0) {
722 x = strappenda("OBJECT_SYSTEMD_UNIT=", t);
724 } else if (cg_path_get_user_unit(c, &t) >= 0) {
725 x = strappenda("OBJECT_SYSTEMD_USER_UNIT=", t);
731 IOVEC_SET_STRING(iovec[n++], x);
739 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
740 IOVEC_SET_STRING(iovec[n++], source_time);
743 /* Note that strictly speaking storing the boot id here is
744 * redundant since the entry includes this in-line
745 * anyway. However, we need this indexed, too. */
746 r = sd_id128_get_boot(&id);
748 sd_id128_to_string(id, boot_id + strlen("_BOOT_ID="));
749 IOVEC_SET_STRING(iovec[n++], boot_id);
752 r = sd_id128_get_machine(&id);
754 sd_id128_to_string(id, machine_id + strlen("_MACHINE_ID="));
755 IOVEC_SET_STRING(iovec[n++], machine_id);
758 t = gethostname_malloc();
760 x = strappenda("_HOSTNAME=", t);
762 IOVEC_SET_STRING(iovec[n++], x);
767 if (s->split_mode == SPLIT_UID && realuid > 0)
768 /* Split up strictly by any UID */
769 journal_uid = realuid;
770 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
771 /* Split up by login UIDs, this avoids creation of
772 * individual journals for system UIDs. We do this
773 * only if the realuid is not root, in order not to
774 * accidentally leak privileged information to the
775 * user that is logged by a privileged process that is
776 * part of an unprivileged session.*/
781 write_to_journal(s, journal_uid, iovec, n);
784 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
785 char mid[11 + 32 + 1];
786 char buffer[16 + LINE_MAX + 1];
787 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
790 struct ucred ucred = {};
795 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
796 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
798 memcpy(buffer, "MESSAGE=", 8);
799 va_start(ap, format);
800 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
802 char_array_0(buffer);
803 IOVEC_SET_STRING(iovec[n++], buffer);
805 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
806 snprintf(mid, sizeof(mid), MESSAGE_ID(message_id));
808 IOVEC_SET_STRING(iovec[n++], mid);
811 ucred.pid = getpid();
812 ucred.uid = getuid();
813 ucred.gid = getgid();
815 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, 0);
818 void server_dispatch_message(
820 struct iovec *iovec, unsigned n, unsigned m,
823 const char *label, size_t label_len,
829 _cleanup_free_ char *path = NULL;
833 assert(iovec || n == 0);
838 if (LOG_PRI(priority) > s->max_level_store)
844 r = cg_pid_get_path_shifted(ucred->pid, NULL, &path);
848 /* example: /user/lennart/3/foobar
849 * /system/dbus.service/foobar
851 * So let's cut of everything past the third /, since that is
852 * where user directories start */
854 c = strchr(path, '/');
856 c = strchr(c+1, '/');
858 c = strchr(c+1, '/');
864 rl = journal_rate_limit_test(s->rate_limit, path,
865 priority & LOG_PRIMASK, available_space(s, false));
870 /* Write a suppression message if we suppressed something */
872 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
873 "Suppressed %u messages from %s", rl - 1, path);
876 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, object_pid);
880 static int system_journal_open(Server *s) {
886 r = sd_id128_get_machine(&machine);
890 sd_id128_to_string(machine, ids);
892 if (!s->system_journal &&
893 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
894 access("/run/systemd/journal/flushed", F_OK) >= 0) {
896 /* If in auto mode: first try to create the machine
897 * path, but not the prefix.
899 * If in persistent mode: create /var/log/journal and
900 * the machine path */
902 if (s->storage == STORAGE_PERSISTENT)
903 (void) mkdir("/var/log/journal/", 0755);
905 fn = strappenda("/var/log/journal/", ids);
906 (void) mkdir(fn, 0755);
908 fn = strappenda(fn, "/system.journal");
909 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
912 server_fix_perms(s, s->system_journal, 0);
914 if (r != -ENOENT && r != -EROFS)
915 log_warning("Failed to open system journal: %s", strerror(-r));
921 if (!s->runtime_journal &&
922 (s->storage != STORAGE_NONE)) {
924 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
928 if (s->system_journal) {
930 /* Try to open the runtime journal, but only
931 * if it already exists, so that we can flush
932 * it into the system journal */
934 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
939 log_warning("Failed to open runtime journal: %s", strerror(-r));
946 /* OK, we really need the runtime journal, so create
947 * it if necessary. */
949 (void) mkdir_parents(fn, 0755);
950 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
954 log_error("Failed to open runtime journal: %s", strerror(-r));
959 if (s->runtime_journal)
960 server_fix_perms(s, s->runtime_journal, 0);
963 available_space(s, true);
968 int server_flush_to_var(Server *s) {
971 sd_journal *j = NULL;
975 if (s->storage != STORAGE_AUTO &&
976 s->storage != STORAGE_PERSISTENT)
979 if (!s->runtime_journal)
982 system_journal_open(s);
984 if (!s->system_journal)
987 log_debug("Flushing to /var...");
989 r = sd_id128_get_machine(&machine);
991 log_error("Failed to get machine id: %s", strerror(-r));
995 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
997 log_error("Failed to read runtime journal: %s", strerror(-r));
1001 sd_journal_set_data_threshold(j, 0);
1003 SD_JOURNAL_FOREACH(j) {
1007 f = j->current_file;
1008 assert(f && f->current_offset > 0);
1010 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1012 log_error("Can't read entry: %s", strerror(-r));
1016 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1020 if (!shall_try_append_again(s->system_journal, r)) {
1021 log_error("Can't write entry: %s", strerror(-r));
1028 if (!s->system_journal) {
1029 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1034 log_debug("Retrying write.");
1035 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1037 log_error("Can't write entry: %s", strerror(-r));
1043 journal_file_post_change(s->system_journal);
1045 journal_file_close(s->runtime_journal);
1046 s->runtime_journal = NULL;
1049 rm_rf("/run/log/journal", false, true, false);
1051 sd_journal_close(j);
1056 int process_event(Server *s, struct epoll_event *ev) {
1060 if (ev->data.fd == s->signal_fd) {
1061 struct signalfd_siginfo sfsi;
1064 if (ev->events != EPOLLIN) {
1065 log_error("Got invalid event from epoll.");
1069 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
1070 if (n != sizeof(sfsi)) {
1075 if (errno == EINTR || errno == EAGAIN)
1081 if (sfsi.ssi_signo == SIGUSR1) {
1082 touch("/run/systemd/journal/flushed");
1083 server_flush_to_var(s);
1088 if (sfsi.ssi_signo == SIGUSR2) {
1094 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
1098 } else if (ev->data.fd == s->sync_timer_fd) {
1102 log_debug("Got sync request from epoll.");
1104 r = read(ev->data.fd, (void *)&t, sizeof(t));
1111 } else if (ev->data.fd == s->dev_kmsg_fd) {
1114 if (ev->events != EPOLLIN) {
1115 log_error("Got invalid event from epoll.");
1119 r = server_read_dev_kmsg(s);
1125 } else if (ev->data.fd == s->native_fd ||
1126 ev->data.fd == s->syslog_fd) {
1128 if (ev->events != EPOLLIN) {
1129 log_error("Got invalid event from epoll.");
1134 struct msghdr msghdr;
1136 struct ucred *ucred = NULL;
1137 struct timeval *tv = NULL;
1138 struct cmsghdr *cmsg;
1140 size_t label_len = 0;
1142 struct cmsghdr cmsghdr;
1144 /* We use NAME_MAX space for the
1145 * SELinux label here. The kernel
1146 * currently enforces no limit, but
1147 * according to suggestions from the
1148 * SELinux people this will change and
1149 * it will probably be identical to
1150 * NAME_MAX. For now we use that, but
1151 * this should be updated one day when
1152 * the final limit is known.*/
1153 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1154 CMSG_SPACE(sizeof(struct timeval)) +
1155 CMSG_SPACE(sizeof(int)) + /* fd */
1156 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1163 if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
1164 log_error("SIOCINQ failed: %m");
1168 if (s->buffer_size < (size_t) v) {
1172 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
1173 b = realloc(s->buffer, l+1);
1176 log_error("Couldn't increase buffer.");
1185 iovec.iov_base = s->buffer;
1186 iovec.iov_len = s->buffer_size;
1190 msghdr.msg_iov = &iovec;
1191 msghdr.msg_iovlen = 1;
1192 msghdr.msg_control = &control;
1193 msghdr.msg_controllen = sizeof(control);
1195 n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1198 if (errno == EINTR || errno == EAGAIN)
1201 log_error("recvmsg() failed: %m");
1205 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1207 if (cmsg->cmsg_level == SOL_SOCKET &&
1208 cmsg->cmsg_type == SCM_CREDENTIALS &&
1209 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1210 ucred = (struct ucred*) CMSG_DATA(cmsg);
1211 else if (cmsg->cmsg_level == SOL_SOCKET &&
1212 cmsg->cmsg_type == SCM_SECURITY) {
1213 label = (char*) CMSG_DATA(cmsg);
1214 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1215 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1216 cmsg->cmsg_type == SO_TIMESTAMP &&
1217 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1218 tv = (struct timeval*) CMSG_DATA(cmsg);
1219 else if (cmsg->cmsg_level == SOL_SOCKET &&
1220 cmsg->cmsg_type == SCM_RIGHTS) {
1221 fds = (int*) CMSG_DATA(cmsg);
1222 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1226 if (ev->data.fd == s->syslog_fd) {
1229 if (n > 0 && n_fds == 0) {
1230 e = memchr(s->buffer, '\n', n);
1236 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1237 } else if (n_fds > 0)
1238 log_warning("Got file descriptors via syslog socket. Ignoring.");
1241 if (n > 0 && n_fds == 0)
1242 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1243 else if (n == 0 && n_fds == 1)
1244 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1246 log_warning("Got too many file descriptors via native socket. Ignoring.");
1249 close_many(fds, n_fds);
1254 } else if (ev->data.fd == s->stdout_fd) {
1256 if (ev->events != EPOLLIN) {
1257 log_error("Got invalid event from epoll.");
1261 stdout_stream_new(s);
1265 StdoutStream *stream;
1267 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
1268 log_error("Got invalid event from epoll.");
1272 /* If it is none of the well-known fds, it must be an
1273 * stdout stream fd. Note that this is a bit ugly here
1274 * (since we rely that none of the well-known fds
1275 * could be interpreted as pointer), but nonetheless
1276 * safe, since the well-known fds would never get an
1277 * fd > 4096, i.e. beyond the first memory page */
1279 stream = ev->data.ptr;
1281 if (stdout_stream_process(stream) <= 0)
1282 stdout_stream_free(stream);
1287 log_error("Unknown event.");
1291 static int open_signalfd(Server *s) {
1293 struct epoll_event ev;
1297 assert_se(sigemptyset(&mask) == 0);
1298 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1299 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1301 s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
1302 if (s->signal_fd < 0) {
1303 log_error("signalfd(): %m");
1308 ev.events = EPOLLIN;
1309 ev.data.fd = s->signal_fd;
1311 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
1312 log_error("epoll_ctl(): %m");
1319 static int server_parse_proc_cmdline(Server *s) {
1320 _cleanup_free_ char *line = NULL;
1325 if (detect_container(NULL) > 0)
1328 r = read_one_line_file("/proc/cmdline", &line);
1330 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1334 FOREACH_WORD_QUOTED(w, l, line, state) {
1335 _cleanup_free_ char *word;
1337 word = strndup(w, l);
1341 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1342 r = parse_boolean(word + 35);
1344 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1346 s->forward_to_syslog = r;
1347 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1348 r = parse_boolean(word + 33);
1350 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1352 s->forward_to_kmsg = r;
1353 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1354 r = parse_boolean(word + 36);
1356 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1358 s->forward_to_console = r;
1359 } else if (startswith(word, "systemd.journald"))
1360 log_warning("Invalid systemd.journald parameter. Ignoring.");
1366 static int server_parse_config_file(Server *s) {
1367 static const char fn[] = "/etc/systemd/journald.conf";
1368 _cleanup_fclose_ FILE *f = NULL;
1373 f = fopen(fn, "re");
1375 if (errno == ENOENT)
1378 log_warning("Failed to open configuration file %s: %m", fn);
1382 r = config_parse(NULL, fn, f, "Journal\0", config_item_perf_lookup,
1383 (void*) journald_gperf_lookup, false, false, s);
1385 log_warning("Failed to parse configuration file: %s", strerror(-r));
1390 static int server_open_sync_timer(Server *s) {
1392 struct epoll_event ev;
1396 s->sync_timer_fd = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC);
1397 if (s->sync_timer_fd < 0)
1401 ev.events = EPOLLIN;
1402 ev.data.fd = s->sync_timer_fd;
1404 r = epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->sync_timer_fd, &ev);
1406 log_error("Failed to add idle timer fd to epoll object: %m");
1413 int server_schedule_sync(Server *s) {
1418 if (s->sync_scheduled)
1421 if (s->sync_interval_usec) {
1422 struct itimerspec sync_timer_enable = {};
1424 timespec_store(&sync_timer_enable.it_value, s->sync_interval_usec);
1426 r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_enable, NULL);
1431 s->sync_scheduled = true;
1436 int server_init(Server *s) {
1442 s->sync_timer_fd = s->syslog_fd = s->native_fd = s->stdout_fd =
1443 s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
1447 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1448 s->sync_scheduled = false;
1450 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1451 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1453 s->forward_to_syslog = true;
1455 s->max_level_store = LOG_DEBUG;
1456 s->max_level_syslog = LOG_DEBUG;
1457 s->max_level_kmsg = LOG_NOTICE;
1458 s->max_level_console = LOG_INFO;
1460 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1461 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1463 server_parse_config_file(s);
1464 server_parse_proc_cmdline(s);
1465 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1466 log_debug("Setting both rate limit interval and burst from %llu,%u to 0,0",
1467 (long long unsigned) s->rate_limit_interval,
1468 s->rate_limit_burst);
1469 s->rate_limit_interval = s->rate_limit_burst = 0;
1472 mkdir_p("/run/systemd/journal", 0755);
1474 s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
1475 if (!s->user_journals)
1478 s->mmap = mmap_cache_new();
1482 s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
1483 if (s->epoll_fd < 0) {
1484 log_error("Failed to create epoll object: %m");
1488 n = sd_listen_fds(true);
1490 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1494 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1496 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1498 if (s->native_fd >= 0) {
1499 log_error("Too many native sockets passed.");
1505 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1507 if (s->stdout_fd >= 0) {
1508 log_error("Too many stdout sockets passed.");
1514 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
1516 if (s->syslog_fd >= 0) {
1517 log_error("Too many /dev/log sockets passed.");
1524 log_error("Unknown socket passed.");
1529 r = server_open_syslog_socket(s);
1533 r = server_open_native_socket(s);
1537 r = server_open_stdout_socket(s);
1541 r = server_open_dev_kmsg(s);
1545 r = server_open_kernel_seqnum(s);
1549 r = server_open_sync_timer(s);
1553 r = open_signalfd(s);
1557 s->udev = udev_new();
1561 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval,
1562 s->rate_limit_burst);
1566 r = system_journal_open(s);
1573 void server_maybe_append_tags(Server *s) {
1579 n = now(CLOCK_REALTIME);
1581 if (s->system_journal)
1582 journal_file_maybe_append_tag(s->system_journal, n);
1584 HASHMAP_FOREACH(f, s->user_journals, i)
1585 journal_file_maybe_append_tag(f, n);
1589 void server_done(Server *s) {
1593 while (s->stdout_streams)
1594 stdout_stream_free(s->stdout_streams);
1596 if (s->system_journal)
1597 journal_file_close(s->system_journal);
1599 if (s->runtime_journal)
1600 journal_file_close(s->runtime_journal);
1602 while ((f = hashmap_steal_first(s->user_journals)))
1603 journal_file_close(f);
1605 hashmap_free(s->user_journals);
1607 if (s->epoll_fd >= 0)
1608 close_nointr_nofail(s->epoll_fd);
1610 if (s->signal_fd >= 0)
1611 close_nointr_nofail(s->signal_fd);
1613 if (s->syslog_fd >= 0)
1614 close_nointr_nofail(s->syslog_fd);
1616 if (s->native_fd >= 0)
1617 close_nointr_nofail(s->native_fd);
1619 if (s->stdout_fd >= 0)
1620 close_nointr_nofail(s->stdout_fd);
1622 if (s->dev_kmsg_fd >= 0)
1623 close_nointr_nofail(s->dev_kmsg_fd);
1625 if (s->sync_timer_fd >= 0)
1626 close_nointr_nofail(s->sync_timer_fd);
1629 journal_rate_limit_free(s->rate_limit);
1631 if (s->kernel_seqnum)
1632 munmap(s->kernel_seqnum, sizeof(uint64_t));
1638 mmap_cache_unref(s->mmap);
1641 udev_unref(s->udev);