1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
27 #include <sys/timerfd.h>
30 #include <systemd/sd-journal.h>
31 #include <systemd/sd-messages.h>
32 #include <systemd/sd-daemon.h>
37 #include "journal-file.h"
38 #include "socket-util.h"
39 #include "cgroup-util.h"
43 #include "conf-parser.h"
44 #include "journal-internal.h"
45 #include "journal-vacuum.h"
46 #include "journal-authenticate.h"
47 #include "journald-server.h"
48 #include "journald-rate-limit.h"
49 #include "journald-kmsg.h"
50 #include "journald-syslog.h"
51 #include "journald-stream.h"
52 #include "journald-console.h"
53 #include "journald-native.h"
57 #include <acl/libacl.h>
62 #include <selinux/selinux.h>
65 #define USER_JOURNALS_MAX 1024
67 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
68 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
69 #define DEFAULT_RATE_LIMIT_BURST 1000
71 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
73 static const char* const storage_table[] = {
74 [STORAGE_AUTO] = "auto",
75 [STORAGE_VOLATILE] = "volatile",
76 [STORAGE_PERSISTENT] = "persistent",
77 [STORAGE_NONE] = "none"
80 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
81 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
83 static const char* const split_mode_table[] = {
84 [SPLIT_NONE] = "none",
86 [SPLIT_LOGIN] = "login"
89 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
90 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
92 static uint64_t available_space(Server *s, bool verbose) {
94 _cleanup_free_ char *p = NULL;
97 uint64_t sum = 0, ss_avail = 0, avail = 0;
99 _cleanup_closedir_ DIR *d = NULL;
104 ts = now(CLOCK_MONOTONIC);
106 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts
108 return s->cached_available_space;
110 r = sd_id128_get_machine(&machine);
114 if (s->system_journal) {
115 f = "/var/log/journal/";
116 m = &s->system_metrics;
118 f = "/run/log/journal/";
119 m = &s->runtime_metrics;
124 p = strappend(f, sd_id128_to_string(machine, ids));
132 if (fstatvfs(dirfd(d), &ss) < 0)
138 union dirent_storage buf;
140 r = readdir_r(d, &buf.de, &de);
147 if (!endswith(de->d_name, ".journal") &&
148 !endswith(de->d_name, ".journal~"))
151 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
154 if (!S_ISREG(st.st_mode))
157 sum += (uint64_t) st.st_blocks * 512UL;
160 ss_avail = ss.f_bsize * ss.f_bavail;
161 avail = ss_avail > m->keep_free ? ss_avail - m->keep_free : 0;
163 s->cached_available_space = MIN(m->max_use, avail) > sum ? MIN(m->max_use, avail) - sum : 0;
164 s->cached_available_space_timestamp = ts;
167 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
168 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX];
170 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
171 "%s journal is using %s (max %s, leaving %s of free %s, current limit %s).",
172 s->system_journal ? "Permanent" : "Runtime",
173 format_bytes(fb1, sizeof(fb1), sum),
174 format_bytes(fb2, sizeof(fb2), m->max_use),
175 format_bytes(fb3, sizeof(fb3), m->keep_free),
176 format_bytes(fb4, sizeof(fb4), ss_avail),
177 format_bytes(fb5, sizeof(fb5), MIN(m->max_use, avail)));
180 return s->cached_available_space;
183 static void server_read_file_gid(Server *s) {
184 const char *g = "systemd-journal";
189 if (s->file_gid_valid)
192 r = get_group_creds(&g, &s->file_gid);
194 log_warning("Failed to resolve '%s' group: %s", g, strerror(-r));
196 /* if we couldn't read the gid, then it will be 0, but that's
197 * fine and we shouldn't try to resolve the group again, so
198 * let's just pretend it worked right-away. */
199 s->file_gid_valid = true;
202 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
207 acl_permset_t permset;
212 server_read_file_gid(s);
214 r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
216 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
222 acl = acl_get_fd(f->fd);
224 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
228 r = acl_find_uid(acl, uid, &entry);
231 if (acl_create_entry(&acl, &entry) < 0 ||
232 acl_set_tag_type(entry, ACL_USER) < 0 ||
233 acl_set_qualifier(entry, &uid) < 0) {
234 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
239 /* We do not recalculate the mask unconditionally here,
240 * so that the fchmod() mask above stays intact. */
241 if (acl_get_permset(entry, &permset) < 0 ||
242 acl_add_perm(permset, ACL_READ) < 0 ||
243 calc_acl_mask_if_needed(&acl) < 0) {
244 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
248 if (acl_set_fd(f->fd, acl) < 0)
249 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
256 static JournalFile* find_journal(Server *s, uid_t uid) {
257 _cleanup_free_ char *p = NULL;
264 /* We split up user logs only on /var, not on /run. If the
265 * runtime file is open, we write to it exclusively, in order
266 * to guarantee proper order as soon as we flush /run to
267 * /var and close the runtime file. */
269 if (s->runtime_journal)
270 return s->runtime_journal;
273 return s->system_journal;
275 r = sd_id128_get_machine(&machine);
277 return s->system_journal;
279 f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
283 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
284 SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
285 return s->system_journal;
287 while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
288 /* Too many open? Then let's close one */
289 f = hashmap_steal_first(s->user_journals);
291 journal_file_close(f);
294 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
296 return s->system_journal;
298 server_fix_perms(s, f, uid);
300 r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
302 journal_file_close(f);
303 return s->system_journal;
309 void server_rotate(Server *s) {
315 log_debug("Rotating...");
317 if (s->runtime_journal) {
318 r = journal_file_rotate(&s->runtime_journal, s->compress, false);
320 if (s->runtime_journal)
321 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
323 log_error("Failed to create new runtime journal: %s", strerror(-r));
325 server_fix_perms(s, s->runtime_journal, 0);
328 if (s->system_journal) {
329 r = journal_file_rotate(&s->system_journal, s->compress, s->seal);
331 if (s->system_journal)
332 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
334 log_error("Failed to create new system journal: %s", strerror(-r));
337 server_fix_perms(s, s->system_journal, 0);
340 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
341 r = journal_file_rotate(&f, s->compress, s->seal);
344 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
346 log_error("Failed to create user journal: %s", strerror(-r));
348 hashmap_replace(s->user_journals, k, f);
349 server_fix_perms(s, f, PTR_TO_UINT32(k));
354 void server_sync(Server *s) {
355 static const struct itimerspec sync_timer_disable = {};
361 if (s->system_journal) {
362 r = journal_file_set_offline(s->system_journal);
364 log_error("Failed to sync system journal: %s", strerror(-r));
367 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
368 r = journal_file_set_offline(f);
370 log_error("Failed to sync user journal: %s", strerror(-r));
373 r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_disable, NULL);
375 log_error("Failed to disable max timer: %m");
377 s->sync_scheduled = false;
380 void server_vacuum(Server *s) {
385 log_debug("Vacuuming...");
387 s->oldest_file_usec = 0;
389 r = sd_id128_get_machine(&machine);
391 log_error("Failed to get machine ID: %s", strerror(-r));
395 sd_id128_to_string(machine, ids);
397 if (s->system_journal) {
398 char *p = strappenda("/var/log/journal/", ids);
400 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
401 if (r < 0 && r != -ENOENT)
402 log_error("Failed to vacuum %s: %s", p, strerror(-r));
405 if (s->runtime_journal) {
406 char *p = strappenda("/run/log/journal/", ids);
408 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
409 if (r < 0 && r != -ENOENT)
410 log_error("Failed to vacuum %s: %s", p, strerror(-r));
413 s->cached_available_space_timestamp = 0;
416 bool shall_try_append_again(JournalFile *f, int r) {
418 /* -E2BIG Hit configured limit
420 -EDQUOT Quota limit hit
422 -EHOSTDOWN Other machine
423 -EBUSY Unclean shutdown
424 -EPROTONOSUPPORT Unsupported feature
427 -ESHUTDOWN Already archived */
429 if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
430 log_debug("%s: Allocation limit reached, rotating.", f->path);
431 else if (r == -EHOSTDOWN)
432 log_info("%s: Journal file from other machine, rotating.", f->path);
433 else if (r == -EBUSY)
434 log_info("%s: Unclean shutdown, rotating.", f->path);
435 else if (r == -EPROTONOSUPPORT)
436 log_info("%s: Unsupported feature, rotating.", f->path);
437 else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
438 log_warning("%s: Journal file corrupted, rotating.", f->path);
445 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
447 bool vacuumed = false;
454 f = find_journal(s, uid);
458 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
459 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
464 f = find_journal(s, uid);
469 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
471 server_schedule_sync(s, priority);
475 if (vacuumed || !shall_try_append_again(f, r)) {
478 for (i = 0; i < n; i++)
479 size += iovec[i].iov_len;
481 log_error("Failed to write entry (%d items, %zu bytes), ignoring: %s", n, size, strerror(-r));
488 f = find_journal(s, uid);
492 log_debug("Retrying write.");
493 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
497 for (i = 0; i < n; i++)
498 size += iovec[i].iov_len;
500 log_error("Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %s", n, size, strerror(-r));
502 server_schedule_sync(s, priority);
505 static void dispatch_message_real(
507 struct iovec *iovec, unsigned n, unsigned m,
510 const char *label, size_t label_len,
515 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
516 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
517 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
518 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
519 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
520 boot_id[sizeof("_BOOT_ID=") + 32] = "_BOOT_ID=",
521 machine_id[sizeof("_MACHINE_ID=") + 32] = "_MACHINE_ID=",
522 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
523 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
524 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
531 uid_t realuid = 0, owner = 0, journal_uid;
532 bool owner_valid = false;
534 char audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
535 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
536 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
537 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
546 assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
549 realuid = ucred->uid;
551 sprintf(pid, "_PID=%lu", (unsigned long) ucred->pid);
552 IOVEC_SET_STRING(iovec[n++], pid);
554 sprintf(uid, "_UID=%lu", (unsigned long) ucred->uid);
555 IOVEC_SET_STRING(iovec[n++], uid);
557 sprintf(gid, "_GID=%lu", (unsigned long) ucred->gid);
558 IOVEC_SET_STRING(iovec[n++], gid);
560 r = get_process_comm(ucred->pid, &t);
562 x = strappenda("_COMM=", t);
564 IOVEC_SET_STRING(iovec[n++], x);
567 r = get_process_exe(ucred->pid, &t);
569 x = strappenda("_EXE=", t);
571 IOVEC_SET_STRING(iovec[n++], x);
574 r = get_process_cmdline(ucred->pid, 0, false, &t);
576 x = strappenda("_CMDLINE=", t);
578 IOVEC_SET_STRING(iovec[n++], x);
581 r = get_process_capeff(ucred->pid, &t);
583 x = strappenda("_CAP_EFFECTIVE=", t);
585 IOVEC_SET_STRING(iovec[n++], x);
589 r = audit_session_from_pid(ucred->pid, &audit);
591 sprintf(audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit);
592 IOVEC_SET_STRING(iovec[n++], audit_session);
595 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
597 sprintf(audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid);
598 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
602 r = cg_pid_get_path_shifted(ucred->pid, NULL, &c);
604 char *session = NULL;
606 x = strappenda("_SYSTEMD_CGROUP=", c);
607 IOVEC_SET_STRING(iovec[n++], x);
609 r = cg_path_get_session(c, &t);
611 session = strappenda("_SYSTEMD_SESSION=", t);
613 IOVEC_SET_STRING(iovec[n++], session);
616 if (cg_path_get_owner_uid(c, &owner) >= 0) {
619 sprintf(owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner);
620 IOVEC_SET_STRING(iovec[n++], owner_uid);
623 if (cg_path_get_unit(c, &t) >= 0) {
624 x = strappenda("_SYSTEMD_UNIT=", t);
626 IOVEC_SET_STRING(iovec[n++], x);
627 } else if (unit_id && !session) {
628 x = strappenda("_SYSTEMD_UNIT=", unit_id);
629 IOVEC_SET_STRING(iovec[n++], x);
632 if (cg_path_get_user_unit(c, &t) >= 0) {
633 x = strappenda("_SYSTEMD_USER_UNIT=", t);
635 IOVEC_SET_STRING(iovec[n++], x);
636 } else if (unit_id && session) {
637 x = strappenda("_SYSTEMD_USER_UNIT=", unit_id);
638 IOVEC_SET_STRING(iovec[n++], x);
641 if (cg_path_get_slice(c, &t) >= 0) {
642 x = strappenda("_SYSTEMD_SLICE=", t);
644 IOVEC_SET_STRING(iovec[n++], x);
652 x = alloca(sizeof("_SELINUX_CONTEXT=") + label_len);
654 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
655 IOVEC_SET_STRING(iovec[n++], x);
657 security_context_t con;
659 if (getpidcon(ucred->pid, &con) >= 0) {
660 x = strappenda("_SELINUX_CONTEXT=", con);
663 IOVEC_SET_STRING(iovec[n++], x);
671 r = get_process_uid(object_pid, &object_uid);
673 sprintf(o_uid, "OBJECT_UID=%lu", (unsigned long) object_uid);
674 IOVEC_SET_STRING(iovec[n++], o_uid);
677 r = get_process_gid(object_pid, &object_gid);
679 sprintf(o_gid, "OBJECT_GID=%lu", (unsigned long) object_gid);
680 IOVEC_SET_STRING(iovec[n++], o_gid);
683 r = get_process_comm(object_pid, &t);
685 x = strappenda("OBJECT_COMM=", t);
687 IOVEC_SET_STRING(iovec[n++], x);
690 r = get_process_exe(object_pid, &t);
692 x = strappenda("OBJECT_EXE=", t);
694 IOVEC_SET_STRING(iovec[n++], x);
697 r = get_process_cmdline(object_pid, 0, false, &t);
699 x = strappenda("OBJECT_CMDLINE=", t);
701 IOVEC_SET_STRING(iovec[n++], x);
705 r = audit_session_from_pid(object_pid, &audit);
707 sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%lu", (unsigned long) audit);
708 IOVEC_SET_STRING(iovec[n++], o_audit_session);
711 r = audit_loginuid_from_pid(object_pid, &loginuid);
713 sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID=%lu", (unsigned long) loginuid);
714 IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
718 r = cg_pid_get_path_shifted(object_pid, NULL, &c);
720 x = strappenda("OBJECT_SYSTEMD_CGROUP=", c);
721 IOVEC_SET_STRING(iovec[n++], x);
723 r = cg_path_get_session(c, &t);
725 x = strappenda("OBJECT_SYSTEMD_SESSION=", t);
727 IOVEC_SET_STRING(iovec[n++], x);
730 if (cg_path_get_owner_uid(c, &owner) >= 0) {
731 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner);
732 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
735 if (cg_path_get_unit(c, &t) >= 0) {
736 x = strappenda("OBJECT_SYSTEMD_UNIT=", t);
738 IOVEC_SET_STRING(iovec[n++], x);
741 if (cg_path_get_user_unit(c, &t) >= 0) {
742 x = strappenda("OBJECT_SYSTEMD_USER_UNIT=", t);
744 IOVEC_SET_STRING(iovec[n++], x);
753 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
754 IOVEC_SET_STRING(iovec[n++], source_time);
757 /* Note that strictly speaking storing the boot id here is
758 * redundant since the entry includes this in-line
759 * anyway. However, we need this indexed, too. */
760 r = sd_id128_get_boot(&id);
762 sd_id128_to_string(id, boot_id + strlen("_BOOT_ID="));
763 IOVEC_SET_STRING(iovec[n++], boot_id);
766 r = sd_id128_get_machine(&id);
768 sd_id128_to_string(id, machine_id + strlen("_MACHINE_ID="));
769 IOVEC_SET_STRING(iovec[n++], machine_id);
772 t = gethostname_malloc();
774 x = strappenda("_HOSTNAME=", t);
776 IOVEC_SET_STRING(iovec[n++], x);
781 if (s->split_mode == SPLIT_UID && realuid > 0)
782 /* Split up strictly by any UID */
783 journal_uid = realuid;
784 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
785 /* Split up by login UIDs, this avoids creation of
786 * individual journals for system UIDs. We do this
787 * only if the realuid is not root, in order not to
788 * accidentally leak privileged information to the
789 * user that is logged by a privileged process that is
790 * part of an unprivileged session.*/
795 write_to_journal(s, journal_uid, iovec, n, priority);
798 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
799 char mid[11 + 32 + 1];
800 char buffer[16 + LINE_MAX + 1];
801 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
804 struct ucred ucred = {};
809 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
810 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
812 memcpy(buffer, "MESSAGE=", 8);
813 va_start(ap, format);
814 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
816 char_array_0(buffer);
817 IOVEC_SET_STRING(iovec[n++], buffer);
819 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
820 snprintf(mid, sizeof(mid), MESSAGE_ID(message_id));
822 IOVEC_SET_STRING(iovec[n++], mid);
825 ucred.pid = getpid();
826 ucred.uid = getuid();
827 ucred.gid = getgid();
829 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
832 void server_dispatch_message(
834 struct iovec *iovec, unsigned n, unsigned m,
837 const char *label, size_t label_len,
843 _cleanup_free_ char *path = NULL;
847 assert(iovec || n == 0);
852 if (LOG_PRI(priority) > s->max_level_store)
855 /* Stop early in case the information will not be stored
857 if (s->storage == STORAGE_NONE)
863 r = cg_pid_get_path_shifted(ucred->pid, NULL, &path);
867 /* example: /user/lennart/3/foobar
868 * /system/dbus.service/foobar
870 * So let's cut of everything past the third /, since that is
871 * where user directories start */
873 c = strchr(path, '/');
875 c = strchr(c+1, '/');
877 c = strchr(c+1, '/');
883 rl = journal_rate_limit_test(s->rate_limit, path,
884 priority & LOG_PRIMASK, available_space(s, false));
889 /* Write a suppression message if we suppressed something */
891 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
892 "Suppressed %u messages from %s", rl - 1, path);
895 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
899 static int system_journal_open(Server *s) {
905 r = sd_id128_get_machine(&machine);
907 log_error("Failed to get machine id: %s", strerror(-r));
911 sd_id128_to_string(machine, ids);
913 if (!s->system_journal &&
914 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
915 access("/run/systemd/journal/flushed", F_OK) >= 0) {
917 /* If in auto mode: first try to create the machine
918 * path, but not the prefix.
920 * If in persistent mode: create /var/log/journal and
921 * the machine path */
923 if (s->storage == STORAGE_PERSISTENT)
924 (void) mkdir("/var/log/journal/", 0755);
926 fn = strappenda("/var/log/journal/", ids);
927 (void) mkdir(fn, 0755);
929 fn = strappenda(fn, "/system.journal");
930 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
933 server_fix_perms(s, s->system_journal, 0);
935 if (r != -ENOENT && r != -EROFS)
936 log_warning("Failed to open system journal: %s", strerror(-r));
942 if (!s->runtime_journal &&
943 (s->storage != STORAGE_NONE)) {
945 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
949 if (s->system_journal) {
951 /* Try to open the runtime journal, but only
952 * if it already exists, so that we can flush
953 * it into the system journal */
955 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
960 log_warning("Failed to open runtime journal: %s", strerror(-r));
967 /* OK, we really need the runtime journal, so create
968 * it if necessary. */
970 (void) mkdir_parents(fn, 0755);
971 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
975 log_error("Failed to open runtime journal: %s", strerror(-r));
980 if (s->runtime_journal)
981 server_fix_perms(s, s->runtime_journal, 0);
984 available_space(s, true);
989 int server_flush_to_var(Server *s) {
992 sd_journal *j = NULL;
996 if (s->storage != STORAGE_AUTO &&
997 s->storage != STORAGE_PERSISTENT)
1000 if (!s->runtime_journal)
1003 system_journal_open(s);
1005 if (!s->system_journal)
1008 log_debug("Flushing to /var...");
1010 r = sd_id128_get_machine(&machine);
1014 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1016 log_error("Failed to read runtime journal: %s", strerror(-r));
1020 sd_journal_set_data_threshold(j, 0);
1022 SD_JOURNAL_FOREACH(j) {
1026 f = j->current_file;
1027 assert(f && f->current_offset > 0);
1029 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1031 log_error("Can't read entry: %s", strerror(-r));
1035 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1039 if (!shall_try_append_again(s->system_journal, r)) {
1040 log_error("Can't write entry: %s", strerror(-r));
1047 if (!s->system_journal) {
1048 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1053 log_debug("Retrying write.");
1054 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1056 log_error("Can't write entry: %s", strerror(-r));
1062 journal_file_post_change(s->system_journal);
1064 journal_file_close(s->runtime_journal);
1065 s->runtime_journal = NULL;
1068 rm_rf("/run/log/journal", false, true, false);
1070 sd_journal_close(j);
1075 int process_event(Server *s, struct epoll_event *ev) {
1079 if (ev->data.fd == s->signal_fd) {
1080 struct signalfd_siginfo sfsi;
1083 if (ev->events != EPOLLIN) {
1084 log_error("Got invalid event from epoll.");
1088 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
1089 if (n != sizeof(sfsi)) {
1094 if (errno == EINTR || errno == EAGAIN)
1100 if (sfsi.ssi_signo == SIGUSR1) {
1101 log_info("Received request to flush runtime journal from PID %"PRIu32,
1103 touch("/run/systemd/journal/flushed");
1104 server_flush_to_var(s);
1109 if (sfsi.ssi_signo == SIGUSR2) {
1110 log_info("Received request to rotate journal from PID %"PRIu32,
1117 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
1121 } else if (ev->data.fd == s->sync_timer_fd) {
1125 log_debug("Got sync request from epoll.");
1127 r = read(ev->data.fd, (void *)&t, sizeof(t));
1134 } else if (ev->data.fd == s->dev_kmsg_fd) {
1137 if (ev->events != EPOLLIN) {
1138 log_error("Got invalid event from epoll.");
1142 r = server_read_dev_kmsg(s);
1148 } else if (ev->data.fd == s->native_fd ||
1149 ev->data.fd == s->syslog_fd) {
1151 if (ev->events != EPOLLIN) {
1152 log_error("Got invalid event from epoll.");
1157 struct msghdr msghdr;
1159 struct ucred *ucred = NULL;
1160 struct timeval *tv = NULL;
1161 struct cmsghdr *cmsg;
1163 size_t label_len = 0;
1165 struct cmsghdr cmsghdr;
1167 /* We use NAME_MAX space for the
1168 * SELinux label here. The kernel
1169 * currently enforces no limit, but
1170 * according to suggestions from the
1171 * SELinux people this will change and
1172 * it will probably be identical to
1173 * NAME_MAX. For now we use that, but
1174 * this should be updated one day when
1175 * the final limit is known.*/
1176 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1177 CMSG_SPACE(sizeof(struct timeval)) +
1178 CMSG_SPACE(sizeof(int)) + /* fd */
1179 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1186 if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
1187 log_error("SIOCINQ failed: %m");
1191 if (s->buffer_size < (size_t) v) {
1195 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
1196 b = realloc(s->buffer, l+1);
1199 log_error("Couldn't increase buffer.");
1208 iovec.iov_base = s->buffer;
1209 iovec.iov_len = s->buffer_size;
1213 msghdr.msg_iov = &iovec;
1214 msghdr.msg_iovlen = 1;
1215 msghdr.msg_control = &control;
1216 msghdr.msg_controllen = sizeof(control);
1218 n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1221 if (errno == EINTR || errno == EAGAIN)
1224 log_error("recvmsg() failed: %m");
1228 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1230 if (cmsg->cmsg_level == SOL_SOCKET &&
1231 cmsg->cmsg_type == SCM_CREDENTIALS &&
1232 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1233 ucred = (struct ucred*) CMSG_DATA(cmsg);
1234 else if (cmsg->cmsg_level == SOL_SOCKET &&
1235 cmsg->cmsg_type == SCM_SECURITY) {
1236 label = (char*) CMSG_DATA(cmsg);
1237 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1238 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1239 cmsg->cmsg_type == SO_TIMESTAMP &&
1240 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1241 tv = (struct timeval*) CMSG_DATA(cmsg);
1242 else if (cmsg->cmsg_level == SOL_SOCKET &&
1243 cmsg->cmsg_type == SCM_RIGHTS) {
1244 fds = (int*) CMSG_DATA(cmsg);
1245 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1249 if (ev->data.fd == s->syslog_fd) {
1250 if (n > 0 && n_fds == 0) {
1252 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1253 } else if (n_fds > 0)
1254 log_warning("Got file descriptors via syslog socket. Ignoring.");
1257 if (n > 0 && n_fds == 0)
1258 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1259 else if (n == 0 && n_fds == 1)
1260 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1262 log_warning("Got too many file descriptors via native socket. Ignoring.");
1265 close_many(fds, n_fds);
1270 } else if (ev->data.fd == s->stdout_fd) {
1272 if (ev->events != EPOLLIN) {
1273 log_error("Got invalid event from epoll.");
1277 stdout_stream_new(s);
1281 StdoutStream *stream;
1283 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
1284 log_error("Got invalid event from epoll.");
1288 /* If it is none of the well-known fds, it must be an
1289 * stdout stream fd. Note that this is a bit ugly here
1290 * (since we rely that none of the well-known fds
1291 * could be interpreted as pointer), but nonetheless
1292 * safe, since the well-known fds would never get an
1293 * fd > 4096, i.e. beyond the first memory page */
1295 stream = ev->data.ptr;
1297 if (stdout_stream_process(stream) <= 0)
1298 stdout_stream_free(stream);
1303 log_error("Unknown event.");
1307 static int open_signalfd(Server *s) {
1309 struct epoll_event ev;
1313 assert_se(sigemptyset(&mask) == 0);
1314 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1315 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1317 s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
1318 if (s->signal_fd < 0) {
1319 log_error("signalfd(): %m");
1324 ev.events = EPOLLIN;
1325 ev.data.fd = s->signal_fd;
1327 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
1328 log_error("epoll_ctl(): %m");
1335 static int server_parse_proc_cmdline(Server *s) {
1336 _cleanup_free_ char *line = NULL;
1341 if (detect_container(NULL) > 0)
1344 r = read_one_line_file("/proc/cmdline", &line);
1346 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1350 FOREACH_WORD_QUOTED(w, l, line, state) {
1351 _cleanup_free_ char *word;
1353 word = strndup(w, l);
1357 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1358 r = parse_boolean(word + 35);
1360 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1362 s->forward_to_syslog = r;
1363 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1364 r = parse_boolean(word + 33);
1366 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1368 s->forward_to_kmsg = r;
1369 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1370 r = parse_boolean(word + 36);
1372 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1374 s->forward_to_console = r;
1375 } else if (startswith(word, "systemd.journald"))
1376 log_warning("Invalid systemd.journald parameter. Ignoring.");
1382 static int server_parse_config_file(Server *s) {
1383 static const char fn[] = "/etc/systemd/journald.conf";
1384 _cleanup_fclose_ FILE *f = NULL;
1389 f = fopen(fn, "re");
1391 if (errno == ENOENT)
1394 log_warning("Failed to open configuration file %s: %m", fn);
1398 r = config_parse(NULL, fn, f, "Journal\0", config_item_perf_lookup,
1399 (void*) journald_gperf_lookup, false, false, s);
1401 log_warning("Failed to parse configuration file: %s", strerror(-r));
1406 static int server_open_sync_timer(Server *s) {
1408 struct epoll_event ev;
1412 s->sync_timer_fd = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC);
1413 if (s->sync_timer_fd < 0)
1417 ev.events = EPOLLIN;
1418 ev.data.fd = s->sync_timer_fd;
1420 r = epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->sync_timer_fd, &ev);
1422 log_error("Failed to add idle timer fd to epoll object: %m");
1429 int server_schedule_sync(Server *s, int priority) {
1434 if (priority <= LOG_CRIT) {
1435 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1440 if (s->sync_scheduled)
1443 if (s->sync_interval_usec) {
1444 struct itimerspec sync_timer_enable = {};
1446 timespec_store(&sync_timer_enable.it_value, s->sync_interval_usec);
1448 r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_enable, NULL);
1453 s->sync_scheduled = true;
1458 int server_init(Server *s) {
1464 s->sync_timer_fd = s->syslog_fd = s->native_fd = s->stdout_fd =
1465 s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
1469 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1470 s->sync_scheduled = false;
1472 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1473 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1475 s->forward_to_syslog = true;
1477 s->max_level_store = LOG_DEBUG;
1478 s->max_level_syslog = LOG_DEBUG;
1479 s->max_level_kmsg = LOG_NOTICE;
1480 s->max_level_console = LOG_INFO;
1482 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1483 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1485 server_parse_config_file(s);
1486 server_parse_proc_cmdline(s);
1487 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1488 log_debug("Setting both rate limit interval and burst from %llu,%u to 0,0",
1489 (long long unsigned) s->rate_limit_interval,
1490 s->rate_limit_burst);
1491 s->rate_limit_interval = s->rate_limit_burst = 0;
1494 mkdir_p("/run/systemd/journal", 0755);
1496 s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
1497 if (!s->user_journals)
1500 s->mmap = mmap_cache_new();
1504 s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
1505 if (s->epoll_fd < 0) {
1506 log_error("Failed to create epoll object: %m");
1510 n = sd_listen_fds(true);
1512 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1516 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1518 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1520 if (s->native_fd >= 0) {
1521 log_error("Too many native sockets passed.");
1527 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1529 if (s->stdout_fd >= 0) {
1530 log_error("Too many stdout sockets passed.");
1536 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
1538 if (s->syslog_fd >= 0) {
1539 log_error("Too many /dev/log sockets passed.");
1546 log_error("Unknown socket passed.");
1551 r = server_open_syslog_socket(s);
1555 r = server_open_native_socket(s);
1559 r = server_open_stdout_socket(s);
1563 r = server_open_dev_kmsg(s);
1567 r = server_open_kernel_seqnum(s);
1571 r = server_open_sync_timer(s);
1575 r = open_signalfd(s);
1579 s->udev = udev_new();
1583 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval,
1584 s->rate_limit_burst);
1588 r = system_journal_open(s);
1595 void server_maybe_append_tags(Server *s) {
1601 n = now(CLOCK_REALTIME);
1603 if (s->system_journal)
1604 journal_file_maybe_append_tag(s->system_journal, n);
1606 HASHMAP_FOREACH(f, s->user_journals, i)
1607 journal_file_maybe_append_tag(f, n);
1611 void server_done(Server *s) {
1615 while (s->stdout_streams)
1616 stdout_stream_free(s->stdout_streams);
1618 if (s->system_journal)
1619 journal_file_close(s->system_journal);
1621 if (s->runtime_journal)
1622 journal_file_close(s->runtime_journal);
1624 while ((f = hashmap_steal_first(s->user_journals)))
1625 journal_file_close(f);
1627 hashmap_free(s->user_journals);
1629 if (s->epoll_fd >= 0)
1630 close_nointr_nofail(s->epoll_fd);
1632 if (s->signal_fd >= 0)
1633 close_nointr_nofail(s->signal_fd);
1635 if (s->syslog_fd >= 0)
1636 close_nointr_nofail(s->syslog_fd);
1638 if (s->native_fd >= 0)
1639 close_nointr_nofail(s->native_fd);
1641 if (s->stdout_fd >= 0)
1642 close_nointr_nofail(s->stdout_fd);
1644 if (s->dev_kmsg_fd >= 0)
1645 close_nointr_nofail(s->dev_kmsg_fd);
1647 if (s->sync_timer_fd >= 0)
1648 close_nointr_nofail(s->sync_timer_fd);
1651 journal_rate_limit_free(s->rate_limit);
1653 if (s->kernel_seqnum)
1654 munmap(s->kernel_seqnum, sizeof(uint64_t));
1660 mmap_cache_unref(s->mmap);
1663 udev_unref(s->udev);