1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
27 #include <sys/timerfd.h>
31 #include "sd-journal.h"
32 #include "sd-messages.h"
33 #include "sd-daemon.h"
37 #include "journal-file.h"
38 #include "socket-util.h"
39 #include "cgroup-util.h"
42 #include "conf-parser.h"
43 #include "selinux-util.h"
44 #include "journal-internal.h"
45 #include "journal-vacuum.h"
46 #include "journal-authenticate.h"
47 #include "journald-rate-limit.h"
48 #include "journald-kmsg.h"
49 #include "journald-syslog.h"
50 #include "journald-stream.h"
51 #include "journald-console.h"
52 #include "journald-native.h"
53 #include "journald-server.h"
57 #include <acl/libacl.h>
62 #include <selinux/selinux.h>
65 #define USER_JOURNALS_MAX 1024
67 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
68 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
69 #define DEFAULT_RATE_LIMIT_BURST 1000
71 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
73 static const char* const storage_table[] = {
74 [STORAGE_AUTO] = "auto",
75 [STORAGE_VOLATILE] = "volatile",
76 [STORAGE_PERSISTENT] = "persistent",
77 [STORAGE_NONE] = "none"
80 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
81 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
83 static const char* const split_mode_table[] = {
84 [SPLIT_NONE] = "none",
86 [SPLIT_LOGIN] = "login"
89 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
90 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
92 static uint64_t available_space(Server *s, bool verbose) {
94 _cleanup_free_ char *p = NULL;
97 uint64_t sum = 0, ss_avail = 0, avail = 0;
99 _cleanup_closedir_ DIR *d = NULL;
104 ts = now(CLOCK_MONOTONIC);
106 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts
108 return s->cached_available_space;
110 r = sd_id128_get_machine(&machine);
114 if (s->system_journal) {
115 f = "/var/log/journal/";
116 m = &s->system_metrics;
118 f = "/run/log/journal/";
119 m = &s->runtime_metrics;
124 p = strappend(f, sd_id128_to_string(machine, ids));
132 if (fstatvfs(dirfd(d), &ss) < 0)
141 if (!de && errno != 0)
147 if (!endswith(de->d_name, ".journal") &&
148 !endswith(de->d_name, ".journal~"))
151 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
154 if (!S_ISREG(st.st_mode))
157 sum += (uint64_t) st.st_blocks * 512UL;
160 ss_avail = ss.f_bsize * ss.f_bavail;
161 avail = ss_avail > m->keep_free ? ss_avail - m->keep_free : 0;
163 s->cached_available_space = MIN(m->max_use, avail) > sum ? MIN(m->max_use, avail) - sum : 0;
164 s->cached_available_space_timestamp = ts;
167 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
168 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX];
170 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
171 "%s journal is using %s (max %s, leaving %s of free %s, current limit %s).",
172 s->system_journal ? "Permanent" : "Runtime",
173 format_bytes(fb1, sizeof(fb1), sum),
174 format_bytes(fb2, sizeof(fb2), m->max_use),
175 format_bytes(fb3, sizeof(fb3), m->keep_free),
176 format_bytes(fb4, sizeof(fb4), ss_avail),
177 format_bytes(fb5, sizeof(fb5), MIN(m->max_use, avail)));
180 return s->cached_available_space;
183 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
188 acl_permset_t permset;
193 r = fchmod(f->fd, 0640);
195 log_warning("Failed to fix access mode on %s, ignoring: %s", f->path, strerror(-r));
201 acl = acl_get_fd(f->fd);
203 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
207 r = acl_find_uid(acl, uid, &entry);
210 if (acl_create_entry(&acl, &entry) < 0 ||
211 acl_set_tag_type(entry, ACL_USER) < 0 ||
212 acl_set_qualifier(entry, &uid) < 0) {
213 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
218 /* We do not recalculate the mask unconditionally here,
219 * so that the fchmod() mask above stays intact. */
220 if (acl_get_permset(entry, &permset) < 0 ||
221 acl_add_perm(permset, ACL_READ) < 0 ||
222 calc_acl_mask_if_needed(&acl) < 0) {
223 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
227 if (acl_set_fd(f->fd, acl) < 0)
228 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
235 static JournalFile* find_journal(Server *s, uid_t uid) {
236 _cleanup_free_ char *p = NULL;
243 /* We split up user logs only on /var, not on /run. If the
244 * runtime file is open, we write to it exclusively, in order
245 * to guarantee proper order as soon as we flush /run to
246 * /var and close the runtime file. */
248 if (s->runtime_journal)
249 return s->runtime_journal;
252 return s->system_journal;
254 r = sd_id128_get_machine(&machine);
256 return s->system_journal;
258 f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
262 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
263 SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
264 return s->system_journal;
266 while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
267 /* Too many open? Then let's close one */
268 f = hashmap_steal_first(s->user_journals);
270 journal_file_close(f);
273 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
275 return s->system_journal;
277 server_fix_perms(s, f, uid);
279 r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
281 journal_file_close(f);
282 return s->system_journal;
288 void server_rotate(Server *s) {
294 log_debug("Rotating...");
296 if (s->runtime_journal) {
297 r = journal_file_rotate(&s->runtime_journal, s->compress, false);
299 if (s->runtime_journal)
300 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
302 log_error("Failed to create new runtime journal: %s", strerror(-r));
304 server_fix_perms(s, s->runtime_journal, 0);
307 if (s->system_journal) {
308 r = journal_file_rotate(&s->system_journal, s->compress, s->seal);
310 if (s->system_journal)
311 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
313 log_error("Failed to create new system journal: %s", strerror(-r));
316 server_fix_perms(s, s->system_journal, 0);
319 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
320 r = journal_file_rotate(&f, s->compress, s->seal);
323 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
325 log_error("Failed to create user journal: %s", strerror(-r));
326 hashmap_remove(s->user_journals, k);
329 hashmap_replace(s->user_journals, k, f);
330 server_fix_perms(s, f, PTR_TO_UINT32(k));
335 void server_sync(Server *s) {
341 if (s->system_journal) {
342 r = journal_file_set_offline(s->system_journal);
344 log_error("Failed to sync system journal: %s", strerror(-r));
347 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
348 r = journal_file_set_offline(f);
350 log_error("Failed to sync user journal: %s", strerror(-r));
353 if (s->sync_event_source) {
354 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
356 log_error("Failed to disable sync timer source: %s", strerror(-r));
359 s->sync_scheduled = false;
362 void server_vacuum(Server *s) {
367 log_debug("Vacuuming...");
369 s->oldest_file_usec = 0;
371 r = sd_id128_get_machine(&machine);
373 log_error("Failed to get machine ID: %s", strerror(-r));
377 sd_id128_to_string(machine, ids);
379 if (s->system_journal) {
380 char *p = strappenda("/var/log/journal/", ids);
382 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
383 if (r < 0 && r != -ENOENT)
384 log_error("Failed to vacuum %s: %s", p, strerror(-r));
387 if (s->runtime_journal) {
388 char *p = strappenda("/run/log/journal/", ids);
390 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
391 if (r < 0 && r != -ENOENT)
392 log_error("Failed to vacuum %s: %s", p, strerror(-r));
395 s->cached_available_space_timestamp = 0;
398 static void server_cache_machine_id(Server *s) {
404 r = sd_id128_get_machine(&id);
408 sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
411 static void server_cache_boot_id(Server *s) {
417 r = sd_id128_get_boot(&id);
421 sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
424 static void server_cache_hostname(Server *s) {
425 _cleanup_free_ char *t = NULL;
430 t = gethostname_malloc();
434 x = strappend("_HOSTNAME=", t);
438 free(s->hostname_field);
439 s->hostname_field = x;
442 bool shall_try_append_again(JournalFile *f, int r) {
444 /* -E2BIG Hit configured limit
446 -EDQUOT Quota limit hit
448 -EHOSTDOWN Other machine
449 -EBUSY Unclean shutdown
450 -EPROTONOSUPPORT Unsupported feature
453 -ESHUTDOWN Already archived */
455 if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
456 log_debug("%s: Allocation limit reached, rotating.", f->path);
457 else if (r == -EHOSTDOWN)
458 log_info("%s: Journal file from other machine, rotating.", f->path);
459 else if (r == -EBUSY)
460 log_info("%s: Unclean shutdown, rotating.", f->path);
461 else if (r == -EPROTONOSUPPORT)
462 log_info("%s: Unsupported feature, rotating.", f->path);
463 else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
464 log_warning("%s: Journal file corrupted, rotating.", f->path);
471 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
473 bool vacuumed = false;
480 f = find_journal(s, uid);
484 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
485 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
490 f = find_journal(s, uid);
495 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
497 server_schedule_sync(s, priority);
501 if (vacuumed || !shall_try_append_again(f, r)) {
504 for (i = 0; i < n; i++)
505 size += iovec[i].iov_len;
507 log_error("Failed to write entry (%d items, %zu bytes), ignoring: %s", n, size, strerror(-r));
514 f = find_journal(s, uid);
518 log_debug("Retrying write.");
519 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
523 for (i = 0; i < n; i++)
524 size += iovec[i].iov_len;
526 log_error("Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %s", n, size, strerror(-r));
528 server_schedule_sync(s, priority);
531 static void dispatch_message_real(
533 struct iovec *iovec, unsigned n, unsigned m,
536 const char *label, size_t label_len,
541 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
542 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
543 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
544 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
545 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
546 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
547 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
548 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
554 uid_t realuid = 0, owner = 0, journal_uid;
555 bool owner_valid = false;
557 char audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
558 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
559 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
560 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
569 assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
572 realuid = ucred->uid;
574 sprintf(pid, "_PID=%lu", (unsigned long) ucred->pid);
575 IOVEC_SET_STRING(iovec[n++], pid);
577 sprintf(uid, "_UID=%lu", (unsigned long) ucred->uid);
578 IOVEC_SET_STRING(iovec[n++], uid);
580 sprintf(gid, "_GID=%lu", (unsigned long) ucred->gid);
581 IOVEC_SET_STRING(iovec[n++], gid);
583 r = get_process_comm(ucred->pid, &t);
585 x = strappenda("_COMM=", t);
587 IOVEC_SET_STRING(iovec[n++], x);
590 r = get_process_exe(ucred->pid, &t);
592 x = strappenda("_EXE=", t);
594 IOVEC_SET_STRING(iovec[n++], x);
597 r = get_process_cmdline(ucred->pid, 0, false, &t);
599 x = strappenda("_CMDLINE=", t);
601 IOVEC_SET_STRING(iovec[n++], x);
604 r = get_process_capeff(ucred->pid, &t);
606 x = strappenda("_CAP_EFFECTIVE=", t);
608 IOVEC_SET_STRING(iovec[n++], x);
612 r = audit_session_from_pid(ucred->pid, &audit);
614 sprintf(audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit);
615 IOVEC_SET_STRING(iovec[n++], audit_session);
618 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
620 sprintf(audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid);
621 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
625 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
627 char *session = NULL;
629 x = strappenda("_SYSTEMD_CGROUP=", c);
630 IOVEC_SET_STRING(iovec[n++], x);
632 r = cg_path_get_session(c, &t);
634 session = strappenda("_SYSTEMD_SESSION=", t);
636 IOVEC_SET_STRING(iovec[n++], session);
639 if (cg_path_get_owner_uid(c, &owner) >= 0) {
642 sprintf(owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner);
643 IOVEC_SET_STRING(iovec[n++], owner_uid);
646 if (cg_path_get_unit(c, &t) >= 0) {
647 x = strappenda("_SYSTEMD_UNIT=", t);
649 IOVEC_SET_STRING(iovec[n++], x);
650 } else if (unit_id && !session) {
651 x = strappenda("_SYSTEMD_UNIT=", unit_id);
652 IOVEC_SET_STRING(iovec[n++], x);
655 if (cg_path_get_user_unit(c, &t) >= 0) {
656 x = strappenda("_SYSTEMD_USER_UNIT=", t);
658 IOVEC_SET_STRING(iovec[n++], x);
659 } else if (unit_id && session) {
660 x = strappenda("_SYSTEMD_USER_UNIT=", unit_id);
661 IOVEC_SET_STRING(iovec[n++], x);
664 if (cg_path_get_slice(c, &t) >= 0) {
665 x = strappenda("_SYSTEMD_SLICE=", t);
667 IOVEC_SET_STRING(iovec[n++], x);
671 } else if (unit_id) {
672 x = strappenda("_SYSTEMD_UNIT=", unit_id);
673 IOVEC_SET_STRING(iovec[n++], x);
679 x = alloca(sizeof("_SELINUX_CONTEXT=") + label_len);
681 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
682 IOVEC_SET_STRING(iovec[n++], x);
684 security_context_t con;
686 if (getpidcon(ucred->pid, &con) >= 0) {
687 x = strappenda("_SELINUX_CONTEXT=", con);
690 IOVEC_SET_STRING(iovec[n++], x);
699 r = get_process_uid(object_pid, &object_uid);
701 sprintf(o_uid, "OBJECT_UID=%lu", (unsigned long) object_uid);
702 IOVEC_SET_STRING(iovec[n++], o_uid);
705 r = get_process_gid(object_pid, &object_gid);
707 sprintf(o_gid, "OBJECT_GID=%lu", (unsigned long) object_gid);
708 IOVEC_SET_STRING(iovec[n++], o_gid);
711 r = get_process_comm(object_pid, &t);
713 x = strappenda("OBJECT_COMM=", t);
715 IOVEC_SET_STRING(iovec[n++], x);
718 r = get_process_exe(object_pid, &t);
720 x = strappenda("OBJECT_EXE=", t);
722 IOVEC_SET_STRING(iovec[n++], x);
725 r = get_process_cmdline(object_pid, 0, false, &t);
727 x = strappenda("OBJECT_CMDLINE=", t);
729 IOVEC_SET_STRING(iovec[n++], x);
733 r = audit_session_from_pid(object_pid, &audit);
735 sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%lu", (unsigned long) audit);
736 IOVEC_SET_STRING(iovec[n++], o_audit_session);
739 r = audit_loginuid_from_pid(object_pid, &loginuid);
741 sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID=%lu", (unsigned long) loginuid);
742 IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
746 r = cg_pid_get_path_shifted(object_pid, s->cgroup_root, &c);
748 x = strappenda("OBJECT_SYSTEMD_CGROUP=", c);
749 IOVEC_SET_STRING(iovec[n++], x);
751 r = cg_path_get_session(c, &t);
753 x = strappenda("OBJECT_SYSTEMD_SESSION=", t);
755 IOVEC_SET_STRING(iovec[n++], x);
758 if (cg_path_get_owner_uid(c, &owner) >= 0) {
759 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner);
760 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
763 if (cg_path_get_unit(c, &t) >= 0) {
764 x = strappenda("OBJECT_SYSTEMD_UNIT=", t);
766 IOVEC_SET_STRING(iovec[n++], x);
769 if (cg_path_get_user_unit(c, &t) >= 0) {
770 x = strappenda("OBJECT_SYSTEMD_USER_UNIT=", t);
772 IOVEC_SET_STRING(iovec[n++], x);
781 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
782 IOVEC_SET_STRING(iovec[n++], source_time);
785 /* Note that strictly speaking storing the boot id here is
786 * redundant since the entry includes this in-line
787 * anyway. However, we need this indexed, too. */
788 if (!isempty(s->boot_id_field))
789 IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
791 if (!isempty(s->machine_id_field))
792 IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
794 if (!isempty(s->hostname_field))
795 IOVEC_SET_STRING(iovec[n++], s->hostname_field);
799 if (s->split_mode == SPLIT_UID && realuid > 0)
800 /* Split up strictly by any UID */
801 journal_uid = realuid;
802 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
803 /* Split up by login UIDs, this avoids creation of
804 * individual journals for system UIDs. We do this
805 * only if the realuid is not root, in order not to
806 * accidentally leak privileged information to the
807 * user that is logged by a privileged process that is
808 * part of an unprivileged session.*/
813 write_to_journal(s, journal_uid, iovec, n, priority);
816 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
817 char mid[11 + 32 + 1];
818 char buffer[16 + LINE_MAX + 1];
819 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
822 struct ucred ucred = {};
827 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
828 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
830 memcpy(buffer, "MESSAGE=", 8);
831 va_start(ap, format);
832 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
834 char_array_0(buffer);
835 IOVEC_SET_STRING(iovec[n++], buffer);
837 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
838 snprintf(mid, sizeof(mid), MESSAGE_ID(message_id));
840 IOVEC_SET_STRING(iovec[n++], mid);
843 ucred.pid = getpid();
844 ucred.uid = getuid();
845 ucred.gid = getgid();
847 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
850 void server_dispatch_message(
852 struct iovec *iovec, unsigned n, unsigned m,
855 const char *label, size_t label_len,
861 _cleanup_free_ char *path = NULL;
865 assert(iovec || n == 0);
870 if (LOG_PRI(priority) > s->max_level_store)
873 /* Stop early in case the information will not be stored
875 if (s->storage == STORAGE_NONE)
881 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &path);
885 /* example: /user/lennart/3/foobar
886 * /system/dbus.service/foobar
888 * So let's cut of everything past the third /, since that is
889 * where user directories start */
891 c = strchr(path, '/');
893 c = strchr(c+1, '/');
895 c = strchr(c+1, '/');
901 rl = journal_rate_limit_test(s->rate_limit, path,
902 priority & LOG_PRIMASK, available_space(s, false));
907 /* Write a suppression message if we suppressed something */
909 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
910 "Suppressed %u messages from %s", rl - 1, path);
913 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
917 static int system_journal_open(Server *s) {
923 r = sd_id128_get_machine(&machine);
925 log_error("Failed to get machine id: %s", strerror(-r));
929 sd_id128_to_string(machine, ids);
931 if (!s->system_journal &&
932 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
933 access("/run/systemd/journal/flushed", F_OK) >= 0) {
935 /* If in auto mode: first try to create the machine
936 * path, but not the prefix.
938 * If in persistent mode: create /var/log/journal and
939 * the machine path */
941 if (s->storage == STORAGE_PERSISTENT)
942 (void) mkdir("/var/log/journal/", 0755);
944 fn = strappenda("/var/log/journal/", ids);
945 (void) mkdir(fn, 0755);
947 fn = strappenda(fn, "/system.journal");
948 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
951 server_fix_perms(s, s->system_journal, 0);
953 if (r != -ENOENT && r != -EROFS)
954 log_warning("Failed to open system journal: %s", strerror(-r));
960 if (!s->runtime_journal &&
961 (s->storage != STORAGE_NONE)) {
963 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
967 if (s->system_journal) {
969 /* Try to open the runtime journal, but only
970 * if it already exists, so that we can flush
971 * it into the system journal */
973 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
978 log_warning("Failed to open runtime journal: %s", strerror(-r));
985 /* OK, we really need the runtime journal, so create
986 * it if necessary. */
988 (void) mkdir_parents(fn, 0755);
989 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
993 log_error("Failed to open runtime journal: %s", strerror(-r));
998 if (s->runtime_journal)
999 server_fix_perms(s, s->runtime_journal, 0);
1002 available_space(s, true);
1007 int server_flush_to_var(Server *s) {
1009 sd_journal *j = NULL;
1010 char ts[FORMAT_TIMESPAN_MAX];
1017 if (s->storage != STORAGE_AUTO &&
1018 s->storage != STORAGE_PERSISTENT)
1021 if (!s->runtime_journal)
1024 system_journal_open(s);
1026 if (!s->system_journal)
1029 log_debug("Flushing to /var...");
1031 start = now(CLOCK_MONOTONIC);
1033 r = sd_id128_get_machine(&machine);
1037 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1039 log_error("Failed to read runtime journal: %s", strerror(-r));
1043 sd_journal_set_data_threshold(j, 0);
1045 SD_JOURNAL_FOREACH(j) {
1049 f = j->current_file;
1050 assert(f && f->current_offset > 0);
1054 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1056 log_error("Can't read entry: %s", strerror(-r));
1060 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1064 if (!shall_try_append_again(s->system_journal, r)) {
1065 log_error("Can't write entry: %s", strerror(-r));
1072 if (!s->system_journal) {
1073 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1078 log_debug("Retrying write.");
1079 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1081 log_error("Can't write entry: %s", strerror(-r));
1087 journal_file_post_change(s->system_journal);
1089 journal_file_close(s->runtime_journal);
1090 s->runtime_journal = NULL;
1093 rm_rf("/run/log/journal", false, true, false);
1095 sd_journal_close(j);
1097 server_driver_message(s, SD_ID128_NULL, "Time spent on flushing to /var is %s for %u entries.", format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0), n);
1102 int process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1103 Server *s = userdata;
1106 assert(fd == s->native_fd || fd == s->syslog_fd);
1108 if (revents != EPOLLIN) {
1109 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1114 struct ucred *ucred = NULL;
1115 struct timeval *tv = NULL;
1116 struct cmsghdr *cmsg;
1118 size_t label_len = 0;
1122 struct cmsghdr cmsghdr;
1124 /* We use NAME_MAX space for the
1125 * SELinux label here. The kernel
1126 * currently enforces no limit, but
1127 * according to suggestions from the
1128 * SELinux people this will change and
1129 * it will probably be identical to
1130 * NAME_MAX. For now we use that, but
1131 * this should be updated one day when
1132 * the final limit is known.*/
1133 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1134 CMSG_SPACE(sizeof(struct timeval)) +
1135 CMSG_SPACE(sizeof(int)) + /* fd */
1136 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1138 struct msghdr msghdr = {
1141 .msg_control = &control,
1142 .msg_controllen = sizeof(control),
1150 if (ioctl(fd, SIOCINQ, &v) < 0) {
1151 log_error("SIOCINQ failed: %m");
1155 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, LINE_MAX + (size_t) v))
1158 iovec.iov_base = s->buffer;
1159 iovec.iov_len = s->buffer_size;
1161 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1163 if (errno == EINTR || errno == EAGAIN)
1166 log_error("recvmsg() failed: %m");
1170 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1172 if (cmsg->cmsg_level == SOL_SOCKET &&
1173 cmsg->cmsg_type == SCM_CREDENTIALS &&
1174 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1175 ucred = (struct ucred*) CMSG_DATA(cmsg);
1176 else if (cmsg->cmsg_level == SOL_SOCKET &&
1177 cmsg->cmsg_type == SCM_SECURITY) {
1178 label = (char*) CMSG_DATA(cmsg);
1179 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1180 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1181 cmsg->cmsg_type == SO_TIMESTAMP &&
1182 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1183 tv = (struct timeval*) CMSG_DATA(cmsg);
1184 else if (cmsg->cmsg_level == SOL_SOCKET &&
1185 cmsg->cmsg_type == SCM_RIGHTS) {
1186 fds = (int*) CMSG_DATA(cmsg);
1187 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1191 if (fd == s->syslog_fd) {
1192 if (n > 0 && n_fds == 0) {
1194 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1195 } else if (n_fds > 0)
1196 log_warning("Got file descriptors via syslog socket. Ignoring.");
1199 if (n > 0 && n_fds == 0)
1200 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1201 else if (n == 0 && n_fds == 1)
1202 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1204 log_warning("Got too many file descriptors via native socket. Ignoring.");
1207 close_many(fds, n_fds);
1213 static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1214 Server *s = userdata;
1218 log_info("Received request to flush runtime journal from PID %"PRIu32, si->ssi_pid);
1220 touch("/run/systemd/journal/flushed");
1221 server_flush_to_var(s);
1227 static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1228 Server *s = userdata;
1232 log_info("Received request to rotate journal from PID %"PRIu32, si->ssi_pid);
1239 static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1240 Server *s = userdata;
1244 log_info("Received SIG%s", signal_to_string(si->ssi_signo));
1246 sd_event_exit(s->event, 0);
1250 static int setup_signals(Server *s) {
1256 assert_se(sigemptyset(&mask) == 0);
1257 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1258 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1260 r = sd_event_add_signal(s->event, SIGUSR1, dispatch_sigusr1, s, &s->sigusr1_event_source);
1264 r = sd_event_add_signal(s->event, SIGUSR2, dispatch_sigusr2, s, &s->sigusr2_event_source);
1268 r = sd_event_add_signal(s->event, SIGTERM, dispatch_sigterm, s, &s->sigterm_event_source);
1272 r = sd_event_add_signal(s->event, SIGINT, dispatch_sigterm, s, &s->sigint_event_source);
1279 static int server_parse_proc_cmdline(Server *s) {
1280 _cleanup_free_ char *line = NULL;
1285 r = proc_cmdline(&line);
1287 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1291 FOREACH_WORD_QUOTED(w, l, line, state) {
1292 _cleanup_free_ char *word;
1294 word = strndup(w, l);
1298 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1299 r = parse_boolean(word + 35);
1301 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1303 s->forward_to_syslog = r;
1304 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1305 r = parse_boolean(word + 33);
1307 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1309 s->forward_to_kmsg = r;
1310 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1311 r = parse_boolean(word + 36);
1313 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1315 s->forward_to_console = r;
1316 } else if (startswith(word, "systemd.journald"))
1317 log_warning("Invalid systemd.journald parameter. Ignoring.");
1323 static int server_parse_config_file(Server *s) {
1324 static const char fn[] = "/etc/systemd/journald.conf";
1325 _cleanup_fclose_ FILE *f = NULL;
1330 f = fopen(fn, "re");
1332 if (errno == ENOENT)
1335 log_warning("Failed to open configuration file %s: %m", fn);
1339 r = config_parse(NULL, fn, f, "Journal\0", config_item_perf_lookup,
1340 (void*) journald_gperf_lookup, false, false, s);
1342 log_warning("Failed to parse configuration file: %s", strerror(-r));
1347 static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1348 Server *s = userdata;
1356 int server_schedule_sync(Server *s, int priority) {
1361 if (priority <= LOG_CRIT) {
1362 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1367 if (s->sync_scheduled)
1370 if (s->sync_interval_usec > 0) {
1373 r = sd_event_get_now_monotonic(s->event, &when);
1377 when += s->sync_interval_usec;
1379 if (!s->sync_event_source) {
1380 r = sd_event_add_monotonic(s->event, when, 0, server_dispatch_sync, s, &s->sync_event_source);
1384 r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1386 r = sd_event_source_set_time(s->sync_event_source, when);
1390 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1395 s->sync_scheduled = true;
1401 static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1402 Server *s = userdata;
1406 server_cache_hostname(s);
1410 static int server_open_hostname(Server *s) {
1415 s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
1416 if (s->hostname_fd < 0) {
1417 log_error("Failed to open /proc/sys/kernel/hostname: %m");
1421 r = sd_event_add_io(s->event, s->hostname_fd, 0, dispatch_hostname_change, s, &s->hostname_event_source);
1423 log_error("Failed to register hostname fd in event loop: %s", strerror(-r));
1427 r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
1429 log_error("Failed to adjust priority of host name event source: %s", strerror(-r));
1436 int server_init(Server *s) {
1442 s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->hostname_fd = -1;
1446 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1447 s->sync_scheduled = false;
1449 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1450 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1452 s->forward_to_syslog = true;
1454 s->max_level_store = LOG_DEBUG;
1455 s->max_level_syslog = LOG_DEBUG;
1456 s->max_level_kmsg = LOG_NOTICE;
1457 s->max_level_console = LOG_INFO;
1459 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1460 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1462 server_parse_config_file(s);
1463 server_parse_proc_cmdline(s);
1464 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1465 log_debug("Setting both rate limit interval and burst from %llu,%u to 0,0",
1466 (long long unsigned) s->rate_limit_interval,
1467 s->rate_limit_burst);
1468 s->rate_limit_interval = s->rate_limit_burst = 0;
1471 mkdir_p("/run/systemd/journal", 0755);
1473 s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
1474 if (!s->user_journals)
1477 s->mmap = mmap_cache_new();
1481 r = sd_event_default(&s->event);
1483 log_error("Failed to create event loop: %s", strerror(-r));
1487 sd_event_set_watchdog(s->event, true);
1489 n = sd_listen_fds(true);
1491 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1495 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1497 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1499 if (s->native_fd >= 0) {
1500 log_error("Too many native sockets passed.");
1506 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1508 if (s->stdout_fd >= 0) {
1509 log_error("Too many stdout sockets passed.");
1515 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
1517 if (s->syslog_fd >= 0) {
1518 log_error("Too many /dev/log sockets passed.");
1525 log_error("Unknown socket passed.");
1530 r = server_open_syslog_socket(s);
1534 r = server_open_native_socket(s);
1538 r = server_open_stdout_socket(s);
1542 r = server_open_dev_kmsg(s);
1546 r = server_open_kernel_seqnum(s);
1550 r = server_open_hostname(s);
1554 r = setup_signals(s);
1558 s->udev = udev_new();
1562 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
1566 r = cg_get_root_path(&s->cgroup_root);
1570 server_cache_hostname(s);
1571 server_cache_boot_id(s);
1572 server_cache_machine_id(s);
1574 r = system_journal_open(s);
1581 void server_maybe_append_tags(Server *s) {
1587 n = now(CLOCK_REALTIME);
1589 if (s->system_journal)
1590 journal_file_maybe_append_tag(s->system_journal, n);
1592 HASHMAP_FOREACH(f, s->user_journals, i)
1593 journal_file_maybe_append_tag(f, n);
1597 void server_done(Server *s) {
1601 while (s->stdout_streams)
1602 stdout_stream_free(s->stdout_streams);
1604 if (s->system_journal)
1605 journal_file_close(s->system_journal);
1607 if (s->runtime_journal)
1608 journal_file_close(s->runtime_journal);
1610 while ((f = hashmap_steal_first(s->user_journals)))
1611 journal_file_close(f);
1613 hashmap_free(s->user_journals);
1615 sd_event_source_unref(s->syslog_event_source);
1616 sd_event_source_unref(s->native_event_source);
1617 sd_event_source_unref(s->stdout_event_source);
1618 sd_event_source_unref(s->dev_kmsg_event_source);
1619 sd_event_source_unref(s->sync_event_source);
1620 sd_event_source_unref(s->sigusr1_event_source);
1621 sd_event_source_unref(s->sigusr2_event_source);
1622 sd_event_source_unref(s->sigterm_event_source);
1623 sd_event_source_unref(s->sigint_event_source);
1624 sd_event_source_unref(s->hostname_event_source);
1625 sd_event_unref(s->event);
1627 if (s->syslog_fd >= 0)
1628 close_nointr_nofail(s->syslog_fd);
1630 if (s->native_fd >= 0)
1631 close_nointr_nofail(s->native_fd);
1633 if (s->stdout_fd >= 0)
1634 close_nointr_nofail(s->stdout_fd);
1636 if (s->dev_kmsg_fd >= 0)
1637 close_nointr_nofail(s->dev_kmsg_fd);
1639 if (s->hostname_fd >= 0)
1640 close_nointr_nofail(s->hostname_fd);
1643 journal_rate_limit_free(s->rate_limit);
1645 if (s->kernel_seqnum)
1646 munmap(s->kernel_seqnum, sizeof(uint64_t));
1650 free(s->cgroup_root);
1653 mmap_cache_unref(s->mmap);
1656 udev_unref(s->udev);