1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
27 #include <sys/timerfd.h>
31 #include "sd-journal.h"
32 #include "sd-messages.h"
33 #include "sd-daemon.h"
37 #include "journal-file.h"
38 #include "socket-util.h"
39 #include "cgroup-util.h"
42 #include "conf-parser.h"
43 #include "selinux-util.h"
44 #include "journal-internal.h"
45 #include "journal-vacuum.h"
46 #include "journal-authenticate.h"
47 #include "journald-rate-limit.h"
48 #include "journald-kmsg.h"
49 #include "journald-syslog.h"
50 #include "journald-stream.h"
51 #include "journald-console.h"
52 #include "journald-native.h"
53 #include "journald-server.h"
57 #include <acl/libacl.h>
62 #include <selinux/selinux.h>
65 #define USER_JOURNALS_MAX 1024
67 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
68 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
69 #define DEFAULT_RATE_LIMIT_BURST 1000
71 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
73 static const char* const storage_table[_STORAGE_MAX] = {
74 [STORAGE_AUTO] = "auto",
75 [STORAGE_VOLATILE] = "volatile",
76 [STORAGE_PERSISTENT] = "persistent",
77 [STORAGE_NONE] = "none"
80 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
81 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
83 static const char* const split_mode_table[_SPLIT_MAX] = {
84 [SPLIT_LOGIN] = "login",
86 [SPLIT_NONE] = "none",
89 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
90 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
92 static uint64_t available_space(Server *s, bool verbose) {
94 _cleanup_free_ char *p = NULL;
97 uint64_t sum = 0, ss_avail = 0, avail = 0;
99 _cleanup_closedir_ DIR *d = NULL;
104 ts = now(CLOCK_MONOTONIC);
106 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts
108 return s->cached_available_space;
110 r = sd_id128_get_machine(&machine);
114 if (s->system_journal) {
115 f = "/var/log/journal/";
116 m = &s->system_metrics;
118 f = "/run/log/journal/";
119 m = &s->runtime_metrics;
124 p = strappend(f, sd_id128_to_string(machine, ids));
132 if (fstatvfs(dirfd(d), &ss) < 0)
141 if (!de && errno != 0)
147 if (!endswith(de->d_name, ".journal") &&
148 !endswith(de->d_name, ".journal~"))
151 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
154 if (!S_ISREG(st.st_mode))
157 sum += (uint64_t) st.st_blocks * 512UL;
160 ss_avail = ss.f_bsize * ss.f_bavail;
162 /* If we reached a high mark, we will always allow this much
163 * again, unless usage goes above max_use. This watermark
164 * value is cached so that we don't give up space on pressure,
165 * but hover below the maximum usage. */
170 avail = LESS_BY(ss_avail, m->keep_free);
172 s->cached_available_space = LESS_BY(MIN(m->max_use, avail), sum);
173 s->cached_available_space_timestamp = ts;
176 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
177 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX];
179 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
180 "%s journal is using %s (max allowed %s, "
181 "trying to leave %s free of %s available → current limit %s).",
182 s->system_journal ? "Permanent" : "Runtime",
183 format_bytes(fb1, sizeof(fb1), sum),
184 format_bytes(fb2, sizeof(fb2), m->max_use),
185 format_bytes(fb3, sizeof(fb3), m->keep_free),
186 format_bytes(fb4, sizeof(fb4), ss_avail),
187 format_bytes(fb5, sizeof(fb5), s->cached_available_space + sum));
190 return s->cached_available_space;
193 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
198 acl_permset_t permset;
203 r = fchmod(f->fd, 0640);
205 log_warning("Failed to fix access mode on %s, ignoring: %s", f->path, strerror(-r));
211 acl = acl_get_fd(f->fd);
213 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
217 r = acl_find_uid(acl, uid, &entry);
220 if (acl_create_entry(&acl, &entry) < 0 ||
221 acl_set_tag_type(entry, ACL_USER) < 0 ||
222 acl_set_qualifier(entry, &uid) < 0) {
223 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
228 /* We do not recalculate the mask unconditionally here,
229 * so that the fchmod() mask above stays intact. */
230 if (acl_get_permset(entry, &permset) < 0 ||
231 acl_add_perm(permset, ACL_READ) < 0 ||
232 calc_acl_mask_if_needed(&acl) < 0) {
233 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
237 if (acl_set_fd(f->fd, acl) < 0)
238 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
245 static JournalFile* find_journal(Server *s, uid_t uid) {
246 _cleanup_free_ char *p = NULL;
253 /* We split up user logs only on /var, not on /run. If the
254 * runtime file is open, we write to it exclusively, in order
255 * to guarantee proper order as soon as we flush /run to
256 * /var and close the runtime file. */
258 if (s->runtime_journal)
259 return s->runtime_journal;
262 return s->system_journal;
264 r = sd_id128_get_machine(&machine);
266 return s->system_journal;
268 f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
272 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
273 SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
274 return s->system_journal;
276 while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
277 /* Too many open? Then let's close one */
278 f = hashmap_steal_first(s->user_journals);
280 journal_file_close(f);
283 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
285 return s->system_journal;
287 server_fix_perms(s, f, uid);
289 r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
291 journal_file_close(f);
292 return s->system_journal;
298 static int do_rotate(Server *s, JournalFile **f, const char* name,
299 bool seal, uint32_t uid) {
306 r = journal_file_rotate(f, s->compress, seal);
309 log_error("Failed to rotate %s: %s",
310 (*f)->path, strerror(-r));
312 log_error("Failed to create new %s journal: %s",
315 server_fix_perms(s, *f, uid);
319 void server_rotate(Server *s) {
325 log_debug("Rotating...");
327 do_rotate(s, &s->runtime_journal, "runtime", false, 0);
328 do_rotate(s, &s->system_journal, "system", s->seal, 0);
330 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
331 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UINT32(k));
333 hashmap_replace(s->user_journals, k, f);
335 /* Old file has been closed and deallocated */
336 hashmap_remove(s->user_journals, k);
340 void server_sync(Server *s) {
346 if (s->system_journal) {
347 r = journal_file_set_offline(s->system_journal);
349 log_error("Failed to sync system journal: %s", strerror(-r));
352 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
353 r = journal_file_set_offline(f);
355 log_error("Failed to sync user journal: %s", strerror(-r));
358 if (s->sync_event_source) {
359 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
361 log_error("Failed to disable sync timer source: %s", strerror(-r));
364 s->sync_scheduled = false;
367 static void do_vacuum(Server *s, char *ids, JournalFile *f, const char* path,
368 JournalMetrics *metrics) {
375 p = strappenda(path, ids);
376 r = journal_directory_vacuum(p, metrics->max_use, s->max_retention_usec, &s->oldest_file_usec);
377 if (r < 0 && r != -ENOENT)
378 log_error("Failed to vacuum %s: %s", p, strerror(-r));
381 void server_vacuum(Server *s) {
386 log_debug("Vacuuming...");
388 s->oldest_file_usec = 0;
390 r = sd_id128_get_machine(&machine);
392 log_error("Failed to get machine ID: %s", strerror(-r));
395 sd_id128_to_string(machine, ids);
397 do_vacuum(s, ids, s->system_journal, "/var/log/journal/", &s->system_metrics);
398 do_vacuum(s, ids, s->runtime_journal, "/run/log/journal/", &s->runtime_metrics);
400 s->cached_available_space_timestamp = 0;
403 static void server_cache_machine_id(Server *s) {
409 r = sd_id128_get_machine(&id);
413 sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
416 static void server_cache_boot_id(Server *s) {
422 r = sd_id128_get_boot(&id);
426 sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
429 static void server_cache_hostname(Server *s) {
430 _cleanup_free_ char *t = NULL;
435 t = gethostname_malloc();
439 x = strappend("_HOSTNAME=", t);
443 free(s->hostname_field);
444 s->hostname_field = x;
447 bool shall_try_append_again(JournalFile *f, int r) {
449 /* -E2BIG Hit configured limit
451 -EDQUOT Quota limit hit
453 -EHOSTDOWN Other machine
454 -EBUSY Unclean shutdown
455 -EPROTONOSUPPORT Unsupported feature
458 -ESHUTDOWN Already archived */
460 if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
461 log_debug("%s: Allocation limit reached, rotating.", f->path);
462 else if (r == -EHOSTDOWN)
463 log_info("%s: Journal file from other machine, rotating.", f->path);
464 else if (r == -EBUSY)
465 log_info("%s: Unclean shutdown, rotating.", f->path);
466 else if (r == -EPROTONOSUPPORT)
467 log_info("%s: Unsupported feature, rotating.", f->path);
468 else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
469 log_warning("%s: Journal file corrupted, rotating.", f->path);
476 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
478 bool vacuumed = false;
485 f = find_journal(s, uid);
489 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
490 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
495 f = find_journal(s, uid);
500 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
502 server_schedule_sync(s, priority);
506 if (vacuumed || !shall_try_append_again(f, r)) {
509 for (i = 0; i < n; i++)
510 size += iovec[i].iov_len;
512 log_error("Failed to write entry (%d items, %zu bytes), ignoring: %s", n, size, strerror(-r));
519 f = find_journal(s, uid);
523 log_debug("Retrying write.");
524 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
528 for (i = 0; i < n; i++)
529 size += iovec[i].iov_len;
531 log_error("Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %s", n, size, strerror(-r));
533 server_schedule_sync(s, priority);
536 static void dispatch_message_real(
538 struct iovec *iovec, unsigned n, unsigned m,
541 const char *label, size_t label_len,
546 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
547 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
548 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
549 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
550 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
551 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
552 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
553 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
559 uid_t realuid = 0, owner = 0, journal_uid;
560 bool owner_valid = false;
562 char audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
563 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
564 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
565 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
574 assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
577 realuid = ucred->uid;
579 sprintf(pid, "_PID=%lu", (unsigned long) ucred->pid);
580 IOVEC_SET_STRING(iovec[n++], pid);
582 sprintf(uid, "_UID=%lu", (unsigned long) ucred->uid);
583 IOVEC_SET_STRING(iovec[n++], uid);
585 sprintf(gid, "_GID=%lu", (unsigned long) ucred->gid);
586 IOVEC_SET_STRING(iovec[n++], gid);
588 r = get_process_comm(ucred->pid, &t);
590 x = strappenda("_COMM=", t);
592 IOVEC_SET_STRING(iovec[n++], x);
595 r = get_process_exe(ucred->pid, &t);
597 x = strappenda("_EXE=", t);
599 IOVEC_SET_STRING(iovec[n++], x);
602 r = get_process_cmdline(ucred->pid, 0, false, &t);
604 x = strappenda("_CMDLINE=", t);
606 IOVEC_SET_STRING(iovec[n++], x);
609 r = get_process_capeff(ucred->pid, &t);
611 x = strappenda("_CAP_EFFECTIVE=", t);
613 IOVEC_SET_STRING(iovec[n++], x);
617 r = audit_session_from_pid(ucred->pid, &audit);
619 sprintf(audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit);
620 IOVEC_SET_STRING(iovec[n++], audit_session);
623 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
625 sprintf(audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid);
626 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
630 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
632 char *session = NULL;
634 x = strappenda("_SYSTEMD_CGROUP=", c);
635 IOVEC_SET_STRING(iovec[n++], x);
637 r = cg_path_get_session(c, &t);
639 session = strappenda("_SYSTEMD_SESSION=", t);
641 IOVEC_SET_STRING(iovec[n++], session);
644 if (cg_path_get_owner_uid(c, &owner) >= 0) {
647 sprintf(owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner);
648 IOVEC_SET_STRING(iovec[n++], owner_uid);
651 if (cg_path_get_unit(c, &t) >= 0) {
652 x = strappenda("_SYSTEMD_UNIT=", t);
654 IOVEC_SET_STRING(iovec[n++], x);
655 } else if (unit_id && !session) {
656 x = strappenda("_SYSTEMD_UNIT=", unit_id);
657 IOVEC_SET_STRING(iovec[n++], x);
660 if (cg_path_get_user_unit(c, &t) >= 0) {
661 x = strappenda("_SYSTEMD_USER_UNIT=", t);
663 IOVEC_SET_STRING(iovec[n++], x);
664 } else if (unit_id && session) {
665 x = strappenda("_SYSTEMD_USER_UNIT=", unit_id);
666 IOVEC_SET_STRING(iovec[n++], x);
669 if (cg_path_get_slice(c, &t) >= 0) {
670 x = strappenda("_SYSTEMD_SLICE=", t);
672 IOVEC_SET_STRING(iovec[n++], x);
676 } else if (unit_id) {
677 x = strappenda("_SYSTEMD_UNIT=", unit_id);
678 IOVEC_SET_STRING(iovec[n++], x);
684 x = alloca(strlen("_SELINUX_CONTEXT=") + label_len + 1);
686 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
687 IOVEC_SET_STRING(iovec[n++], x);
689 security_context_t con;
691 if (getpidcon(ucred->pid, &con) >= 0) {
692 x = strappenda("_SELINUX_CONTEXT=", con);
695 IOVEC_SET_STRING(iovec[n++], x);
704 r = get_process_uid(object_pid, &object_uid);
706 sprintf(o_uid, "OBJECT_UID=%lu", (unsigned long) object_uid);
707 IOVEC_SET_STRING(iovec[n++], o_uid);
710 r = get_process_gid(object_pid, &object_gid);
712 sprintf(o_gid, "OBJECT_GID=%lu", (unsigned long) object_gid);
713 IOVEC_SET_STRING(iovec[n++], o_gid);
716 r = get_process_comm(object_pid, &t);
718 x = strappenda("OBJECT_COMM=", t);
720 IOVEC_SET_STRING(iovec[n++], x);
723 r = get_process_exe(object_pid, &t);
725 x = strappenda("OBJECT_EXE=", t);
727 IOVEC_SET_STRING(iovec[n++], x);
730 r = get_process_cmdline(object_pid, 0, false, &t);
732 x = strappenda("OBJECT_CMDLINE=", t);
734 IOVEC_SET_STRING(iovec[n++], x);
738 r = audit_session_from_pid(object_pid, &audit);
740 sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%lu", (unsigned long) audit);
741 IOVEC_SET_STRING(iovec[n++], o_audit_session);
744 r = audit_loginuid_from_pid(object_pid, &loginuid);
746 sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID=%lu", (unsigned long) loginuid);
747 IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
751 r = cg_pid_get_path_shifted(object_pid, s->cgroup_root, &c);
753 x = strappenda("OBJECT_SYSTEMD_CGROUP=", c);
754 IOVEC_SET_STRING(iovec[n++], x);
756 r = cg_path_get_session(c, &t);
758 x = strappenda("OBJECT_SYSTEMD_SESSION=", t);
760 IOVEC_SET_STRING(iovec[n++], x);
763 if (cg_path_get_owner_uid(c, &owner) >= 0) {
764 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner);
765 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
768 if (cg_path_get_unit(c, &t) >= 0) {
769 x = strappenda("OBJECT_SYSTEMD_UNIT=", t);
771 IOVEC_SET_STRING(iovec[n++], x);
774 if (cg_path_get_user_unit(c, &t) >= 0) {
775 x = strappenda("OBJECT_SYSTEMD_USER_UNIT=", t);
777 IOVEC_SET_STRING(iovec[n++], x);
786 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
787 IOVEC_SET_STRING(iovec[n++], source_time);
790 /* Note that strictly speaking storing the boot id here is
791 * redundant since the entry includes this in-line
792 * anyway. However, we need this indexed, too. */
793 if (!isempty(s->boot_id_field))
794 IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
796 if (!isempty(s->machine_id_field))
797 IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
799 if (!isempty(s->hostname_field))
800 IOVEC_SET_STRING(iovec[n++], s->hostname_field);
804 if (s->split_mode == SPLIT_UID && realuid > 0)
805 /* Split up strictly by any UID */
806 journal_uid = realuid;
807 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
808 /* Split up by login UIDs, this avoids creation of
809 * individual journals for system UIDs. We do this
810 * only if the realuid is not root, in order not to
811 * accidentally leak privileged information to the
812 * user that is logged by a privileged process that is
813 * part of an unprivileged session.*/
818 write_to_journal(s, journal_uid, iovec, n, priority);
821 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
822 char mid[11 + 32 + 1];
823 char buffer[16 + LINE_MAX + 1];
824 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
827 struct ucred ucred = {};
832 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
833 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
835 memcpy(buffer, "MESSAGE=", 8);
836 va_start(ap, format);
837 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
839 char_array_0(buffer);
840 IOVEC_SET_STRING(iovec[n++], buffer);
842 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
843 snprintf(mid, sizeof(mid), MESSAGE_ID(message_id));
845 IOVEC_SET_STRING(iovec[n++], mid);
848 ucred.pid = getpid();
849 ucred.uid = getuid();
850 ucred.gid = getgid();
852 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
855 void server_dispatch_message(
857 struct iovec *iovec, unsigned n, unsigned m,
860 const char *label, size_t label_len,
866 _cleanup_free_ char *path = NULL;
870 assert(iovec || n == 0);
875 if (LOG_PRI(priority) > s->max_level_store)
878 /* Stop early in case the information will not be stored
880 if (s->storage == STORAGE_NONE)
886 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &path);
890 /* example: /user/lennart/3/foobar
891 * /system/dbus.service/foobar
893 * So let's cut of everything past the third /, since that is
894 * where user directories start */
896 c = strchr(path, '/');
898 c = strchr(c+1, '/');
900 c = strchr(c+1, '/');
906 rl = journal_rate_limit_test(s->rate_limit, path,
907 priority & LOG_PRIMASK, available_space(s, false));
912 /* Write a suppression message if we suppressed something */
914 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
915 "Suppressed %u messages from %s", rl - 1, path);
918 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
922 static int system_journal_open(Server *s) {
928 r = sd_id128_get_machine(&machine);
930 log_error("Failed to get machine id: %s", strerror(-r));
934 sd_id128_to_string(machine, ids);
936 if (!s->system_journal &&
937 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
938 access("/run/systemd/journal/flushed", F_OK) >= 0) {
940 /* If in auto mode: first try to create the machine
941 * path, but not the prefix.
943 * If in persistent mode: create /var/log/journal and
944 * the machine path */
946 if (s->storage == STORAGE_PERSISTENT)
947 (void) mkdir("/var/log/journal/", 0755);
949 fn = strappenda("/var/log/journal/", ids);
950 (void) mkdir(fn, 0755);
952 fn = strappenda(fn, "/system.journal");
953 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
956 server_fix_perms(s, s->system_journal, 0);
958 if (r != -ENOENT && r != -EROFS)
959 log_warning("Failed to open system journal: %s", strerror(-r));
965 if (!s->runtime_journal &&
966 (s->storage != STORAGE_NONE)) {
968 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
972 if (s->system_journal) {
974 /* Try to open the runtime journal, but only
975 * if it already exists, so that we can flush
976 * it into the system journal */
978 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
983 log_warning("Failed to open runtime journal: %s", strerror(-r));
990 /* OK, we really need the runtime journal, so create
991 * it if necessary. */
993 (void) mkdir_parents(fn, 0755);
994 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
998 log_error("Failed to open runtime journal: %s", strerror(-r));
1003 if (s->runtime_journal)
1004 server_fix_perms(s, s->runtime_journal, 0);
1007 available_space(s, true);
1012 int server_flush_to_var(Server *s) {
1014 sd_journal *j = NULL;
1015 char ts[FORMAT_TIMESPAN_MAX];
1022 if (s->storage != STORAGE_AUTO &&
1023 s->storage != STORAGE_PERSISTENT)
1026 if (!s->runtime_journal)
1029 system_journal_open(s);
1031 if (!s->system_journal)
1034 log_debug("Flushing to /var...");
1036 start = now(CLOCK_MONOTONIC);
1038 r = sd_id128_get_machine(&machine);
1042 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1044 log_error("Failed to read runtime journal: %s", strerror(-r));
1048 sd_journal_set_data_threshold(j, 0);
1050 SD_JOURNAL_FOREACH(j) {
1054 f = j->current_file;
1055 assert(f && f->current_offset > 0);
1059 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1061 log_error("Can't read entry: %s", strerror(-r));
1065 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1069 if (!shall_try_append_again(s->system_journal, r)) {
1070 log_error("Can't write entry: %s", strerror(-r));
1077 if (!s->system_journal) {
1078 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1083 log_debug("Retrying write.");
1084 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1086 log_error("Can't write entry: %s", strerror(-r));
1092 journal_file_post_change(s->system_journal);
1094 journal_file_close(s->runtime_journal);
1095 s->runtime_journal = NULL;
1098 rm_rf("/run/log/journal", false, true, false);
1100 sd_journal_close(j);
1102 server_driver_message(s, SD_ID128_NULL, "Time spent on flushing to /var is %s for %u entries.", format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0), n);
1107 int process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1108 Server *s = userdata;
1111 assert(fd == s->native_fd || fd == s->syslog_fd);
1113 if (revents != EPOLLIN) {
1114 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1119 struct ucred *ucred = NULL;
1120 struct timeval *tv = NULL;
1121 struct cmsghdr *cmsg;
1123 size_t label_len = 0;
1127 struct cmsghdr cmsghdr;
1129 /* We use NAME_MAX space for the SELinux label
1130 * here. The kernel currently enforces no
1131 * limit, but according to suggestions from
1132 * the SELinux people this will change and it
1133 * will probably be identical to NAME_MAX. For
1134 * now we use that, but this should be updated
1135 * one day when the final limit is known.*/
1136 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1137 CMSG_SPACE(sizeof(struct timeval)) +
1138 CMSG_SPACE(sizeof(int)) + /* fd */
1139 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1141 struct msghdr msghdr = {
1144 .msg_control = &control,
1145 .msg_controllen = sizeof(control),
1153 if (ioctl(fd, SIOCINQ, &v) < 0) {
1154 log_error("SIOCINQ failed: %m");
1158 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, LINE_MAX + (size_t) v))
1161 iovec.iov_base = s->buffer;
1162 iovec.iov_len = s->buffer_size;
1164 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1166 if (errno == EINTR || errno == EAGAIN)
1169 log_error("recvmsg() failed: %m");
1173 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1175 if (cmsg->cmsg_level == SOL_SOCKET &&
1176 cmsg->cmsg_type == SCM_CREDENTIALS &&
1177 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1178 ucred = (struct ucred*) CMSG_DATA(cmsg);
1179 else if (cmsg->cmsg_level == SOL_SOCKET &&
1180 cmsg->cmsg_type == SCM_SECURITY) {
1181 label = (char*) CMSG_DATA(cmsg);
1182 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1183 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1184 cmsg->cmsg_type == SO_TIMESTAMP &&
1185 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1186 tv = (struct timeval*) CMSG_DATA(cmsg);
1187 else if (cmsg->cmsg_level == SOL_SOCKET &&
1188 cmsg->cmsg_type == SCM_RIGHTS) {
1189 fds = (int*) CMSG_DATA(cmsg);
1190 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1194 if (fd == s->syslog_fd) {
1195 if (n > 0 && n_fds == 0) {
1197 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1198 } else if (n_fds > 0)
1199 log_warning("Got file descriptors via syslog socket. Ignoring.");
1202 if (n > 0 && n_fds == 0)
1203 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1204 else if (n == 0 && n_fds == 1)
1205 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1207 log_warning("Got too many file descriptors via native socket. Ignoring.");
1210 close_many(fds, n_fds);
1214 static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1215 Server *s = userdata;
1219 log_info("Received request to flush runtime journal from PID %"PRIu32, si->ssi_pid);
1221 touch("/run/systemd/journal/flushed");
1222 server_flush_to_var(s);
1228 static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1229 Server *s = userdata;
1233 log_info("Received request to rotate journal from PID %"PRIu32, si->ssi_pid);
1240 static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1241 Server *s = userdata;
1245 log_received_signal(LOG_INFO, si);
1247 sd_event_exit(s->event, 0);
1251 static int setup_signals(Server *s) {
1257 assert_se(sigemptyset(&mask) == 0);
1258 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1259 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1261 r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
1265 r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
1269 r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
1273 r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
1280 static int server_parse_proc_cmdline(Server *s) {
1281 _cleanup_free_ char *line = NULL;
1286 r = proc_cmdline(&line);
1288 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1292 FOREACH_WORD_QUOTED(w, l, line, state) {
1293 _cleanup_free_ char *word;
1295 word = strndup(w, l);
1299 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1300 r = parse_boolean(word + 35);
1302 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1304 s->forward_to_syslog = r;
1305 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1306 r = parse_boolean(word + 33);
1308 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1310 s->forward_to_kmsg = r;
1311 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1312 r = parse_boolean(word + 36);
1314 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1316 s->forward_to_console = r;
1317 } else if (startswith(word, "systemd.journald.forward_to_wall=")) {
1318 r = parse_boolean(word + 33);
1320 log_warning("Failed to parse forward to wall switch %s. Ignoring.", word + 33);
1322 s->forward_to_wall = r;
1323 } else if (startswith(word, "systemd.journald"))
1324 log_warning("Invalid systemd.journald parameter. Ignoring.");
1330 static int server_parse_config_file(Server *s) {
1331 static const char fn[] = "/etc/systemd/journald.conf";
1332 _cleanup_fclose_ FILE *f = NULL;
1337 f = fopen(fn, "re");
1339 if (errno == ENOENT)
1342 log_warning("Failed to open configuration file %s: %m", fn);
1346 r = config_parse(NULL, fn, f, "Journal\0", config_item_perf_lookup,
1347 (void*) journald_gperf_lookup, false, false, s);
1349 log_warning("Failed to parse configuration file: %s", strerror(-r));
1354 static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1355 Server *s = userdata;
1363 int server_schedule_sync(Server *s, int priority) {
1368 if (priority <= LOG_CRIT) {
1369 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1374 if (s->sync_scheduled)
1377 if (s->sync_interval_usec > 0) {
1380 r = sd_event_get_now_monotonic(s->event, &when);
1384 when += s->sync_interval_usec;
1386 if (!s->sync_event_source) {
1387 r = sd_event_add_monotonic(s->event, &s->sync_event_source, when, 0, server_dispatch_sync, s);
1391 r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1393 r = sd_event_source_set_time(s->sync_event_source, when);
1397 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1402 s->sync_scheduled = true;
1408 static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1409 Server *s = userdata;
1413 server_cache_hostname(s);
1417 static int server_open_hostname(Server *s) {
1422 s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
1423 if (s->hostname_fd < 0) {
1424 log_error("Failed to open /proc/sys/kernel/hostname: %m");
1428 r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
1430 /* kernels prior to 3.2 don't support polling this file. Ignore
1433 log_warning("Failed to register hostname fd in event loop: %s. Ignoring.",
1435 close_nointr_nofail(s->hostname_fd);
1436 s->hostname_fd = -1;
1440 log_error("Failed to register hostname fd in event loop: %s", strerror(-r));
1444 r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
1446 log_error("Failed to adjust priority of host name event source: %s", strerror(-r));
1453 int server_init(Server *s) {
1459 s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->hostname_fd = -1;
1463 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1464 s->sync_scheduled = false;
1466 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1467 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1469 s->forward_to_syslog = true;
1470 s->forward_to_wall = true;
1472 s->max_level_store = LOG_DEBUG;
1473 s->max_level_syslog = LOG_DEBUG;
1474 s->max_level_kmsg = LOG_NOTICE;
1475 s->max_level_console = LOG_INFO;
1476 s->max_level_wall = LOG_EMERG;
1478 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1479 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1481 server_parse_config_file(s);
1482 server_parse_proc_cmdline(s);
1483 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1484 log_debug("Setting both rate limit interval and burst from %llu,%u to 0,0",
1485 (long long unsigned) s->rate_limit_interval,
1486 s->rate_limit_burst);
1487 s->rate_limit_interval = s->rate_limit_burst = 0;
1490 mkdir_p("/run/systemd/journal", 0755);
1492 s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
1493 if (!s->user_journals)
1496 s->mmap = mmap_cache_new();
1500 r = sd_event_default(&s->event);
1502 log_error("Failed to create event loop: %s", strerror(-r));
1506 sd_event_set_watchdog(s->event, true);
1508 n = sd_listen_fds(true);
1510 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1514 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1516 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1518 if (s->native_fd >= 0) {
1519 log_error("Too many native sockets passed.");
1525 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1527 if (s->stdout_fd >= 0) {
1528 log_error("Too many stdout sockets passed.");
1534 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
1536 if (s->syslog_fd >= 0) {
1537 log_error("Too many /dev/log sockets passed.");
1544 log_error("Unknown socket passed.");
1549 r = server_open_syslog_socket(s);
1553 r = server_open_native_socket(s);
1557 r = server_open_stdout_socket(s);
1561 r = server_open_dev_kmsg(s);
1565 r = server_open_kernel_seqnum(s);
1569 r = server_open_hostname(s);
1573 r = setup_signals(s);
1577 s->udev = udev_new();
1581 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
1585 r = cg_get_root_path(&s->cgroup_root);
1589 server_cache_hostname(s);
1590 server_cache_boot_id(s);
1591 server_cache_machine_id(s);
1593 r = system_journal_open(s);
1600 void server_maybe_append_tags(Server *s) {
1606 n = now(CLOCK_REALTIME);
1608 if (s->system_journal)
1609 journal_file_maybe_append_tag(s->system_journal, n);
1611 HASHMAP_FOREACH(f, s->user_journals, i)
1612 journal_file_maybe_append_tag(f, n);
1616 void server_done(Server *s) {
1620 while (s->stdout_streams)
1621 stdout_stream_free(s->stdout_streams);
1623 if (s->system_journal)
1624 journal_file_close(s->system_journal);
1626 if (s->runtime_journal)
1627 journal_file_close(s->runtime_journal);
1629 while ((f = hashmap_steal_first(s->user_journals)))
1630 journal_file_close(f);
1632 hashmap_free(s->user_journals);
1634 sd_event_source_unref(s->syslog_event_source);
1635 sd_event_source_unref(s->native_event_source);
1636 sd_event_source_unref(s->stdout_event_source);
1637 sd_event_source_unref(s->dev_kmsg_event_source);
1638 sd_event_source_unref(s->sync_event_source);
1639 sd_event_source_unref(s->sigusr1_event_source);
1640 sd_event_source_unref(s->sigusr2_event_source);
1641 sd_event_source_unref(s->sigterm_event_source);
1642 sd_event_source_unref(s->sigint_event_source);
1643 sd_event_source_unref(s->hostname_event_source);
1644 sd_event_unref(s->event);
1646 if (s->syslog_fd >= 0)
1647 close_nointr_nofail(s->syslog_fd);
1649 if (s->native_fd >= 0)
1650 close_nointr_nofail(s->native_fd);
1652 if (s->stdout_fd >= 0)
1653 close_nointr_nofail(s->stdout_fd);
1655 if (s->dev_kmsg_fd >= 0)
1656 close_nointr_nofail(s->dev_kmsg_fd);
1658 if (s->hostname_fd >= 0)
1659 close_nointr_nofail(s->hostname_fd);
1662 journal_rate_limit_free(s->rate_limit);
1664 if (s->kernel_seqnum)
1665 munmap(s->kernel_seqnum, sizeof(uint64_t));
1669 free(s->cgroup_root);
1672 mmap_cache_unref(s->mmap);
1675 udev_unref(s->udev);