1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
27 #include <sys/timerfd.h>
31 #include "sd-journal.h"
32 #include "sd-messages.h"
33 #include "sd-daemon.h"
37 #include "journal-file.h"
38 #include "socket-util.h"
39 #include "cgroup-util.h"
42 #include "conf-parser.h"
43 #include "selinux-util.h"
44 #include "journal-internal.h"
45 #include "journal-vacuum.h"
46 #include "journal-authenticate.h"
47 #include "journald-rate-limit.h"
48 #include "journald-kmsg.h"
49 #include "journald-syslog.h"
50 #include "journald-stream.h"
51 #include "journald-console.h"
52 #include "journald-native.h"
53 #include "journald-server.h"
57 #include <acl/libacl.h>
62 #include <selinux/selinux.h>
65 #define USER_JOURNALS_MAX 1024
67 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
68 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
69 #define DEFAULT_RATE_LIMIT_BURST 1000
71 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
73 static const char* const storage_table[_STORAGE_MAX] = {
74 [STORAGE_AUTO] = "auto",
75 [STORAGE_VOLATILE] = "volatile",
76 [STORAGE_PERSISTENT] = "persistent",
77 [STORAGE_NONE] = "none"
80 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
81 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
83 static const char* const split_mode_table[_SPLIT_MAX] = {
84 [SPLIT_LOGIN] = "login",
86 [SPLIT_NONE] = "none",
89 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
90 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
92 static uint64_t available_space(Server *s, bool verbose) {
94 _cleanup_free_ char *p = NULL;
97 uint64_t sum = 0, ss_avail = 0, avail = 0;
99 _cleanup_closedir_ DIR *d = NULL;
104 ts = now(CLOCK_MONOTONIC);
106 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts
108 return s->cached_available_space;
110 r = sd_id128_get_machine(&machine);
114 if (s->system_journal) {
115 f = "/var/log/journal/";
116 m = &s->system_metrics;
118 f = "/run/log/journal/";
119 m = &s->runtime_metrics;
124 p = strappend(f, sd_id128_to_string(machine, ids));
132 if (fstatvfs(dirfd(d), &ss) < 0)
141 if (!de && errno != 0)
147 if (!endswith(de->d_name, ".journal") &&
148 !endswith(de->d_name, ".journal~"))
151 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
154 if (!S_ISREG(st.st_mode))
157 sum += (uint64_t) st.st_blocks * 512UL;
160 ss_avail = ss.f_bsize * ss.f_bavail;
162 /* If we reached a high mark, we will always allow this much
163 * again, unless usage goes above max_use. This watermark
164 * value is cached so that we don't give up space on pressure,
165 * but hover below the maximum usage. */
170 avail = LESS_BY(ss_avail, m->keep_free);
172 s->cached_available_space = LESS_BY(MIN(m->max_use, avail), sum);
173 s->cached_available_space_timestamp = ts;
176 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
177 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX];
179 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
180 "%s journal is using %s (max allowed %s, "
181 "trying to leave %s free of %s available → current limit %s).",
182 s->system_journal ? "Permanent" : "Runtime",
183 format_bytes(fb1, sizeof(fb1), sum),
184 format_bytes(fb2, sizeof(fb2), m->max_use),
185 format_bytes(fb3, sizeof(fb3), m->keep_free),
186 format_bytes(fb4, sizeof(fb4), ss_avail),
187 format_bytes(fb5, sizeof(fb5), s->cached_available_space + sum));
190 return s->cached_available_space;
193 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
198 acl_permset_t permset;
203 r = fchmod(f->fd, 0640);
205 log_warning("Failed to fix access mode on %s, ignoring: %s", f->path, strerror(-r));
208 if (uid <= SYSTEM_UID_MAX)
211 acl = acl_get_fd(f->fd);
213 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
217 r = acl_find_uid(acl, uid, &entry);
220 if (acl_create_entry(&acl, &entry) < 0 ||
221 acl_set_tag_type(entry, ACL_USER) < 0 ||
222 acl_set_qualifier(entry, &uid) < 0) {
223 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
228 /* We do not recalculate the mask unconditionally here,
229 * so that the fchmod() mask above stays intact. */
230 if (acl_get_permset(entry, &permset) < 0 ||
231 acl_add_perm(permset, ACL_READ) < 0 ||
232 calc_acl_mask_if_needed(&acl) < 0) {
233 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
237 if (acl_set_fd(f->fd, acl) < 0)
238 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
245 static JournalFile* find_journal(Server *s, uid_t uid) {
246 _cleanup_free_ char *p = NULL;
253 /* We split up user logs only on /var, not on /run. If the
254 * runtime file is open, we write to it exclusively, in order
255 * to guarantee proper order as soon as we flush /run to
256 * /var and close the runtime file. */
258 if (s->runtime_journal)
259 return s->runtime_journal;
261 if (uid <= SYSTEM_UID_MAX)
262 return s->system_journal;
264 r = sd_id128_get_machine(&machine);
266 return s->system_journal;
268 f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
272 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
273 SD_ID128_FORMAT_VAL(machine), uid) < 0)
274 return s->system_journal;
276 while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
277 /* Too many open? Then let's close one */
278 f = hashmap_steal_first(s->user_journals);
280 journal_file_close(f);
283 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
285 return s->system_journal;
287 server_fix_perms(s, f, uid);
289 r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
291 journal_file_close(f);
292 return s->system_journal;
298 static int do_rotate(Server *s, JournalFile **f, const char* name,
299 bool seal, uint32_t uid) {
306 r = journal_file_rotate(f, s->compress, seal);
309 log_error("Failed to rotate %s: %s",
310 (*f)->path, strerror(-r));
312 log_error("Failed to create new %s journal: %s",
315 server_fix_perms(s, *f, uid);
319 void server_rotate(Server *s) {
325 log_debug("Rotating...");
327 do_rotate(s, &s->runtime_journal, "runtime", false, 0);
328 do_rotate(s, &s->system_journal, "system", s->seal, 0);
330 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
331 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UINT32(k));
333 hashmap_replace(s->user_journals, k, f);
335 /* Old file has been closed and deallocated */
336 hashmap_remove(s->user_journals, k);
340 void server_sync(Server *s) {
346 if (s->system_journal) {
347 r = journal_file_set_offline(s->system_journal);
349 log_error("Failed to sync system journal: %s", strerror(-r));
352 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
353 r = journal_file_set_offline(f);
355 log_error("Failed to sync user journal: %s", strerror(-r));
358 if (s->sync_event_source) {
359 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
361 log_error("Failed to disable sync timer source: %s", strerror(-r));
364 s->sync_scheduled = false;
367 static void do_vacuum(Server *s, char *ids, JournalFile *f, const char* path,
368 JournalMetrics *metrics) {
375 p = strappenda(path, ids);
376 r = journal_directory_vacuum(p, metrics->max_use, s->max_retention_usec, &s->oldest_file_usec);
377 if (r < 0 && r != -ENOENT)
378 log_error("Failed to vacuum %s: %s", p, strerror(-r));
381 void server_vacuum(Server *s) {
386 log_debug("Vacuuming...");
388 s->oldest_file_usec = 0;
390 r = sd_id128_get_machine(&machine);
392 log_error("Failed to get machine ID: %s", strerror(-r));
395 sd_id128_to_string(machine, ids);
397 do_vacuum(s, ids, s->system_journal, "/var/log/journal/", &s->system_metrics);
398 do_vacuum(s, ids, s->runtime_journal, "/run/log/journal/", &s->runtime_metrics);
400 s->cached_available_space_timestamp = 0;
403 static void server_cache_machine_id(Server *s) {
409 r = sd_id128_get_machine(&id);
413 sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
416 static void server_cache_boot_id(Server *s) {
422 r = sd_id128_get_boot(&id);
426 sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
429 static void server_cache_hostname(Server *s) {
430 _cleanup_free_ char *t = NULL;
435 t = gethostname_malloc();
439 x = strappend("_HOSTNAME=", t);
443 free(s->hostname_field);
444 s->hostname_field = x;
447 bool shall_try_append_again(JournalFile *f, int r) {
449 /* -E2BIG Hit configured limit
451 -EDQUOT Quota limit hit
453 -EHOSTDOWN Other machine
454 -EBUSY Unclean shutdown
455 -EPROTONOSUPPORT Unsupported feature
458 -ESHUTDOWN Already archived */
460 if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
461 log_debug("%s: Allocation limit reached, rotating.", f->path);
462 else if (r == -EHOSTDOWN)
463 log_info("%s: Journal file from other machine, rotating.", f->path);
464 else if (r == -EBUSY)
465 log_info("%s: Unclean shutdown, rotating.", f->path);
466 else if (r == -EPROTONOSUPPORT)
467 log_info("%s: Unsupported feature, rotating.", f->path);
468 else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
469 log_warning("%s: Journal file corrupted, rotating.", f->path);
476 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
478 bool vacuumed = false;
485 f = find_journal(s, uid);
489 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
490 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
495 f = find_journal(s, uid);
500 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
502 server_schedule_sync(s, priority);
506 if (vacuumed || !shall_try_append_again(f, r)) {
509 for (i = 0; i < n; i++)
510 size += iovec[i].iov_len;
512 log_error("Failed to write entry (%d items, %zu bytes), ignoring: %s", n, size, strerror(-r));
519 f = find_journal(s, uid);
523 log_debug("Retrying write.");
524 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
528 for (i = 0; i < n; i++)
529 size += iovec[i].iov_len;
531 log_error("Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %s", n, size, strerror(-r));
533 server_schedule_sync(s, priority);
536 static void dispatch_message_real(
538 struct iovec *iovec, unsigned n, unsigned m,
541 const char *label, size_t label_len,
546 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
547 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
548 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
549 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
550 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
551 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
552 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
553 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
559 uid_t realuid = 0, owner = 0, journal_uid;
560 bool owner_valid = false;
562 char audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
563 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
564 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
565 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
574 assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
577 realuid = ucred->uid;
579 sprintf(pid, "_PID="PID_FMT, ucred->pid);
580 IOVEC_SET_STRING(iovec[n++], pid);
582 sprintf(uid, "_UID="UID_FMT, ucred->uid);
583 IOVEC_SET_STRING(iovec[n++], uid);
585 sprintf(gid, "_GID="GID_FMT, ucred->gid);
586 IOVEC_SET_STRING(iovec[n++], gid);
588 r = get_process_comm(ucred->pid, &t);
590 x = strappenda("_COMM=", t);
592 IOVEC_SET_STRING(iovec[n++], x);
595 r = get_process_exe(ucred->pid, &t);
597 x = strappenda("_EXE=", t);
599 IOVEC_SET_STRING(iovec[n++], x);
602 r = get_process_cmdline(ucred->pid, 0, false, &t);
604 x = strappenda("_CMDLINE=", t);
606 IOVEC_SET_STRING(iovec[n++], x);
609 r = get_process_capeff(ucred->pid, &t);
611 x = strappenda("_CAP_EFFECTIVE=", t);
613 IOVEC_SET_STRING(iovec[n++], x);
617 r = audit_session_from_pid(ucred->pid, &audit);
619 sprintf(audit_session, "_AUDIT_SESSION=%"PRIu32, audit);
620 IOVEC_SET_STRING(iovec[n++], audit_session);
623 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
625 sprintf(audit_loginuid, "_AUDIT_LOGINUID="UID_FMT, loginuid);
626 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
630 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
632 char *session = NULL;
634 x = strappenda("_SYSTEMD_CGROUP=", c);
635 IOVEC_SET_STRING(iovec[n++], x);
637 r = cg_path_get_session(c, &t);
639 session = strappenda("_SYSTEMD_SESSION=", t);
641 IOVEC_SET_STRING(iovec[n++], session);
644 if (cg_path_get_owner_uid(c, &owner) >= 0) {
647 sprintf(owner_uid, "_SYSTEMD_OWNER_UID="UID_FMT, owner);
648 IOVEC_SET_STRING(iovec[n++], owner_uid);
651 if (cg_path_get_unit(c, &t) >= 0) {
652 x = strappenda("_SYSTEMD_UNIT=", t);
654 IOVEC_SET_STRING(iovec[n++], x);
655 } else if (unit_id && !session) {
656 x = strappenda("_SYSTEMD_UNIT=", unit_id);
657 IOVEC_SET_STRING(iovec[n++], x);
660 if (cg_path_get_user_unit(c, &t) >= 0) {
661 x = strappenda("_SYSTEMD_USER_UNIT=", t);
663 IOVEC_SET_STRING(iovec[n++], x);
664 } else if (unit_id && session) {
665 x = strappenda("_SYSTEMD_USER_UNIT=", unit_id);
666 IOVEC_SET_STRING(iovec[n++], x);
669 if (cg_path_get_slice(c, &t) >= 0) {
670 x = strappenda("_SYSTEMD_SLICE=", t);
672 IOVEC_SET_STRING(iovec[n++], x);
676 } else if (unit_id) {
677 x = strappenda("_SYSTEMD_UNIT=", unit_id);
678 IOVEC_SET_STRING(iovec[n++], x);
684 x = alloca(strlen("_SELINUX_CONTEXT=") + label_len + 1);
686 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
687 IOVEC_SET_STRING(iovec[n++], x);
689 security_context_t con;
691 if (getpidcon(ucred->pid, &con) >= 0) {
692 x = strappenda("_SELINUX_CONTEXT=", con);
695 IOVEC_SET_STRING(iovec[n++], x);
704 r = get_process_uid(object_pid, &object_uid);
706 sprintf(o_uid, "OBJECT_UID="UID_FMT, object_uid);
707 IOVEC_SET_STRING(iovec[n++], o_uid);
710 r = get_process_gid(object_pid, &object_gid);
712 sprintf(o_gid, "OBJECT_GID="GID_FMT, object_gid);
713 IOVEC_SET_STRING(iovec[n++], o_gid);
716 r = get_process_comm(object_pid, &t);
718 x = strappenda("OBJECT_COMM=", t);
720 IOVEC_SET_STRING(iovec[n++], x);
723 r = get_process_exe(object_pid, &t);
725 x = strappenda("OBJECT_EXE=", t);
727 IOVEC_SET_STRING(iovec[n++], x);
730 r = get_process_cmdline(object_pid, 0, false, &t);
732 x = strappenda("OBJECT_CMDLINE=", t);
734 IOVEC_SET_STRING(iovec[n++], x);
738 r = audit_session_from_pid(object_pid, &audit);
740 sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%"PRIu32, audit);
741 IOVEC_SET_STRING(iovec[n++], o_audit_session);
744 r = audit_loginuid_from_pid(object_pid, &loginuid);
746 sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID="UID_FMT, loginuid);
747 IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
751 r = cg_pid_get_path_shifted(object_pid, s->cgroup_root, &c);
753 x = strappenda("OBJECT_SYSTEMD_CGROUP=", c);
754 IOVEC_SET_STRING(iovec[n++], x);
756 r = cg_path_get_session(c, &t);
758 x = strappenda("OBJECT_SYSTEMD_SESSION=", t);
760 IOVEC_SET_STRING(iovec[n++], x);
763 if (cg_path_get_owner_uid(c, &owner) >= 0) {
764 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT, owner);
765 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
768 if (cg_path_get_unit(c, &t) >= 0) {
769 x = strappenda("OBJECT_SYSTEMD_UNIT=", t);
771 IOVEC_SET_STRING(iovec[n++], x);
774 if (cg_path_get_user_unit(c, &t) >= 0) {
775 x = strappenda("OBJECT_SYSTEMD_USER_UNIT=", t);
777 IOVEC_SET_STRING(iovec[n++], x);
786 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
787 IOVEC_SET_STRING(iovec[n++], source_time);
790 /* Note that strictly speaking storing the boot id here is
791 * redundant since the entry includes this in-line
792 * anyway. However, we need this indexed, too. */
793 if (!isempty(s->boot_id_field))
794 IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
796 if (!isempty(s->machine_id_field))
797 IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
799 if (!isempty(s->hostname_field))
800 IOVEC_SET_STRING(iovec[n++], s->hostname_field);
804 if (s->split_mode == SPLIT_UID && realuid > 0)
805 /* Split up strictly by any UID */
806 journal_uid = realuid;
807 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
808 /* Split up by login UIDs. We do this only if the
809 * realuid is not root, in order not to accidentally
810 * leak privileged information to the user that is
811 * logged by a privileged process that is part of an
812 * unprivileged session.*/
817 write_to_journal(s, journal_uid, iovec, n, priority);
820 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
821 char mid[11 + 32 + 1];
822 char buffer[16 + LINE_MAX + 1];
823 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
826 struct ucred ucred = {};
831 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
832 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
834 memcpy(buffer, "MESSAGE=", 8);
835 va_start(ap, format);
836 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
838 char_array_0(buffer);
839 IOVEC_SET_STRING(iovec[n++], buffer);
841 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
842 snprintf(mid, sizeof(mid), MESSAGE_ID(message_id));
844 IOVEC_SET_STRING(iovec[n++], mid);
847 ucred.pid = getpid();
848 ucred.uid = getuid();
849 ucred.gid = getgid();
851 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
854 void server_dispatch_message(
856 struct iovec *iovec, unsigned n, unsigned m,
859 const char *label, size_t label_len,
865 _cleanup_free_ char *path = NULL;
869 assert(iovec || n == 0);
874 if (LOG_PRI(priority) > s->max_level_store)
877 /* Stop early in case the information will not be stored
879 if (s->storage == STORAGE_NONE)
885 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &path);
889 /* example: /user/lennart/3/foobar
890 * /system/dbus.service/foobar
892 * So let's cut of everything past the third /, since that is
893 * where user directories start */
895 c = strchr(path, '/');
897 c = strchr(c+1, '/');
899 c = strchr(c+1, '/');
905 rl = journal_rate_limit_test(s->rate_limit, path,
906 priority & LOG_PRIMASK, available_space(s, false));
911 /* Write a suppression message if we suppressed something */
913 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
914 "Suppressed %u messages from %s", rl - 1, path);
917 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
921 static int system_journal_open(Server *s) {
927 r = sd_id128_get_machine(&machine);
929 log_error("Failed to get machine id: %s", strerror(-r));
933 sd_id128_to_string(machine, ids);
935 if (!s->system_journal &&
936 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
937 access("/run/systemd/journal/flushed", F_OK) >= 0) {
939 /* If in auto mode: first try to create the machine
940 * path, but not the prefix.
942 * If in persistent mode: create /var/log/journal and
943 * the machine path */
945 if (s->storage == STORAGE_PERSISTENT)
946 (void) mkdir("/var/log/journal/", 0755);
948 fn = strappenda("/var/log/journal/", ids);
949 (void) mkdir(fn, 0755);
951 fn = strappenda(fn, "/system.journal");
952 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
955 server_fix_perms(s, s->system_journal, 0);
957 if (r != -ENOENT && r != -EROFS)
958 log_warning("Failed to open system journal: %s", strerror(-r));
964 if (!s->runtime_journal &&
965 (s->storage != STORAGE_NONE)) {
967 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
971 if (s->system_journal) {
973 /* Try to open the runtime journal, but only
974 * if it already exists, so that we can flush
975 * it into the system journal */
977 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
982 log_warning("Failed to open runtime journal: %s", strerror(-r));
989 /* OK, we really need the runtime journal, so create
990 * it if necessary. */
992 (void) mkdir("/run/log", 0755);
993 (void) mkdir("/run/log/journal", 0755);
994 (void) mkdir_parents(fn, 0750);
996 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
1000 log_error("Failed to open runtime journal: %s", strerror(-r));
1005 if (s->runtime_journal)
1006 server_fix_perms(s, s->runtime_journal, 0);
1009 available_space(s, true);
1014 int server_flush_to_var(Server *s) {
1016 sd_journal *j = NULL;
1017 char ts[FORMAT_TIMESPAN_MAX];
1024 if (s->storage != STORAGE_AUTO &&
1025 s->storage != STORAGE_PERSISTENT)
1028 if (!s->runtime_journal)
1031 system_journal_open(s);
1033 if (!s->system_journal)
1036 log_debug("Flushing to /var...");
1038 start = now(CLOCK_MONOTONIC);
1040 r = sd_id128_get_machine(&machine);
1044 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1046 log_error("Failed to read runtime journal: %s", strerror(-r));
1050 sd_journal_set_data_threshold(j, 0);
1052 SD_JOURNAL_FOREACH(j) {
1056 f = j->current_file;
1057 assert(f && f->current_offset > 0);
1061 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1063 log_error("Can't read entry: %s", strerror(-r));
1067 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1071 if (!shall_try_append_again(s->system_journal, r)) {
1072 log_error("Can't write entry: %s", strerror(-r));
1079 if (!s->system_journal) {
1080 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1085 log_debug("Retrying write.");
1086 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1088 log_error("Can't write entry: %s", strerror(-r));
1094 journal_file_post_change(s->system_journal);
1096 journal_file_close(s->runtime_journal);
1097 s->runtime_journal = NULL;
1100 rm_rf("/run/log/journal", false, true, false);
1102 sd_journal_close(j);
1104 server_driver_message(s, SD_ID128_NULL, "Time spent on flushing to /var is %s for %u entries.", format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0), n);
1109 int process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1110 Server *s = userdata;
1113 assert(fd == s->native_fd || fd == s->syslog_fd);
1115 if (revents != EPOLLIN) {
1116 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1121 struct ucred *ucred = NULL;
1122 struct timeval *tv = NULL;
1123 struct cmsghdr *cmsg;
1125 size_t label_len = 0;
1129 struct cmsghdr cmsghdr;
1131 /* We use NAME_MAX space for the SELinux label
1132 * here. The kernel currently enforces no
1133 * limit, but according to suggestions from
1134 * the SELinux people this will change and it
1135 * will probably be identical to NAME_MAX. For
1136 * now we use that, but this should be updated
1137 * one day when the final limit is known.*/
1138 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1139 CMSG_SPACE(sizeof(struct timeval)) +
1140 CMSG_SPACE(sizeof(int)) + /* fd */
1141 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1143 struct msghdr msghdr = {
1146 .msg_control = &control,
1147 .msg_controllen = sizeof(control),
1155 if (ioctl(fd, SIOCINQ, &v) < 0) {
1156 log_error("SIOCINQ failed: %m");
1160 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, LINE_MAX + (size_t) v))
1163 iovec.iov_base = s->buffer;
1164 iovec.iov_len = s->buffer_size;
1166 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1168 if (errno == EINTR || errno == EAGAIN)
1171 log_error("recvmsg() failed: %m");
1175 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1177 if (cmsg->cmsg_level == SOL_SOCKET &&
1178 cmsg->cmsg_type == SCM_CREDENTIALS &&
1179 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1180 ucred = (struct ucred*) CMSG_DATA(cmsg);
1181 else if (cmsg->cmsg_level == SOL_SOCKET &&
1182 cmsg->cmsg_type == SCM_SECURITY) {
1183 label = (char*) CMSG_DATA(cmsg);
1184 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1185 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1186 cmsg->cmsg_type == SO_TIMESTAMP &&
1187 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1188 tv = (struct timeval*) CMSG_DATA(cmsg);
1189 else if (cmsg->cmsg_level == SOL_SOCKET &&
1190 cmsg->cmsg_type == SCM_RIGHTS) {
1191 fds = (int*) CMSG_DATA(cmsg);
1192 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1196 if (fd == s->syslog_fd) {
1197 if (n > 0 && n_fds == 0) {
1199 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1200 } else if (n_fds > 0)
1201 log_warning("Got file descriptors via syslog socket. Ignoring.");
1204 if (n > 0 && n_fds == 0)
1205 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1206 else if (n == 0 && n_fds == 1)
1207 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1209 log_warning("Got too many file descriptors via native socket. Ignoring.");
1212 close_many(fds, n_fds);
1216 static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1217 Server *s = userdata;
1221 log_info("Received request to flush runtime journal from PID %"PRIu32, si->ssi_pid);
1223 touch("/run/systemd/journal/flushed");
1224 server_flush_to_var(s);
1230 static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1231 Server *s = userdata;
1235 log_info("Received request to rotate journal from PID %"PRIu32, si->ssi_pid);
1242 static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1243 Server *s = userdata;
1247 log_received_signal(LOG_INFO, si);
1249 sd_event_exit(s->event, 0);
1253 static int setup_signals(Server *s) {
1259 assert_se(sigemptyset(&mask) == 0);
1260 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1261 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1263 r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
1267 r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
1271 r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
1275 r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
1282 static int server_parse_proc_cmdline(Server *s) {
1283 _cleanup_free_ char *line = NULL;
1288 r = proc_cmdline(&line);
1290 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1294 FOREACH_WORD_QUOTED(w, l, line, state) {
1295 _cleanup_free_ char *word;
1297 word = strndup(w, l);
1301 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1302 r = parse_boolean(word + 35);
1304 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1306 s->forward_to_syslog = r;
1307 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1308 r = parse_boolean(word + 33);
1310 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1312 s->forward_to_kmsg = r;
1313 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1314 r = parse_boolean(word + 36);
1316 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1318 s->forward_to_console = r;
1319 } else if (startswith(word, "systemd.journald.forward_to_wall=")) {
1320 r = parse_boolean(word + 33);
1322 log_warning("Failed to parse forward to wall switch %s. Ignoring.", word + 33);
1324 s->forward_to_wall = r;
1325 } else if (startswith(word, "systemd.journald"))
1326 log_warning("Invalid systemd.journald parameter. Ignoring.");
1332 static int server_parse_config_file(Server *s) {
1333 static const char fn[] = "/etc/systemd/journald.conf";
1334 _cleanup_fclose_ FILE *f = NULL;
1339 f = fopen(fn, "re");
1341 if (errno == ENOENT)
1344 log_warning("Failed to open configuration file %s: %m", fn);
1348 r = config_parse(NULL, fn, f, "Journal\0", config_item_perf_lookup,
1349 (void*) journald_gperf_lookup, false, false, s);
1351 log_warning("Failed to parse configuration file: %s", strerror(-r));
1356 static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1357 Server *s = userdata;
1365 int server_schedule_sync(Server *s, int priority) {
1370 if (priority <= LOG_CRIT) {
1371 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1376 if (s->sync_scheduled)
1379 if (s->sync_interval_usec > 0) {
1382 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
1386 when += s->sync_interval_usec;
1388 if (!s->sync_event_source) {
1389 r = sd_event_add_time(
1391 &s->sync_event_source,
1394 server_dispatch_sync, s);
1398 r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1400 r = sd_event_source_set_time(s->sync_event_source, when);
1404 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1409 s->sync_scheduled = true;
1415 static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1416 Server *s = userdata;
1420 server_cache_hostname(s);
1424 static int server_open_hostname(Server *s) {
1429 s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
1430 if (s->hostname_fd < 0) {
1431 log_error("Failed to open /proc/sys/kernel/hostname: %m");
1435 r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
1437 /* kernels prior to 3.2 don't support polling this file. Ignore
1440 log_warning("Failed to register hostname fd in event loop: %s. Ignoring.",
1442 s->hostname_fd = safe_close(s->hostname_fd);
1446 log_error("Failed to register hostname fd in event loop: %s", strerror(-r));
1450 r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
1452 log_error("Failed to adjust priority of host name event source: %s", strerror(-r));
1459 int server_init(Server *s) {
1465 s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->hostname_fd = -1;
1469 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1470 s->sync_scheduled = false;
1472 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1473 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1475 s->forward_to_syslog = true;
1476 s->forward_to_wall = true;
1478 s->max_level_store = LOG_DEBUG;
1479 s->max_level_syslog = LOG_DEBUG;
1480 s->max_level_kmsg = LOG_NOTICE;
1481 s->max_level_console = LOG_INFO;
1482 s->max_level_wall = LOG_EMERG;
1484 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1485 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1487 server_parse_config_file(s);
1488 server_parse_proc_cmdline(s);
1489 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1490 log_debug("Setting both rate limit interval and burst from %llu,%u to 0,0",
1491 (long long unsigned) s->rate_limit_interval,
1492 s->rate_limit_burst);
1493 s->rate_limit_interval = s->rate_limit_burst = 0;
1496 mkdir_p("/run/systemd/journal", 0755);
1498 s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
1499 if (!s->user_journals)
1502 s->mmap = mmap_cache_new();
1506 r = sd_event_default(&s->event);
1508 log_error("Failed to create event loop: %s", strerror(-r));
1512 sd_event_set_watchdog(s->event, true);
1514 n = sd_listen_fds(true);
1516 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1520 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1522 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1524 if (s->native_fd >= 0) {
1525 log_error("Too many native sockets passed.");
1531 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1533 if (s->stdout_fd >= 0) {
1534 log_error("Too many stdout sockets passed.");
1540 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1541 sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1543 if (s->syslog_fd >= 0) {
1544 log_error("Too many /dev/log sockets passed.");
1551 log_error("Unknown socket passed.");
1556 r = server_open_syslog_socket(s);
1560 r = server_open_native_socket(s);
1564 r = server_open_stdout_socket(s);
1568 r = server_open_dev_kmsg(s);
1572 r = server_open_kernel_seqnum(s);
1576 r = server_open_hostname(s);
1580 r = setup_signals(s);
1584 s->udev = udev_new();
1588 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
1592 r = cg_get_root_path(&s->cgroup_root);
1596 server_cache_hostname(s);
1597 server_cache_boot_id(s);
1598 server_cache_machine_id(s);
1600 r = system_journal_open(s);
1607 void server_maybe_append_tags(Server *s) {
1613 n = now(CLOCK_REALTIME);
1615 if (s->system_journal)
1616 journal_file_maybe_append_tag(s->system_journal, n);
1618 HASHMAP_FOREACH(f, s->user_journals, i)
1619 journal_file_maybe_append_tag(f, n);
1623 void server_done(Server *s) {
1627 while (s->stdout_streams)
1628 stdout_stream_free(s->stdout_streams);
1630 if (s->system_journal)
1631 journal_file_close(s->system_journal);
1633 if (s->runtime_journal)
1634 journal_file_close(s->runtime_journal);
1636 while ((f = hashmap_steal_first(s->user_journals)))
1637 journal_file_close(f);
1639 hashmap_free(s->user_journals);
1641 sd_event_source_unref(s->syslog_event_source);
1642 sd_event_source_unref(s->native_event_source);
1643 sd_event_source_unref(s->stdout_event_source);
1644 sd_event_source_unref(s->dev_kmsg_event_source);
1645 sd_event_source_unref(s->sync_event_source);
1646 sd_event_source_unref(s->sigusr1_event_source);
1647 sd_event_source_unref(s->sigusr2_event_source);
1648 sd_event_source_unref(s->sigterm_event_source);
1649 sd_event_source_unref(s->sigint_event_source);
1650 sd_event_source_unref(s->hostname_event_source);
1651 sd_event_unref(s->event);
1653 safe_close(s->syslog_fd);
1654 safe_close(s->native_fd);
1655 safe_close(s->stdout_fd);
1656 safe_close(s->dev_kmsg_fd);
1657 safe_close(s->hostname_fd);
1660 journal_rate_limit_free(s->rate_limit);
1662 if (s->kernel_seqnum)
1663 munmap(s->kernel_seqnum, sizeof(uint64_t));
1667 free(s->cgroup_root);
1670 mmap_cache_unref(s->mmap);
1673 udev_unref(s->udev);