1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
27 #include <sys/timerfd.h>
31 #include "sd-journal.h"
32 #include "sd-messages.h"
33 #include "sd-daemon.h"
37 #include "journal-file.h"
38 #include "socket-util.h"
39 #include "cgroup-util.h"
42 #include "conf-parser.h"
43 #include "selinux-util.h"
44 #include "journal-internal.h"
45 #include "journal-vacuum.h"
46 #include "journal-authenticate.h"
47 #include "journald-rate-limit.h"
48 #include "journald-kmsg.h"
49 #include "journald-syslog.h"
50 #include "journald-stream.h"
51 #include "journald-console.h"
52 #include "journald-native.h"
53 #include "journald-server.h"
57 #include <acl/libacl.h>
62 #include <selinux/selinux.h>
65 #define USER_JOURNALS_MAX 1024
67 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
68 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
69 #define DEFAULT_RATE_LIMIT_BURST 1000
70 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
72 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
74 static const char* const storage_table[_STORAGE_MAX] = {
75 [STORAGE_AUTO] = "auto",
76 [STORAGE_VOLATILE] = "volatile",
77 [STORAGE_PERSISTENT] = "persistent",
78 [STORAGE_NONE] = "none"
81 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
82 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
84 static const char* const split_mode_table[_SPLIT_MAX] = {
85 [SPLIT_LOGIN] = "login",
87 [SPLIT_NONE] = "none",
90 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
91 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
93 static uint64_t available_space(Server *s, bool verbose) {
95 _cleanup_free_ char *p = NULL;
98 uint64_t sum = 0, ss_avail = 0, avail = 0;
100 _cleanup_closedir_ DIR *d = NULL;
105 ts = now(CLOCK_MONOTONIC);
107 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts
109 return s->cached_available_space;
111 r = sd_id128_get_machine(&machine);
115 if (s->system_journal) {
116 f = "/var/log/journal/";
117 m = &s->system_metrics;
119 f = "/run/log/journal/";
120 m = &s->runtime_metrics;
125 p = strappend(f, sd_id128_to_string(machine, ids));
133 if (fstatvfs(dirfd(d), &ss) < 0)
142 if (!de && errno != 0)
148 if (!endswith(de->d_name, ".journal") &&
149 !endswith(de->d_name, ".journal~"))
152 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
155 if (!S_ISREG(st.st_mode))
158 sum += (uint64_t) st.st_blocks * 512UL;
161 ss_avail = ss.f_bsize * ss.f_bavail;
163 /* If we reached a high mark, we will always allow this much
164 * again, unless usage goes above max_use. This watermark
165 * value is cached so that we don't give up space on pressure,
166 * but hover below the maximum usage. */
171 avail = LESS_BY(ss_avail, m->keep_free);
173 s->cached_available_space = LESS_BY(MIN(m->max_use, avail), sum);
174 s->cached_available_space_timestamp = ts;
177 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
178 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX];
180 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
181 "%s journal is using %s (max allowed %s, "
182 "trying to leave %s free of %s available → current limit %s).",
183 s->system_journal ? "Permanent" : "Runtime",
184 format_bytes(fb1, sizeof(fb1), sum),
185 format_bytes(fb2, sizeof(fb2), m->max_use),
186 format_bytes(fb3, sizeof(fb3), m->keep_free),
187 format_bytes(fb4, sizeof(fb4), ss_avail),
188 format_bytes(fb5, sizeof(fb5), s->cached_available_space + sum));
191 return s->cached_available_space;
194 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
199 acl_permset_t permset;
204 r = fchmod(f->fd, 0640);
206 log_warning("Failed to fix access mode on %s, ignoring: %s", f->path, strerror(-r));
209 if (uid <= SYSTEM_UID_MAX)
212 acl = acl_get_fd(f->fd);
214 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
218 r = acl_find_uid(acl, uid, &entry);
221 if (acl_create_entry(&acl, &entry) < 0 ||
222 acl_set_tag_type(entry, ACL_USER) < 0 ||
223 acl_set_qualifier(entry, &uid) < 0) {
224 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
229 /* We do not recalculate the mask unconditionally here,
230 * so that the fchmod() mask above stays intact. */
231 if (acl_get_permset(entry, &permset) < 0 ||
232 acl_add_perm(permset, ACL_READ) < 0 ||
233 calc_acl_mask_if_needed(&acl) < 0) {
234 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
238 if (acl_set_fd(f->fd, acl) < 0)
239 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
246 static JournalFile* find_journal(Server *s, uid_t uid) {
247 _cleanup_free_ char *p = NULL;
254 /* We split up user logs only on /var, not on /run. If the
255 * runtime file is open, we write to it exclusively, in order
256 * to guarantee proper order as soon as we flush /run to
257 * /var and close the runtime file. */
259 if (s->runtime_journal)
260 return s->runtime_journal;
262 if (uid <= SYSTEM_UID_MAX)
263 return s->system_journal;
265 r = sd_id128_get_machine(&machine);
267 return s->system_journal;
269 f = ordered_hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
273 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
274 SD_ID128_FORMAT_VAL(machine), uid) < 0)
275 return s->system_journal;
277 while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
278 /* Too many open? Then let's close one */
279 f = ordered_hashmap_steal_first(s->user_journals);
281 journal_file_close(f);
284 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
286 return s->system_journal;
288 server_fix_perms(s, f, uid);
290 r = ordered_hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
292 journal_file_close(f);
293 return s->system_journal;
299 static int do_rotate(Server *s, JournalFile **f, const char* name,
300 bool seal, uint32_t uid) {
307 r = journal_file_rotate(f, s->compress, seal);
310 log_error("Failed to rotate %s: %s",
311 (*f)->path, strerror(-r));
313 log_error("Failed to create new %s journal: %s",
316 server_fix_perms(s, *f, uid);
320 void server_rotate(Server *s) {
326 log_debug("Rotating...");
328 do_rotate(s, &s->runtime_journal, "runtime", false, 0);
329 do_rotate(s, &s->system_journal, "system", s->seal, 0);
331 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
332 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UINT32(k));
334 ordered_hashmap_replace(s->user_journals, k, f);
336 /* Old file has been closed and deallocated */
337 ordered_hashmap_remove(s->user_journals, k);
341 void server_sync(Server *s) {
347 if (s->system_journal) {
348 r = journal_file_set_offline(s->system_journal);
350 log_error("Failed to sync system journal: %s", strerror(-r));
353 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
354 r = journal_file_set_offline(f);
356 log_error("Failed to sync user journal: %s", strerror(-r));
359 if (s->sync_event_source) {
360 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
362 log_error("Failed to disable sync timer source: %s", strerror(-r));
365 s->sync_scheduled = false;
368 static void do_vacuum(Server *s, char *ids, JournalFile *f, const char* path,
369 JournalMetrics *metrics) {
376 p = strappenda(path, ids);
377 r = journal_directory_vacuum(p, metrics->max_use, s->max_retention_usec, &s->oldest_file_usec);
378 if (r < 0 && r != -ENOENT)
379 log_error("Failed to vacuum %s: %s", p, strerror(-r));
382 void server_vacuum(Server *s) {
387 log_debug("Vacuuming...");
389 s->oldest_file_usec = 0;
391 r = sd_id128_get_machine(&machine);
393 log_error("Failed to get machine ID: %s", strerror(-r));
396 sd_id128_to_string(machine, ids);
398 do_vacuum(s, ids, s->system_journal, "/var/log/journal/", &s->system_metrics);
399 do_vacuum(s, ids, s->runtime_journal, "/run/log/journal/", &s->runtime_metrics);
401 s->cached_available_space_timestamp = 0;
404 static void server_cache_machine_id(Server *s) {
410 r = sd_id128_get_machine(&id);
414 sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
417 static void server_cache_boot_id(Server *s) {
423 r = sd_id128_get_boot(&id);
427 sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
430 static void server_cache_hostname(Server *s) {
431 _cleanup_free_ char *t = NULL;
436 t = gethostname_malloc();
440 x = strappend("_HOSTNAME=", t);
444 free(s->hostname_field);
445 s->hostname_field = x;
448 bool shall_try_append_again(JournalFile *f, int r) {
450 /* -E2BIG Hit configured limit
452 -EDQUOT Quota limit hit
454 -EHOSTDOWN Other machine
455 -EBUSY Unclean shutdown
456 -EPROTONOSUPPORT Unsupported feature
459 -ESHUTDOWN Already archived */
461 if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
462 log_debug("%s: Allocation limit reached, rotating.", f->path);
463 else if (r == -EHOSTDOWN)
464 log_info("%s: Journal file from other machine, rotating.", f->path);
465 else if (r == -EBUSY)
466 log_info("%s: Unclean shutdown, rotating.", f->path);
467 else if (r == -EPROTONOSUPPORT)
468 log_info("%s: Unsupported feature, rotating.", f->path);
469 else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
470 log_warning("%s: Journal file corrupted, rotating.", f->path);
477 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
479 bool vacuumed = false;
486 f = find_journal(s, uid);
490 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
491 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
496 f = find_journal(s, uid);
501 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
503 server_schedule_sync(s, priority);
507 if (vacuumed || !shall_try_append_again(f, r)) {
510 for (i = 0; i < n; i++)
511 size += iovec[i].iov_len;
513 log_error("Failed to write entry (%d items, %zu bytes), ignoring: %s", n, size, strerror(-r));
520 f = find_journal(s, uid);
524 log_debug("Retrying write.");
525 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
529 for (i = 0; i < n; i++)
530 size += iovec[i].iov_len;
532 log_error("Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %s", n, size, strerror(-r));
534 server_schedule_sync(s, priority);
537 static void dispatch_message_real(
539 struct iovec *iovec, unsigned n, unsigned m,
542 const char *label, size_t label_len,
547 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
548 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
549 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
550 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
551 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
552 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
553 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
554 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
560 uid_t realuid = 0, owner = 0, journal_uid;
561 bool owner_valid = false;
563 char audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
564 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
565 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
566 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
575 assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
578 realuid = ucred->uid;
580 sprintf(pid, "_PID="PID_FMT, ucred->pid);
581 IOVEC_SET_STRING(iovec[n++], pid);
583 sprintf(uid, "_UID="UID_FMT, ucred->uid);
584 IOVEC_SET_STRING(iovec[n++], uid);
586 sprintf(gid, "_GID="GID_FMT, ucred->gid);
587 IOVEC_SET_STRING(iovec[n++], gid);
589 r = get_process_comm(ucred->pid, &t);
591 x = strappenda("_COMM=", t);
593 IOVEC_SET_STRING(iovec[n++], x);
596 r = get_process_exe(ucred->pid, &t);
598 x = strappenda("_EXE=", t);
600 IOVEC_SET_STRING(iovec[n++], x);
603 r = get_process_cmdline(ucred->pid, 0, false, &t);
605 x = strappenda("_CMDLINE=", t);
607 IOVEC_SET_STRING(iovec[n++], x);
610 r = get_process_capeff(ucred->pid, &t);
612 x = strappenda("_CAP_EFFECTIVE=", t);
614 IOVEC_SET_STRING(iovec[n++], x);
618 r = audit_session_from_pid(ucred->pid, &audit);
620 sprintf(audit_session, "_AUDIT_SESSION=%"PRIu32, audit);
621 IOVEC_SET_STRING(iovec[n++], audit_session);
624 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
626 sprintf(audit_loginuid, "_AUDIT_LOGINUID="UID_FMT, loginuid);
627 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
631 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
633 char *session = NULL;
635 x = strappenda("_SYSTEMD_CGROUP=", c);
636 IOVEC_SET_STRING(iovec[n++], x);
638 r = cg_path_get_session(c, &t);
640 session = strappenda("_SYSTEMD_SESSION=", t);
642 IOVEC_SET_STRING(iovec[n++], session);
645 if (cg_path_get_owner_uid(c, &owner) >= 0) {
648 sprintf(owner_uid, "_SYSTEMD_OWNER_UID="UID_FMT, owner);
649 IOVEC_SET_STRING(iovec[n++], owner_uid);
652 if (cg_path_get_unit(c, &t) >= 0) {
653 x = strappenda("_SYSTEMD_UNIT=", t);
655 IOVEC_SET_STRING(iovec[n++], x);
656 } else if (unit_id && !session) {
657 x = strappenda("_SYSTEMD_UNIT=", unit_id);
658 IOVEC_SET_STRING(iovec[n++], x);
661 if (cg_path_get_user_unit(c, &t) >= 0) {
662 x = strappenda("_SYSTEMD_USER_UNIT=", t);
664 IOVEC_SET_STRING(iovec[n++], x);
665 } else if (unit_id && session) {
666 x = strappenda("_SYSTEMD_USER_UNIT=", unit_id);
667 IOVEC_SET_STRING(iovec[n++], x);
670 if (cg_path_get_slice(c, &t) >= 0) {
671 x = strappenda("_SYSTEMD_SLICE=", t);
673 IOVEC_SET_STRING(iovec[n++], x);
677 } else if (unit_id) {
678 x = strappenda("_SYSTEMD_UNIT=", unit_id);
679 IOVEC_SET_STRING(iovec[n++], x);
683 if (mac_selinux_use()) {
685 x = alloca(strlen("_SELINUX_CONTEXT=") + label_len + 1);
687 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
688 IOVEC_SET_STRING(iovec[n++], x);
690 security_context_t con;
692 if (getpidcon(ucred->pid, &con) >= 0) {
693 x = strappenda("_SELINUX_CONTEXT=", con);
696 IOVEC_SET_STRING(iovec[n++], x);
705 r = get_process_uid(object_pid, &object_uid);
707 sprintf(o_uid, "OBJECT_UID="UID_FMT, object_uid);
708 IOVEC_SET_STRING(iovec[n++], o_uid);
711 r = get_process_gid(object_pid, &object_gid);
713 sprintf(o_gid, "OBJECT_GID="GID_FMT, object_gid);
714 IOVEC_SET_STRING(iovec[n++], o_gid);
717 r = get_process_comm(object_pid, &t);
719 x = strappenda("OBJECT_COMM=", t);
721 IOVEC_SET_STRING(iovec[n++], x);
724 r = get_process_exe(object_pid, &t);
726 x = strappenda("OBJECT_EXE=", t);
728 IOVEC_SET_STRING(iovec[n++], x);
731 r = get_process_cmdline(object_pid, 0, false, &t);
733 x = strappenda("OBJECT_CMDLINE=", t);
735 IOVEC_SET_STRING(iovec[n++], x);
739 r = audit_session_from_pid(object_pid, &audit);
741 sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%"PRIu32, audit);
742 IOVEC_SET_STRING(iovec[n++], o_audit_session);
745 r = audit_loginuid_from_pid(object_pid, &loginuid);
747 sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID="UID_FMT, loginuid);
748 IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
752 r = cg_pid_get_path_shifted(object_pid, s->cgroup_root, &c);
754 x = strappenda("OBJECT_SYSTEMD_CGROUP=", c);
755 IOVEC_SET_STRING(iovec[n++], x);
757 r = cg_path_get_session(c, &t);
759 x = strappenda("OBJECT_SYSTEMD_SESSION=", t);
761 IOVEC_SET_STRING(iovec[n++], x);
764 if (cg_path_get_owner_uid(c, &owner) >= 0) {
765 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT, owner);
766 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
769 if (cg_path_get_unit(c, &t) >= 0) {
770 x = strappenda("OBJECT_SYSTEMD_UNIT=", t);
772 IOVEC_SET_STRING(iovec[n++], x);
775 if (cg_path_get_user_unit(c, &t) >= 0) {
776 x = strappenda("OBJECT_SYSTEMD_USER_UNIT=", t);
778 IOVEC_SET_STRING(iovec[n++], x);
787 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
788 IOVEC_SET_STRING(iovec[n++], source_time);
791 /* Note that strictly speaking storing the boot id here is
792 * redundant since the entry includes this in-line
793 * anyway. However, we need this indexed, too. */
794 if (!isempty(s->boot_id_field))
795 IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
797 if (!isempty(s->machine_id_field))
798 IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
800 if (!isempty(s->hostname_field))
801 IOVEC_SET_STRING(iovec[n++], s->hostname_field);
805 if (s->split_mode == SPLIT_UID && realuid > 0)
806 /* Split up strictly by any UID */
807 journal_uid = realuid;
808 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
809 /* Split up by login UIDs. We do this only if the
810 * realuid is not root, in order not to accidentally
811 * leak privileged information to the user that is
812 * logged by a privileged process that is part of an
813 * unprivileged session.*/
818 write_to_journal(s, journal_uid, iovec, n, priority);
821 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
822 char mid[11 + 32 + 1];
823 char buffer[16 + LINE_MAX + 1];
824 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
827 struct ucred ucred = {};
832 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
833 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
835 memcpy(buffer, "MESSAGE=", 8);
836 va_start(ap, format);
837 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
839 char_array_0(buffer);
840 IOVEC_SET_STRING(iovec[n++], buffer);
842 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
843 snprintf(mid, sizeof(mid), MESSAGE_ID(message_id));
845 IOVEC_SET_STRING(iovec[n++], mid);
848 ucred.pid = getpid();
849 ucred.uid = getuid();
850 ucred.gid = getgid();
852 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
855 void server_dispatch_message(
857 struct iovec *iovec, unsigned n, unsigned m,
860 const char *label, size_t label_len,
866 _cleanup_free_ char *path = NULL;
870 assert(iovec || n == 0);
875 if (LOG_PRI(priority) > s->max_level_store)
878 /* Stop early in case the information will not be stored
880 if (s->storage == STORAGE_NONE)
886 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &path);
890 /* example: /user/lennart/3/foobar
891 * /system/dbus.service/foobar
893 * So let's cut of everything past the third /, since that is
894 * where user directories start */
896 c = strchr(path, '/');
898 c = strchr(c+1, '/');
900 c = strchr(c+1, '/');
906 rl = journal_rate_limit_test(s->rate_limit, path,
907 priority & LOG_PRIMASK, available_space(s, false));
912 /* Write a suppression message if we suppressed something */
914 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
915 "Suppressed %u messages from %s", rl - 1, path);
918 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
922 static int system_journal_open(Server *s, bool flush_requested) {
928 r = sd_id128_get_machine(&machine);
930 log_error("Failed to get machine id: %s", strerror(-r));
934 sd_id128_to_string(machine, ids);
936 if (!s->system_journal &&
937 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
939 || access("/run/systemd/journal/flushed", F_OK) >= 0)) {
941 /* If in auto mode: first try to create the machine
942 * path, but not the prefix.
944 * If in persistent mode: create /var/log/journal and
945 * the machine path */
947 if (s->storage == STORAGE_PERSISTENT)
948 (void) mkdir("/var/log/journal/", 0755);
950 fn = strappenda("/var/log/journal/", ids);
951 (void) mkdir(fn, 0755);
953 fn = strappenda(fn, "/system.journal");
954 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
957 server_fix_perms(s, s->system_journal, 0);
959 if (r != -ENOENT && r != -EROFS)
960 log_warning("Failed to open system journal: %s", strerror(-r));
966 if (!s->runtime_journal &&
967 (s->storage != STORAGE_NONE)) {
969 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
973 if (s->system_journal) {
975 /* Try to open the runtime journal, but only
976 * if it already exists, so that we can flush
977 * it into the system journal */
979 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
984 log_warning("Failed to open runtime journal: %s", strerror(-r));
991 /* OK, we really need the runtime journal, so create
992 * it if necessary. */
994 (void) mkdir("/run/log", 0755);
995 (void) mkdir("/run/log/journal", 0755);
996 (void) mkdir_parents(fn, 0750);
998 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
1002 log_error("Failed to open runtime journal: %s", strerror(-r));
1007 if (s->runtime_journal)
1008 server_fix_perms(s, s->runtime_journal, 0);
1011 available_space(s, true);
1016 int server_flush_to_var(Server *s) {
1018 sd_journal *j = NULL;
1019 char ts[FORMAT_TIMESPAN_MAX];
1026 if (s->storage != STORAGE_AUTO &&
1027 s->storage != STORAGE_PERSISTENT)
1030 if (!s->runtime_journal)
1033 system_journal_open(s, true);
1035 if (!s->system_journal)
1038 log_debug("Flushing to /var...");
1040 start = now(CLOCK_MONOTONIC);
1042 r = sd_id128_get_machine(&machine);
1046 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1048 log_error("Failed to read runtime journal: %s", strerror(-r));
1052 sd_journal_set_data_threshold(j, 0);
1054 SD_JOURNAL_FOREACH(j) {
1058 f = j->current_file;
1059 assert(f && f->current_offset > 0);
1063 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1065 log_error("Can't read entry: %s", strerror(-r));
1069 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1073 if (!shall_try_append_again(s->system_journal, r)) {
1074 log_error("Can't write entry: %s", strerror(-r));
1081 if (!s->system_journal) {
1082 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1087 log_debug("Retrying write.");
1088 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1090 log_error("Can't write entry: %s", strerror(-r));
1096 journal_file_post_change(s->system_journal);
1098 journal_file_close(s->runtime_journal);
1099 s->runtime_journal = NULL;
1102 rm_rf("/run/log/journal", false, true, false);
1104 sd_journal_close(j);
1106 server_driver_message(s, SD_ID128_NULL, "Time spent on flushing to /var is %s for %u entries.", format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0), n);
1111 int process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1112 Server *s = userdata;
1115 assert(fd == s->native_fd || fd == s->syslog_fd);
1117 if (revents != EPOLLIN) {
1118 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1123 struct ucred *ucred = NULL;
1124 struct timeval *tv = NULL;
1125 struct cmsghdr *cmsg;
1127 size_t label_len = 0;
1131 struct cmsghdr cmsghdr;
1133 /* We use NAME_MAX space for the SELinux label
1134 * here. The kernel currently enforces no
1135 * limit, but according to suggestions from
1136 * the SELinux people this will change and it
1137 * will probably be identical to NAME_MAX. For
1138 * now we use that, but this should be updated
1139 * one day when the final limit is known.*/
1140 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1141 CMSG_SPACE(sizeof(struct timeval)) +
1142 CMSG_SPACE(sizeof(int)) + /* fd */
1143 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1145 struct msghdr msghdr = {
1148 .msg_control = &control,
1149 .msg_controllen = sizeof(control),
1157 if (ioctl(fd, SIOCINQ, &v) < 0) {
1158 log_error("SIOCINQ failed: %m");
1162 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, LINE_MAX + (size_t) v))
1165 iovec.iov_base = s->buffer;
1166 iovec.iov_len = s->buffer_size;
1168 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1170 if (errno == EINTR || errno == EAGAIN)
1173 log_error("recvmsg() failed: %m");
1177 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1179 if (cmsg->cmsg_level == SOL_SOCKET &&
1180 cmsg->cmsg_type == SCM_CREDENTIALS &&
1181 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1182 ucred = (struct ucred*) CMSG_DATA(cmsg);
1183 else if (cmsg->cmsg_level == SOL_SOCKET &&
1184 cmsg->cmsg_type == SCM_SECURITY) {
1185 label = (char*) CMSG_DATA(cmsg);
1186 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1187 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1188 cmsg->cmsg_type == SO_TIMESTAMP &&
1189 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1190 tv = (struct timeval*) CMSG_DATA(cmsg);
1191 else if (cmsg->cmsg_level == SOL_SOCKET &&
1192 cmsg->cmsg_type == SCM_RIGHTS) {
1193 fds = (int*) CMSG_DATA(cmsg);
1194 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1198 if (fd == s->syslog_fd) {
1199 if (n > 0 && n_fds == 0) {
1201 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1202 } else if (n_fds > 0)
1203 log_warning("Got file descriptors via syslog socket. Ignoring.");
1206 if (n > 0 && n_fds == 0)
1207 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1208 else if (n == 0 && n_fds == 1)
1209 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1211 log_warning("Got too many file descriptors via native socket. Ignoring.");
1214 close_many(fds, n_fds);
1218 static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1219 Server *s = userdata;
1223 log_info("Received request to flush runtime journal from PID %"PRIu32, si->ssi_pid);
1225 server_flush_to_var(s);
1229 touch("/run/systemd/journal/flushed");
1234 static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1235 Server *s = userdata;
1239 log_info("Received request to rotate journal from PID %"PRIu32, si->ssi_pid);
1246 static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1247 Server *s = userdata;
1251 log_received_signal(LOG_INFO, si);
1253 sd_event_exit(s->event, 0);
1257 static int setup_signals(Server *s) {
1263 assert_se(sigemptyset(&mask) == 0);
1264 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1265 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1267 r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
1271 r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
1275 r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
1279 r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
1286 static int server_parse_proc_cmdline(Server *s) {
1287 _cleanup_free_ char *line = NULL;
1288 const char *w, *state;
1292 r = proc_cmdline(&line);
1294 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1298 FOREACH_WORD_QUOTED(w, l, line, state) {
1299 _cleanup_free_ char *word;
1301 word = strndup(w, l);
1305 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1306 r = parse_boolean(word + 35);
1308 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1310 s->forward_to_syslog = r;
1311 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1312 r = parse_boolean(word + 33);
1314 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1316 s->forward_to_kmsg = r;
1317 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1318 r = parse_boolean(word + 36);
1320 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1322 s->forward_to_console = r;
1323 } else if (startswith(word, "systemd.journald.forward_to_wall=")) {
1324 r = parse_boolean(word + 33);
1326 log_warning("Failed to parse forward to wall switch %s. Ignoring.", word + 33);
1328 s->forward_to_wall = r;
1329 } else if (startswith(word, "systemd.journald"))
1330 log_warning("Invalid systemd.journald parameter. Ignoring.");
1332 /* do not warn about state here, since probably systemd already did */
1337 static int server_parse_config_file(Server *s) {
1340 return config_parse(NULL, "/etc/systemd/journald.conf", NULL,
1342 config_item_perf_lookup, journald_gperf_lookup,
1343 false, false, true, s);
1346 static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1347 Server *s = userdata;
1355 int server_schedule_sync(Server *s, int priority) {
1360 if (priority <= LOG_CRIT) {
1361 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1366 if (s->sync_scheduled)
1369 if (s->sync_interval_usec > 0) {
1372 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
1376 when += s->sync_interval_usec;
1378 if (!s->sync_event_source) {
1379 r = sd_event_add_time(
1381 &s->sync_event_source,
1384 server_dispatch_sync, s);
1388 r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1390 r = sd_event_source_set_time(s->sync_event_source, when);
1394 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1399 s->sync_scheduled = true;
1405 static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1406 Server *s = userdata;
1410 server_cache_hostname(s);
1414 static int server_open_hostname(Server *s) {
1419 s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
1420 if (s->hostname_fd < 0) {
1421 log_error("Failed to open /proc/sys/kernel/hostname: %m");
1425 r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
1427 /* kernels prior to 3.2 don't support polling this file. Ignore
1430 log_warning("Failed to register hostname fd in event loop: %s. Ignoring.",
1432 s->hostname_fd = safe_close(s->hostname_fd);
1436 log_error("Failed to register hostname fd in event loop: %s", strerror(-r));
1440 r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
1442 log_error("Failed to adjust priority of host name event source: %s", strerror(-r));
1449 int server_init(Server *s) {
1455 s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->hostname_fd = -1;
1459 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1460 s->sync_scheduled = false;
1462 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1463 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1465 s->forward_to_wall = true;
1467 s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1469 s->max_level_store = LOG_DEBUG;
1470 s->max_level_syslog = LOG_DEBUG;
1471 s->max_level_kmsg = LOG_NOTICE;
1472 s->max_level_console = LOG_INFO;
1473 s->max_level_wall = LOG_EMERG;
1475 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1476 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1478 server_parse_config_file(s);
1479 server_parse_proc_cmdline(s);
1480 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1481 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1482 s->rate_limit_interval, s->rate_limit_burst);
1483 s->rate_limit_interval = s->rate_limit_burst = 0;
1486 mkdir_p("/run/systemd/journal", 0755);
1488 s->user_journals = ordered_hashmap_new(NULL);
1489 if (!s->user_journals)
1492 s->mmap = mmap_cache_new();
1496 r = sd_event_default(&s->event);
1498 log_error("Failed to create event loop: %s", strerror(-r));
1502 sd_event_set_watchdog(s->event, true);
1504 n = sd_listen_fds(true);
1506 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1510 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1512 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1514 if (s->native_fd >= 0) {
1515 log_error("Too many native sockets passed.");
1521 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1523 if (s->stdout_fd >= 0) {
1524 log_error("Too many stdout sockets passed.");
1530 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1531 sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1533 if (s->syslog_fd >= 0) {
1534 log_error("Too many /dev/log sockets passed.");
1541 log_error("Unknown socket passed.");
1546 r = server_open_syslog_socket(s);
1550 r = server_open_native_socket(s);
1554 r = server_open_stdout_socket(s);
1558 r = server_open_dev_kmsg(s);
1562 r = server_open_kernel_seqnum(s);
1566 r = server_open_hostname(s);
1570 r = setup_signals(s);
1574 s->udev = udev_new();
1578 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
1582 r = cg_get_root_path(&s->cgroup_root);
1586 server_cache_hostname(s);
1587 server_cache_boot_id(s);
1588 server_cache_machine_id(s);
1590 r = system_journal_open(s, false);
1597 void server_maybe_append_tags(Server *s) {
1603 n = now(CLOCK_REALTIME);
1605 if (s->system_journal)
1606 journal_file_maybe_append_tag(s->system_journal, n);
1608 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
1609 journal_file_maybe_append_tag(f, n);
1613 void server_done(Server *s) {
1617 while (s->stdout_streams)
1618 stdout_stream_free(s->stdout_streams);
1620 if (s->system_journal)
1621 journal_file_close(s->system_journal);
1623 if (s->runtime_journal)
1624 journal_file_close(s->runtime_journal);
1626 while ((f = ordered_hashmap_steal_first(s->user_journals)))
1627 journal_file_close(f);
1629 ordered_hashmap_free(s->user_journals);
1631 sd_event_source_unref(s->syslog_event_source);
1632 sd_event_source_unref(s->native_event_source);
1633 sd_event_source_unref(s->stdout_event_source);
1634 sd_event_source_unref(s->dev_kmsg_event_source);
1635 sd_event_source_unref(s->sync_event_source);
1636 sd_event_source_unref(s->sigusr1_event_source);
1637 sd_event_source_unref(s->sigusr2_event_source);
1638 sd_event_source_unref(s->sigterm_event_source);
1639 sd_event_source_unref(s->sigint_event_source);
1640 sd_event_source_unref(s->hostname_event_source);
1641 sd_event_unref(s->event);
1643 safe_close(s->syslog_fd);
1644 safe_close(s->native_fd);
1645 safe_close(s->stdout_fd);
1646 safe_close(s->dev_kmsg_fd);
1647 safe_close(s->hostname_fd);
1650 journal_rate_limit_free(s->rate_limit);
1652 if (s->kernel_seqnum)
1653 munmap(s->kernel_seqnum, sizeof(uint64_t));
1657 free(s->cgroup_root);
1660 mmap_cache_unref(s->mmap);
1663 udev_unref(s->udev);