1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
27 #include <sys/timerfd.h>
30 #include <systemd/sd-journal.h>
31 #include <systemd/sd-messages.h>
32 #include <systemd/sd-daemon.h>
37 #include "journal-file.h"
38 #include "socket-util.h"
39 #include "cgroup-util.h"
43 #include "conf-parser.h"
44 #include "journal-internal.h"
45 #include "journal-vacuum.h"
46 #include "journal-authenticate.h"
47 #include "journald-server.h"
48 #include "journald-rate-limit.h"
49 #include "journald-kmsg.h"
50 #include "journald-syslog.h"
51 #include "journald-stream.h"
52 #include "journald-console.h"
53 #include "journald-native.h"
57 #include <acl/libacl.h>
62 #include <selinux/selinux.h>
65 #define USER_JOURNALS_MAX 1024
67 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
68 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
69 #define DEFAULT_RATE_LIMIT_BURST 200
71 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
73 static const char* const storage_table[] = {
74 [STORAGE_AUTO] = "auto",
75 [STORAGE_VOLATILE] = "volatile",
76 [STORAGE_PERSISTENT] = "persistent",
77 [STORAGE_NONE] = "none"
80 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
81 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
83 static const char* const split_mode_table[] = {
84 [SPLIT_NONE] = "none",
86 [SPLIT_LOGIN] = "login"
89 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
90 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
92 static uint64_t available_space(Server *s) {
94 _cleanup_free_ char *p = NULL;
98 uint64_t sum = 0, avail = 0, ss_avail = 0;
100 _cleanup_closedir_ DIR *d = NULL;
104 ts = now(CLOCK_MONOTONIC);
106 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
107 return s->cached_available_space;
109 r = sd_id128_get_machine(&machine);
113 if (s->system_journal) {
114 f = "/var/log/journal/";
115 m = &s->system_metrics;
117 f = "/run/log/journal/";
118 m = &s->runtime_metrics;
123 p = strappend(f, sd_id128_to_string(machine, ids));
131 if (fstatvfs(dirfd(d), &ss) < 0)
137 union dirent_storage buf;
139 r = readdir_r(d, &buf.de, &de);
146 if (!endswith(de->d_name, ".journal") &&
147 !endswith(de->d_name, ".journal~"))
150 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
153 if (!S_ISREG(st.st_mode))
156 sum += (uint64_t) st.st_blocks * 512UL;
159 avail = sum >= m->max_use ? 0 : m->max_use - sum;
161 ss_avail = ss.f_bsize * ss.f_bavail;
163 ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
165 if (ss_avail < avail)
168 s->cached_available_space = avail;
169 s->cached_available_space_timestamp = ts;
174 static void server_read_file_gid(Server *s) {
175 const char *g = "systemd-journal";
180 if (s->file_gid_valid)
183 r = get_group_creds(&g, &s->file_gid);
185 log_warning("Failed to resolve '%s' group: %s", g, strerror(-r));
187 /* if we couldn't read the gid, then it will be 0, but that's
188 * fine and we shouldn't try to resolve the group again, so
189 * let's just pretend it worked right-away. */
190 s->file_gid_valid = true;
193 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
198 acl_permset_t permset;
203 server_read_file_gid(s);
205 r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
207 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
213 acl = acl_get_fd(f->fd);
215 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
219 r = acl_find_uid(acl, uid, &entry);
222 if (acl_create_entry(&acl, &entry) < 0 ||
223 acl_set_tag_type(entry, ACL_USER) < 0 ||
224 acl_set_qualifier(entry, &uid) < 0) {
225 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
230 /* We do not recalculate the mask unconditionally here,
231 * so that the fchmod() mask above stays intact. */
232 if (acl_get_permset(entry, &permset) < 0 ||
233 acl_add_perm(permset, ACL_READ) < 0 ||
234 calc_acl_mask_if_needed(&acl) < 0) {
235 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
239 if (acl_set_fd(f->fd, acl) < 0)
240 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
247 static JournalFile* find_journal(Server *s, uid_t uid) {
255 /* We split up user logs only on /var, not on /run. If the
256 * runtime file is open, we write to it exclusively, in order
257 * to guarantee proper order as soon as we flush /run to
258 * /var and close the runtime file. */
260 if (s->runtime_journal)
261 return s->runtime_journal;
264 return s->system_journal;
266 r = sd_id128_get_machine(&machine);
268 return s->system_journal;
270 f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
274 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
275 SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
276 return s->system_journal;
278 while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
279 /* Too many open? Then let's close one */
280 f = hashmap_steal_first(s->user_journals);
282 journal_file_close(f);
285 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, s->system_journal, &f);
289 return s->system_journal;
291 server_fix_perms(s, f, uid);
293 r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
295 journal_file_close(f);
296 return s->system_journal;
302 void server_rotate(Server *s) {
308 log_debug("Rotating...");
310 if (s->runtime_journal) {
311 r = journal_file_rotate(&s->runtime_journal, s->compress, false);
313 if (s->runtime_journal)
314 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
316 log_error("Failed to create new runtime journal: %s", strerror(-r));
318 server_fix_perms(s, s->runtime_journal, 0);
321 if (s->system_journal) {
322 r = journal_file_rotate(&s->system_journal, s->compress, s->seal);
324 if (s->system_journal)
325 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
327 log_error("Failed to create new system journal: %s", strerror(-r));
330 server_fix_perms(s, s->system_journal, 0);
333 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
334 r = journal_file_rotate(&f, s->compress, s->seal);
337 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
339 log_error("Failed to create user journal: %s", strerror(-r));
341 hashmap_replace(s->user_journals, k, f);
342 server_fix_perms(s, f, PTR_TO_UINT32(k));
347 void server_sync(Server *s) {
353 static const struct itimerspec sync_timer_disable = {};
355 if (s->system_journal) {
356 r = journal_file_set_offline(s->system_journal);
358 log_error("Failed to sync system journal: %s", strerror(-r));
361 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
362 r = journal_file_set_offline(f);
364 log_error("Failed to sync user journal: %s", strerror(-r));
367 r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_disable, NULL);
369 log_error("Failed to disable max timer: %m");
371 s->sync_scheduled = false;
374 void server_vacuum(Server *s) {
380 log_debug("Vacuuming...");
382 s->oldest_file_usec = 0;
384 r = sd_id128_get_machine(&machine);
386 log_error("Failed to get machine ID: %s", strerror(-r));
390 sd_id128_to_string(machine, ids);
392 if (s->system_journal) {
393 p = strappend("/var/log/journal/", ids);
399 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
400 if (r < 0 && r != -ENOENT)
401 log_error("Failed to vacuum %s: %s", p, strerror(-r));
405 if (s->runtime_journal) {
406 p = strappend("/run/log/journal/", ids);
412 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
413 if (r < 0 && r != -ENOENT)
414 log_error("Failed to vacuum %s: %s", p, strerror(-r));
418 s->cached_available_space_timestamp = 0;
421 bool shall_try_append_again(JournalFile *f, int r) {
423 /* -E2BIG Hit configured limit
425 -EDQUOT Quota limit hit
427 -EHOSTDOWN Other machine
428 -EBUSY Unclean shutdown
429 -EPROTONOSUPPORT Unsupported feature
432 -ESHUTDOWN Already archived */
434 if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
435 log_debug("%s: Allocation limit reached, rotating.", f->path);
436 else if (r == -EHOSTDOWN)
437 log_info("%s: Journal file from other machine, rotating.", f->path);
438 else if (r == -EBUSY)
439 log_info("%s: Unclean shutdown, rotating.", f->path);
440 else if (r == -EPROTONOSUPPORT)
441 log_info("%s: Unsupported feature, rotating.", f->path);
442 else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
443 log_warning("%s: Journal file corrupted, rotating.", f->path);
450 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n) {
452 bool vacuumed = false;
459 f = find_journal(s, uid);
463 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
464 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
469 f = find_journal(s, uid);
474 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
476 server_schedule_sync(s);
480 if (vacuumed || !shall_try_append_again(f, r)) {
481 log_error("Failed to write entry, ignoring: %s", strerror(-r));
488 f = find_journal(s, uid);
492 log_debug("Retrying write.");
493 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
495 log_error("Failed to write entry, ignoring: %s", strerror(-r));
498 static void dispatch_message_real(
500 struct iovec *iovec, unsigned n, unsigned m,
503 const char *label, size_t label_len,
504 const char *unit_id) {
506 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
507 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
508 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
509 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
510 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
511 boot_id[sizeof("_BOOT_ID=") + 32] = "_BOOT_ID=",
512 machine_id[sizeof("_MACHINE_ID=") + 32] = "_MACHINE_ID=";
513 char *comm, *exe, *cmdline, *cgroup, *session, *unit, *hostname;
517 uid_t realuid = 0, owner = 0, journal_uid;
518 bool owner_valid = false;
520 char audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
521 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
530 assert(n + N_IOVEC_META_FIELDS <= m);
533 realuid = ucred->uid;
535 sprintf(pid, "_PID=%lu", (unsigned long) ucred->pid);
536 IOVEC_SET_STRING(iovec[n++], pid);
538 sprintf(uid, "_UID=%lu", (unsigned long) ucred->uid);
539 IOVEC_SET_STRING(iovec[n++], uid);
541 sprintf(gid, "_GID=%lu", (unsigned long) ucred->gid);
542 IOVEC_SET_STRING(iovec[n++], gid);
544 r = get_process_comm(ucred->pid, &t);
546 comm = strappenda("_COMM=", t);
548 IOVEC_SET_STRING(iovec[n++], comm);
551 r = get_process_exe(ucred->pid, &t);
553 exe = strappenda("_EXE=", t);
555 IOVEC_SET_STRING(iovec[n++], exe);
558 r = get_process_cmdline(ucred->pid, 0, false, &t);
560 cmdline = strappenda("_CMDLINE=", t);
562 IOVEC_SET_STRING(iovec[n++], cmdline);
566 r = audit_session_from_pid(ucred->pid, &audit);
568 sprintf(audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit);
569 IOVEC_SET_STRING(iovec[n++], audit_session);
572 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
574 sprintf(audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid);
575 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
579 r = cg_pid_get_path_shifted(ucred->pid, NULL, &c);
581 cgroup = strappenda("_SYSTEMD_CGROUP=", c);
582 IOVEC_SET_STRING(iovec[n++], cgroup);
584 r = cg_path_get_session(c, &t);
586 session = strappenda("_SYSTEMD_SESSION=", t);
588 IOVEC_SET_STRING(iovec[n++], session);
591 if (cg_path_get_owner_uid(c, &owner) >= 0) {
594 sprintf(owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner);
595 IOVEC_SET_STRING(iovec[n++], owner_uid);
598 if (cg_path_get_unit(c, &t) >= 0) {
599 unit = strappenda("_SYSTEMD_UNIT=", t);
601 } else if (cg_path_get_user_unit(c, &t) >= 0) {
602 unit = strappenda("_SYSTEMD_USER_UNIT=", t);
604 } else if (unit_id) {
606 unit = strappenda("_SYSTEMD_USER_UNIT=", unit_id);
608 unit = strappenda("_SYSTEMD_UNIT=", unit_id);
613 IOVEC_SET_STRING(iovec[n++], unit);
620 char *selinux_context = alloca(sizeof("_SELINUX_CONTEXT=") + label_len);
622 *((char*) mempcpy(stpcpy(selinux_context, "_SELINUX_CONTEXT="), label, label_len)) = 0;
623 IOVEC_SET_STRING(iovec[n++], selinux_context);
625 security_context_t con;
627 if (getpidcon(ucred->pid, &con) >= 0) {
628 char *selinux_context = strappenda("_SELINUX_CONTEXT=", con);
631 IOVEC_SET_STRING(iovec[n++], selinux_context);
638 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
639 IOVEC_SET_STRING(iovec[n++], source_time);
642 /* Note that strictly speaking storing the boot id here is
643 * redundant since the entry includes this in-line
644 * anyway. However, we need this indexed, too. */
645 r = sd_id128_get_boot(&id);
647 sd_id128_to_string(id, boot_id + sizeof("_BOOT_ID=") - 1);
648 IOVEC_SET_STRING(iovec[n++], boot_id);
651 r = sd_id128_get_machine(&id);
653 sd_id128_to_string(id, machine_id + sizeof("_MACHINE_ID=") - 1);
654 IOVEC_SET_STRING(iovec[n++], machine_id);
657 t = gethostname_malloc();
659 hostname = strappenda("_HOSTNAME=", t);
661 IOVEC_SET_STRING(iovec[n++], hostname);
666 if (s->split_mode == SPLIT_UID && realuid > 0)
667 /* Split up strictly by any UID */
668 journal_uid = realuid;
669 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
670 /* Split up by login UIDs, this avoids creation of
671 * individual journals for system UIDs. We do this
672 * only if the realuid is not root, in order not to
673 * accidentally leak privileged information to the
674 * user that is logged by a privileged process that is
675 * part of an unprivileged session.*/
680 write_to_journal(s, journal_uid, iovec, n);
683 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
684 char mid[11 + 32 + 1];
685 char buffer[16 + LINE_MAX + 1];
686 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
689 struct ucred ucred = {};
694 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
695 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
697 memcpy(buffer, "MESSAGE=", 8);
698 va_start(ap, format);
699 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
701 char_array_0(buffer);
702 IOVEC_SET_STRING(iovec[n++], buffer);
704 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
705 snprintf(mid, sizeof(mid), MESSAGE_ID(message_id));
707 IOVEC_SET_STRING(iovec[n++], mid);
710 ucred.pid = getpid();
711 ucred.uid = getuid();
712 ucred.gid = getgid();
714 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL);
717 void server_dispatch_message(
719 struct iovec *iovec, unsigned n, unsigned m,
722 const char *label, size_t label_len,
727 _cleanup_free_ char *path = NULL;
731 assert(iovec || n == 0);
736 if (LOG_PRI(priority) > s->max_level_store)
742 r = cg_pid_get_path_shifted(ucred->pid, NULL, &path);
746 /* example: /user/lennart/3/foobar
747 * /system/dbus.service/foobar
749 * So let's cut of everything past the third /, since that is
750 * where user directories start */
752 c = strchr(path, '/');
754 c = strchr(c+1, '/');
756 c = strchr(c+1, '/');
762 rl = journal_rate_limit_test(s->rate_limit, path,
763 priority & LOG_PRIMASK, available_space(s));
768 /* Write a suppression message if we suppressed something */
770 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
771 "Suppressed %u messages from %s", rl - 1, path);
774 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id);
778 static int system_journal_open(Server *s) {
785 avail = available_space(s);
787 r = sd_id128_get_machine(&machine);
791 sd_id128_to_string(machine, ids);
793 if (!s->system_journal &&
794 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
795 access("/run/systemd/journal/flushed", F_OK) >= 0) {
797 /* If in auto mode: first try to create the machine
798 * path, but not the prefix.
800 * If in persistent mode: create /var/log/journal and
801 * the machine path */
803 if (s->storage == STORAGE_PERSISTENT)
804 (void) mkdir("/var/log/journal/", 0755);
806 fn = strappend("/var/log/journal/", ids);
810 (void) mkdir(fn, 0755);
813 fn = strjoin("/var/log/journal/", ids, "/system.journal", NULL);
817 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
821 char fb[FORMAT_BYTES_MAX];
823 server_fix_perms(s, s->system_journal, 0);
824 server_driver_message(s, SD_ID128_NULL, "Allowing system journal files to grow to %s.",
825 format_bytes(fb, sizeof(fb), s->system_metrics.max_use));
827 if (s->system_metrics.max_use > avail)
828 server_driver_message(s, SD_ID128_NULL, "Journal size currently limited to %s due to SystemKeepFree.",
829 format_bytes(fb, sizeof(fb), avail));
833 if (r != -ENOENT && r != -EROFS)
834 log_warning("Failed to open system journal: %s", strerror(-r));
840 if (!s->runtime_journal &&
841 (s->storage != STORAGE_NONE)) {
843 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
847 if (s->system_journal) {
849 /* Try to open the runtime journal, but only
850 * if it already exists, so that we can flush
851 * it into the system journal */
853 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
858 log_warning("Failed to open runtime journal: %s", strerror(-r));
865 /* OK, we really need the runtime journal, so create
866 * it if necessary. */
868 (void) mkdir_parents(fn, 0755);
869 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
873 log_error("Failed to open runtime journal: %s", strerror(-r));
878 if (s->runtime_journal) {
879 char fb[FORMAT_BYTES_MAX];
881 server_fix_perms(s, s->runtime_journal, 0);
882 server_driver_message(s, SD_ID128_NULL, "Allowing runtime journal files to grow to %s.",
883 format_bytes(fb, sizeof(fb), s->runtime_metrics.max_use));
885 if (s->system_metrics.max_use > avail)
886 server_driver_message(s, SD_ID128_NULL, "Journal size currently limited to %s due to RuntimeKeepFree.",
887 format_bytes(fb, sizeof(fb), avail));
894 int server_flush_to_var(Server *s) {
897 sd_journal *j = NULL;
901 if (s->storage != STORAGE_AUTO &&
902 s->storage != STORAGE_PERSISTENT)
905 if (!s->runtime_journal)
908 system_journal_open(s);
910 if (!s->system_journal)
913 log_debug("Flushing to /var...");
915 r = sd_id128_get_machine(&machine);
917 log_error("Failed to get machine id: %s", strerror(-r));
921 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
923 log_error("Failed to read runtime journal: %s", strerror(-r));
927 sd_journal_set_data_threshold(j, 0);
929 SD_JOURNAL_FOREACH(j) {
934 assert(f && f->current_offset > 0);
936 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
938 log_error("Can't read entry: %s", strerror(-r));
942 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
946 if (!shall_try_append_again(s->system_journal, r)) {
947 log_error("Can't write entry: %s", strerror(-r));
954 if (!s->system_journal) {
955 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
960 log_debug("Retrying write.");
961 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
963 log_error("Can't write entry: %s", strerror(-r));
969 journal_file_post_change(s->system_journal);
971 journal_file_close(s->runtime_journal);
972 s->runtime_journal = NULL;
975 rm_rf("/run/log/journal", false, true, false);
982 int process_event(Server *s, struct epoll_event *ev) {
986 if (ev->data.fd == s->signal_fd) {
987 struct signalfd_siginfo sfsi;
990 if (ev->events != EPOLLIN) {
991 log_error("Got invalid event from epoll.");
995 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
996 if (n != sizeof(sfsi)) {
1001 if (errno == EINTR || errno == EAGAIN)
1007 if (sfsi.ssi_signo == SIGUSR1) {
1008 touch("/run/systemd/journal/flushed");
1009 server_flush_to_var(s);
1014 if (sfsi.ssi_signo == SIGUSR2) {
1020 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
1024 } else if (ev->data.fd == s->sync_timer_fd) {
1028 log_debug("Got sync request from epoll.");
1030 r = read(ev->data.fd, (void *)&t, sizeof(t));
1037 } else if (ev->data.fd == s->dev_kmsg_fd) {
1040 if (ev->events != EPOLLIN) {
1041 log_error("Got invalid event from epoll.");
1045 r = server_read_dev_kmsg(s);
1051 } else if (ev->data.fd == s->native_fd ||
1052 ev->data.fd == s->syslog_fd) {
1054 if (ev->events != EPOLLIN) {
1055 log_error("Got invalid event from epoll.");
1060 struct msghdr msghdr;
1062 struct ucred *ucred = NULL;
1063 struct timeval *tv = NULL;
1064 struct cmsghdr *cmsg;
1066 size_t label_len = 0;
1068 struct cmsghdr cmsghdr;
1070 /* We use NAME_MAX space for the
1071 * SELinux label here. The kernel
1072 * currently enforces no limit, but
1073 * according to suggestions from the
1074 * SELinux people this will change and
1075 * it will probably be identical to
1076 * NAME_MAX. For now we use that, but
1077 * this should be updated one day when
1078 * the final limit is known.*/
1079 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1080 CMSG_SPACE(sizeof(struct timeval)) +
1081 CMSG_SPACE(sizeof(int)) + /* fd */
1082 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1089 if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
1090 log_error("SIOCINQ failed: %m");
1094 if (s->buffer_size < (size_t) v) {
1098 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
1099 b = realloc(s->buffer, l+1);
1102 log_error("Couldn't increase buffer.");
1111 iovec.iov_base = s->buffer;
1112 iovec.iov_len = s->buffer_size;
1116 msghdr.msg_iov = &iovec;
1117 msghdr.msg_iovlen = 1;
1118 msghdr.msg_control = &control;
1119 msghdr.msg_controllen = sizeof(control);
1121 n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1124 if (errno == EINTR || errno == EAGAIN)
1127 log_error("recvmsg() failed: %m");
1131 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1133 if (cmsg->cmsg_level == SOL_SOCKET &&
1134 cmsg->cmsg_type == SCM_CREDENTIALS &&
1135 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1136 ucred = (struct ucred*) CMSG_DATA(cmsg);
1137 else if (cmsg->cmsg_level == SOL_SOCKET &&
1138 cmsg->cmsg_type == SCM_SECURITY) {
1139 label = (char*) CMSG_DATA(cmsg);
1140 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1141 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1142 cmsg->cmsg_type == SO_TIMESTAMP &&
1143 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1144 tv = (struct timeval*) CMSG_DATA(cmsg);
1145 else if (cmsg->cmsg_level == SOL_SOCKET &&
1146 cmsg->cmsg_type == SCM_RIGHTS) {
1147 fds = (int*) CMSG_DATA(cmsg);
1148 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1152 if (ev->data.fd == s->syslog_fd) {
1155 if (n > 0 && n_fds == 0) {
1156 e = memchr(s->buffer, '\n', n);
1162 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1163 } else if (n_fds > 0)
1164 log_warning("Got file descriptors via syslog socket. Ignoring.");
1167 if (n > 0 && n_fds == 0)
1168 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1169 else if (n == 0 && n_fds == 1)
1170 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1172 log_warning("Got too many file descriptors via native socket. Ignoring.");
1175 close_many(fds, n_fds);
1180 } else if (ev->data.fd == s->stdout_fd) {
1182 if (ev->events != EPOLLIN) {
1183 log_error("Got invalid event from epoll.");
1187 stdout_stream_new(s);
1191 StdoutStream *stream;
1193 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
1194 log_error("Got invalid event from epoll.");
1198 /* If it is none of the well-known fds, it must be an
1199 * stdout stream fd. Note that this is a bit ugly here
1200 * (since we rely that none of the well-known fds
1201 * could be interpreted as pointer), but nonetheless
1202 * safe, since the well-known fds would never get an
1203 * fd > 4096, i.e. beyond the first memory page */
1205 stream = ev->data.ptr;
1207 if (stdout_stream_process(stream) <= 0)
1208 stdout_stream_free(stream);
1213 log_error("Unknown event.");
1217 static int open_signalfd(Server *s) {
1219 struct epoll_event ev;
1223 assert_se(sigemptyset(&mask) == 0);
1224 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1225 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1227 s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
1228 if (s->signal_fd < 0) {
1229 log_error("signalfd(): %m");
1234 ev.events = EPOLLIN;
1235 ev.data.fd = s->signal_fd;
1237 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
1238 log_error("epoll_ctl(): %m");
1245 static int server_parse_proc_cmdline(Server *s) {
1246 _cleanup_free_ char *line = NULL;
1251 if (detect_container(NULL) > 0)
1254 r = read_one_line_file("/proc/cmdline", &line);
1256 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1260 FOREACH_WORD_QUOTED(w, l, line, state) {
1261 _cleanup_free_ char *word;
1263 word = strndup(w, l);
1267 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1268 r = parse_boolean(word + 35);
1270 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1272 s->forward_to_syslog = r;
1273 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1274 r = parse_boolean(word + 33);
1276 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1278 s->forward_to_kmsg = r;
1279 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1280 r = parse_boolean(word + 36);
1282 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1284 s->forward_to_console = r;
1285 } else if (startswith(word, "systemd.journald"))
1286 log_warning("Invalid systemd.journald parameter. Ignoring.");
1292 static int server_parse_config_file(Server *s) {
1293 static const char fn[] = "/etc/systemd/journald.conf";
1294 _cleanup_fclose_ FILE *f = NULL;
1299 f = fopen(fn, "re");
1301 if (errno == ENOENT)
1304 log_warning("Failed to open configuration file %s: %m", fn);
1308 r = config_parse(NULL, fn, f, "Journal\0", config_item_perf_lookup,
1309 (void*) journald_gperf_lookup, false, false, s);
1311 log_warning("Failed to parse configuration file: %s", strerror(-r));
1316 static int server_open_sync_timer(Server *s) {
1318 struct epoll_event ev;
1322 s->sync_timer_fd = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC);
1323 if (s->sync_timer_fd < 0)
1327 ev.events = EPOLLIN;
1328 ev.data.fd = s->sync_timer_fd;
1330 r = epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->sync_timer_fd, &ev);
1332 log_error("Failed to add idle timer fd to epoll object: %m");
1339 int server_schedule_sync(Server *s) {
1344 if (s->sync_scheduled)
1347 if (s->sync_interval_usec) {
1348 struct itimerspec sync_timer_enable = {};
1350 timespec_store(&sync_timer_enable.it_value, s->sync_interval_usec);
1352 r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_enable, NULL);
1357 s->sync_scheduled = true;
1362 int server_init(Server *s) {
1368 s->sync_timer_fd = s->syslog_fd = s->native_fd = s->stdout_fd =
1369 s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
1373 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1374 s->sync_scheduled = false;
1376 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1377 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1379 s->forward_to_syslog = true;
1381 s->max_level_store = LOG_DEBUG;
1382 s->max_level_syslog = LOG_DEBUG;
1383 s->max_level_kmsg = LOG_NOTICE;
1384 s->max_level_console = LOG_INFO;
1386 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1387 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1389 server_parse_config_file(s);
1390 server_parse_proc_cmdline(s);
1391 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1392 log_debug("Setting both rate limit interval and burst from %llu,%u to 0,0",
1393 (long long unsigned) s->rate_limit_interval,
1394 s->rate_limit_burst);
1395 s->rate_limit_interval = s->rate_limit_burst = 0;
1398 mkdir_p("/run/systemd/journal", 0755);
1400 s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
1401 if (!s->user_journals)
1404 s->mmap = mmap_cache_new();
1408 s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
1409 if (s->epoll_fd < 0) {
1410 log_error("Failed to create epoll object: %m");
1414 n = sd_listen_fds(true);
1416 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1420 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1422 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1424 if (s->native_fd >= 0) {
1425 log_error("Too many native sockets passed.");
1431 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1433 if (s->stdout_fd >= 0) {
1434 log_error("Too many stdout sockets passed.");
1440 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
1442 if (s->syslog_fd >= 0) {
1443 log_error("Too many /dev/log sockets passed.");
1450 log_error("Unknown socket passed.");
1455 r = server_open_syslog_socket(s);
1459 r = server_open_native_socket(s);
1463 r = server_open_stdout_socket(s);
1467 r = server_open_dev_kmsg(s);
1471 r = server_open_kernel_seqnum(s);
1475 r = server_open_sync_timer(s);
1479 r = open_signalfd(s);
1483 s->udev = udev_new();
1487 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval,
1488 s->rate_limit_burst);
1492 r = system_journal_open(s);
1499 void server_maybe_append_tags(Server *s) {
1505 n = now(CLOCK_REALTIME);
1507 if (s->system_journal)
1508 journal_file_maybe_append_tag(s->system_journal, n);
1510 HASHMAP_FOREACH(f, s->user_journals, i)
1511 journal_file_maybe_append_tag(f, n);
1515 void server_done(Server *s) {
1519 while (s->stdout_streams)
1520 stdout_stream_free(s->stdout_streams);
1522 if (s->system_journal)
1523 journal_file_close(s->system_journal);
1525 if (s->runtime_journal)
1526 journal_file_close(s->runtime_journal);
1528 while ((f = hashmap_steal_first(s->user_journals)))
1529 journal_file_close(f);
1531 hashmap_free(s->user_journals);
1533 if (s->epoll_fd >= 0)
1534 close_nointr_nofail(s->epoll_fd);
1536 if (s->signal_fd >= 0)
1537 close_nointr_nofail(s->signal_fd);
1539 if (s->syslog_fd >= 0)
1540 close_nointr_nofail(s->syslog_fd);
1542 if (s->native_fd >= 0)
1543 close_nointr_nofail(s->native_fd);
1545 if (s->stdout_fd >= 0)
1546 close_nointr_nofail(s->stdout_fd);
1548 if (s->dev_kmsg_fd >= 0)
1549 close_nointr_nofail(s->dev_kmsg_fd);
1551 if (s->sync_timer_fd >= 0)
1552 close_nointr_nofail(s->sync_timer_fd);
1555 journal_rate_limit_free(s->rate_limit);
1557 if (s->kernel_seqnum)
1558 munmap(s->kernel_seqnum, sizeof(uint64_t));
1564 mmap_cache_unref(s->mmap);
1567 udev_unref(s->udev);