1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
27 #include <sys/timerfd.h>
30 #include <systemd/sd-journal.h>
31 #include <systemd/sd-messages.h>
32 #include <systemd/sd-daemon.h>
37 #include "journal-file.h"
38 #include "socket-util.h"
39 #include "cgroup-util.h"
43 #include "conf-parser.h"
44 #include "journal-internal.h"
45 #include "journal-vacuum.h"
46 #include "journal-authenticate.h"
47 #include "journald-server.h"
48 #include "journald-rate-limit.h"
49 #include "journald-kmsg.h"
50 #include "journald-syslog.h"
51 #include "journald-stream.h"
52 #include "journald-console.h"
53 #include "journald-native.h"
57 #include <acl/libacl.h>
62 #include <selinux/selinux.h>
65 #define USER_JOURNALS_MAX 1024
67 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
68 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
69 #define DEFAULT_RATE_LIMIT_BURST 200
71 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
73 static const char* const storage_table[] = {
74 [STORAGE_AUTO] = "auto",
75 [STORAGE_VOLATILE] = "volatile",
76 [STORAGE_PERSISTENT] = "persistent",
77 [STORAGE_NONE] = "none"
80 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
81 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
83 static const char* const split_mode_table[] = {
84 [SPLIT_NONE] = "none",
86 [SPLIT_LOGIN] = "login"
89 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
90 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
92 static uint64_t available_space(Server *s) {
94 _cleanup_free_ char *p = NULL;
98 uint64_t sum = 0, avail = 0, ss_avail = 0;
100 _cleanup_closedir_ DIR *d = NULL;
104 ts = now(CLOCK_MONOTONIC);
106 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
107 return s->cached_available_space;
109 r = sd_id128_get_machine(&machine);
113 if (s->system_journal) {
114 f = "/var/log/journal/";
115 m = &s->system_metrics;
117 f = "/run/log/journal/";
118 m = &s->runtime_metrics;
123 p = strappend(f, sd_id128_to_string(machine, ids));
131 if (fstatvfs(dirfd(d), &ss) < 0)
137 union dirent_storage buf;
139 r = readdir_r(d, &buf.de, &de);
146 if (!endswith(de->d_name, ".journal") &&
147 !endswith(de->d_name, ".journal~"))
150 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
153 if (!S_ISREG(st.st_mode))
156 sum += (uint64_t) st.st_blocks * 512UL;
159 avail = sum >= m->max_use ? 0 : m->max_use - sum;
161 ss_avail = ss.f_bsize * ss.f_bavail;
163 ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
165 if (ss_avail < avail)
168 s->cached_available_space = avail;
169 s->cached_available_space_timestamp = ts;
174 static void server_read_file_gid(Server *s) {
175 const char *g = "systemd-journal";
180 if (s->file_gid_valid)
183 r = get_group_creds(&g, &s->file_gid);
185 log_warning("Failed to resolve '%s' group: %s", g, strerror(-r));
187 /* if we couldn't read the gid, then it will be 0, but that's
188 * fine and we shouldn't try to resolve the group again, so
189 * let's just pretend it worked right-away. */
190 s->file_gid_valid = true;
193 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
198 acl_permset_t permset;
203 server_read_file_gid(s);
205 r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
207 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
213 acl = acl_get_fd(f->fd);
215 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
219 r = acl_find_uid(acl, uid, &entry);
222 if (acl_create_entry(&acl, &entry) < 0 ||
223 acl_set_tag_type(entry, ACL_USER) < 0 ||
224 acl_set_qualifier(entry, &uid) < 0) {
225 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
230 /* We do not recalculate the mask unconditionally here,
231 * so that the fchmod() mask above stays intact. */
232 if (acl_get_permset(entry, &permset) < 0 ||
233 acl_add_perm(permset, ACL_READ) < 0 ||
234 calc_acl_mask_if_needed(&acl) < 0) {
235 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
239 if (acl_set_fd(f->fd, acl) < 0)
240 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
247 static JournalFile* find_journal(Server *s, uid_t uid) {
255 /* We split up user logs only on /var, not on /run. If the
256 * runtime file is open, we write to it exclusively, in order
257 * to guarantee proper order as soon as we flush /run to
258 * /var and close the runtime file. */
260 if (s->runtime_journal)
261 return s->runtime_journal;
264 return s->system_journal;
266 r = sd_id128_get_machine(&machine);
268 return s->system_journal;
270 f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
274 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
275 SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
276 return s->system_journal;
278 while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
279 /* Too many open? Then let's close one */
280 f = hashmap_steal_first(s->user_journals);
282 journal_file_close(f);
285 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, s->system_journal, &f);
289 return s->system_journal;
291 server_fix_perms(s, f, uid);
293 r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
295 journal_file_close(f);
296 return s->system_journal;
302 void server_rotate(Server *s) {
308 log_debug("Rotating...");
310 if (s->runtime_journal) {
311 r = journal_file_rotate(&s->runtime_journal, s->compress, false);
313 if (s->runtime_journal)
314 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
316 log_error("Failed to create new runtime journal: %s", strerror(-r));
318 server_fix_perms(s, s->runtime_journal, 0);
321 if (s->system_journal) {
322 r = journal_file_rotate(&s->system_journal, s->compress, s->seal);
324 if (s->system_journal)
325 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
327 log_error("Failed to create new system journal: %s", strerror(-r));
330 server_fix_perms(s, s->system_journal, 0);
333 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
334 r = journal_file_rotate(&f, s->compress, s->seal);
337 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
339 log_error("Failed to create user journal: %s", strerror(-r));
341 hashmap_replace(s->user_journals, k, f);
342 server_fix_perms(s, f, PTR_TO_UINT32(k));
347 void server_sync(Server *s) {
353 static const struct itimerspec sync_timer_disable = {};
355 if (s->system_journal) {
356 r = journal_file_set_offline(s->system_journal);
358 log_error("Failed to sync system journal: %s", strerror(-r));
361 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
362 r = journal_file_set_offline(f);
364 log_error("Failed to sync user journal: %s", strerror(-r));
367 r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_disable, NULL);
369 log_error("Failed to disable max timer: %m");
371 s->sync_scheduled = false;
374 void server_vacuum(Server *s) {
380 log_debug("Vacuuming...");
382 s->oldest_file_usec = 0;
384 r = sd_id128_get_machine(&machine);
386 log_error("Failed to get machine ID: %s", strerror(-r));
390 sd_id128_to_string(machine, ids);
392 if (s->system_journal) {
393 p = strappend("/var/log/journal/", ids);
399 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
400 if (r < 0 && r != -ENOENT)
401 log_error("Failed to vacuum %s: %s", p, strerror(-r));
405 if (s->runtime_journal) {
406 p = strappend("/run/log/journal/", ids);
412 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
413 if (r < 0 && r != -ENOENT)
414 log_error("Failed to vacuum %s: %s", p, strerror(-r));
418 s->cached_available_space_timestamp = 0;
421 bool shall_try_append_again(JournalFile *f, int r) {
423 /* -E2BIG Hit configured limit
425 -EDQUOT Quota limit hit
427 -EHOSTDOWN Other machine
428 -EBUSY Unclean shutdown
429 -EPROTONOSUPPORT Unsupported feature
432 -ESHUTDOWN Already archived */
434 if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
435 log_debug("%s: Allocation limit reached, rotating.", f->path);
436 else if (r == -EHOSTDOWN)
437 log_info("%s: Journal file from other machine, rotating.", f->path);
438 else if (r == -EBUSY)
439 log_info("%s: Unclean shutdown, rotating.", f->path);
440 else if (r == -EPROTONOSUPPORT)
441 log_info("%s: Unsupported feature, rotating.", f->path);
442 else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
443 log_warning("%s: Journal file corrupted, rotating.", f->path);
450 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n) {
452 bool vacuumed = false;
459 f = find_journal(s, uid);
463 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
464 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
469 f = find_journal(s, uid);
474 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
476 server_schedule_sync(s);
480 if (vacuumed || !shall_try_append_again(f, r)) {
481 log_error("Failed to write entry, ignoring: %s", strerror(-r));
488 f = find_journal(s, uid);
492 log_debug("Retrying write.");
493 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
495 log_error("Failed to write entry, ignoring: %s", strerror(-r));
498 static void dispatch_message_real(
500 struct iovec *iovec, unsigned n, unsigned m,
503 const char *label, size_t label_len,
504 const char *unit_id) {
506 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
507 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
508 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
509 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
510 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
511 boot_id[sizeof("_BOOT_ID=") + 32] = "_BOOT_ID=",
512 machine_id[sizeof("_MACHINE_ID=") + 32] = "_MACHINE_ID=";
513 char *comm, *exe, *cmdline, *cgroup, *session, *unit, *hostname;
517 uid_t realuid = 0, owner = 0, journal_uid;
518 bool owner_valid = false;
520 char audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
521 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
530 assert(n + N_IOVEC_META_FIELDS <= m);
533 realuid = ucred->uid;
535 sprintf(pid, "_PID=%lu", (unsigned long) ucred->pid);
536 IOVEC_SET_STRING(iovec[n++], pid);
538 sprintf(uid, "_UID=%lu", (unsigned long) ucred->uid);
539 IOVEC_SET_STRING(iovec[n++], uid);
541 sprintf(gid, "_GID=%lu", (unsigned long) ucred->gid);
542 IOVEC_SET_STRING(iovec[n++], gid);
544 r = get_process_comm(ucred->pid, &t);
546 comm = strappenda("_COMM=", t);
548 IOVEC_SET_STRING(iovec[n++], comm);
551 r = get_process_exe(ucred->pid, &t);
553 exe = strappenda("_EXE=", t);
555 IOVEC_SET_STRING(iovec[n++], exe);
558 r = get_process_cmdline(ucred->pid, 0, false, &t);
560 cmdline = strappenda("_CMDLINE=", t);
562 IOVEC_SET_STRING(iovec[n++], cmdline);
566 r = audit_session_from_pid(ucred->pid, &audit);
568 sprintf(audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit);
569 IOVEC_SET_STRING(iovec[n++], audit_session);
572 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
574 sprintf(audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid);
575 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
579 r = cg_pid_get_path_shifted(ucred->pid, NULL, &c);
581 cgroup = strappenda("_SYSTEMD_CGROUP=", c);
582 IOVEC_SET_STRING(iovec[n++], cgroup);
584 r = cg_path_get_session(c, &t);
586 session = strappenda("_SYSTEMD_SESSION=", t);
588 IOVEC_SET_STRING(iovec[n++], session);
591 if (cg_path_get_owner_uid(c, &owner) >= 0) {
594 sprintf(owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner);
595 IOVEC_SET_STRING(iovec[n++], owner_uid);
598 if (cg_path_get_unit(c, &t) >= 0) {
599 unit = strappenda("_SYSTEMD_UNIT=", t);
601 } else if (cg_path_get_user_unit(c, &t) >= 0) {
602 unit = strappenda("_SYSTEMD_USER_UNIT=", t);
604 } else if (unit_id) {
606 unit = strappenda("_SYSTEMD_USER_UNIT=", unit_id);
608 unit = strappenda("_SYSTEMD_UNIT=", unit_id);
613 IOVEC_SET_STRING(iovec[n++], unit);
620 char *selinux_context = alloca(sizeof("_SELINUX_CONTEXT=") + label_len);
622 *((char*) mempcpy(stpcpy(selinux_context, "_SELINUX_CONTEXT="), label, label_len)) = 0;
623 IOVEC_SET_STRING(iovec[n++], selinux_context);
625 security_context_t con;
627 if (getpidcon(ucred->pid, &con) >= 0) {
628 char *selinux_context = strappenda("_SELINUX_CONTEXT=", con);
631 IOVEC_SET_STRING(iovec[n++], selinux_context);
638 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
639 IOVEC_SET_STRING(iovec[n++], source_time);
642 /* Note that strictly speaking storing the boot id here is
643 * redundant since the entry includes this in-line
644 * anyway. However, we need this indexed, too. */
645 r = sd_id128_get_boot(&id);
647 sd_id128_to_string(id, boot_id + sizeof("_BOOT_ID=") - 1);
648 IOVEC_SET_STRING(iovec[n++], boot_id);
651 r = sd_id128_get_machine(&id);
653 sd_id128_to_string(id, machine_id + sizeof("_MACHINE_ID=") - 1);
654 IOVEC_SET_STRING(iovec[n++], machine_id);
657 t = gethostname_malloc();
659 hostname = strappenda("_HOSTNAME=", t);
661 IOVEC_SET_STRING(iovec[n++], hostname);
666 if (s->split_mode == SPLIT_UID && realuid > 0)
667 /* Split up strictly by any UID */
668 journal_uid = realuid;
669 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
670 /* Split up by login UIDs, this avoids creation of
671 * individual journals for system UIDs. We do this
672 * only if the realuid is not root, in order not to
673 * accidentally leak privileged information to the
674 * user that is logged by a privileged process that is
675 * part of an unprivileged session.*/
680 write_to_journal(s, journal_uid, iovec, n);
683 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
684 char mid[11 + 32 + 1];
685 char buffer[16 + LINE_MAX + 1];
686 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
689 struct ucred ucred = {};
694 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
695 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
697 memcpy(buffer, "MESSAGE=", 8);
698 va_start(ap, format);
699 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
701 char_array_0(buffer);
702 IOVEC_SET_STRING(iovec[n++], buffer);
704 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
705 snprintf(mid, sizeof(mid), MESSAGE_ID(message_id));
707 IOVEC_SET_STRING(iovec[n++], mid);
710 ucred.pid = getpid();
711 ucred.uid = getuid();
712 ucred.gid = getgid();
714 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL);
717 void server_dispatch_message(
719 struct iovec *iovec, unsigned n, unsigned m,
722 const char *label, size_t label_len,
727 _cleanup_free_ char *path = NULL;
731 assert(iovec || n == 0);
736 if (LOG_PRI(priority) > s->max_level_store)
742 r = cg_pid_get_path_shifted(ucred->pid, NULL, &path);
746 /* example: /user/lennart/3/foobar
747 * /system/dbus.service/foobar
749 * So let's cut of everything past the third /, since that is
750 * where user directories start */
752 c = strchr(path, '/');
754 c = strchr(c+1, '/');
756 c = strchr(c+1, '/');
762 rl = journal_rate_limit_test(s->rate_limit, path,
763 priority & LOG_PRIMASK, available_space(s));
768 /* Write a suppression message if we suppressed something */
770 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
771 "Suppressed %u messages from %s", rl - 1, path);
774 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id);
778 static int system_journal_open(Server *s) {
784 r = sd_id128_get_machine(&machine);
788 sd_id128_to_string(machine, ids);
790 if (!s->system_journal &&
791 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
792 access("/run/systemd/journal/flushed", F_OK) >= 0) {
794 /* If in auto mode: first try to create the machine
795 * path, but not the prefix.
797 * If in persistent mode: create /var/log/journal and
798 * the machine path */
800 if (s->storage == STORAGE_PERSISTENT)
801 (void) mkdir("/var/log/journal/", 0755);
803 fn = strappend("/var/log/journal/", ids);
807 (void) mkdir(fn, 0755);
810 fn = strjoin("/var/log/journal/", ids, "/system.journal", NULL);
814 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
818 char fb[FORMAT_BYTES_MAX];
820 server_fix_perms(s, s->system_journal, 0);
821 server_driver_message(s, SD_ID128_NULL, "Allowing system journal files to grow to %s.",
822 format_bytes(fb, sizeof(fb), s->system_metrics.max_use));
826 if (r != -ENOENT && r != -EROFS)
827 log_warning("Failed to open system journal: %s", strerror(-r));
833 if (!s->runtime_journal &&
834 (s->storage != STORAGE_NONE)) {
836 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
840 if (s->system_journal) {
842 /* Try to open the runtime journal, but only
843 * if it already exists, so that we can flush
844 * it into the system journal */
846 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
851 log_warning("Failed to open runtime journal: %s", strerror(-r));
858 /* OK, we really need the runtime journal, so create
859 * it if necessary. */
861 (void) mkdir_parents(fn, 0755);
862 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
866 log_error("Failed to open runtime journal: %s", strerror(-r));
871 if (s->runtime_journal) {
872 char fb[FORMAT_BYTES_MAX];
874 server_fix_perms(s, s->runtime_journal, 0);
875 server_driver_message(s, SD_ID128_NULL, "Allowing runtime journal files to grow to %s.",
876 format_bytes(fb, sizeof(fb), s->runtime_metrics.max_use));
883 int server_flush_to_var(Server *s) {
886 sd_journal *j = NULL;
890 if (s->storage != STORAGE_AUTO &&
891 s->storage != STORAGE_PERSISTENT)
894 if (!s->runtime_journal)
897 system_journal_open(s);
899 if (!s->system_journal)
902 log_debug("Flushing to /var...");
904 r = sd_id128_get_machine(&machine);
906 log_error("Failed to get machine id: %s", strerror(-r));
910 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
912 log_error("Failed to read runtime journal: %s", strerror(-r));
916 sd_journal_set_data_threshold(j, 0);
918 SD_JOURNAL_FOREACH(j) {
923 assert(f && f->current_offset > 0);
925 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
927 log_error("Can't read entry: %s", strerror(-r));
931 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
935 if (!shall_try_append_again(s->system_journal, r)) {
936 log_error("Can't write entry: %s", strerror(-r));
943 if (!s->system_journal) {
944 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
949 log_debug("Retrying write.");
950 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
952 log_error("Can't write entry: %s", strerror(-r));
958 journal_file_post_change(s->system_journal);
960 journal_file_close(s->runtime_journal);
961 s->runtime_journal = NULL;
964 rm_rf("/run/log/journal", false, true, false);
971 int process_event(Server *s, struct epoll_event *ev) {
975 if (ev->data.fd == s->signal_fd) {
976 struct signalfd_siginfo sfsi;
979 if (ev->events != EPOLLIN) {
980 log_error("Got invalid event from epoll.");
984 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
985 if (n != sizeof(sfsi)) {
990 if (errno == EINTR || errno == EAGAIN)
996 if (sfsi.ssi_signo == SIGUSR1) {
997 touch("/run/systemd/journal/flushed");
998 server_flush_to_var(s);
1003 if (sfsi.ssi_signo == SIGUSR2) {
1009 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
1013 } else if (ev->data.fd == s->sync_timer_fd) {
1017 log_debug("Got sync request from epoll.");
1019 r = read(ev->data.fd, (void *)&t, sizeof(t));
1026 } else if (ev->data.fd == s->dev_kmsg_fd) {
1029 if (ev->events != EPOLLIN) {
1030 log_error("Got invalid event from epoll.");
1034 r = server_read_dev_kmsg(s);
1040 } else if (ev->data.fd == s->native_fd ||
1041 ev->data.fd == s->syslog_fd) {
1043 if (ev->events != EPOLLIN) {
1044 log_error("Got invalid event from epoll.");
1049 struct msghdr msghdr;
1051 struct ucred *ucred = NULL;
1052 struct timeval *tv = NULL;
1053 struct cmsghdr *cmsg;
1055 size_t label_len = 0;
1057 struct cmsghdr cmsghdr;
1059 /* We use NAME_MAX space for the
1060 * SELinux label here. The kernel
1061 * currently enforces no limit, but
1062 * according to suggestions from the
1063 * SELinux people this will change and
1064 * it will probably be identical to
1065 * NAME_MAX. For now we use that, but
1066 * this should be updated one day when
1067 * the final limit is known.*/
1068 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1069 CMSG_SPACE(sizeof(struct timeval)) +
1070 CMSG_SPACE(sizeof(int)) + /* fd */
1071 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1078 if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
1079 log_error("SIOCINQ failed: %m");
1083 if (s->buffer_size < (size_t) v) {
1087 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
1088 b = realloc(s->buffer, l+1);
1091 log_error("Couldn't increase buffer.");
1100 iovec.iov_base = s->buffer;
1101 iovec.iov_len = s->buffer_size;
1105 msghdr.msg_iov = &iovec;
1106 msghdr.msg_iovlen = 1;
1107 msghdr.msg_control = &control;
1108 msghdr.msg_controllen = sizeof(control);
1110 n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1113 if (errno == EINTR || errno == EAGAIN)
1116 log_error("recvmsg() failed: %m");
1120 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1122 if (cmsg->cmsg_level == SOL_SOCKET &&
1123 cmsg->cmsg_type == SCM_CREDENTIALS &&
1124 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1125 ucred = (struct ucred*) CMSG_DATA(cmsg);
1126 else if (cmsg->cmsg_level == SOL_SOCKET &&
1127 cmsg->cmsg_type == SCM_SECURITY) {
1128 label = (char*) CMSG_DATA(cmsg);
1129 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1130 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1131 cmsg->cmsg_type == SO_TIMESTAMP &&
1132 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1133 tv = (struct timeval*) CMSG_DATA(cmsg);
1134 else if (cmsg->cmsg_level == SOL_SOCKET &&
1135 cmsg->cmsg_type == SCM_RIGHTS) {
1136 fds = (int*) CMSG_DATA(cmsg);
1137 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1141 if (ev->data.fd == s->syslog_fd) {
1144 if (n > 0 && n_fds == 0) {
1145 e = memchr(s->buffer, '\n', n);
1151 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1152 } else if (n_fds > 0)
1153 log_warning("Got file descriptors via syslog socket. Ignoring.");
1156 if (n > 0 && n_fds == 0)
1157 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1158 else if (n == 0 && n_fds == 1)
1159 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1161 log_warning("Got too many file descriptors via native socket. Ignoring.");
1164 close_many(fds, n_fds);
1169 } else if (ev->data.fd == s->stdout_fd) {
1171 if (ev->events != EPOLLIN) {
1172 log_error("Got invalid event from epoll.");
1176 stdout_stream_new(s);
1180 StdoutStream *stream;
1182 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
1183 log_error("Got invalid event from epoll.");
1187 /* If it is none of the well-known fds, it must be an
1188 * stdout stream fd. Note that this is a bit ugly here
1189 * (since we rely that none of the well-known fds
1190 * could be interpreted as pointer), but nonetheless
1191 * safe, since the well-known fds would never get an
1192 * fd > 4096, i.e. beyond the first memory page */
1194 stream = ev->data.ptr;
1196 if (stdout_stream_process(stream) <= 0)
1197 stdout_stream_free(stream);
1202 log_error("Unknown event.");
1206 static int open_signalfd(Server *s) {
1208 struct epoll_event ev;
1212 assert_se(sigemptyset(&mask) == 0);
1213 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1214 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1216 s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
1217 if (s->signal_fd < 0) {
1218 log_error("signalfd(): %m");
1223 ev.events = EPOLLIN;
1224 ev.data.fd = s->signal_fd;
1226 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
1227 log_error("epoll_ctl(): %m");
1234 static int server_parse_proc_cmdline(Server *s) {
1235 _cleanup_free_ char *line = NULL;
1240 if (detect_container(NULL) > 0)
1243 r = read_one_line_file("/proc/cmdline", &line);
1245 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1249 FOREACH_WORD_QUOTED(w, l, line, state) {
1250 _cleanup_free_ char *word;
1252 word = strndup(w, l);
1256 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1257 r = parse_boolean(word + 35);
1259 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1261 s->forward_to_syslog = r;
1262 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1263 r = parse_boolean(word + 33);
1265 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1267 s->forward_to_kmsg = r;
1268 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1269 r = parse_boolean(word + 36);
1271 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1273 s->forward_to_console = r;
1274 } else if (startswith(word, "systemd.journald"))
1275 log_warning("Invalid systemd.journald parameter. Ignoring.");
1281 static int server_parse_config_file(Server *s) {
1282 static const char fn[] = "/etc/systemd/journald.conf";
1283 _cleanup_fclose_ FILE *f = NULL;
1288 f = fopen(fn, "re");
1290 if (errno == ENOENT)
1293 log_warning("Failed to open configuration file %s: %m", fn);
1297 r = config_parse(NULL, fn, f, "Journal\0", config_item_perf_lookup,
1298 (void*) journald_gperf_lookup, false, false, s);
1300 log_warning("Failed to parse configuration file: %s", strerror(-r));
1305 static int server_open_sync_timer(Server *s) {
1307 struct epoll_event ev;
1311 s->sync_timer_fd = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC);
1312 if (s->sync_timer_fd < 0)
1316 ev.events = EPOLLIN;
1317 ev.data.fd = s->sync_timer_fd;
1319 r = epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->sync_timer_fd, &ev);
1321 log_error("Failed to add idle timer fd to epoll object: %m");
1328 int server_schedule_sync(Server *s) {
1333 if (s->sync_scheduled)
1336 if (s->sync_interval_usec) {
1337 struct itimerspec sync_timer_enable = {};
1339 timespec_store(&sync_timer_enable.it_value, s->sync_interval_usec);
1341 r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_enable, NULL);
1346 s->sync_scheduled = true;
1351 int server_init(Server *s) {
1357 s->sync_timer_fd = s->syslog_fd = s->native_fd = s->stdout_fd =
1358 s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
1362 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1363 s->sync_scheduled = false;
1365 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1366 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1368 s->forward_to_syslog = true;
1370 s->max_level_store = LOG_DEBUG;
1371 s->max_level_syslog = LOG_DEBUG;
1372 s->max_level_kmsg = LOG_NOTICE;
1373 s->max_level_console = LOG_INFO;
1375 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1376 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1378 server_parse_config_file(s);
1379 server_parse_proc_cmdline(s);
1380 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1381 log_debug("Setting both rate limit interval and burst from %llu,%u to 0,0",
1382 (long long unsigned) s->rate_limit_interval,
1383 s->rate_limit_burst);
1384 s->rate_limit_interval = s->rate_limit_burst = 0;
1387 mkdir_p("/run/systemd/journal", 0755);
1389 s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
1390 if (!s->user_journals)
1393 s->mmap = mmap_cache_new();
1397 s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
1398 if (s->epoll_fd < 0) {
1399 log_error("Failed to create epoll object: %m");
1403 n = sd_listen_fds(true);
1405 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1409 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1411 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1413 if (s->native_fd >= 0) {
1414 log_error("Too many native sockets passed.");
1420 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1422 if (s->stdout_fd >= 0) {
1423 log_error("Too many stdout sockets passed.");
1429 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
1431 if (s->syslog_fd >= 0) {
1432 log_error("Too many /dev/log sockets passed.");
1439 log_error("Unknown socket passed.");
1444 r = server_open_syslog_socket(s);
1448 r = server_open_native_socket(s);
1452 r = server_open_stdout_socket(s);
1456 r = server_open_dev_kmsg(s);
1460 r = server_open_kernel_seqnum(s);
1464 r = server_open_sync_timer(s);
1468 r = open_signalfd(s);
1472 s->udev = udev_new();
1476 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval,
1477 s->rate_limit_burst);
1481 r = system_journal_open(s);
1488 void server_maybe_append_tags(Server *s) {
1494 n = now(CLOCK_REALTIME);
1496 if (s->system_journal)
1497 journal_file_maybe_append_tag(s->system_journal, n);
1499 HASHMAP_FOREACH(f, s->user_journals, i)
1500 journal_file_maybe_append_tag(f, n);
1504 void server_done(Server *s) {
1508 while (s->stdout_streams)
1509 stdout_stream_free(s->stdout_streams);
1511 if (s->system_journal)
1512 journal_file_close(s->system_journal);
1514 if (s->runtime_journal)
1515 journal_file_close(s->runtime_journal);
1517 while ((f = hashmap_steal_first(s->user_journals)))
1518 journal_file_close(f);
1520 hashmap_free(s->user_journals);
1522 if (s->epoll_fd >= 0)
1523 close_nointr_nofail(s->epoll_fd);
1525 if (s->signal_fd >= 0)
1526 close_nointr_nofail(s->signal_fd);
1528 if (s->syslog_fd >= 0)
1529 close_nointr_nofail(s->syslog_fd);
1531 if (s->native_fd >= 0)
1532 close_nointr_nofail(s->native_fd);
1534 if (s->stdout_fd >= 0)
1535 close_nointr_nofail(s->stdout_fd);
1537 if (s->dev_kmsg_fd >= 0)
1538 close_nointr_nofail(s->dev_kmsg_fd);
1540 if (s->sync_timer_fd >= 0)
1541 close_nointr_nofail(s->sync_timer_fd);
1544 journal_rate_limit_free(s->rate_limit);
1546 if (s->kernel_seqnum)
1547 munmap(s->kernel_seqnum, sizeof(uint64_t));
1553 mmap_cache_unref(s->mmap);
1556 udev_unref(s->udev);