1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
27 #include <sys/timerfd.h>
30 #include <systemd/sd-journal.h>
31 #include <systemd/sd-messages.h>
32 #include <systemd/sd-daemon.h>
37 #include "journal-file.h"
38 #include "socket-util.h"
39 #include "cgroup-util.h"
43 #include "conf-parser.h"
44 #include "journal-internal.h"
45 #include "journal-vacuum.h"
46 #include "journal-authenticate.h"
47 #include "journald-server.h"
48 #include "journald-rate-limit.h"
49 #include "journald-kmsg.h"
50 #include "journald-syslog.h"
51 #include "journald-stream.h"
52 #include "journald-console.h"
53 #include "journald-native.h"
57 #include <acl/libacl.h>
62 #include <selinux/selinux.h>
65 #define USER_JOURNALS_MAX 1024
67 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
68 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
69 #define DEFAULT_RATE_LIMIT_BURST 200
71 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
73 static const char* const storage_table[] = {
74 [STORAGE_AUTO] = "auto",
75 [STORAGE_VOLATILE] = "volatile",
76 [STORAGE_PERSISTENT] = "persistent",
77 [STORAGE_NONE] = "none"
80 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
81 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
83 static const char* const split_mode_table[] = {
84 [SPLIT_NONE] = "none",
86 [SPLIT_LOGIN] = "login"
89 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
90 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
92 static uint64_t available_space(Server *s) {
94 _cleanup_free_ char *p = NULL;
98 uint64_t sum = 0, avail = 0, ss_avail = 0;
100 _cleanup_closedir_ DIR *d = NULL;
104 ts = now(CLOCK_MONOTONIC);
106 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
107 return s->cached_available_space;
109 r = sd_id128_get_machine(&machine);
113 if (s->system_journal) {
114 f = "/var/log/journal/";
115 m = &s->system_metrics;
117 f = "/run/log/journal/";
118 m = &s->runtime_metrics;
123 p = strappend(f, sd_id128_to_string(machine, ids));
131 if (fstatvfs(dirfd(d), &ss) < 0)
137 union dirent_storage buf;
139 r = readdir_r(d, &buf.de, &de);
146 if (!endswith(de->d_name, ".journal") &&
147 !endswith(de->d_name, ".journal~"))
150 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
153 if (!S_ISREG(st.st_mode))
156 sum += (uint64_t) st.st_blocks * 512UL;
159 avail = sum >= m->max_use ? 0 : m->max_use - sum;
161 ss_avail = ss.f_bsize * ss.f_bavail;
163 ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
165 if (ss_avail < avail)
168 s->cached_available_space = avail;
169 s->cached_available_space_timestamp = ts;
174 static void server_read_file_gid(Server *s) {
175 const char *g = "systemd-journal";
180 if (s->file_gid_valid)
183 r = get_group_creds(&g, &s->file_gid);
185 log_warning("Failed to resolve '%s' group: %s", g, strerror(-r));
187 /* if we couldn't read the gid, then it will be 0, but that's
188 * fine and we shouldn't try to resolve the group again, so
189 * let's just pretend it worked right-away. */
190 s->file_gid_valid = true;
193 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
198 acl_permset_t permset;
203 server_read_file_gid(s);
205 r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
207 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
213 acl = acl_get_fd(f->fd);
215 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
219 r = acl_find_uid(acl, uid, &entry);
222 if (acl_create_entry(&acl, &entry) < 0 ||
223 acl_set_tag_type(entry, ACL_USER) < 0 ||
224 acl_set_qualifier(entry, &uid) < 0) {
225 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
230 if (acl_get_permset(entry, &permset) < 0 ||
231 acl_add_perm(permset, ACL_READ) < 0 ||
232 acl_calc_mask(&acl) < 0) {
233 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
237 if (acl_set_fd(f->fd, acl) < 0)
238 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
245 static JournalFile* find_journal(Server *s, uid_t uid) {
253 /* We split up user logs only on /var, not on /run. If the
254 * runtime file is open, we write to it exclusively, in order
255 * to guarantee proper order as soon as we flush /run to
256 * /var and close the runtime file. */
258 if (s->runtime_journal)
259 return s->runtime_journal;
262 return s->system_journal;
264 r = sd_id128_get_machine(&machine);
266 return s->system_journal;
268 f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
272 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
273 SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
274 return s->system_journal;
276 while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
277 /* Too many open? Then let's close one */
278 f = hashmap_steal_first(s->user_journals);
280 journal_file_close(f);
283 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, s->system_journal, &f);
287 return s->system_journal;
289 server_fix_perms(s, f, uid);
291 r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
293 journal_file_close(f);
294 return s->system_journal;
300 void server_rotate(Server *s) {
306 log_debug("Rotating...");
308 if (s->runtime_journal) {
309 r = journal_file_rotate(&s->runtime_journal, s->compress, false);
311 if (s->runtime_journal)
312 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
314 log_error("Failed to create new runtime journal: %s", strerror(-r));
316 server_fix_perms(s, s->runtime_journal, 0);
319 if (s->system_journal) {
320 r = journal_file_rotate(&s->system_journal, s->compress, s->seal);
322 if (s->system_journal)
323 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
325 log_error("Failed to create new system journal: %s", strerror(-r));
328 server_fix_perms(s, s->system_journal, 0);
331 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
332 r = journal_file_rotate(&f, s->compress, s->seal);
335 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
337 log_error("Failed to create user journal: %s", strerror(-r));
339 hashmap_replace(s->user_journals, k, f);
340 server_fix_perms(s, f, PTR_TO_UINT32(k));
345 void server_sync(Server *s) {
351 static const struct itimerspec sync_timer_disable = {};
353 if (s->system_journal) {
354 r = journal_file_set_offline(s->system_journal);
356 log_error("Failed to sync system journal: %s", strerror(-r));
359 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
360 r = journal_file_set_offline(f);
362 log_error("Failed to sync user journal: %s", strerror(-r));
365 r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_disable, NULL);
367 log_error("Failed to disable max timer: %m");
369 s->sync_scheduled = false;
372 void server_vacuum(Server *s) {
378 log_debug("Vacuuming...");
380 s->oldest_file_usec = 0;
382 r = sd_id128_get_machine(&machine);
384 log_error("Failed to get machine ID: %s", strerror(-r));
388 sd_id128_to_string(machine, ids);
390 if (s->system_journal) {
391 p = strappend("/var/log/journal/", ids);
397 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
398 if (r < 0 && r != -ENOENT)
399 log_error("Failed to vacuum %s: %s", p, strerror(-r));
403 if (s->runtime_journal) {
404 p = strappend("/run/log/journal/", ids);
410 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
411 if (r < 0 && r != -ENOENT)
412 log_error("Failed to vacuum %s: %s", p, strerror(-r));
416 s->cached_available_space_timestamp = 0;
419 bool shall_try_append_again(JournalFile *f, int r) {
421 /* -E2BIG Hit configured limit
423 -EDQUOT Quota limit hit
425 -EHOSTDOWN Other machine
426 -EBUSY Unclean shutdown
427 -EPROTONOSUPPORT Unsupported feature
430 -ESHUTDOWN Already archived */
432 if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
433 log_debug("%s: Allocation limit reached, rotating.", f->path);
434 else if (r == -EHOSTDOWN)
435 log_info("%s: Journal file from other machine, rotating.", f->path);
436 else if (r == -EBUSY)
437 log_info("%s: Unclean shutdown, rotating.", f->path);
438 else if (r == -EPROTONOSUPPORT)
439 log_info("%s: Unsupported feature, rotating.", f->path);
440 else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
441 log_warning("%s: Journal file corrupted, rotating.", f->path);
448 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n) {
450 bool vacuumed = false;
457 f = find_journal(s, uid);
461 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
462 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
467 f = find_journal(s, uid);
472 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
474 server_schedule_sync(s);
478 if (vacuumed || !shall_try_append_again(f, r)) {
479 log_error("Failed to write entry, ignoring: %s", strerror(-r));
486 f = find_journal(s, uid);
490 log_debug("Retrying write.");
491 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
493 log_error("Failed to write entry, ignoring: %s", strerror(-r));
496 static void dispatch_message_real(
498 struct iovec *iovec, unsigned n, unsigned m,
501 const char *label, size_t label_len,
502 const char *unit_id) {
504 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
505 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
506 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
507 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
508 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
509 boot_id[sizeof("_BOOT_ID=") + 32] = "_BOOT_ID=",
510 machine_id[sizeof("_MACHINE_ID=") + 32] = "_MACHINE_ID=";
511 char *comm, *exe, *cmdline, *cgroup, *session, *unit, *selinux_context, *hostname;
515 uid_t realuid = 0, owner = 0, journal_uid;
516 bool owner_valid = false;
518 char audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
519 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
528 assert(n + N_IOVEC_META_FIELDS <= m);
531 realuid = ucred->uid;
533 sprintf(pid, "_PID=%lu", (unsigned long) ucred->pid);
534 IOVEC_SET_STRING(iovec[n++], pid);
536 sprintf(uid, "_UID=%lu", (unsigned long) ucred->uid);
537 IOVEC_SET_STRING(iovec[n++], uid);
539 sprintf(gid, "_GID=%lu", (unsigned long) ucred->gid);
540 IOVEC_SET_STRING(iovec[n++], gid);
542 r = get_process_comm(ucred->pid, &t);
544 comm = strappenda("_COMM=", t);
546 IOVEC_SET_STRING(iovec[n++], comm);
549 r = get_process_exe(ucred->pid, &t);
551 exe = strappenda("_EXE=", t);
553 IOVEC_SET_STRING(iovec[n++], exe);
556 r = get_process_cmdline(ucred->pid, 0, false, &t);
558 cmdline = strappenda("_CMDLINE=", t);
560 IOVEC_SET_STRING(iovec[n++], cmdline);
564 r = audit_session_from_pid(ucred->pid, &audit);
566 sprintf(audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit);
567 IOVEC_SET_STRING(iovec[n++], audit_session);
570 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
572 sprintf(audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid);
573 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
577 r = cg_pid_get_path_shifted(ucred->pid, NULL, &c);
579 cgroup = strappenda("_SYSTEMD_CGROUP=", c);
580 IOVEC_SET_STRING(iovec[n++], cgroup);
582 r = cg_path_get_session(c, &t);
584 session = strappenda("_SYSTEMD_SESSION=", t);
586 IOVEC_SET_STRING(iovec[n++], session);
589 if (cg_path_get_owner_uid(c, &owner) >= 0) {
592 sprintf(owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner);
593 IOVEC_SET_STRING(iovec[n++], owner_uid);
596 if (cg_path_get_unit(c, &t) >= 0) {
597 unit = strappenda("_SYSTEMD_UNIT=", t);
599 } else if (cg_path_get_user_unit(c, &t) >= 0) {
600 unit = strappenda("_SYSTEMD_USER_UNIT=", t);
602 } else if (unit_id) {
604 unit = strappenda("_SYSTEMD_USER_UNIT=", unit_id);
606 unit = strappenda("_SYSTEMD_UNIT=", unit_id);
611 IOVEC_SET_STRING(iovec[n++], unit);
618 selinux_context = alloca(sizeof("_SELINUX_CONTEXT=") + label_len);
620 *((char*) mempcpy(stpcpy(selinux_context, "_SELINUX_CONTEXT="), label, label_len)) = 0;
621 IOVEC_SET_STRING(iovec[n++], selinux_context);
623 security_context_t con;
625 if (getpidcon(ucred->pid, &con) >= 0) {
626 selinux_context = strappenda("_SELINUX_CONTEXT=", con);
628 IOVEC_SET_STRING(iovec[n++], selinux_context);
635 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
636 IOVEC_SET_STRING(iovec[n++], source_time);
639 /* Note that strictly speaking storing the boot id here is
640 * redundant since the entry includes this in-line
641 * anyway. However, we need this indexed, too. */
642 r = sd_id128_get_boot(&id);
644 sd_id128_to_string(id, boot_id + sizeof("_BOOT_ID=") - 1);
645 IOVEC_SET_STRING(iovec[n++], boot_id);
648 r = sd_id128_get_machine(&id);
650 sd_id128_to_string(id, machine_id + sizeof("_MACHINE_ID=") - 1);
651 IOVEC_SET_STRING(iovec[n++], machine_id);
654 t = gethostname_malloc();
656 hostname = strappenda("_HOSTNAME=", t);
658 IOVEC_SET_STRING(iovec[n++], hostname);
663 if (s->split_mode == SPLIT_UID && realuid > 0)
664 /* Split up strictly by any UID */
665 journal_uid = realuid;
666 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
667 /* Split up by login UIDs, this avoids creation of
668 * individual journals for system UIDs. We do this
669 * only if the realuid is not root, in order not to
670 * accidentally leak privileged information to the
671 * user that is logged by a privileged process that is
672 * part of an unprivileged session.*/
677 write_to_journal(s, journal_uid, iovec, n);
680 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
681 char mid[11 + 32 + 1];
682 char buffer[16 + LINE_MAX + 1];
683 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
686 struct ucred ucred = {};
691 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
692 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
694 memcpy(buffer, "MESSAGE=", 8);
695 va_start(ap, format);
696 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
698 char_array_0(buffer);
699 IOVEC_SET_STRING(iovec[n++], buffer);
701 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
702 snprintf(mid, sizeof(mid), MESSAGE_ID(message_id));
704 IOVEC_SET_STRING(iovec[n++], mid);
707 ucred.pid = getpid();
708 ucred.uid = getuid();
709 ucred.gid = getgid();
711 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL);
714 void server_dispatch_message(
716 struct iovec *iovec, unsigned n, unsigned m,
719 const char *label, size_t label_len,
724 _cleanup_free_ char *path = NULL;
728 assert(iovec || n == 0);
733 if (LOG_PRI(priority) > s->max_level_store)
739 r = cg_pid_get_path_shifted(ucred->pid, NULL, &path);
743 /* example: /user/lennart/3/foobar
744 * /system/dbus.service/foobar
746 * So let's cut of everything past the third /, since that is
747 * where user directories start */
749 c = strchr(path, '/');
751 c = strchr(c+1, '/');
753 c = strchr(c+1, '/');
759 rl = journal_rate_limit_test(s->rate_limit, path,
760 priority & LOG_PRIMASK, available_space(s));
765 /* Write a suppression message if we suppressed something */
767 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
768 "Suppressed %u messages from %s", rl - 1, path);
771 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id);
775 static int system_journal_open(Server *s) {
781 r = sd_id128_get_machine(&machine);
785 sd_id128_to_string(machine, ids);
787 if (!s->system_journal &&
788 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
789 access("/run/systemd/journal/flushed", F_OK) >= 0) {
791 /* If in auto mode: first try to create the machine
792 * path, but not the prefix.
794 * If in persistent mode: create /var/log/journal and
795 * the machine path */
797 if (s->storage == STORAGE_PERSISTENT)
798 (void) mkdir("/var/log/journal/", 0755);
800 fn = strappend("/var/log/journal/", ids);
804 (void) mkdir(fn, 0755);
807 fn = strjoin("/var/log/journal/", ids, "/system.journal", NULL);
811 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
815 char fb[FORMAT_BYTES_MAX];
817 server_fix_perms(s, s->system_journal, 0);
818 server_driver_message(s, SD_ID128_NULL, "Allowing system journal files to grow to %s.",
819 format_bytes(fb, sizeof(fb), s->system_metrics.max_use));
823 if (r != -ENOENT && r != -EROFS)
824 log_warning("Failed to open system journal: %s", strerror(-r));
830 if (!s->runtime_journal &&
831 (s->storage != STORAGE_NONE)) {
833 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
837 if (s->system_journal) {
839 /* Try to open the runtime journal, but only
840 * if it already exists, so that we can flush
841 * it into the system journal */
843 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
848 log_warning("Failed to open runtime journal: %s", strerror(-r));
855 /* OK, we really need the runtime journal, so create
856 * it if necessary. */
858 (void) mkdir_parents(fn, 0755);
859 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
863 log_error("Failed to open runtime journal: %s", strerror(-r));
868 if (s->runtime_journal) {
869 char fb[FORMAT_BYTES_MAX];
871 server_fix_perms(s, s->runtime_journal, 0);
872 server_driver_message(s, SD_ID128_NULL, "Allowing runtime journal files to grow to %s.",
873 format_bytes(fb, sizeof(fb), s->runtime_metrics.max_use));
880 int server_flush_to_var(Server *s) {
883 sd_journal *j = NULL;
887 if (s->storage != STORAGE_AUTO &&
888 s->storage != STORAGE_PERSISTENT)
891 if (!s->runtime_journal)
894 system_journal_open(s);
896 if (!s->system_journal)
899 log_debug("Flushing to /var...");
901 r = sd_id128_get_machine(&machine);
903 log_error("Failed to get machine id: %s", strerror(-r));
907 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
909 log_error("Failed to read runtime journal: %s", strerror(-r));
913 sd_journal_set_data_threshold(j, 0);
915 SD_JOURNAL_FOREACH(j) {
920 assert(f && f->current_offset > 0);
922 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
924 log_error("Can't read entry: %s", strerror(-r));
928 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
932 if (!shall_try_append_again(s->system_journal, r)) {
933 log_error("Can't write entry: %s", strerror(-r));
940 log_debug("Retrying write.");
941 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
943 log_error("Can't write entry: %s", strerror(-r));
949 journal_file_post_change(s->system_journal);
951 journal_file_close(s->runtime_journal);
952 s->runtime_journal = NULL;
955 rm_rf("/run/log/journal", false, true, false);
962 int process_event(Server *s, struct epoll_event *ev) {
966 if (ev->data.fd == s->signal_fd) {
967 struct signalfd_siginfo sfsi;
970 if (ev->events != EPOLLIN) {
971 log_error("Got invalid event from epoll.");
975 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
976 if (n != sizeof(sfsi)) {
981 if (errno == EINTR || errno == EAGAIN)
987 if (sfsi.ssi_signo == SIGUSR1) {
988 touch("/run/systemd/journal/flushed");
989 server_flush_to_var(s);
994 if (sfsi.ssi_signo == SIGUSR2) {
1000 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
1004 } else if (ev->data.fd == s->sync_timer_fd) {
1008 log_debug("Got sync request from epoll.");
1010 r = read(ev->data.fd, (void *)&t, sizeof(t));
1017 } else if (ev->data.fd == s->dev_kmsg_fd) {
1020 if (ev->events != EPOLLIN) {
1021 log_error("Got invalid event from epoll.");
1025 r = server_read_dev_kmsg(s);
1031 } else if (ev->data.fd == s->native_fd ||
1032 ev->data.fd == s->syslog_fd) {
1034 if (ev->events != EPOLLIN) {
1035 log_error("Got invalid event from epoll.");
1040 struct msghdr msghdr;
1042 struct ucred *ucred = NULL;
1043 struct timeval *tv = NULL;
1044 struct cmsghdr *cmsg;
1046 size_t label_len = 0;
1048 struct cmsghdr cmsghdr;
1050 /* We use NAME_MAX space for the
1051 * SELinux label here. The kernel
1052 * currently enforces no limit, but
1053 * according to suggestions from the
1054 * SELinux people this will change and
1055 * it will probably be identical to
1056 * NAME_MAX. For now we use that, but
1057 * this should be updated one day when
1058 * the final limit is known.*/
1059 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1060 CMSG_SPACE(sizeof(struct timeval)) +
1061 CMSG_SPACE(sizeof(int)) + /* fd */
1062 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1069 if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
1070 log_error("SIOCINQ failed: %m");
1074 if (s->buffer_size < (size_t) v) {
1078 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
1079 b = realloc(s->buffer, l+1);
1082 log_error("Couldn't increase buffer.");
1091 iovec.iov_base = s->buffer;
1092 iovec.iov_len = s->buffer_size;
1096 msghdr.msg_iov = &iovec;
1097 msghdr.msg_iovlen = 1;
1098 msghdr.msg_control = &control;
1099 msghdr.msg_controllen = sizeof(control);
1101 n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1104 if (errno == EINTR || errno == EAGAIN)
1107 log_error("recvmsg() failed: %m");
1111 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1113 if (cmsg->cmsg_level == SOL_SOCKET &&
1114 cmsg->cmsg_type == SCM_CREDENTIALS &&
1115 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1116 ucred = (struct ucred*) CMSG_DATA(cmsg);
1117 else if (cmsg->cmsg_level == SOL_SOCKET &&
1118 cmsg->cmsg_type == SCM_SECURITY) {
1119 label = (char*) CMSG_DATA(cmsg);
1120 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1121 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1122 cmsg->cmsg_type == SO_TIMESTAMP &&
1123 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1124 tv = (struct timeval*) CMSG_DATA(cmsg);
1125 else if (cmsg->cmsg_level == SOL_SOCKET &&
1126 cmsg->cmsg_type == SCM_RIGHTS) {
1127 fds = (int*) CMSG_DATA(cmsg);
1128 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1132 if (ev->data.fd == s->syslog_fd) {
1135 if (n > 0 && n_fds == 0) {
1136 e = memchr(s->buffer, '\n', n);
1142 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1143 } else if (n_fds > 0)
1144 log_warning("Got file descriptors via syslog socket. Ignoring.");
1147 if (n > 0 && n_fds == 0)
1148 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1149 else if (n == 0 && n_fds == 1)
1150 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1152 log_warning("Got too many file descriptors via native socket. Ignoring.");
1155 close_many(fds, n_fds);
1160 } else if (ev->data.fd == s->stdout_fd) {
1162 if (ev->events != EPOLLIN) {
1163 log_error("Got invalid event from epoll.");
1167 stdout_stream_new(s);
1171 StdoutStream *stream;
1173 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
1174 log_error("Got invalid event from epoll.");
1178 /* If it is none of the well-known fds, it must be an
1179 * stdout stream fd. Note that this is a bit ugly here
1180 * (since we rely that none of the well-known fds
1181 * could be interpreted as pointer), but nonetheless
1182 * safe, since the well-known fds would never get an
1183 * fd > 4096, i.e. beyond the first memory page */
1185 stream = ev->data.ptr;
1187 if (stdout_stream_process(stream) <= 0)
1188 stdout_stream_free(stream);
1193 log_error("Unknown event.");
1197 static int open_signalfd(Server *s) {
1199 struct epoll_event ev;
1203 assert_se(sigemptyset(&mask) == 0);
1204 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1205 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1207 s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
1208 if (s->signal_fd < 0) {
1209 log_error("signalfd(): %m");
1214 ev.events = EPOLLIN;
1215 ev.data.fd = s->signal_fd;
1217 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
1218 log_error("epoll_ctl(): %m");
1225 static int server_parse_proc_cmdline(Server *s) {
1226 _cleanup_free_ char *line = NULL;
1231 if (detect_container(NULL) > 0)
1234 r = read_one_line_file("/proc/cmdline", &line);
1236 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1240 FOREACH_WORD_QUOTED(w, l, line, state) {
1241 _cleanup_free_ char *word;
1243 word = strndup(w, l);
1247 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1248 r = parse_boolean(word + 35);
1250 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1252 s->forward_to_syslog = r;
1253 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1254 r = parse_boolean(word + 33);
1256 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1258 s->forward_to_kmsg = r;
1259 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1260 r = parse_boolean(word + 36);
1262 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1264 s->forward_to_console = r;
1265 } else if (startswith(word, "systemd.journald"))
1266 log_warning("Invalid systemd.journald parameter. Ignoring.");
1272 static int server_parse_config_file(Server *s) {
1273 static const char *fn = "/etc/systemd/journald.conf";
1274 _cleanup_fclose_ FILE *f = NULL;
1279 f = fopen(fn, "re");
1281 if (errno == ENOENT)
1284 log_warning("Failed to open configuration file %s: %m", fn);
1288 r = config_parse(NULL, fn, f, "Journal\0", config_item_perf_lookup,
1289 (void*) journald_gperf_lookup, false, s);
1291 log_warning("Failed to parse configuration file: %s", strerror(-r));
1296 static int server_open_sync_timer(Server *s) {
1298 struct epoll_event ev;
1302 s->sync_timer_fd = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC);
1303 if (s->sync_timer_fd < 0)
1307 ev.events = EPOLLIN;
1308 ev.data.fd = s->sync_timer_fd;
1310 r = epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->sync_timer_fd, &ev);
1312 log_error("Failed to add idle timer fd to epoll object: %m");
1319 int server_schedule_sync(Server *s) {
1324 if (s->sync_scheduled)
1327 if (s->sync_interval_usec) {
1328 struct itimerspec sync_timer_enable = {
1329 .it_value.tv_sec = s->sync_interval_usec / USEC_PER_SEC,
1330 .it_value.tv_nsec = s->sync_interval_usec % MSEC_PER_SEC,
1333 r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_enable, NULL);
1338 s->sync_scheduled = true;
1343 int server_init(Server *s) {
1349 s->sync_timer_fd = s->syslog_fd = s->native_fd = s->stdout_fd =
1350 s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
1354 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1355 s->sync_scheduled = false;
1357 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1358 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1360 s->forward_to_syslog = true;
1362 s->max_level_store = LOG_DEBUG;
1363 s->max_level_syslog = LOG_DEBUG;
1364 s->max_level_kmsg = LOG_NOTICE;
1365 s->max_level_console = LOG_INFO;
1367 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1368 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1370 server_parse_config_file(s);
1371 server_parse_proc_cmdline(s);
1372 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1373 log_debug("Setting both rate limit interval and burst from %llu,%u to 0,0",
1374 (long long unsigned) s->rate_limit_interval,
1375 s->rate_limit_burst);
1376 s->rate_limit_interval = s->rate_limit_burst = 0;
1379 mkdir_p("/run/systemd/journal", 0755);
1381 s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
1382 if (!s->user_journals)
1385 s->mmap = mmap_cache_new();
1389 s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
1390 if (s->epoll_fd < 0) {
1391 log_error("Failed to create epoll object: %m");
1395 n = sd_listen_fds(true);
1397 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1401 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1403 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1405 if (s->native_fd >= 0) {
1406 log_error("Too many native sockets passed.");
1412 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1414 if (s->stdout_fd >= 0) {
1415 log_error("Too many stdout sockets passed.");
1421 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
1423 if (s->syslog_fd >= 0) {
1424 log_error("Too many /dev/log sockets passed.");
1431 log_error("Unknown socket passed.");
1436 r = server_open_syslog_socket(s);
1440 r = server_open_native_socket(s);
1444 r = server_open_stdout_socket(s);
1448 r = server_open_dev_kmsg(s);
1452 r = server_open_kernel_seqnum(s);
1456 r = server_open_sync_timer(s);
1460 r = open_signalfd(s);
1464 s->udev = udev_new();
1468 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval,
1469 s->rate_limit_burst);
1473 r = system_journal_open(s);
1480 void server_maybe_append_tags(Server *s) {
1486 n = now(CLOCK_REALTIME);
1488 if (s->system_journal)
1489 journal_file_maybe_append_tag(s->system_journal, n);
1491 HASHMAP_FOREACH(f, s->user_journals, i)
1492 journal_file_maybe_append_tag(f, n);
1496 void server_done(Server *s) {
1500 while (s->stdout_streams)
1501 stdout_stream_free(s->stdout_streams);
1503 if (s->system_journal)
1504 journal_file_close(s->system_journal);
1506 if (s->runtime_journal)
1507 journal_file_close(s->runtime_journal);
1509 while ((f = hashmap_steal_first(s->user_journals)))
1510 journal_file_close(f);
1512 hashmap_free(s->user_journals);
1514 if (s->epoll_fd >= 0)
1515 close_nointr_nofail(s->epoll_fd);
1517 if (s->signal_fd >= 0)
1518 close_nointr_nofail(s->signal_fd);
1520 if (s->syslog_fd >= 0)
1521 close_nointr_nofail(s->syslog_fd);
1523 if (s->native_fd >= 0)
1524 close_nointr_nofail(s->native_fd);
1526 if (s->stdout_fd >= 0)
1527 close_nointr_nofail(s->stdout_fd);
1529 if (s->dev_kmsg_fd >= 0)
1530 close_nointr_nofail(s->dev_kmsg_fd);
1532 if (s->sync_timer_fd >= 0)
1533 close_nointr_nofail(s->sync_timer_fd);
1536 journal_rate_limit_free(s->rate_limit);
1538 if (s->kernel_seqnum)
1539 munmap(s->kernel_seqnum, sizeof(uint64_t));
1545 mmap_cache_unref(s->mmap);
1548 udev_unref(s->udev);