1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
27 #include <sys/timerfd.h>
30 #include <systemd/sd-journal.h>
31 #include <systemd/sd-messages.h>
32 #include <systemd/sd-daemon.h>
35 #include <systemd/sd-login.h>
41 #include "journal-file.h"
42 #include "socket-util.h"
43 #include "cgroup-util.h"
47 #include "conf-parser.h"
48 #include "journal-internal.h"
49 #include "journal-vacuum.h"
50 #include "journal-authenticate.h"
51 #include "journald-server.h"
52 #include "journald-rate-limit.h"
53 #include "journald-kmsg.h"
54 #include "journald-syslog.h"
55 #include "journald-stream.h"
56 #include "journald-console.h"
57 #include "journald-native.h"
61 #include <acl/libacl.h>
66 #include <selinux/selinux.h>
69 #define USER_JOURNALS_MAX 1024
71 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
72 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
73 #define DEFAULT_RATE_LIMIT_BURST 200
75 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
77 static const char* const storage_table[] = {
78 [STORAGE_AUTO] = "auto",
79 [STORAGE_VOLATILE] = "volatile",
80 [STORAGE_PERSISTENT] = "persistent",
81 [STORAGE_NONE] = "none"
84 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
85 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
87 static const char* const split_mode_table[] = {
88 [SPLIT_NONE] = "none",
90 [SPLIT_LOGIN] = "login"
93 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
94 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
96 static uint64_t available_space(Server *s) {
98 _cleanup_free_ char *p = NULL;
102 uint64_t sum = 0, avail = 0, ss_avail = 0;
104 _cleanup_closedir_ DIR *d = NULL;
108 ts = now(CLOCK_MONOTONIC);
110 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
111 return s->cached_available_space;
113 r = sd_id128_get_machine(&machine);
117 if (s->system_journal) {
118 f = "/var/log/journal/";
119 m = &s->system_metrics;
121 f = "/run/log/journal/";
122 m = &s->runtime_metrics;
127 p = strappend(f, sd_id128_to_string(machine, ids));
135 if (fstatvfs(dirfd(d), &ss) < 0)
141 union dirent_storage buf;
143 r = readdir_r(d, &buf.de, &de);
150 if (!endswith(de->d_name, ".journal") &&
151 !endswith(de->d_name, ".journal~"))
154 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
157 if (!S_ISREG(st.st_mode))
160 sum += (uint64_t) st.st_blocks * 512UL;
163 avail = sum >= m->max_use ? 0 : m->max_use - sum;
165 ss_avail = ss.f_bsize * ss.f_bavail;
167 ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
169 if (ss_avail < avail)
172 s->cached_available_space = avail;
173 s->cached_available_space_timestamp = ts;
178 static void server_read_file_gid(Server *s) {
179 const char *g = "systemd-journal";
184 if (s->file_gid_valid)
187 r = get_group_creds(&g, &s->file_gid);
189 log_warning("Failed to resolve '%s' group: %s", g, strerror(-r));
191 /* if we couldn't read the gid, then it will be 0, but that's
192 * fine and we shouldn't try to resolve the group again, so
193 * let's just pretend it worked right-away. */
194 s->file_gid_valid = true;
197 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
202 acl_permset_t permset;
207 server_read_file_gid(s);
209 r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
211 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
217 acl = acl_get_fd(f->fd);
219 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
223 r = acl_find_uid(acl, uid, &entry);
226 if (acl_create_entry(&acl, &entry) < 0 ||
227 acl_set_tag_type(entry, ACL_USER) < 0 ||
228 acl_set_qualifier(entry, &uid) < 0) {
229 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
234 if (acl_get_permset(entry, &permset) < 0 ||
235 acl_add_perm(permset, ACL_READ) < 0 ||
236 acl_calc_mask(&acl) < 0) {
237 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
241 if (acl_set_fd(f->fd, acl) < 0)
242 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
249 static JournalFile* find_journal(Server *s, uid_t uid) {
257 /* We split up user logs only on /var, not on /run. If the
258 * runtime file is open, we write to it exclusively, in order
259 * to guarantee proper order as soon as we flush /run to
260 * /var and close the runtime file. */
262 if (s->runtime_journal)
263 return s->runtime_journal;
266 return s->system_journal;
268 r = sd_id128_get_machine(&machine);
270 return s->system_journal;
272 f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
276 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
277 SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
278 return s->system_journal;
280 while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
281 /* Too many open? Then let's close one */
282 f = hashmap_steal_first(s->user_journals);
284 journal_file_close(f);
287 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, s->system_journal, &f);
291 return s->system_journal;
293 server_fix_perms(s, f, uid);
295 r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
297 journal_file_close(f);
298 return s->system_journal;
304 void server_rotate(Server *s) {
310 log_debug("Rotating...");
312 if (s->runtime_journal) {
313 r = journal_file_rotate(&s->runtime_journal, s->compress, false);
315 if (s->runtime_journal)
316 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
318 log_error("Failed to create new runtime journal: %s", strerror(-r));
320 server_fix_perms(s, s->runtime_journal, 0);
323 if (s->system_journal) {
324 r = journal_file_rotate(&s->system_journal, s->compress, s->seal);
326 if (s->system_journal)
327 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
329 log_error("Failed to create new system journal: %s", strerror(-r));
332 server_fix_perms(s, s->system_journal, 0);
335 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
336 r = journal_file_rotate(&f, s->compress, s->seal);
339 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
341 log_error("Failed to create user journal: %s", strerror(-r));
343 hashmap_replace(s->user_journals, k, f);
344 server_fix_perms(s, f, PTR_TO_UINT32(k));
349 void server_sync(Server *s) {
355 static const struct itimerspec sync_timer_disable = {};
357 if (s->system_journal) {
358 r = journal_file_set_offline(s->system_journal);
360 log_error("Failed to sync system journal: %s", strerror(-r));
363 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
364 r = journal_file_set_offline(f);
366 log_error("Failed to sync user journal: %s", strerror(-r));
369 r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_disable, NULL);
371 log_error("Failed to disable max timer: %m");
373 s->sync_scheduled = false;
376 void server_vacuum(Server *s) {
382 log_debug("Vacuuming...");
384 s->oldest_file_usec = 0;
386 r = sd_id128_get_machine(&machine);
388 log_error("Failed to get machine ID: %s", strerror(-r));
392 sd_id128_to_string(machine, ids);
394 if (s->system_journal) {
395 p = strappend("/var/log/journal/", ids);
401 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
402 if (r < 0 && r != -ENOENT)
403 log_error("Failed to vacuum %s: %s", p, strerror(-r));
407 if (s->runtime_journal) {
408 p = strappend("/run/log/journal/", ids);
414 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
415 if (r < 0 && r != -ENOENT)
416 log_error("Failed to vacuum %s: %s", p, strerror(-r));
420 s->cached_available_space_timestamp = 0;
423 bool shall_try_append_again(JournalFile *f, int r) {
425 /* -E2BIG Hit configured limit
427 -EDQUOT Quota limit hit
429 -EHOSTDOWN Other machine
430 -EBUSY Unclean shutdown
431 -EPROTONOSUPPORT Unsupported feature
434 -ESHUTDOWN Already archived */
436 if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
437 log_debug("%s: Allocation limit reached, rotating.", f->path);
438 else if (r == -EHOSTDOWN)
439 log_info("%s: Journal file from other machine, rotating.", f->path);
440 else if (r == -EBUSY)
441 log_info("%s: Unclean shutdown, rotating.", f->path);
442 else if (r == -EPROTONOSUPPORT)
443 log_info("%s: Unsupported feature, rotating.", f->path);
444 else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
445 log_warning("%s: Journal file corrupted, rotating.", f->path);
452 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n) {
454 bool vacuumed = false;
461 f = find_journal(s, uid);
465 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
466 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
471 f = find_journal(s, uid);
476 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
478 server_schedule_sync(s);
482 if (vacuumed || !shall_try_append_again(f, r)) {
483 log_error("Failed to write entry, ignoring: %s", strerror(-r));
490 f = find_journal(s, uid);
494 log_debug("Retrying write.");
495 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
497 log_error("Failed to write entry, ignoring: %s", strerror(-r));
500 static void dispatch_message_real(
502 struct iovec *iovec, unsigned n, unsigned m,
505 const char *label, size_t label_len,
506 const char *unit_id) {
508 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(ucred->pid)],
509 uid[sizeof("_UID=") + DECIMAL_STR_MAX(ucred->uid)],
510 gid[sizeof("_GID=") + DECIMAL_STR_MAX(ucred->gid)],
511 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
512 boot_id[sizeof("_BOOT_ID=") + 32] = "_BOOT_ID=",
513 machine_id[sizeof("_MACHINE_ID=") + 32] = "_MACHINE_ID=";
515 _cleanup_free_ char *comm = NULL, *cmdline = NULL, *hostname = NULL,
516 *exe = NULL, *cgroup = NULL, *session = NULL,
517 *owner_uid = NULL, *unit = NULL, *selinux_context = NULL;
520 _cleanup_free_ char *audit_session = NULL, *audit_loginuid = NULL;
526 uid_t realuid = 0, owner = 0, journal_uid;
527 bool owner_valid = false;
532 assert(n + N_IOVEC_META_FIELDS <= m);
540 realuid = ucred->uid;
542 snprintf(pid, sizeof(pid) - 1, "_PID=%lu", (unsigned long) ucred->pid);
544 IOVEC_SET_STRING(iovec[n++], pid);
546 snprintf(uid, sizeof(uid) - 1, "_UID=%lu", (unsigned long) ucred->uid);
548 IOVEC_SET_STRING(iovec[n++], uid);
550 snprintf(gid, sizeof(gid) - 1, "_GID=%lu", (unsigned long) ucred->gid);
552 IOVEC_SET_STRING(iovec[n++], gid);
554 r = get_process_comm(ucred->pid, &t);
556 comm = strappend("_COMM=", t);
560 IOVEC_SET_STRING(iovec[n++], comm);
563 r = get_process_exe(ucred->pid, &t);
565 exe = strappend("_EXE=", t);
569 IOVEC_SET_STRING(iovec[n++], exe);
572 r = get_process_cmdline(ucred->pid, 0, false, &t);
574 cmdline = strappend("_CMDLINE=", t);
578 IOVEC_SET_STRING(iovec[n++], cmdline);
582 r = audit_session_from_pid(ucred->pid, &audit);
584 if (asprintf(&audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit) >= 0)
585 IOVEC_SET_STRING(iovec[n++], audit_session);
587 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
589 if (asprintf(&audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0)
590 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
593 r = cg_pid_get_path(NULL, ucred->pid, &t);
595 cgroup = strappend("_SYSTEMD_CGROUP=", t);
599 IOVEC_SET_STRING(iovec[n++], cgroup);
603 r = cg_pid_get_session(ucred->pid, &t);
605 session = strappend("_SYSTEMD_SESSION=", t);
609 IOVEC_SET_STRING(iovec[n++], session);
612 if (sd_pid_get_owner_uid(ucred->pid, &owner) >= 0) {
614 if (asprintf(&owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner) >= 0)
615 IOVEC_SET_STRING(iovec[n++], owner_uid);
619 if (cg_pid_get_unit(ucred->pid, &t) >= 0) {
620 unit = strappend("_SYSTEMD_UNIT=", t);
622 } else if (cg_pid_get_user_unit(ucred->pid, &t) >= 0) {
623 unit = strappend("_SYSTEMD_USER_UNIT=", t);
625 } else if (unit_id) {
627 unit = strappend("_SYSTEMD_USER_UNIT=", unit_id);
629 unit = strappend("_SYSTEMD_UNIT=", unit_id);
633 IOVEC_SET_STRING(iovec[n++], unit);
637 selinux_context = malloc(sizeof("_SELINUX_CONTEXT=") + label_len);
638 if (selinux_context) {
639 *((char*) mempcpy(stpcpy(selinux_context, "_SELINUX_CONTEXT="), label, label_len)) = 0;
640 IOVEC_SET_STRING(iovec[n++], selinux_context);
643 security_context_t con;
645 if (getpidcon(ucred->pid, &con) >= 0) {
646 selinux_context = strappend("_SELINUX_CONTEXT=", con);
648 IOVEC_SET_STRING(iovec[n++], selinux_context);
656 snprintf(source_time, sizeof(source_time) - 1, "_SOURCE_REALTIME_TIMESTAMP=%llu",
657 (unsigned long long) timeval_load(tv));
658 char_array_0(source_time);
659 IOVEC_SET_STRING(iovec[n++], source_time);
662 /* Note that strictly speaking storing the boot id here is
663 * redundant since the entry includes this in-line
664 * anyway. However, we need this indexed, too. */
665 r = sd_id128_get_boot(&id);
667 sd_id128_to_string(id, boot_id + sizeof("_BOOT_ID=") - 1);
668 IOVEC_SET_STRING(iovec[n++], boot_id);
671 r = sd_id128_get_machine(&id);
673 sd_id128_to_string(id, machine_id + sizeof("_MACHINE_ID=") - 1);
674 IOVEC_SET_STRING(iovec[n++], machine_id);
677 t = gethostname_malloc();
679 hostname = strappend("_HOSTNAME=", t);
682 IOVEC_SET_STRING(iovec[n++], hostname);
687 if (s->split_mode == SPLIT_UID && realuid > 0)
688 /* Split up strictly by any UID */
689 journal_uid = realuid;
690 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
691 /* Split up by login UIDs, this avoids creation of
692 * individual journals for system UIDs. We do this
693 * only if the realuid is not root, in order not to
694 * accidentally leak privileged information to the
695 * user that is logged by a privileged process that is
696 * part of an unprivileged session.*/
701 write_to_journal(s, journal_uid, iovec, n);
704 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
705 char mid[11 + 32 + 1];
706 char buffer[16 + LINE_MAX + 1];
707 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
710 struct ucred ucred = {};
715 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
716 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
718 memcpy(buffer, "MESSAGE=", 8);
719 va_start(ap, format);
720 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
722 char_array_0(buffer);
723 IOVEC_SET_STRING(iovec[n++], buffer);
725 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
726 snprintf(mid, sizeof(mid), MESSAGE_ID(message_id));
728 IOVEC_SET_STRING(iovec[n++], mid);
731 ucred.pid = getpid();
732 ucred.uid = getuid();
733 ucred.gid = getgid();
735 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL);
738 void server_dispatch_message(
740 struct iovec *iovec, unsigned n, unsigned m,
743 const char *label, size_t label_len,
748 _cleanup_free_ char *path = NULL;
752 assert(iovec || n == 0);
757 if (LOG_PRI(priority) > s->max_level_store)
763 r = cg_pid_get_path_shifted(ucred->pid, NULL, &path);
767 /* example: /user/lennart/3/foobar
768 * /system/dbus.service/foobar
770 * So let's cut of everything past the third /, since that is
771 * where user directories start */
773 c = strchr(path, '/');
775 c = strchr(c+1, '/');
777 c = strchr(c+1, '/');
783 rl = journal_rate_limit_test(s->rate_limit, path,
784 priority & LOG_PRIMASK, available_space(s));
789 /* Write a suppression message if we suppressed something */
791 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
792 "Suppressed %u messages from %s", rl - 1, path);
795 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id);
799 static int system_journal_open(Server *s) {
805 r = sd_id128_get_machine(&machine);
809 sd_id128_to_string(machine, ids);
811 if (!s->system_journal &&
812 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
813 access("/run/systemd/journal/flushed", F_OK) >= 0) {
815 /* If in auto mode: first try to create the machine
816 * path, but not the prefix.
818 * If in persistent mode: create /var/log/journal and
819 * the machine path */
821 if (s->storage == STORAGE_PERSISTENT)
822 (void) mkdir("/var/log/journal/", 0755);
824 fn = strappend("/var/log/journal/", ids);
828 (void) mkdir(fn, 0755);
831 fn = strjoin("/var/log/journal/", ids, "/system.journal", NULL);
835 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
839 char fb[FORMAT_BYTES_MAX];
841 server_fix_perms(s, s->system_journal, 0);
842 server_driver_message(s, SD_ID128_NULL, "Allowing system journal files to grow to %s.",
843 format_bytes(fb, sizeof(fb), s->system_metrics.max_use));
847 if (r != -ENOENT && r != -EROFS)
848 log_warning("Failed to open system journal: %s", strerror(-r));
854 if (!s->runtime_journal &&
855 (s->storage != STORAGE_NONE)) {
857 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
861 if (s->system_journal) {
863 /* Try to open the runtime journal, but only
864 * if it already exists, so that we can flush
865 * it into the system journal */
867 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
872 log_warning("Failed to open runtime journal: %s", strerror(-r));
879 /* OK, we really need the runtime journal, so create
880 * it if necessary. */
882 (void) mkdir_parents(fn, 0755);
883 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
887 log_error("Failed to open runtime journal: %s", strerror(-r));
892 if (s->runtime_journal) {
893 char fb[FORMAT_BYTES_MAX];
895 server_fix_perms(s, s->runtime_journal, 0);
896 server_driver_message(s, SD_ID128_NULL, "Allowing runtime journal files to grow to %s.",
897 format_bytes(fb, sizeof(fb), s->runtime_metrics.max_use));
904 int server_flush_to_var(Server *s) {
907 sd_journal *j = NULL;
911 if (s->storage != STORAGE_AUTO &&
912 s->storage != STORAGE_PERSISTENT)
915 if (!s->runtime_journal)
918 system_journal_open(s);
920 if (!s->system_journal)
923 log_debug("Flushing to /var...");
925 r = sd_id128_get_machine(&machine);
927 log_error("Failed to get machine id: %s", strerror(-r));
931 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
933 log_error("Failed to read runtime journal: %s", strerror(-r));
937 sd_journal_set_data_threshold(j, 0);
939 SD_JOURNAL_FOREACH(j) {
944 assert(f && f->current_offset > 0);
946 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
948 log_error("Can't read entry: %s", strerror(-r));
952 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
956 if (!shall_try_append_again(s->system_journal, r)) {
957 log_error("Can't write entry: %s", strerror(-r));
964 log_debug("Retrying write.");
965 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
967 log_error("Can't write entry: %s", strerror(-r));
973 journal_file_post_change(s->system_journal);
975 journal_file_close(s->runtime_journal);
976 s->runtime_journal = NULL;
979 rm_rf("/run/log/journal", false, true, false);
986 int process_event(Server *s, struct epoll_event *ev) {
990 if (ev->data.fd == s->signal_fd) {
991 struct signalfd_siginfo sfsi;
994 if (ev->events != EPOLLIN) {
995 log_error("Got invalid event from epoll.");
999 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
1000 if (n != sizeof(sfsi)) {
1005 if (errno == EINTR || errno == EAGAIN)
1011 if (sfsi.ssi_signo == SIGUSR1) {
1012 touch("/run/systemd/journal/flushed");
1013 server_flush_to_var(s);
1018 if (sfsi.ssi_signo == SIGUSR2) {
1024 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
1028 } else if (ev->data.fd == s->sync_timer_fd) {
1032 log_debug("Got sync request from epoll.");
1034 r = read(ev->data.fd, (void *)&t, sizeof(t));
1041 } else if (ev->data.fd == s->dev_kmsg_fd) {
1044 if (ev->events != EPOLLIN) {
1045 log_error("Got invalid event from epoll.");
1049 r = server_read_dev_kmsg(s);
1055 } else if (ev->data.fd == s->native_fd ||
1056 ev->data.fd == s->syslog_fd) {
1058 if (ev->events != EPOLLIN) {
1059 log_error("Got invalid event from epoll.");
1064 struct msghdr msghdr;
1066 struct ucred *ucred = NULL;
1067 struct timeval *tv = NULL;
1068 struct cmsghdr *cmsg;
1070 size_t label_len = 0;
1072 struct cmsghdr cmsghdr;
1074 /* We use NAME_MAX space for the
1075 * SELinux label here. The kernel
1076 * currently enforces no limit, but
1077 * according to suggestions from the
1078 * SELinux people this will change and
1079 * it will probably be identical to
1080 * NAME_MAX. For now we use that, but
1081 * this should be updated one day when
1082 * the final limit is known.*/
1083 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1084 CMSG_SPACE(sizeof(struct timeval)) +
1085 CMSG_SPACE(sizeof(int)) + /* fd */
1086 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1093 if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
1094 log_error("SIOCINQ failed: %m");
1098 if (s->buffer_size < (size_t) v) {
1102 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
1103 b = realloc(s->buffer, l+1);
1106 log_error("Couldn't increase buffer.");
1115 iovec.iov_base = s->buffer;
1116 iovec.iov_len = s->buffer_size;
1120 msghdr.msg_iov = &iovec;
1121 msghdr.msg_iovlen = 1;
1122 msghdr.msg_control = &control;
1123 msghdr.msg_controllen = sizeof(control);
1125 n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1128 if (errno == EINTR || errno == EAGAIN)
1131 log_error("recvmsg() failed: %m");
1135 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1137 if (cmsg->cmsg_level == SOL_SOCKET &&
1138 cmsg->cmsg_type == SCM_CREDENTIALS &&
1139 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1140 ucred = (struct ucred*) CMSG_DATA(cmsg);
1141 else if (cmsg->cmsg_level == SOL_SOCKET &&
1142 cmsg->cmsg_type == SCM_SECURITY) {
1143 label = (char*) CMSG_DATA(cmsg);
1144 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1145 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1146 cmsg->cmsg_type == SO_TIMESTAMP &&
1147 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1148 tv = (struct timeval*) CMSG_DATA(cmsg);
1149 else if (cmsg->cmsg_level == SOL_SOCKET &&
1150 cmsg->cmsg_type == SCM_RIGHTS) {
1151 fds = (int*) CMSG_DATA(cmsg);
1152 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1156 if (ev->data.fd == s->syslog_fd) {
1159 if (n > 0 && n_fds == 0) {
1160 e = memchr(s->buffer, '\n', n);
1166 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1167 } else if (n_fds > 0)
1168 log_warning("Got file descriptors via syslog socket. Ignoring.");
1171 if (n > 0 && n_fds == 0)
1172 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1173 else if (n == 0 && n_fds == 1)
1174 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1176 log_warning("Got too many file descriptors via native socket. Ignoring.");
1179 close_many(fds, n_fds);
1184 } else if (ev->data.fd == s->stdout_fd) {
1186 if (ev->events != EPOLLIN) {
1187 log_error("Got invalid event from epoll.");
1191 stdout_stream_new(s);
1195 StdoutStream *stream;
1197 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
1198 log_error("Got invalid event from epoll.");
1202 /* If it is none of the well-known fds, it must be an
1203 * stdout stream fd. Note that this is a bit ugly here
1204 * (since we rely that none of the well-known fds
1205 * could be interpreted as pointer), but nonetheless
1206 * safe, since the well-known fds would never get an
1207 * fd > 4096, i.e. beyond the first memory page */
1209 stream = ev->data.ptr;
1211 if (stdout_stream_process(stream) <= 0)
1212 stdout_stream_free(stream);
1217 log_error("Unknown event.");
1221 static int open_signalfd(Server *s) {
1223 struct epoll_event ev;
1227 assert_se(sigemptyset(&mask) == 0);
1228 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1229 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1231 s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
1232 if (s->signal_fd < 0) {
1233 log_error("signalfd(): %m");
1238 ev.events = EPOLLIN;
1239 ev.data.fd = s->signal_fd;
1241 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
1242 log_error("epoll_ctl(): %m");
1249 static int server_parse_proc_cmdline(Server *s) {
1250 _cleanup_free_ char *line = NULL;
1255 if (detect_container(NULL) > 0)
1258 r = read_one_line_file("/proc/cmdline", &line);
1260 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1264 FOREACH_WORD_QUOTED(w, l, line, state) {
1265 _cleanup_free_ char *word;
1267 word = strndup(w, l);
1271 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1272 r = parse_boolean(word + 35);
1274 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1276 s->forward_to_syslog = r;
1277 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1278 r = parse_boolean(word + 33);
1280 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1282 s->forward_to_kmsg = r;
1283 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1284 r = parse_boolean(word + 36);
1286 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1288 s->forward_to_console = r;
1289 } else if (startswith(word, "systemd.journald"))
1290 log_warning("Invalid systemd.journald parameter. Ignoring.");
1296 static int server_parse_config_file(Server *s) {
1297 static const char *fn = "/etc/systemd/journald.conf";
1298 _cleanup_fclose_ FILE *f = NULL;
1303 f = fopen(fn, "re");
1305 if (errno == ENOENT)
1308 log_warning("Failed to open configuration file %s: %m", fn);
1312 r = config_parse(NULL, fn, f, "Journal\0", config_item_perf_lookup,
1313 (void*) journald_gperf_lookup, false, s);
1315 log_warning("Failed to parse configuration file: %s", strerror(-r));
1320 static int server_open_sync_timer(Server *s) {
1322 struct epoll_event ev;
1326 s->sync_timer_fd = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC);
1327 if (s->sync_timer_fd < 0)
1331 ev.events = EPOLLIN;
1332 ev.data.fd = s->sync_timer_fd;
1334 r = epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->sync_timer_fd, &ev);
1336 log_error("Failed to add idle timer fd to epoll object: %m");
1343 int server_schedule_sync(Server *s) {
1348 if (s->sync_scheduled)
1351 if (s->sync_interval_usec) {
1352 struct itimerspec sync_timer_enable = {
1353 .it_value.tv_sec = s->sync_interval_usec / USEC_PER_SEC,
1354 .it_value.tv_nsec = s->sync_interval_usec % MSEC_PER_SEC,
1357 r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_enable, NULL);
1362 s->sync_scheduled = true;
1367 int server_init(Server *s) {
1373 s->sync_timer_fd = s->syslog_fd = s->native_fd = s->stdout_fd =
1374 s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
1378 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1379 s->sync_scheduled = false;
1381 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1382 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1384 s->forward_to_syslog = true;
1386 s->max_level_store = LOG_DEBUG;
1387 s->max_level_syslog = LOG_DEBUG;
1388 s->max_level_kmsg = LOG_NOTICE;
1389 s->max_level_console = LOG_INFO;
1391 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1392 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1394 server_parse_config_file(s);
1395 server_parse_proc_cmdline(s);
1396 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1397 log_debug("Setting both rate limit interval and burst from %llu,%u to 0,0",
1398 (long long unsigned) s->rate_limit_interval,
1399 s->rate_limit_burst);
1400 s->rate_limit_interval = s->rate_limit_burst = 0;
1403 mkdir_p("/run/systemd/journal", 0755);
1405 s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
1406 if (!s->user_journals)
1409 s->mmap = mmap_cache_new();
1413 s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
1414 if (s->epoll_fd < 0) {
1415 log_error("Failed to create epoll object: %m");
1419 n = sd_listen_fds(true);
1421 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1425 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1427 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1429 if (s->native_fd >= 0) {
1430 log_error("Too many native sockets passed.");
1436 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1438 if (s->stdout_fd >= 0) {
1439 log_error("Too many stdout sockets passed.");
1445 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
1447 if (s->syslog_fd >= 0) {
1448 log_error("Too many /dev/log sockets passed.");
1455 log_error("Unknown socket passed.");
1460 r = server_open_syslog_socket(s);
1464 r = server_open_native_socket(s);
1468 r = server_open_stdout_socket(s);
1472 r = server_open_dev_kmsg(s);
1476 r = server_open_kernel_seqnum(s);
1480 r = server_open_sync_timer(s);
1484 r = open_signalfd(s);
1488 s->udev = udev_new();
1492 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval,
1493 s->rate_limit_burst);
1497 r = system_journal_open(s);
1504 void server_maybe_append_tags(Server *s) {
1510 n = now(CLOCK_REALTIME);
1512 if (s->system_journal)
1513 journal_file_maybe_append_tag(s->system_journal, n);
1515 HASHMAP_FOREACH(f, s->user_journals, i)
1516 journal_file_maybe_append_tag(f, n);
1520 void server_done(Server *s) {
1524 while (s->stdout_streams)
1525 stdout_stream_free(s->stdout_streams);
1527 if (s->system_journal)
1528 journal_file_close(s->system_journal);
1530 if (s->runtime_journal)
1531 journal_file_close(s->runtime_journal);
1533 while ((f = hashmap_steal_first(s->user_journals)))
1534 journal_file_close(f);
1536 hashmap_free(s->user_journals);
1538 if (s->epoll_fd >= 0)
1539 close_nointr_nofail(s->epoll_fd);
1541 if (s->signal_fd >= 0)
1542 close_nointr_nofail(s->signal_fd);
1544 if (s->syslog_fd >= 0)
1545 close_nointr_nofail(s->syslog_fd);
1547 if (s->native_fd >= 0)
1548 close_nointr_nofail(s->native_fd);
1550 if (s->stdout_fd >= 0)
1551 close_nointr_nofail(s->stdout_fd);
1553 if (s->dev_kmsg_fd >= 0)
1554 close_nointr_nofail(s->dev_kmsg_fd);
1556 if (s->sync_timer_fd >= 0)
1557 close_nointr_nofail(s->sync_timer_fd);
1560 journal_rate_limit_free(s->rate_limit);
1562 if (s->kernel_seqnum)
1563 munmap(s->kernel_seqnum, sizeof(uint64_t));
1569 mmap_cache_unref(s->mmap);
1572 udev_unref(s->udev);