1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
29 #include <systemd/sd-journal.h>
30 #include <systemd/sd-messages.h>
31 #include <systemd/sd-daemon.h>
34 #include <systemd/sd-login.h>
40 #include "journal-file.h"
41 #include "socket-util.h"
42 #include "cgroup-util.h"
46 #include "conf-parser.h"
47 #include "journal-internal.h"
48 #include "journal-vacuum.h"
49 #include "journal-authenticate.h"
50 #include "journald-server.h"
51 #include "journald-rate-limit.h"
52 #include "journald-kmsg.h"
53 #include "journald-syslog.h"
54 #include "journald-stream.h"
55 #include "journald-console.h"
56 #include "journald-native.h"
60 #include <acl/libacl.h>
65 #include <selinux/selinux.h>
68 #define USER_JOURNALS_MAX 1024
70 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
71 #define DEFAULT_RATE_LIMIT_BURST 200
73 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
75 static const char* const storage_table[] = {
76 [STORAGE_AUTO] = "auto",
77 [STORAGE_VOLATILE] = "volatile",
78 [STORAGE_PERSISTENT] = "persistent",
79 [STORAGE_NONE] = "none"
82 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
83 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
85 static const char* const split_mode_table[] = {
86 [SPLIT_NONE] = "none",
88 [SPLIT_LOGIN] = "login"
91 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
92 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
94 static uint64_t available_space(Server *s) {
96 char _cleanup_free_ *p = NULL;
100 uint64_t sum = 0, avail = 0, ss_avail = 0;
102 DIR _cleanup_closedir_ *d = NULL;
106 ts = now(CLOCK_MONOTONIC);
108 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
109 return s->cached_available_space;
111 r = sd_id128_get_machine(&machine);
115 if (s->system_journal) {
116 f = "/var/log/journal/";
117 m = &s->system_metrics;
119 f = "/run/log/journal/";
120 m = &s->runtime_metrics;
125 p = strappend(f, sd_id128_to_string(machine, ids));
133 if (fstatvfs(dirfd(d), &ss) < 0)
139 union dirent_storage buf;
141 r = readdir_r(d, &buf.de, &de);
148 if (!endswith(de->d_name, ".journal") &&
149 !endswith(de->d_name, ".journal~"))
152 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
155 if (!S_ISREG(st.st_mode))
158 sum += (uint64_t) st.st_blocks * 512UL;
161 avail = sum >= m->max_use ? 0 : m->max_use - sum;
163 ss_avail = ss.f_bsize * ss.f_bavail;
165 ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
167 if (ss_avail < avail)
170 s->cached_available_space = avail;
171 s->cached_available_space_timestamp = ts;
176 static void server_read_file_gid(Server *s) {
177 const char *g = "systemd-journal";
182 if (s->file_gid_valid)
185 r = get_group_creds(&g, &s->file_gid);
187 log_warning("Failed to resolve '%s' group: %s", g, strerror(-r));
189 /* if we couldn't read the gid, then it will be 0, but that's
190 * fine and we shouldn't try to resolve the group again, so
191 * let's just pretend it worked right-away. */
192 s->file_gid_valid = true;
195 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
200 acl_permset_t permset;
205 server_read_file_gid(s);
207 r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
209 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
215 acl = acl_get_fd(f->fd);
217 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
221 r = acl_find_uid(acl, uid, &entry);
224 if (acl_create_entry(&acl, &entry) < 0 ||
225 acl_set_tag_type(entry, ACL_USER) < 0 ||
226 acl_set_qualifier(entry, &uid) < 0) {
227 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
232 if (acl_get_permset(entry, &permset) < 0 ||
233 acl_add_perm(permset, ACL_READ) < 0 ||
234 acl_calc_mask(&acl) < 0) {
235 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
239 if (acl_set_fd(f->fd, acl) < 0)
240 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
247 static JournalFile* find_journal(Server *s, uid_t uid) {
255 /* We split up user logs only on /var, not on /run. If the
256 * runtime file is open, we write to it exclusively, in order
257 * to guarantee proper order as soon as we flush /run to
258 * /var and close the runtime file. */
260 if (s->runtime_journal)
261 return s->runtime_journal;
264 return s->system_journal;
266 r = sd_id128_get_machine(&machine);
268 return s->system_journal;
270 f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
274 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
275 SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
276 return s->system_journal;
278 while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
279 /* Too many open? Then let's close one */
280 f = hashmap_steal_first(s->user_journals);
282 journal_file_close(f);
285 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, s->system_journal, &f);
289 return s->system_journal;
291 server_fix_perms(s, f, uid);
293 r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
295 journal_file_close(f);
296 return s->system_journal;
302 void server_rotate(Server *s) {
308 log_debug("Rotating...");
310 if (s->runtime_journal) {
311 r = journal_file_rotate(&s->runtime_journal, s->compress, false);
313 if (s->runtime_journal)
314 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
316 log_error("Failed to create new runtime journal: %s", strerror(-r));
318 server_fix_perms(s, s->runtime_journal, 0);
321 if (s->system_journal) {
322 r = journal_file_rotate(&s->system_journal, s->compress, s->seal);
324 if (s->system_journal)
325 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
327 log_error("Failed to create new system journal: %s", strerror(-r));
330 server_fix_perms(s, s->system_journal, 0);
333 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
334 r = journal_file_rotate(&f, s->compress, s->seal);
337 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
339 log_error("Failed to create user journal: %s", strerror(-r));
341 hashmap_replace(s->user_journals, k, f);
342 server_fix_perms(s, f, PTR_TO_UINT32(k));
347 void server_vacuum(Server *s) {
353 log_debug("Vacuuming...");
355 s->oldest_file_usec = 0;
357 r = sd_id128_get_machine(&machine);
359 log_error("Failed to get machine ID: %s", strerror(-r));
363 sd_id128_to_string(machine, ids);
365 if (s->system_journal) {
366 p = strappend("/var/log/journal/", ids);
372 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
373 if (r < 0 && r != -ENOENT)
374 log_error("Failed to vacuum %s: %s", p, strerror(-r));
378 if (s->runtime_journal) {
379 p = strappend("/run/log/journal/", ids);
385 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
386 if (r < 0 && r != -ENOENT)
387 log_error("Failed to vacuum %s: %s", p, strerror(-r));
391 s->cached_available_space_timestamp = 0;
394 static char *shortened_cgroup_path(pid_t pid) {
396 char _cleanup_free_ *process_path = NULL, *init_path = NULL;
401 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, pid, &process_path);
405 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &init_path);
409 if (endswith(init_path, "/system"))
410 init_path[strlen(init_path) - 7] = 0;
411 else if (streq(init_path, "/"))
414 if (startswith(process_path, init_path)) {
415 path = strdup(process_path + strlen(init_path));
424 bool shall_try_append_again(JournalFile *f, int r) {
426 /* -E2BIG Hit configured limit
428 -EDQUOT Quota limit hit
430 -EHOSTDOWN Other machine
431 -EBUSY Unclean shutdown
432 -EPROTONOSUPPORT Unsupported feature
435 -ESHUTDOWN Already archived */
437 if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
438 log_debug("%s: Allocation limit reached, rotating.", f->path);
439 else if (r == -EHOSTDOWN)
440 log_info("%s: Journal file from other machine, rotating.", f->path);
441 else if (r == -EBUSY)
442 log_info("%s: Unclean shutdown, rotating.", f->path);
443 else if (r == -EPROTONOSUPPORT)
444 log_info("%s: Unsupported feature, rotating.", f->path);
445 else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
446 log_warning("%s: Journal file corrupted, rotating.", f->path);
453 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n) {
455 bool vacuumed = false;
462 f = find_journal(s, uid);
466 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
467 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
472 f = find_journal(s, uid);
477 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
481 if (vacuumed || !shall_try_append_again(f, r)) {
482 log_error("Failed to write entry, ignoring: %s", strerror(-r));
489 f = find_journal(s, uid);
493 log_debug("Retrying write.");
494 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
496 log_error("Failed to write entry, ignoring: %s", strerror(-r));
499 static void dispatch_message_real(
501 struct iovec *iovec, unsigned n, unsigned m,
504 const char *label, size_t label_len,
505 const char *unit_id) {
507 char _cleanup_free_ *pid = NULL, *uid = NULL, *gid = NULL,
508 *source_time = NULL, *boot_id = NULL, *machine_id = NULL,
509 *comm = NULL, *cmdline = NULL, *hostname = NULL,
510 *audit_session = NULL, *audit_loginuid = NULL,
511 *exe = NULL, *cgroup = NULL, *session = NULL,
512 *owner_uid = NULL, *unit = NULL, *selinux_context = NULL;
518 uid_t realuid = 0, owner = 0, journal_uid;
519 bool owner_valid = false;
524 assert(n + N_IOVEC_META_FIELDS <= m);
530 realuid = ucred->uid;
532 if (asprintf(&pid, "_PID=%lu", (unsigned long) ucred->pid) >= 0)
533 IOVEC_SET_STRING(iovec[n++], pid);
535 if (asprintf(&uid, "_UID=%lu", (unsigned long) ucred->uid) >= 0)
536 IOVEC_SET_STRING(iovec[n++], uid);
538 if (asprintf(&gid, "_GID=%lu", (unsigned long) ucred->gid) >= 0)
539 IOVEC_SET_STRING(iovec[n++], gid);
541 r = get_process_comm(ucred->pid, &t);
543 comm = strappend("_COMM=", t);
547 IOVEC_SET_STRING(iovec[n++], comm);
550 r = get_process_exe(ucred->pid, &t);
552 exe = strappend("_EXE=", t);
556 IOVEC_SET_STRING(iovec[n++], exe);
559 r = get_process_cmdline(ucred->pid, 0, false, &t);
561 cmdline = strappend("_CMDLINE=", t);
565 IOVEC_SET_STRING(iovec[n++], cmdline);
568 r = audit_session_from_pid(ucred->pid, &audit);
570 if (asprintf(&audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit) >= 0)
571 IOVEC_SET_STRING(iovec[n++], audit_session);
573 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
575 if (asprintf(&audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0)
576 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
578 t = shortened_cgroup_path(ucred->pid);
580 cgroup = strappend("_SYSTEMD_CGROUP=", t);
584 IOVEC_SET_STRING(iovec[n++], cgroup);
588 if (sd_pid_get_session(ucred->pid, &t) >= 0) {
589 session = strappend("_SYSTEMD_SESSION=", t);
593 IOVEC_SET_STRING(iovec[n++], session);
596 if (sd_pid_get_owner_uid(ucred->uid, &owner) >= 0) {
598 if (asprintf(&owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner) >= 0)
599 IOVEC_SET_STRING(iovec[n++], owner_uid);
603 if (cg_pid_get_unit(ucred->pid, &t) >= 0) {
604 unit = strappend("_SYSTEMD_UNIT=", t);
606 } else if (cg_pid_get_user_unit(ucred->pid, &t) >= 0) {
607 unit = strappend("_SYSTEMD_USER_UNIT=", t);
609 } else if (unit_id) {
611 unit = strappend("_SYSTEMD_USER_UNIT=", unit_id);
613 unit = strappend("_SYSTEMD_UNIT=", unit_id);
617 IOVEC_SET_STRING(iovec[n++], unit);
621 selinux_context = malloc(sizeof("_SELINUX_CONTEXT=") + label_len);
622 if (selinux_context) {
623 *((char*) mempcpy(stpcpy(selinux_context, "_SELINUX_CONTEXT="), label, label_len)) = 0;
624 IOVEC_SET_STRING(iovec[n++], selinux_context);
627 security_context_t con;
629 if (getpidcon(ucred->pid, &con) >= 0) {
630 selinux_context = strappend("_SELINUX_CONTEXT=", con);
632 IOVEC_SET_STRING(iovec[n++], selinux_context);
640 if (asprintf(&source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu",
641 (unsigned long long) timeval_load(tv)) >= 0)
642 IOVEC_SET_STRING(iovec[n++], source_time);
645 /* Note that strictly speaking storing the boot id here is
646 * redundant since the entry includes this in-line
647 * anyway. However, we need this indexed, too. */
648 r = sd_id128_get_boot(&id);
650 if (asprintf(&boot_id, "_BOOT_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
651 IOVEC_SET_STRING(iovec[n++], boot_id);
653 r = sd_id128_get_machine(&id);
655 if (asprintf(&machine_id, "_MACHINE_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
656 IOVEC_SET_STRING(iovec[n++], machine_id);
658 t = gethostname_malloc();
660 hostname = strappend("_HOSTNAME=", t);
663 IOVEC_SET_STRING(iovec[n++], hostname);
668 if (s->split_mode == SPLIT_UID && realuid > 0)
669 /* Split up strictly by any UID */
670 journal_uid = realuid;
671 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
672 /* Split up by login UIDs, this avoids creation of
673 * individual journals for system UIDs. We do this
674 * only if the realuid is not root, in order not to
675 * accidentally leak privileged information to the
676 * user that is logged by a privileged process that is
677 * part of an unprivileged session.*/
682 write_to_journal(s, journal_uid, iovec, n);
685 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
686 char mid[11 + 32 + 1];
687 char buffer[16 + LINE_MAX + 1];
688 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
696 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
697 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
699 memcpy(buffer, "MESSAGE=", 8);
700 va_start(ap, format);
701 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
703 char_array_0(buffer);
704 IOVEC_SET_STRING(iovec[n++], buffer);
706 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
707 snprintf(mid, sizeof(mid), MESSAGE_ID(message_id));
709 IOVEC_SET_STRING(iovec[n++], mid);
713 ucred.pid = getpid();
714 ucred.uid = getuid();
715 ucred.gid = getgid();
717 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL);
720 void server_dispatch_message(
722 struct iovec *iovec, unsigned n, unsigned m,
725 const char *label, size_t label_len,
730 char _cleanup_free_ *path = NULL;
734 assert(iovec || n == 0);
739 if (LOG_PRI(priority) > s->max_level_store)
745 path = shortened_cgroup_path(ucred->pid);
749 /* example: /user/lennart/3/foobar
750 * /system/dbus.service/foobar
752 * So let's cut of everything past the third /, since that is
753 * where user directories start */
755 c = strchr(path, '/');
757 c = strchr(c+1, '/');
759 c = strchr(c+1, '/');
765 rl = journal_rate_limit_test(s->rate_limit, path,
766 priority & LOG_PRIMASK, available_space(s));
771 /* Write a suppression message if we suppressed something */
773 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
774 "Suppressed %u messages from %s", rl - 1, path);
777 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id);
781 static int system_journal_open(Server *s) {
787 r = sd_id128_get_machine(&machine);
791 sd_id128_to_string(machine, ids);
793 if (!s->system_journal &&
794 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
795 access("/run/systemd/journal/flushed", F_OK) >= 0) {
797 /* If in auto mode: first try to create the machine
798 * path, but not the prefix.
800 * If in persistent mode: create /var/log/journal and
801 * the machine path */
803 if (s->storage == STORAGE_PERSISTENT)
804 (void) mkdir("/var/log/journal/", 0755);
806 fn = strappend("/var/log/journal/", ids);
810 (void) mkdir(fn, 0755);
813 fn = strjoin("/var/log/journal/", ids, "/system.journal", NULL);
817 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
821 char fb[FORMAT_BYTES_MAX];
823 server_fix_perms(s, s->system_journal, 0);
824 server_driver_message(s, SD_ID128_NULL, "Allowing system journal files to grow to %s.",
825 format_bytes(fb, sizeof(fb), s->system_metrics.max_use));
829 if (r != -ENOENT && r != -EROFS)
830 log_warning("Failed to open system journal: %s", strerror(-r));
836 if (!s->runtime_journal &&
837 (s->storage != STORAGE_NONE)) {
839 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
843 if (s->system_journal) {
845 /* Try to open the runtime journal, but only
846 * if it already exists, so that we can flush
847 * it into the system journal */
849 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
854 log_warning("Failed to open runtime journal: %s", strerror(-r));
861 /* OK, we really need the runtime journal, so create
862 * it if necessary. */
864 (void) mkdir_parents(fn, 0755);
865 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
869 log_error("Failed to open runtime journal: %s", strerror(-r));
874 if (s->runtime_journal) {
875 char fb[FORMAT_BYTES_MAX];
877 server_fix_perms(s, s->runtime_journal, 0);
878 server_driver_message(s, SD_ID128_NULL, "Allowing runtime journal files to grow to %s.",
879 format_bytes(fb, sizeof(fb), s->runtime_metrics.max_use));
886 int server_flush_to_var(Server *s) {
889 sd_journal *j = NULL;
893 if (s->storage != STORAGE_AUTO &&
894 s->storage != STORAGE_PERSISTENT)
897 if (!s->runtime_journal)
900 system_journal_open(s);
902 if (!s->system_journal)
905 log_debug("Flushing to /var...");
907 r = sd_id128_get_machine(&machine);
909 log_error("Failed to get machine id: %s", strerror(-r));
913 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
915 log_error("Failed to read runtime journal: %s", strerror(-r));
919 sd_journal_set_data_threshold(j, 0);
921 SD_JOURNAL_FOREACH(j) {
926 assert(f && f->current_offset > 0);
928 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
930 log_error("Can't read entry: %s", strerror(-r));
934 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
938 if (!shall_try_append_again(s->system_journal, r)) {
939 log_error("Can't write entry: %s", strerror(-r));
946 log_debug("Retrying write.");
947 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
949 log_error("Can't write entry: %s", strerror(-r));
955 journal_file_post_change(s->system_journal);
957 journal_file_close(s->runtime_journal);
958 s->runtime_journal = NULL;
961 rm_rf("/run/log/journal", false, true, false);
969 int process_event(Server *s, struct epoll_event *ev) {
973 if (ev->data.fd == s->signal_fd) {
974 struct signalfd_siginfo sfsi;
977 if (ev->events != EPOLLIN) {
978 log_error("Got invalid event from epoll.");
982 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
983 if (n != sizeof(sfsi)) {
988 if (errno == EINTR || errno == EAGAIN)
994 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
996 if (sfsi.ssi_signo == SIGUSR1) {
997 touch("/run/systemd/journal/flushed");
998 server_flush_to_var(s);
1002 if (sfsi.ssi_signo == SIGUSR2) {
1010 } else if (ev->data.fd == s->dev_kmsg_fd) {
1013 if (ev->events != EPOLLIN) {
1014 log_error("Got invalid event from epoll.");
1018 r = server_read_dev_kmsg(s);
1024 } else if (ev->data.fd == s->native_fd ||
1025 ev->data.fd == s->syslog_fd) {
1027 if (ev->events != EPOLLIN) {
1028 log_error("Got invalid event from epoll.");
1033 struct msghdr msghdr;
1035 struct ucred *ucred = NULL;
1036 struct timeval *tv = NULL;
1037 struct cmsghdr *cmsg;
1039 size_t label_len = 0;
1041 struct cmsghdr cmsghdr;
1043 /* We use NAME_MAX space for the
1044 * SELinux label here. The kernel
1045 * currently enforces no limit, but
1046 * according to suggestions from the
1047 * SELinux people this will change and
1048 * it will probably be identical to
1049 * NAME_MAX. For now we use that, but
1050 * this should be updated one day when
1051 * the final limit is known.*/
1052 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1053 CMSG_SPACE(sizeof(struct timeval)) +
1054 CMSG_SPACE(sizeof(int)) + /* fd */
1055 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1062 if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
1063 log_error("SIOCINQ failed: %m");
1067 if (s->buffer_size < (size_t) v) {
1071 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
1072 b = realloc(s->buffer, l+1);
1075 log_error("Couldn't increase buffer.");
1084 iovec.iov_base = s->buffer;
1085 iovec.iov_len = s->buffer_size;
1089 msghdr.msg_iov = &iovec;
1090 msghdr.msg_iovlen = 1;
1091 msghdr.msg_control = &control;
1092 msghdr.msg_controllen = sizeof(control);
1094 n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1097 if (errno == EINTR || errno == EAGAIN)
1100 log_error("recvmsg() failed: %m");
1104 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1106 if (cmsg->cmsg_level == SOL_SOCKET &&
1107 cmsg->cmsg_type == SCM_CREDENTIALS &&
1108 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1109 ucred = (struct ucred*) CMSG_DATA(cmsg);
1110 else if (cmsg->cmsg_level == SOL_SOCKET &&
1111 cmsg->cmsg_type == SCM_SECURITY) {
1112 label = (char*) CMSG_DATA(cmsg);
1113 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1114 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1115 cmsg->cmsg_type == SO_TIMESTAMP &&
1116 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1117 tv = (struct timeval*) CMSG_DATA(cmsg);
1118 else if (cmsg->cmsg_level == SOL_SOCKET &&
1119 cmsg->cmsg_type == SCM_RIGHTS) {
1120 fds = (int*) CMSG_DATA(cmsg);
1121 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1125 if (ev->data.fd == s->syslog_fd) {
1128 if (n > 0 && n_fds == 0) {
1129 e = memchr(s->buffer, '\n', n);
1135 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1136 } else if (n_fds > 0)
1137 log_warning("Got file descriptors via syslog socket. Ignoring.");
1140 if (n > 0 && n_fds == 0)
1141 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1142 else if (n == 0 && n_fds == 1)
1143 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1145 log_warning("Got too many file descriptors via native socket. Ignoring.");
1148 close_many(fds, n_fds);
1153 } else if (ev->data.fd == s->stdout_fd) {
1155 if (ev->events != EPOLLIN) {
1156 log_error("Got invalid event from epoll.");
1160 stdout_stream_new(s);
1164 StdoutStream *stream;
1166 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
1167 log_error("Got invalid event from epoll.");
1171 /* If it is none of the well-known fds, it must be an
1172 * stdout stream fd. Note that this is a bit ugly here
1173 * (since we rely that none of the well-known fds
1174 * could be interpreted as pointer), but nonetheless
1175 * safe, since the well-known fds would never get an
1176 * fd > 4096, i.e. beyond the first memory page */
1178 stream = ev->data.ptr;
1180 if (stdout_stream_process(stream) <= 0)
1181 stdout_stream_free(stream);
1186 log_error("Unknown event.");
1190 static int open_signalfd(Server *s) {
1192 struct epoll_event ev;
1196 assert_se(sigemptyset(&mask) == 0);
1197 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1198 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1200 s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
1201 if (s->signal_fd < 0) {
1202 log_error("signalfd(): %m");
1207 ev.events = EPOLLIN;
1208 ev.data.fd = s->signal_fd;
1210 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
1211 log_error("epoll_ctl(): %m");
1218 static int server_parse_proc_cmdline(Server *s) {
1219 char _cleanup_free_ *line = NULL;
1224 if (detect_container(NULL) > 0)
1227 r = read_one_line_file("/proc/cmdline", &line);
1229 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1233 FOREACH_WORD_QUOTED(w, l, line, state) {
1234 char _cleanup_free_ *word;
1236 word = strndup(w, l);
1240 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1241 r = parse_boolean(word + 35);
1243 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1245 s->forward_to_syslog = r;
1246 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1247 r = parse_boolean(word + 33);
1249 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1251 s->forward_to_kmsg = r;
1252 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1253 r = parse_boolean(word + 36);
1255 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1257 s->forward_to_console = r;
1258 } else if (startswith(word, "systemd.journald"))
1259 log_warning("Invalid systemd.journald parameter. Ignoring.");
1265 static int server_parse_config_file(Server *s) {
1266 static const char *fn = "/etc/systemd/journald.conf";
1267 FILE _cleanup_fclose_ *f = NULL;
1272 f = fopen(fn, "re");
1274 if (errno == ENOENT)
1277 log_warning("Failed to open configuration file %s: %m", fn);
1281 r = config_parse(fn, f, "Journal\0", config_item_perf_lookup,
1282 (void*) journald_gperf_lookup, false, s);
1284 log_warning("Failed to parse configuration file: %s", strerror(-r));
1289 int server_init(Server *s) {
1295 s->syslog_fd = s->native_fd = s->stdout_fd = s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
1299 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1300 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1302 s->forward_to_syslog = true;
1304 s->max_level_store = LOG_DEBUG;
1305 s->max_level_syslog = LOG_DEBUG;
1306 s->max_level_kmsg = LOG_NOTICE;
1307 s->max_level_console = LOG_INFO;
1309 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1310 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1312 server_parse_config_file(s);
1313 server_parse_proc_cmdline(s);
1314 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1315 log_debug("Setting both rate limit interval and burst from %llu,%u to 0,0",
1316 (long long unsigned) s->rate_limit_interval,
1317 s->rate_limit_burst);
1318 s->rate_limit_interval = s->rate_limit_burst = 0;
1321 mkdir_p("/run/systemd/journal", 0755);
1323 s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
1324 if (!s->user_journals)
1327 s->mmap = mmap_cache_new();
1331 s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
1332 if (s->epoll_fd < 0) {
1333 log_error("Failed to create epoll object: %m");
1337 n = sd_listen_fds(true);
1339 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1343 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1345 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1347 if (s->native_fd >= 0) {
1348 log_error("Too many native sockets passed.");
1354 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1356 if (s->stdout_fd >= 0) {
1357 log_error("Too many stdout sockets passed.");
1363 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
1365 if (s->syslog_fd >= 0) {
1366 log_error("Too many /dev/log sockets passed.");
1373 log_error("Unknown socket passed.");
1378 r = server_open_syslog_socket(s);
1382 r = server_open_native_socket(s);
1386 r = server_open_stdout_socket(s);
1390 r = server_open_dev_kmsg(s);
1394 r = server_open_kernel_seqnum(s);
1398 r = open_signalfd(s);
1402 s->udev = udev_new();
1406 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval,
1407 s->rate_limit_burst);
1411 r = system_journal_open(s);
1418 void server_maybe_append_tags(Server *s) {
1424 n = now(CLOCK_REALTIME);
1426 if (s->system_journal)
1427 journal_file_maybe_append_tag(s->system_journal, n);
1429 HASHMAP_FOREACH(f, s->user_journals, i)
1430 journal_file_maybe_append_tag(f, n);
1434 void server_done(Server *s) {
1438 while (s->stdout_streams)
1439 stdout_stream_free(s->stdout_streams);
1441 if (s->system_journal)
1442 journal_file_close(s->system_journal);
1444 if (s->runtime_journal)
1445 journal_file_close(s->runtime_journal);
1447 while ((f = hashmap_steal_first(s->user_journals)))
1448 journal_file_close(f);
1450 hashmap_free(s->user_journals);
1452 if (s->epoll_fd >= 0)
1453 close_nointr_nofail(s->epoll_fd);
1455 if (s->signal_fd >= 0)
1456 close_nointr_nofail(s->signal_fd);
1458 if (s->syslog_fd >= 0)
1459 close_nointr_nofail(s->syslog_fd);
1461 if (s->native_fd >= 0)
1462 close_nointr_nofail(s->native_fd);
1464 if (s->stdout_fd >= 0)
1465 close_nointr_nofail(s->stdout_fd);
1467 if (s->dev_kmsg_fd >= 0)
1468 close_nointr_nofail(s->dev_kmsg_fd);
1471 journal_rate_limit_free(s->rate_limit);
1473 if (s->kernel_seqnum)
1474 munmap(s->kernel_seqnum, sizeof(uint64_t));
1480 mmap_cache_unref(s->mmap);
1483 udev_unref(s->udev);