1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
29 #include <systemd/sd-journal.h>
30 #include <systemd/sd-messages.h>
31 #include <systemd/sd-daemon.h>
34 #include <systemd/sd-login.h>
40 #include "journal-file.h"
41 #include "socket-util.h"
42 #include "cgroup-util.h"
46 #include "conf-parser.h"
47 #include "journal-internal.h"
48 #include "journal-vacuum.h"
49 #include "journal-authenticate.h"
50 #include "journald-server.h"
51 #include "journald-rate-limit.h"
52 #include "journald-kmsg.h"
53 #include "journald-syslog.h"
54 #include "journald-stream.h"
55 #include "journald-console.h"
56 #include "journald-native.h"
60 #include <acl/libacl.h>
65 #include <selinux/selinux.h>
68 #define USER_JOURNALS_MAX 1024
70 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
71 #define DEFAULT_RATE_LIMIT_BURST 200
73 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
75 static const char* const storage_table[] = {
76 [STORAGE_AUTO] = "auto",
77 [STORAGE_VOLATILE] = "volatile",
78 [STORAGE_PERSISTENT] = "persistent",
79 [STORAGE_NONE] = "none"
82 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
83 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
85 static const char* const split_mode_table[] = {
86 [SPLIT_NONE] = "none",
88 [SPLIT_LOGIN] = "login"
91 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
92 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
94 static uint64_t available_space(Server *s) {
96 char _cleanup_free_ *p = NULL;
100 uint64_t sum = 0, avail = 0, ss_avail = 0;
102 DIR _cleanup_closedir_ *d = NULL;
106 ts = now(CLOCK_MONOTONIC);
108 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
109 return s->cached_available_space;
111 r = sd_id128_get_machine(&machine);
115 if (s->system_journal) {
116 f = "/var/log/journal/";
117 m = &s->system_metrics;
119 f = "/run/log/journal/";
120 m = &s->runtime_metrics;
125 p = strappend(f, sd_id128_to_string(machine, ids));
133 if (fstatvfs(dirfd(d), &ss) < 0)
139 union dirent_storage buf;
141 r = readdir_r(d, &buf.de, &de);
148 if (!endswith(de->d_name, ".journal") &&
149 !endswith(de->d_name, ".journal~"))
152 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
155 if (!S_ISREG(st.st_mode))
158 sum += (uint64_t) st.st_blocks * 512UL;
161 avail = sum >= m->max_use ? 0 : m->max_use - sum;
163 ss_avail = ss.f_bsize * ss.f_bavail;
165 ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
167 if (ss_avail < avail)
170 s->cached_available_space = avail;
171 s->cached_available_space_timestamp = ts;
176 static void server_read_file_gid(Server *s) {
177 const char *g = "systemd-journal";
182 if (s->file_gid_valid)
185 r = get_group_creds(&g, &s->file_gid);
187 log_warning("Failed to resolve '%s' group: %s", g, strerror(-r));
189 /* if we couldn't read the gid, then it will be 0, but that's
190 * fine and we shouldn't try to resolve the group again, so
191 * let's just pretend it worked right-away. */
192 s->file_gid_valid = true;
195 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
200 acl_permset_t permset;
205 server_read_file_gid(s);
207 r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
209 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
215 acl = acl_get_fd(f->fd);
217 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
221 r = acl_find_uid(acl, uid, &entry);
224 if (acl_create_entry(&acl, &entry) < 0 ||
225 acl_set_tag_type(entry, ACL_USER) < 0 ||
226 acl_set_qualifier(entry, &uid) < 0) {
227 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
232 if (acl_get_permset(entry, &permset) < 0 ||
233 acl_add_perm(permset, ACL_READ) < 0 ||
234 acl_calc_mask(&acl) < 0) {
235 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
239 if (acl_set_fd(f->fd, acl) < 0)
240 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
247 static JournalFile* find_journal(Server *s, uid_t uid) {
255 /* We split up user logs only on /var, not on /run. If the
256 * runtime file is open, we write to it exclusively, in order
257 * to guarantee proper order as soon as we flush /run to
258 * /var and close the runtime file. */
260 if (s->runtime_journal)
261 return s->runtime_journal;
264 return s->system_journal;
266 r = sd_id128_get_machine(&machine);
268 return s->system_journal;
270 f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
274 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
275 SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
276 return s->system_journal;
278 while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
279 /* Too many open? Then let's close one */
280 f = hashmap_steal_first(s->user_journals);
282 journal_file_close(f);
285 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, s->system_journal, &f);
289 return s->system_journal;
291 server_fix_perms(s, f, uid);
293 r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
295 journal_file_close(f);
296 return s->system_journal;
302 void server_rotate(Server *s) {
308 log_debug("Rotating...");
310 if (s->runtime_journal) {
311 r = journal_file_rotate(&s->runtime_journal, s->compress, false);
313 if (s->runtime_journal)
314 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
316 log_error("Failed to create new runtime journal: %s", strerror(-r));
318 server_fix_perms(s, s->runtime_journal, 0);
321 if (s->system_journal) {
322 r = journal_file_rotate(&s->system_journal, s->compress, s->seal);
324 if (s->system_journal)
325 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
327 log_error("Failed to create new system journal: %s", strerror(-r));
330 server_fix_perms(s, s->system_journal, 0);
333 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
334 r = journal_file_rotate(&f, s->compress, s->seal);
337 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
339 log_error("Failed to create user journal: %s", strerror(-r));
341 hashmap_replace(s->user_journals, k, f);
342 server_fix_perms(s, f, PTR_TO_UINT32(k));
347 void server_vacuum(Server *s) {
353 log_debug("Vacuuming...");
355 s->oldest_file_usec = 0;
357 r = sd_id128_get_machine(&machine);
359 log_error("Failed to get machine ID: %s", strerror(-r));
363 sd_id128_to_string(machine, ids);
365 if (s->system_journal) {
366 p = strappend("/var/log/journal/", ids);
372 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
373 if (r < 0 && r != -ENOENT)
374 log_error("Failed to vacuum %s: %s", p, strerror(-r));
378 if (s->runtime_journal) {
379 p = strappend("/run/log/journal/", ids);
385 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
386 if (r < 0 && r != -ENOENT)
387 log_error("Failed to vacuum %s: %s", p, strerror(-r));
391 s->cached_available_space_timestamp = 0;
394 static char *shortened_cgroup_path(pid_t pid) {
396 char _cleanup_free_ *process_path = NULL, *init_path = NULL;
401 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, pid, &process_path);
405 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &init_path);
409 if (endswith(init_path, "/system"))
410 init_path[strlen(init_path) - 7] = 0;
411 else if (streq(init_path, "/"))
414 if (startswith(process_path, init_path)) {
415 path = strdup(process_path + strlen(init_path));
424 bool shall_try_append_again(JournalFile *f, int r) {
426 /* -E2BIG Hit configured limit
428 -EDQUOT Quota limit hit
430 -EHOSTDOWN Other machine
431 -EBUSY Unclean shutdown
432 -EPROTONOSUPPORT Unsupported feature
435 -ESHUTDOWN Already archived */
437 if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
438 log_debug("%s: Allocation limit reached, rotating.", f->path);
439 else if (r == -EHOSTDOWN)
440 log_info("%s: Journal file from other machine, rotating.", f->path);
441 else if (r == -EBUSY)
442 log_info("%s: Unclean shutdown, rotating.", f->path);
443 else if (r == -EPROTONOSUPPORT)
444 log_info("%s: Unsupported feature, rotating.", f->path);
445 else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
446 log_warning("%s: Journal file corrupted, rotating.", f->path);
453 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n) {
455 bool vacuumed = false;
462 f = find_journal(s, uid);
466 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
467 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
472 f = find_journal(s, uid);
477 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
481 if (vacuumed || !shall_try_append_again(f, r)) {
482 log_error("Failed to write entry, ignoring: %s", strerror(-r));
489 f = find_journal(s, uid);
493 log_debug("Retrying write.");
494 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
496 log_error("Failed to write entry, ignoring: %s", strerror(-r));
499 static void dispatch_message_real(
501 struct iovec *iovec, unsigned n, unsigned m,
504 const char *label, size_t label_len,
505 const char *unit_id) {
507 char _cleanup_free_ *pid = NULL, *uid = NULL, *gid = NULL,
508 *source_time = NULL, *boot_id = NULL, *machine_id = NULL,
509 *comm = NULL, *cmdline = NULL, *hostname = NULL,
510 *audit_session = NULL, *audit_loginuid = NULL,
511 *exe = NULL, *cgroup = NULL, *session = NULL,
512 *owner_uid = NULL, *unit = NULL, *selinux_context = NULL;
518 uid_t realuid = 0, owner = 0, journal_uid;
519 bool owner_valid = false;
524 assert(n + N_IOVEC_META_FIELDS <= m);
530 realuid = ucred->uid;
532 if (asprintf(&pid, "_PID=%lu", (unsigned long) ucred->pid) >= 0)
533 IOVEC_SET_STRING(iovec[n++], pid);
535 if (asprintf(&uid, "_UID=%lu", (unsigned long) ucred->uid) >= 0)
536 IOVEC_SET_STRING(iovec[n++], uid);
538 if (asprintf(&gid, "_GID=%lu", (unsigned long) ucred->gid) >= 0)
539 IOVEC_SET_STRING(iovec[n++], gid);
541 r = get_process_comm(ucred->pid, &t);
543 comm = strappend("_COMM=", t);
547 IOVEC_SET_STRING(iovec[n++], comm);
550 r = get_process_exe(ucred->pid, &t);
552 exe = strappend("_EXE=", t);
556 IOVEC_SET_STRING(iovec[n++], exe);
559 r = get_process_cmdline(ucred->pid, 0, false, &t);
561 cmdline = strappend("_CMDLINE=", t);
565 IOVEC_SET_STRING(iovec[n++], cmdline);
568 r = audit_session_from_pid(ucred->pid, &audit);
570 if (asprintf(&audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit) >= 0)
571 IOVEC_SET_STRING(iovec[n++], audit_session);
573 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
575 if (asprintf(&audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0)
576 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
578 t = shortened_cgroup_path(ucred->pid);
580 cgroup = strappend("_SYSTEMD_CGROUP=", t);
584 IOVEC_SET_STRING(iovec[n++], cgroup);
588 if (sd_pid_get_session(ucred->pid, &t) >= 0) {
589 session = strappend("_SYSTEMD_SESSION=", t);
593 IOVEC_SET_STRING(iovec[n++], session);
596 if (sd_pid_get_owner_uid(ucred->pid, &owner) >= 0) {
598 if (asprintf(&owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner) >= 0)
599 IOVEC_SET_STRING(iovec[n++], owner_uid);
603 if (cg_pid_get_unit(ucred->pid, &t) >= 0) {
604 unit = strappend("_SYSTEMD_UNIT=", t);
606 } else if (cg_pid_get_user_unit(ucred->pid, &t) >= 0) {
607 unit = strappend("_SYSTEMD_USER_UNIT=", t);
609 } else if (unit_id) {
611 unit = strappend("_SYSTEMD_USER_UNIT=", unit_id);
613 unit = strappend("_SYSTEMD_UNIT=", unit_id);
617 IOVEC_SET_STRING(iovec[n++], unit);
621 selinux_context = malloc(sizeof("_SELINUX_CONTEXT=") + label_len);
622 if (selinux_context) {
623 *((char*) mempcpy(stpcpy(selinux_context, "_SELINUX_CONTEXT="), label, label_len)) = 0;
624 IOVEC_SET_STRING(iovec[n++], selinux_context);
627 security_context_t con;
629 if (getpidcon(ucred->pid, &con) >= 0) {
630 selinux_context = strappend("_SELINUX_CONTEXT=", con);
632 IOVEC_SET_STRING(iovec[n++], selinux_context);
640 if (asprintf(&source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu",
641 (unsigned long long) timeval_load(tv)) >= 0)
642 IOVEC_SET_STRING(iovec[n++], source_time);
645 /* Note that strictly speaking storing the boot id here is
646 * redundant since the entry includes this in-line
647 * anyway. However, we need this indexed, too. */
648 r = sd_id128_get_boot(&id);
650 if (asprintf(&boot_id, "_BOOT_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
651 IOVEC_SET_STRING(iovec[n++], boot_id);
653 r = sd_id128_get_machine(&id);
655 if (asprintf(&machine_id, "_MACHINE_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
656 IOVEC_SET_STRING(iovec[n++], machine_id);
658 t = gethostname_malloc();
660 hostname = strappend("_HOSTNAME=", t);
663 IOVEC_SET_STRING(iovec[n++], hostname);
668 if (s->split_mode == SPLIT_UID && realuid > 0)
669 /* Split up strictly by any UID */
670 journal_uid = realuid;
671 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
672 /* Split up by login UIDs, this avoids creation of
673 * individual journals for system UIDs. We do this
674 * only if the realuid is not root, in order not to
675 * accidentally leak privileged information to the
676 * user that is logged by a privileged process that is
677 * part of an unprivileged session.*/
682 write_to_journal(s, journal_uid, iovec, n);
685 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
686 char mid[11 + 32 + 1];
687 char buffer[16 + LINE_MAX + 1];
688 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
696 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
697 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
699 memcpy(buffer, "MESSAGE=", 8);
700 va_start(ap, format);
701 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
703 char_array_0(buffer);
704 IOVEC_SET_STRING(iovec[n++], buffer);
706 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
707 snprintf(mid, sizeof(mid), MESSAGE_ID(message_id));
709 IOVEC_SET_STRING(iovec[n++], mid);
713 ucred.pid = getpid();
714 ucred.uid = getuid();
715 ucred.gid = getgid();
717 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL);
720 void server_dispatch_message(
722 struct iovec *iovec, unsigned n, unsigned m,
725 const char *label, size_t label_len,
730 char _cleanup_free_ *path = NULL;
734 assert(iovec || n == 0);
739 if (LOG_PRI(priority) > s->max_level_store)
745 path = shortened_cgroup_path(ucred->pid);
749 /* example: /user/lennart/3/foobar
750 * /system/dbus.service/foobar
752 * So let's cut of everything past the third /, since that is
753 * where user directories start */
755 c = strchr(path, '/');
757 c = strchr(c+1, '/');
759 c = strchr(c+1, '/');
765 rl = journal_rate_limit_test(s->rate_limit, path,
766 priority & LOG_PRIMASK, available_space(s));
771 /* Write a suppression message if we suppressed something */
773 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
774 "Suppressed %u messages from %s", rl - 1, path);
777 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id);
781 static int system_journal_open(Server *s) {
787 r = sd_id128_get_machine(&machine);
791 sd_id128_to_string(machine, ids);
793 if (!s->system_journal &&
794 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
795 access("/run/systemd/journal/flushed", F_OK) >= 0) {
797 /* If in auto mode: first try to create the machine
798 * path, but not the prefix.
800 * If in persistent mode: create /var/log/journal and
801 * the machine path */
803 if (s->storage == STORAGE_PERSISTENT)
804 (void) mkdir("/var/log/journal/", 0755);
806 fn = strappend("/var/log/journal/", ids);
810 (void) mkdir(fn, 0755);
813 fn = strjoin("/var/log/journal/", ids, "/system.journal", NULL);
817 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
821 char fb[FORMAT_BYTES_MAX];
823 server_fix_perms(s, s->system_journal, 0);
824 server_driver_message(s, SD_ID128_NULL, "Allowing system journal files to grow to %s.",
825 format_bytes(fb, sizeof(fb), s->system_metrics.max_use));
829 if (r != -ENOENT && r != -EROFS)
830 log_warning("Failed to open system journal: %s", strerror(-r));
836 if (!s->runtime_journal &&
837 (s->storage != STORAGE_NONE)) {
839 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
843 if (s->system_journal) {
845 /* Try to open the runtime journal, but only
846 * if it already exists, so that we can flush
847 * it into the system journal */
849 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
854 log_warning("Failed to open runtime journal: %s", strerror(-r));
861 /* OK, we really need the runtime journal, so create
862 * it if necessary. */
864 (void) mkdir_parents(fn, 0755);
865 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
869 log_error("Failed to open runtime journal: %s", strerror(-r));
874 if (s->runtime_journal) {
875 char fb[FORMAT_BYTES_MAX];
877 server_fix_perms(s, s->runtime_journal, 0);
878 server_driver_message(s, SD_ID128_NULL, "Allowing runtime journal files to grow to %s.",
879 format_bytes(fb, sizeof(fb), s->runtime_metrics.max_use));
886 int server_flush_to_var(Server *s) {
889 sd_journal *j = NULL;
893 if (s->storage != STORAGE_AUTO &&
894 s->storage != STORAGE_PERSISTENT)
897 if (!s->runtime_journal)
900 system_journal_open(s);
902 if (!s->system_journal)
905 log_debug("Flushing to /var...");
907 r = sd_id128_get_machine(&machine);
909 log_error("Failed to get machine id: %s", strerror(-r));
913 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
915 log_error("Failed to read runtime journal: %s", strerror(-r));
919 sd_journal_set_data_threshold(j, 0);
921 SD_JOURNAL_FOREACH(j) {
926 assert(f && f->current_offset > 0);
928 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
930 log_error("Can't read entry: %s", strerror(-r));
934 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
938 if (!shall_try_append_again(s->system_journal, r)) {
939 log_error("Can't write entry: %s", strerror(-r));
946 log_debug("Retrying write.");
947 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
949 log_error("Can't write entry: %s", strerror(-r));
955 journal_file_post_change(s->system_journal);
957 journal_file_close(s->runtime_journal);
958 s->runtime_journal = NULL;
961 rm_rf("/run/log/journal", false, true, false);
968 int process_event(Server *s, struct epoll_event *ev) {
972 if (ev->data.fd == s->signal_fd) {
973 struct signalfd_siginfo sfsi;
976 if (ev->events != EPOLLIN) {
977 log_error("Got invalid event from epoll.");
981 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
982 if (n != sizeof(sfsi)) {
987 if (errno == EINTR || errno == EAGAIN)
993 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
995 if (sfsi.ssi_signo == SIGUSR1) {
996 touch("/run/systemd/journal/flushed");
997 server_flush_to_var(s);
1001 if (sfsi.ssi_signo == SIGUSR2) {
1009 } else if (ev->data.fd == s->dev_kmsg_fd) {
1012 if (ev->events != EPOLLIN) {
1013 log_error("Got invalid event from epoll.");
1017 r = server_read_dev_kmsg(s);
1023 } else if (ev->data.fd == s->native_fd ||
1024 ev->data.fd == s->syslog_fd) {
1026 if (ev->events != EPOLLIN) {
1027 log_error("Got invalid event from epoll.");
1032 struct msghdr msghdr;
1034 struct ucred *ucred = NULL;
1035 struct timeval *tv = NULL;
1036 struct cmsghdr *cmsg;
1038 size_t label_len = 0;
1040 struct cmsghdr cmsghdr;
1042 /* We use NAME_MAX space for the
1043 * SELinux label here. The kernel
1044 * currently enforces no limit, but
1045 * according to suggestions from the
1046 * SELinux people this will change and
1047 * it will probably be identical to
1048 * NAME_MAX. For now we use that, but
1049 * this should be updated one day when
1050 * the final limit is known.*/
1051 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1052 CMSG_SPACE(sizeof(struct timeval)) +
1053 CMSG_SPACE(sizeof(int)) + /* fd */
1054 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1061 if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
1062 log_error("SIOCINQ failed: %m");
1066 if (s->buffer_size < (size_t) v) {
1070 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
1071 b = realloc(s->buffer, l+1);
1074 log_error("Couldn't increase buffer.");
1083 iovec.iov_base = s->buffer;
1084 iovec.iov_len = s->buffer_size;
1088 msghdr.msg_iov = &iovec;
1089 msghdr.msg_iovlen = 1;
1090 msghdr.msg_control = &control;
1091 msghdr.msg_controllen = sizeof(control);
1093 n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1096 if (errno == EINTR || errno == EAGAIN)
1099 log_error("recvmsg() failed: %m");
1103 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1105 if (cmsg->cmsg_level == SOL_SOCKET &&
1106 cmsg->cmsg_type == SCM_CREDENTIALS &&
1107 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1108 ucred = (struct ucred*) CMSG_DATA(cmsg);
1109 else if (cmsg->cmsg_level == SOL_SOCKET &&
1110 cmsg->cmsg_type == SCM_SECURITY) {
1111 label = (char*) CMSG_DATA(cmsg);
1112 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1113 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1114 cmsg->cmsg_type == SO_TIMESTAMP &&
1115 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1116 tv = (struct timeval*) CMSG_DATA(cmsg);
1117 else if (cmsg->cmsg_level == SOL_SOCKET &&
1118 cmsg->cmsg_type == SCM_RIGHTS) {
1119 fds = (int*) CMSG_DATA(cmsg);
1120 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1124 if (ev->data.fd == s->syslog_fd) {
1127 if (n > 0 && n_fds == 0) {
1128 e = memchr(s->buffer, '\n', n);
1134 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1135 } else if (n_fds > 0)
1136 log_warning("Got file descriptors via syslog socket. Ignoring.");
1139 if (n > 0 && n_fds == 0)
1140 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1141 else if (n == 0 && n_fds == 1)
1142 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1144 log_warning("Got too many file descriptors via native socket. Ignoring.");
1147 close_many(fds, n_fds);
1152 } else if (ev->data.fd == s->stdout_fd) {
1154 if (ev->events != EPOLLIN) {
1155 log_error("Got invalid event from epoll.");
1159 stdout_stream_new(s);
1163 StdoutStream *stream;
1165 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
1166 log_error("Got invalid event from epoll.");
1170 /* If it is none of the well-known fds, it must be an
1171 * stdout stream fd. Note that this is a bit ugly here
1172 * (since we rely that none of the well-known fds
1173 * could be interpreted as pointer), but nonetheless
1174 * safe, since the well-known fds would never get an
1175 * fd > 4096, i.e. beyond the first memory page */
1177 stream = ev->data.ptr;
1179 if (stdout_stream_process(stream) <= 0)
1180 stdout_stream_free(stream);
1185 log_error("Unknown event.");
1189 static int open_signalfd(Server *s) {
1191 struct epoll_event ev;
1195 assert_se(sigemptyset(&mask) == 0);
1196 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1197 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1199 s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
1200 if (s->signal_fd < 0) {
1201 log_error("signalfd(): %m");
1206 ev.events = EPOLLIN;
1207 ev.data.fd = s->signal_fd;
1209 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
1210 log_error("epoll_ctl(): %m");
1217 static int server_parse_proc_cmdline(Server *s) {
1218 char _cleanup_free_ *line = NULL;
1223 if (detect_container(NULL) > 0)
1226 r = read_one_line_file("/proc/cmdline", &line);
1228 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1232 FOREACH_WORD_QUOTED(w, l, line, state) {
1233 char _cleanup_free_ *word;
1235 word = strndup(w, l);
1239 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1240 r = parse_boolean(word + 35);
1242 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1244 s->forward_to_syslog = r;
1245 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1246 r = parse_boolean(word + 33);
1248 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1250 s->forward_to_kmsg = r;
1251 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1252 r = parse_boolean(word + 36);
1254 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1256 s->forward_to_console = r;
1257 } else if (startswith(word, "systemd.journald"))
1258 log_warning("Invalid systemd.journald parameter. Ignoring.");
1264 static int server_parse_config_file(Server *s) {
1265 static const char *fn = "/etc/systemd/journald.conf";
1266 FILE _cleanup_fclose_ *f = NULL;
1271 f = fopen(fn, "re");
1273 if (errno == ENOENT)
1276 log_warning("Failed to open configuration file %s: %m", fn);
1280 r = config_parse(fn, f, "Journal\0", config_item_perf_lookup,
1281 (void*) journald_gperf_lookup, false, s);
1283 log_warning("Failed to parse configuration file: %s", strerror(-r));
1288 int server_init(Server *s) {
1294 s->syslog_fd = s->native_fd = s->stdout_fd = s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
1298 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1299 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1301 s->forward_to_syslog = true;
1303 s->max_level_store = LOG_DEBUG;
1304 s->max_level_syslog = LOG_DEBUG;
1305 s->max_level_kmsg = LOG_NOTICE;
1306 s->max_level_console = LOG_INFO;
1308 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1309 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1311 server_parse_config_file(s);
1312 server_parse_proc_cmdline(s);
1313 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1314 log_debug("Setting both rate limit interval and burst from %llu,%u to 0,0",
1315 (long long unsigned) s->rate_limit_interval,
1316 s->rate_limit_burst);
1317 s->rate_limit_interval = s->rate_limit_burst = 0;
1320 mkdir_p("/run/systemd/journal", 0755);
1322 s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
1323 if (!s->user_journals)
1326 s->mmap = mmap_cache_new();
1330 s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
1331 if (s->epoll_fd < 0) {
1332 log_error("Failed to create epoll object: %m");
1336 n = sd_listen_fds(true);
1338 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1342 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1344 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1346 if (s->native_fd >= 0) {
1347 log_error("Too many native sockets passed.");
1353 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1355 if (s->stdout_fd >= 0) {
1356 log_error("Too many stdout sockets passed.");
1362 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
1364 if (s->syslog_fd >= 0) {
1365 log_error("Too many /dev/log sockets passed.");
1372 log_error("Unknown socket passed.");
1377 r = server_open_syslog_socket(s);
1381 r = server_open_native_socket(s);
1385 r = server_open_stdout_socket(s);
1389 r = server_open_dev_kmsg(s);
1393 r = server_open_kernel_seqnum(s);
1397 r = open_signalfd(s);
1401 s->udev = udev_new();
1405 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval,
1406 s->rate_limit_burst);
1410 r = system_journal_open(s);
1417 void server_maybe_append_tags(Server *s) {
1423 n = now(CLOCK_REALTIME);
1425 if (s->system_journal)
1426 journal_file_maybe_append_tag(s->system_journal, n);
1428 HASHMAP_FOREACH(f, s->user_journals, i)
1429 journal_file_maybe_append_tag(f, n);
1433 void server_done(Server *s) {
1437 while (s->stdout_streams)
1438 stdout_stream_free(s->stdout_streams);
1440 if (s->system_journal)
1441 journal_file_close(s->system_journal);
1443 if (s->runtime_journal)
1444 journal_file_close(s->runtime_journal);
1446 while ((f = hashmap_steal_first(s->user_journals)))
1447 journal_file_close(f);
1449 hashmap_free(s->user_journals);
1451 if (s->epoll_fd >= 0)
1452 close_nointr_nofail(s->epoll_fd);
1454 if (s->signal_fd >= 0)
1455 close_nointr_nofail(s->signal_fd);
1457 if (s->syslog_fd >= 0)
1458 close_nointr_nofail(s->syslog_fd);
1460 if (s->native_fd >= 0)
1461 close_nointr_nofail(s->native_fd);
1463 if (s->stdout_fd >= 0)
1464 close_nointr_nofail(s->stdout_fd);
1466 if (s->dev_kmsg_fd >= 0)
1467 close_nointr_nofail(s->dev_kmsg_fd);
1470 journal_rate_limit_free(s->rate_limit);
1472 if (s->kernel_seqnum)
1473 munmap(s->kernel_seqnum, sizeof(uint64_t));
1479 mmap_cache_unref(s->mmap);
1482 udev_unref(s->udev);