1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
29 #include <systemd/sd-journal.h>
30 #include <systemd/sd-messages.h>
31 #include <systemd/sd-daemon.h>
34 #include <systemd/sd-login.h>
40 #include "journal-file.h"
41 #include "socket-util.h"
42 #include "cgroup-util.h"
46 #include "conf-parser.h"
47 #include "journal-internal.h"
48 #include "journal-vacuum.h"
49 #include "journal-authenticate.h"
50 #include "journald-server.h"
51 #include "journald-rate-limit.h"
52 #include "journald-kmsg.h"
53 #include "journald-syslog.h"
54 #include "journald-stream.h"
55 #include "journald-console.h"
56 #include "journald-native.h"
60 #include <acl/libacl.h>
65 #include <selinux/selinux.h>
68 #define USER_JOURNALS_MAX 1024
70 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
71 #define DEFAULT_RATE_LIMIT_BURST 200
73 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
75 static const char* const storage_table[] = {
76 [STORAGE_AUTO] = "auto",
77 [STORAGE_VOLATILE] = "volatile",
78 [STORAGE_PERSISTENT] = "persistent",
79 [STORAGE_NONE] = "none"
82 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
83 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
85 static const char* const split_mode_table[] = {
86 [SPLIT_NONE] = "none",
88 [SPLIT_LOGIN] = "login"
91 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
92 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
94 static uint64_t available_space(Server *s) {
96 char _cleanup_free_ *p = NULL;
100 uint64_t sum = 0, avail = 0, ss_avail = 0;
102 DIR _cleanup_closedir_ *d = NULL;
106 ts = now(CLOCK_MONOTONIC);
108 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
109 return s->cached_available_space;
111 r = sd_id128_get_machine(&machine);
115 if (s->system_journal) {
116 f = "/var/log/journal/";
117 m = &s->system_metrics;
119 f = "/run/log/journal/";
120 m = &s->runtime_metrics;
125 p = strappend(f, sd_id128_to_string(machine, ids));
133 if (fstatvfs(dirfd(d), &ss) < 0)
139 union dirent_storage buf;
141 r = readdir_r(d, &buf.de, &de);
148 if (!endswith(de->d_name, ".journal") &&
149 !endswith(de->d_name, ".journal~"))
152 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
155 if (!S_ISREG(st.st_mode))
158 sum += (uint64_t) st.st_blocks * 512UL;
161 avail = sum >= m->max_use ? 0 : m->max_use - sum;
163 ss_avail = ss.f_bsize * ss.f_bavail;
165 ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
167 if (ss_avail < avail)
170 s->cached_available_space = avail;
171 s->cached_available_space_timestamp = ts;
176 static void server_read_file_gid(Server *s) {
177 const char *adm = "adm";
182 if (s->file_gid_valid)
185 r = get_group_creds(&adm, &s->file_gid);
187 log_warning("Failed to resolve 'adm' group: %s", strerror(-r));
189 /* if we couldn't read the gid, then it will be 0, but that's
190 * fine and we shouldn't try to resolve the group again, so
191 * let's just pretend it worked right-away. */
192 s->file_gid_valid = true;
195 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
200 acl_permset_t permset;
205 server_read_file_gid(s);
207 r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
209 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
215 acl = acl_get_fd(f->fd);
217 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
221 r = acl_find_uid(acl, uid, &entry);
224 if (acl_create_entry(&acl, &entry) < 0 ||
225 acl_set_tag_type(entry, ACL_USER) < 0 ||
226 acl_set_qualifier(entry, &uid) < 0) {
227 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
232 if (acl_get_permset(entry, &permset) < 0 ||
233 acl_add_perm(permset, ACL_READ) < 0 ||
234 acl_calc_mask(&acl) < 0) {
235 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
239 if (acl_set_fd(f->fd, acl) < 0)
240 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
247 static JournalFile* find_journal(Server *s, uid_t uid) {
255 /* We split up user logs only on /var, not on /run. If the
256 * runtime file is open, we write to it exclusively, in order
257 * to guarantee proper order as soon as we flush /run to
258 * /var and close the runtime file. */
260 if (s->runtime_journal)
261 return s->runtime_journal;
264 return s->system_journal;
266 r = sd_id128_get_machine(&machine);
268 return s->system_journal;
270 f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
274 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
275 SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
276 return s->system_journal;
278 while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
279 /* Too many open? Then let's close one */
280 f = hashmap_steal_first(s->user_journals);
282 journal_file_close(f);
285 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, s->system_journal, &f);
289 return s->system_journal;
291 server_fix_perms(s, f, uid);
293 r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
295 journal_file_close(f);
296 return s->system_journal;
302 void server_rotate(Server *s) {
308 log_debug("Rotating...");
310 if (s->runtime_journal) {
311 r = journal_file_rotate(&s->runtime_journal, s->compress, false);
313 if (s->runtime_journal)
314 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
316 log_error("Failed to create new runtime journal: %s", strerror(-r));
318 server_fix_perms(s, s->runtime_journal, 0);
321 if (s->system_journal) {
322 r = journal_file_rotate(&s->system_journal, s->compress, s->seal);
324 if (s->system_journal)
325 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
327 log_error("Failed to create new system journal: %s", strerror(-r));
330 server_fix_perms(s, s->system_journal, 0);
333 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
334 r = journal_file_rotate(&f, s->compress, s->seal);
337 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
339 log_error("Failed to create user journal: %s", strerror(-r));
341 hashmap_replace(s->user_journals, k, f);
342 server_fix_perms(s, f, PTR_TO_UINT32(k));
347 void server_vacuum(Server *s) {
353 log_debug("Vacuuming...");
355 s->oldest_file_usec = 0;
357 r = sd_id128_get_machine(&machine);
359 log_error("Failed to get machine ID: %s", strerror(-r));
363 sd_id128_to_string(machine, ids);
365 if (s->system_journal) {
366 p = strappend("/var/log/journal/", ids);
372 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
373 if (r < 0 && r != -ENOENT)
374 log_error("Failed to vacuum %s: %s", p, strerror(-r));
378 if (s->runtime_journal) {
379 p = strappend("/run/log/journal/", ids);
385 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
386 if (r < 0 && r != -ENOENT)
387 log_error("Failed to vacuum %s: %s", p, strerror(-r));
391 s->cached_available_space_timestamp = 0;
394 static char *shortened_cgroup_path(pid_t pid) {
396 char _cleanup_free_ *process_path = NULL, *init_path = NULL;
401 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, pid, &process_path);
405 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &init_path);
409 if (endswith(init_path, "/system"))
410 init_path[strlen(init_path) - 7] = 0;
411 else if (streq(init_path, "/"))
414 if (startswith(process_path, init_path)) {
415 path = strdup(process_path + strlen(init_path));
424 bool shall_try_append_again(JournalFile *f, int r) {
426 /* -E2BIG Hit configured limit
428 -EDQUOT Quota limit hit
430 -EHOSTDOWN Other machine
431 -EBUSY Unclean shutdown
432 -EPROTONOSUPPORT Unsupported feature
435 -ESHUTDOWN Already archived */
437 if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
438 log_debug("%s: Allocation limit reached, rotating.", f->path);
439 else if (r == -EHOSTDOWN)
440 log_info("%s: Journal file from other machine, rotating.", f->path);
441 else if (r == -EBUSY)
442 log_info("%s: Unclean shutdown, rotating.", f->path);
443 else if (r == -EPROTONOSUPPORT)
444 log_info("%s: Unsupported feature, rotating.", f->path);
445 else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
446 log_warning("%s: Journal file corrupted, rotating.", f->path);
453 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n) {
455 bool vacuumed = false;
462 f = find_journal(s, uid);
466 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
467 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
472 f = find_journal(s, uid);
477 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
481 if (vacuumed || !shall_try_append_again(f, r)) {
482 log_error("Failed to write entry, ignoring: %s", strerror(-r));
489 f = find_journal(s, uid);
493 log_debug("Retrying write.");
494 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
496 log_error("Failed to write entry, ignoring: %s", strerror(-r));
499 static void dispatch_message_real(
501 struct iovec *iovec, unsigned n, unsigned m,
504 const char *label, size_t label_len,
505 const char *unit_id) {
507 char _cleanup_free_ *pid = NULL, *uid = NULL, *gid = NULL,
508 *source_time = NULL, *boot_id = NULL, *machine_id = NULL,
509 *comm = NULL, *cmdline = NULL, *hostname = NULL,
510 *audit_session = NULL, *audit_loginuid = NULL,
511 *exe = NULL, *cgroup = NULL, *session = NULL,
512 *owner_uid = NULL, *unit = NULL, *selinux_context = NULL;
518 uid_t loginuid = 0, realuid = 0;
520 bool loginuid_valid = false;
525 assert(n + N_IOVEC_META_FIELDS <= m);
533 realuid = ucred->uid;
535 if (asprintf(&pid, "_PID=%lu", (unsigned long) ucred->pid) >= 0)
536 IOVEC_SET_STRING(iovec[n++], pid);
538 if (asprintf(&uid, "_UID=%lu", (unsigned long) ucred->uid) >= 0)
539 IOVEC_SET_STRING(iovec[n++], uid);
541 if (asprintf(&gid, "_GID=%lu", (unsigned long) ucred->gid) >= 0)
542 IOVEC_SET_STRING(iovec[n++], gid);
544 r = get_process_comm(ucred->pid, &t);
546 comm = strappend("_COMM=", t);
550 IOVEC_SET_STRING(iovec[n++], comm);
553 r = get_process_exe(ucred->pid, &t);
555 exe = strappend("_EXE=", t);
559 IOVEC_SET_STRING(iovec[n++], exe);
562 r = get_process_cmdline(ucred->pid, 0, false, &t);
564 cmdline = strappend("_CMDLINE=", t);
568 IOVEC_SET_STRING(iovec[n++], cmdline);
571 r = audit_session_from_pid(ucred->pid, &audit);
573 if (asprintf(&audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit) >= 0)
574 IOVEC_SET_STRING(iovec[n++], audit_session);
576 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
578 loginuid_valid = true;
579 if (asprintf(&audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0)
580 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
583 t = shortened_cgroup_path(ucred->pid);
585 cgroup = strappend("_SYSTEMD_CGROUP=", t);
589 IOVEC_SET_STRING(iovec[n++], cgroup);
593 if (sd_pid_get_session(ucred->pid, &t) >= 0) {
594 session = strappend("_SYSTEMD_SESSION=", t);
598 IOVEC_SET_STRING(iovec[n++], session);
601 if (sd_pid_get_owner_uid(ucred->uid, &owner) >= 0)
602 if (asprintf(&owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner) >= 0)
603 IOVEC_SET_STRING(iovec[n++], owner_uid);
606 if (cg_pid_get_unit(ucred->pid, &t) >= 0) {
607 unit = strappend("_SYSTEMD_UNIT=", t);
609 } else if (cg_pid_get_user_unit(ucred->pid, &t) >= 0) {
610 unit = strappend("_SYSTEMD_USER_UNIT=", t);
612 } else if (unit_id) {
614 unit = strappend("_SYSTEMD_USER_UNIT=", unit_id);
616 unit = strappend("_SYSTEMD_UNIT=", unit_id);
620 IOVEC_SET_STRING(iovec[n++], unit);
624 selinux_context = malloc(sizeof("_SELINUX_CONTEXT=") + label_len);
625 if (selinux_context) {
626 memcpy(selinux_context, "_SELINUX_CONTEXT=", sizeof("_SELINUX_CONTEXT=")-1);
627 memcpy(selinux_context+sizeof("_SELINUX_CONTEXT=")-1, label, label_len);
628 selinux_context[sizeof("_SELINUX_CONTEXT=")-1+label_len] = 0;
629 IOVEC_SET_STRING(iovec[n++], selinux_context);
632 security_context_t con;
634 if (getpidcon(ucred->pid, &con) >= 0) {
635 selinux_context = strappend("_SELINUX_CONTEXT=", con);
637 IOVEC_SET_STRING(iovec[n++], selinux_context);
646 if (asprintf(&source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu",
647 (unsigned long long) timeval_load(tv)) >= 0)
648 IOVEC_SET_STRING(iovec[n++], source_time);
651 /* Note that strictly speaking storing the boot id here is
652 * redundant since the entry includes this in-line
653 * anyway. However, we need this indexed, too. */
654 r = sd_id128_get_boot(&id);
656 if (asprintf(&boot_id, "_BOOT_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
657 IOVEC_SET_STRING(iovec[n++], boot_id);
659 r = sd_id128_get_machine(&id);
661 if (asprintf(&machine_id, "_MACHINE_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
662 IOVEC_SET_STRING(iovec[n++], machine_id);
664 t = gethostname_malloc();
666 hostname = strappend("_HOSTNAME=", t);
669 IOVEC_SET_STRING(iovec[n++], hostname);
674 if (s->split_mode == SPLIT_NONE)
676 else if (s->split_mode == SPLIT_UID || realuid == 0 || !loginuid_valid)
677 journal_uid = realuid;
679 journal_uid = loginuid;
681 write_to_journal(s, journal_uid, iovec, n);
684 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
685 char mid[11 + 32 + 1];
686 char buffer[16 + LINE_MAX + 1];
687 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
695 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
696 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
698 memcpy(buffer, "MESSAGE=", 8);
699 va_start(ap, format);
700 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
702 char_array_0(buffer);
703 IOVEC_SET_STRING(iovec[n++], buffer);
705 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
706 snprintf(mid, sizeof(mid), MESSAGE_ID(message_id));
708 IOVEC_SET_STRING(iovec[n++], mid);
712 ucred.pid = getpid();
713 ucred.uid = getuid();
714 ucred.gid = getgid();
716 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL);
719 void server_dispatch_message(
721 struct iovec *iovec, unsigned n, unsigned m,
724 const char *label, size_t label_len,
729 char _cleanup_free_ *path = NULL;
733 assert(iovec || n == 0);
738 if (LOG_PRI(priority) > s->max_level_store)
744 path = shortened_cgroup_path(ucred->pid);
748 /* example: /user/lennart/3/foobar
749 * /system/dbus.service/foobar
751 * So let's cut of everything past the third /, since that is
752 * where user directories start */
754 c = strchr(path, '/');
756 c = strchr(c+1, '/');
758 c = strchr(c+1, '/');
764 rl = journal_rate_limit_test(s->rate_limit, path,
765 priority & LOG_PRIMASK, available_space(s));
770 /* Write a suppression message if we suppressed something */
772 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
773 "Suppressed %u messages from %s", rl - 1, path);
776 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id);
780 static int system_journal_open(Server *s) {
786 r = sd_id128_get_machine(&machine);
790 sd_id128_to_string(machine, ids);
792 if (!s->system_journal &&
793 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
794 access("/run/systemd/journal/flushed", F_OK) >= 0) {
796 /* If in auto mode: first try to create the machine
797 * path, but not the prefix.
799 * If in persistent mode: create /var/log/journal and
800 * the machine path */
802 if (s->storage == STORAGE_PERSISTENT)
803 (void) mkdir("/var/log/journal/", 0755);
805 fn = strappend("/var/log/journal/", ids);
809 (void) mkdir(fn, 0755);
812 fn = strjoin("/var/log/journal/", ids, "/system.journal", NULL);
816 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
820 char fb[FORMAT_BYTES_MAX];
822 server_fix_perms(s, s->system_journal, 0);
823 server_driver_message(s, SD_ID128_NULL, "Allowing system journal files to grow to %s.",
824 format_bytes(fb, sizeof(fb), s->system_metrics.max_use));
828 if (r != -ENOENT && r != -EROFS)
829 log_warning("Failed to open system journal: %s", strerror(-r));
835 if (!s->runtime_journal &&
836 (s->storage != STORAGE_NONE)) {
838 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
842 if (s->system_journal) {
844 /* Try to open the runtime journal, but only
845 * if it already exists, so that we can flush
846 * it into the system journal */
848 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
853 log_warning("Failed to open runtime journal: %s", strerror(-r));
860 /* OK, we really need the runtime journal, so create
861 * it if necessary. */
863 (void) mkdir_parents(fn, 0755);
864 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
868 log_error("Failed to open runtime journal: %s", strerror(-r));
873 if (s->runtime_journal) {
874 char fb[FORMAT_BYTES_MAX];
876 server_fix_perms(s, s->runtime_journal, 0);
877 server_driver_message(s, SD_ID128_NULL, "Allowing runtime journal files to grow to %s.",
878 format_bytes(fb, sizeof(fb), s->runtime_metrics.max_use));
885 int server_flush_to_var(Server *s) {
888 sd_journal *j = NULL;
892 if (s->storage != STORAGE_AUTO &&
893 s->storage != STORAGE_PERSISTENT)
896 if (!s->runtime_journal)
899 system_journal_open(s);
901 if (!s->system_journal)
904 log_debug("Flushing to /var...");
906 r = sd_id128_get_machine(&machine);
908 log_error("Failed to get machine id: %s", strerror(-r));
912 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
914 log_error("Failed to read runtime journal: %s", strerror(-r));
918 sd_journal_set_data_threshold(j, 0);
920 SD_JOURNAL_FOREACH(j) {
925 assert(f && f->current_offset > 0);
927 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
929 log_error("Can't read entry: %s", strerror(-r));
933 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
937 if (!shall_try_append_again(s->system_journal, r)) {
938 log_error("Can't write entry: %s", strerror(-r));
945 log_debug("Retrying write.");
946 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
948 log_error("Can't write entry: %s", strerror(-r));
954 journal_file_post_change(s->system_journal);
956 journal_file_close(s->runtime_journal);
957 s->runtime_journal = NULL;
960 rm_rf("/run/log/journal", false, true, false);
968 int process_event(Server *s, struct epoll_event *ev) {
972 if (ev->data.fd == s->signal_fd) {
973 struct signalfd_siginfo sfsi;
976 if (ev->events != EPOLLIN) {
977 log_error("Got invalid event from epoll.");
981 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
982 if (n != sizeof(sfsi)) {
987 if (errno == EINTR || errno == EAGAIN)
993 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
995 if (sfsi.ssi_signo == SIGUSR1) {
996 touch("/run/systemd/journal/flushed");
997 server_flush_to_var(s);
1001 if (sfsi.ssi_signo == SIGUSR2) {
1009 } else if (ev->data.fd == s->dev_kmsg_fd) {
1012 if (ev->events != EPOLLIN) {
1013 log_error("Got invalid event from epoll.");
1017 r = server_read_dev_kmsg(s);
1023 } else if (ev->data.fd == s->native_fd ||
1024 ev->data.fd == s->syslog_fd) {
1026 if (ev->events != EPOLLIN) {
1027 log_error("Got invalid event from epoll.");
1032 struct msghdr msghdr;
1034 struct ucred *ucred = NULL;
1035 struct timeval *tv = NULL;
1036 struct cmsghdr *cmsg;
1038 size_t label_len = 0;
1040 struct cmsghdr cmsghdr;
1042 /* We use NAME_MAX space for the
1043 * SELinux label here. The kernel
1044 * currently enforces no limit, but
1045 * according to suggestions from the
1046 * SELinux people this will change and
1047 * it will probably be identical to
1048 * NAME_MAX. For now we use that, but
1049 * this should be updated one day when
1050 * the final limit is known.*/
1051 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1052 CMSG_SPACE(sizeof(struct timeval)) +
1053 CMSG_SPACE(sizeof(int)) + /* fd */
1054 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1061 if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
1062 log_error("SIOCINQ failed: %m");
1066 if (s->buffer_size < (size_t) v) {
1070 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
1071 b = realloc(s->buffer, l+1);
1074 log_error("Couldn't increase buffer.");
1083 iovec.iov_base = s->buffer;
1084 iovec.iov_len = s->buffer_size;
1088 msghdr.msg_iov = &iovec;
1089 msghdr.msg_iovlen = 1;
1090 msghdr.msg_control = &control;
1091 msghdr.msg_controllen = sizeof(control);
1093 n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1096 if (errno == EINTR || errno == EAGAIN)
1099 log_error("recvmsg() failed: %m");
1103 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1105 if (cmsg->cmsg_level == SOL_SOCKET &&
1106 cmsg->cmsg_type == SCM_CREDENTIALS &&
1107 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1108 ucred = (struct ucred*) CMSG_DATA(cmsg);
1109 else if (cmsg->cmsg_level == SOL_SOCKET &&
1110 cmsg->cmsg_type == SCM_SECURITY) {
1111 label = (char*) CMSG_DATA(cmsg);
1112 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1113 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1114 cmsg->cmsg_type == SO_TIMESTAMP &&
1115 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1116 tv = (struct timeval*) CMSG_DATA(cmsg);
1117 else if (cmsg->cmsg_level == SOL_SOCKET &&
1118 cmsg->cmsg_type == SCM_RIGHTS) {
1119 fds = (int*) CMSG_DATA(cmsg);
1120 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1124 if (ev->data.fd == s->syslog_fd) {
1127 if (n > 0 && n_fds == 0) {
1128 e = memchr(s->buffer, '\n', n);
1134 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1135 } else if (n_fds > 0)
1136 log_warning("Got file descriptors via syslog socket. Ignoring.");
1139 if (n > 0 && n_fds == 0)
1140 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1141 else if (n == 0 && n_fds == 1)
1142 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1144 log_warning("Got too many file descriptors via native socket. Ignoring.");
1147 close_many(fds, n_fds);
1152 } else if (ev->data.fd == s->stdout_fd) {
1154 if (ev->events != EPOLLIN) {
1155 log_error("Got invalid event from epoll.");
1159 stdout_stream_new(s);
1163 StdoutStream *stream;
1165 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
1166 log_error("Got invalid event from epoll.");
1170 /* If it is none of the well-known fds, it must be an
1171 * stdout stream fd. Note that this is a bit ugly here
1172 * (since we rely that none of the well-known fds
1173 * could be interpreted as pointer), but nonetheless
1174 * safe, since the well-known fds would never get an
1175 * fd > 4096, i.e. beyond the first memory page */
1177 stream = ev->data.ptr;
1179 if (stdout_stream_process(stream) <= 0)
1180 stdout_stream_free(stream);
1185 log_error("Unknown event.");
1189 static int open_signalfd(Server *s) {
1191 struct epoll_event ev;
1195 assert_se(sigemptyset(&mask) == 0);
1196 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1197 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1199 s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
1200 if (s->signal_fd < 0) {
1201 log_error("signalfd(): %m");
1206 ev.events = EPOLLIN;
1207 ev.data.fd = s->signal_fd;
1209 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
1210 log_error("epoll_ctl(): %m");
1217 static int server_parse_proc_cmdline(Server *s) {
1218 char _cleanup_free_ *line = NULL;
1223 if (detect_container(NULL) > 0)
1226 r = read_one_line_file("/proc/cmdline", &line);
1228 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1232 FOREACH_WORD_QUOTED(w, l, line, state) {
1233 char _cleanup_free_ *word;
1235 word = strndup(w, l);
1239 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1240 r = parse_boolean(word + 35);
1242 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1244 s->forward_to_syslog = r;
1245 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1246 r = parse_boolean(word + 33);
1248 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1250 s->forward_to_kmsg = r;
1251 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1252 r = parse_boolean(word + 36);
1254 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1256 s->forward_to_console = r;
1257 } else if (startswith(word, "systemd.journald"))
1258 log_warning("Invalid systemd.journald parameter. Ignoring.");
1264 static int server_parse_config_file(Server *s) {
1265 static const char *fn = "/etc/systemd/journald.conf";
1266 FILE _cleanup_fclose_ *f = NULL;
1271 f = fopen(fn, "re");
1273 if (errno == ENOENT)
1276 log_warning("Failed to open configuration file %s: %m", fn);
1280 r = config_parse(fn, f, "Journal\0", config_item_perf_lookup,
1281 (void*) journald_gperf_lookup, false, s);
1283 log_warning("Failed to parse configuration file: %s", strerror(-r));
1288 int server_init(Server *s) {
1294 s->syslog_fd = s->native_fd = s->stdout_fd = s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
1298 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1299 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1301 s->forward_to_syslog = true;
1303 s->max_level_store = LOG_DEBUG;
1304 s->max_level_syslog = LOG_DEBUG;
1305 s->max_level_kmsg = LOG_NOTICE;
1306 s->max_level_console = LOG_INFO;
1308 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1309 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1311 server_parse_config_file(s);
1312 server_parse_proc_cmdline(s);
1314 mkdir_p("/run/systemd/journal", 0755);
1316 s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
1317 if (!s->user_journals)
1320 s->mmap = mmap_cache_new();
1324 s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
1325 if (s->epoll_fd < 0) {
1326 log_error("Failed to create epoll object: %m");
1330 n = sd_listen_fds(true);
1332 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1336 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1338 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1340 if (s->native_fd >= 0) {
1341 log_error("Too many native sockets passed.");
1347 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1349 if (s->stdout_fd >= 0) {
1350 log_error("Too many stdout sockets passed.");
1356 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
1358 if (s->syslog_fd >= 0) {
1359 log_error("Too many /dev/log sockets passed.");
1366 log_error("Unknown socket passed.");
1371 r = server_open_syslog_socket(s);
1375 r = server_open_native_socket(s);
1379 r = server_open_stdout_socket(s);
1383 r = server_open_dev_kmsg(s);
1387 r = server_open_kernel_seqnum(s);
1391 r = open_signalfd(s);
1395 s->udev = udev_new();
1399 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
1403 r = system_journal_open(s);
1410 void server_maybe_append_tags(Server *s) {
1416 n = now(CLOCK_REALTIME);
1418 if (s->system_journal)
1419 journal_file_maybe_append_tag(s->system_journal, n);
1421 HASHMAP_FOREACH(f, s->user_journals, i)
1422 journal_file_maybe_append_tag(f, n);
1426 void server_done(Server *s) {
1430 while (s->stdout_streams)
1431 stdout_stream_free(s->stdout_streams);
1433 if (s->system_journal)
1434 journal_file_close(s->system_journal);
1436 if (s->runtime_journal)
1437 journal_file_close(s->runtime_journal);
1439 while ((f = hashmap_steal_first(s->user_journals)))
1440 journal_file_close(f);
1442 hashmap_free(s->user_journals);
1444 if (s->epoll_fd >= 0)
1445 close_nointr_nofail(s->epoll_fd);
1447 if (s->signal_fd >= 0)
1448 close_nointr_nofail(s->signal_fd);
1450 if (s->syslog_fd >= 0)
1451 close_nointr_nofail(s->syslog_fd);
1453 if (s->native_fd >= 0)
1454 close_nointr_nofail(s->native_fd);
1456 if (s->stdout_fd >= 0)
1457 close_nointr_nofail(s->stdout_fd);
1459 if (s->dev_kmsg_fd >= 0)
1460 close_nointr_nofail(s->dev_kmsg_fd);
1463 journal_rate_limit_free(s->rate_limit);
1465 if (s->kernel_seqnum)
1466 munmap(s->kernel_seqnum, sizeof(uint64_t));
1472 mmap_cache_unref(s->mmap);
1475 udev_unref(s->udev);