1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
29 #include <systemd/sd-journal.h>
30 #include <systemd/sd-messages.h>
31 #include <systemd/sd-daemon.h>
34 #include <systemd/sd-login.h>
39 #include "journal-file.h"
40 #include "socket-util.h"
41 #include "cgroup-util.h"
45 #include "conf-parser.h"
46 #include "journal-internal.h"
47 #include "journal-vacuum.h"
48 #include "journal-authenticate.h"
49 #include "journald-server.h"
50 #include "journald-rate-limit.h"
51 #include "journald-kmsg.h"
52 #include "journald-syslog.h"
53 #include "journald-stream.h"
54 #include "journald-console.h"
55 #include "journald-native.h"
59 #include <acl/libacl.h>
64 #include <selinux/selinux.h>
67 #define USER_JOURNALS_MAX 1024
69 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
70 #define DEFAULT_RATE_LIMIT_BURST 200
72 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
74 static const char* const storage_table[] = {
75 [STORAGE_AUTO] = "auto",
76 [STORAGE_VOLATILE] = "volatile",
77 [STORAGE_PERSISTENT] = "persistent",
78 [STORAGE_NONE] = "none"
81 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
82 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
84 static const char* const split_mode_table[] = {
85 [SPLIT_NONE] = "none",
87 [SPLIT_LOGIN] = "login"
90 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
91 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
93 static uint64_t available_space(Server *s) {
95 char _cleanup_free_ *p = NULL;
99 uint64_t sum = 0, avail = 0, ss_avail = 0;
101 DIR _cleanup_closedir_ *d = NULL;
105 ts = now(CLOCK_MONOTONIC);
107 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
108 return s->cached_available_space;
110 r = sd_id128_get_machine(&machine);
114 if (s->system_journal) {
115 f = "/var/log/journal/";
116 m = &s->system_metrics;
118 f = "/run/log/journal/";
119 m = &s->runtime_metrics;
124 p = strappend(f, sd_id128_to_string(machine, ids));
132 if (fstatvfs(dirfd(d), &ss) < 0)
138 union dirent_storage buf;
140 r = readdir_r(d, &buf.de, &de);
147 if (!endswith(de->d_name, ".journal") &&
148 !endswith(de->d_name, ".journal~"))
151 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
154 if (!S_ISREG(st.st_mode))
157 sum += (uint64_t) st.st_blocks * 512UL;
160 avail = sum >= m->max_use ? 0 : m->max_use - sum;
162 ss_avail = ss.f_bsize * ss.f_bavail;
164 ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
166 if (ss_avail < avail)
169 s->cached_available_space = avail;
170 s->cached_available_space_timestamp = ts;
175 static void server_read_file_gid(Server *s) {
176 const char *adm = "adm";
181 if (s->file_gid_valid)
184 r = get_group_creds(&adm, &s->file_gid);
186 log_warning("Failed to resolve 'adm' group: %s", strerror(-r));
188 /* if we couldn't read the gid, then it will be 0, but that's
189 * fine and we shouldn't try to resolve the group again, so
190 * let's just pretend it worked right-away. */
191 s->file_gid_valid = true;
194 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
199 acl_permset_t permset;
204 server_read_file_gid(s);
206 r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
208 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
214 acl = acl_get_fd(f->fd);
216 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
220 r = acl_find_uid(acl, uid, &entry);
223 if (acl_create_entry(&acl, &entry) < 0 ||
224 acl_set_tag_type(entry, ACL_USER) < 0 ||
225 acl_set_qualifier(entry, &uid) < 0) {
226 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
231 if (acl_get_permset(entry, &permset) < 0 ||
232 acl_add_perm(permset, ACL_READ) < 0 ||
233 acl_calc_mask(&acl) < 0) {
234 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
238 if (acl_set_fd(f->fd, acl) < 0)
239 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
246 static JournalFile* find_journal(Server *s, uid_t uid) {
254 /* We split up user logs only on /var, not on /run. If the
255 * runtime file is open, we write to it exclusively, in order
256 * to guarantee proper order as soon as we flush /run to
257 * /var and close the runtime file. */
259 if (s->runtime_journal)
260 return s->runtime_journal;
263 return s->system_journal;
265 r = sd_id128_get_machine(&machine);
267 return s->system_journal;
269 f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
273 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
274 SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
275 return s->system_journal;
277 while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
278 /* Too many open? Then let's close one */
279 f = hashmap_steal_first(s->user_journals);
281 journal_file_close(f);
284 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, s->system_journal, &f);
288 return s->system_journal;
290 server_fix_perms(s, f, uid);
292 r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
294 journal_file_close(f);
295 return s->system_journal;
301 void server_rotate(Server *s) {
307 log_debug("Rotating...");
309 if (s->runtime_journal) {
310 r = journal_file_rotate(&s->runtime_journal, s->compress, false);
312 if (s->runtime_journal)
313 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
315 log_error("Failed to create new runtime journal: %s", strerror(-r));
317 server_fix_perms(s, s->runtime_journal, 0);
320 if (s->system_journal) {
321 r = journal_file_rotate(&s->system_journal, s->compress, s->seal);
323 if (s->system_journal)
324 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
326 log_error("Failed to create new system journal: %s", strerror(-r));
329 server_fix_perms(s, s->system_journal, 0);
332 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
333 r = journal_file_rotate(&f, s->compress, s->seal);
336 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
338 log_error("Failed to create user journal: %s", strerror(-r));
340 hashmap_replace(s->user_journals, k, f);
341 server_fix_perms(s, f, PTR_TO_UINT32(k));
346 void server_vacuum(Server *s) {
352 log_debug("Vacuuming...");
354 s->oldest_file_usec = 0;
356 r = sd_id128_get_machine(&machine);
358 log_error("Failed to get machine ID: %s", strerror(-r));
362 sd_id128_to_string(machine, ids);
364 if (s->system_journal) {
365 p = strappend("/var/log/journal/", ids);
371 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
372 if (r < 0 && r != -ENOENT)
373 log_error("Failed to vacuum %s: %s", p, strerror(-r));
377 if (s->runtime_journal) {
378 p = strappend("/run/log/journal/", ids);
384 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
385 if (r < 0 && r != -ENOENT)
386 log_error("Failed to vacuum %s: %s", p, strerror(-r));
390 s->cached_available_space_timestamp = 0;
393 static char *shortened_cgroup_path(pid_t pid) {
395 char _cleanup_free_ *process_path = NULL, *init_path = NULL;
400 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, pid, &process_path);
404 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &init_path);
408 if (endswith(init_path, "/system"))
409 init_path[strlen(init_path) - 7] = 0;
410 else if (streq(init_path, "/"))
413 if (startswith(process_path, init_path)) {
414 path = strdup(process_path + strlen(init_path));
423 bool shall_try_append_again(JournalFile *f, int r) {
425 /* -E2BIG Hit configured limit
427 -EDQUOT Quota limit hit
429 -EHOSTDOWN Other machine
430 -EBUSY Unclean shutdown
431 -EPROTONOSUPPORT Unsupported feature
434 -ESHUTDOWN Already archived */
436 if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
437 log_debug("%s: Allocation limit reached, rotating.", f->path);
438 else if (r == -EHOSTDOWN)
439 log_info("%s: Journal file from other machine, rotating.", f->path);
440 else if (r == -EBUSY)
441 log_info("%s: Unclean shutdown, rotating.", f->path);
442 else if (r == -EPROTONOSUPPORT)
443 log_info("%s: Unsupported feature, rotating.", f->path);
444 else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
445 log_warning("%s: Journal file corrupted, rotating.", f->path);
452 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n) {
454 bool vacuumed = false;
461 f = find_journal(s, uid);
465 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
466 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
471 f = find_journal(s, uid);
476 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
480 if (vacuumed || !shall_try_append_again(f, r)) {
481 log_error("Failed to write entry, ignoring: %s", strerror(-r));
488 f = find_journal(s, uid);
492 log_debug("Retrying write.");
493 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
495 log_error("Failed to write entry, ignoring: %s", strerror(-r));
498 static void dispatch_message_real(
500 struct iovec *iovec, unsigned n, unsigned m,
503 const char *label, size_t label_len,
504 const char *unit_id) {
506 char _cleanup_free_ *pid = NULL, *uid = NULL, *gid = NULL,
507 *source_time = NULL, *boot_id = NULL, *machine_id = NULL,
508 *comm = NULL, *cmdline = NULL, *hostname = NULL,
509 *audit_session = NULL, *audit_loginuid = NULL,
510 *exe = NULL, *cgroup = NULL, *session = NULL,
511 *owner_uid = NULL, *unit = NULL, *selinux_context = NULL;
517 uid_t loginuid = 0, realuid = 0;
519 bool loginuid_valid = false;
524 assert(n + N_IOVEC_META_FIELDS <= m);
532 realuid = ucred->uid;
534 if (asprintf(&pid, "_PID=%lu", (unsigned long) ucred->pid) >= 0)
535 IOVEC_SET_STRING(iovec[n++], pid);
537 if (asprintf(&uid, "_UID=%lu", (unsigned long) ucred->uid) >= 0)
538 IOVEC_SET_STRING(iovec[n++], uid);
540 if (asprintf(&gid, "_GID=%lu", (unsigned long) ucred->gid) >= 0)
541 IOVEC_SET_STRING(iovec[n++], gid);
543 r = get_process_comm(ucred->pid, &t);
545 comm = strappend("_COMM=", t);
549 IOVEC_SET_STRING(iovec[n++], comm);
552 r = get_process_exe(ucred->pid, &t);
554 exe = strappend("_EXE=", t);
558 IOVEC_SET_STRING(iovec[n++], exe);
561 r = get_process_cmdline(ucred->pid, 0, false, &t);
563 cmdline = strappend("_CMDLINE=", t);
567 IOVEC_SET_STRING(iovec[n++], cmdline);
570 r = audit_session_from_pid(ucred->pid, &audit);
572 if (asprintf(&audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit) >= 0)
573 IOVEC_SET_STRING(iovec[n++], audit_session);
575 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
577 loginuid_valid = true;
578 if (asprintf(&audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0)
579 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
582 t = shortened_cgroup_path(ucred->pid);
584 cgroup = strappend("_SYSTEMD_CGROUP=", t);
588 IOVEC_SET_STRING(iovec[n++], cgroup);
592 if (sd_pid_get_session(ucred->pid, &t) >= 0) {
593 session = strappend("_SYSTEMD_SESSION=", t);
597 IOVEC_SET_STRING(iovec[n++], session);
600 if (sd_pid_get_owner_uid(ucred->uid, &owner) >= 0)
601 if (asprintf(&owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner) >= 0)
602 IOVEC_SET_STRING(iovec[n++], owner_uid);
605 if (cg_pid_get_unit(ucred->pid, &t) >= 0) {
606 unit = strappend("_SYSTEMD_UNIT=", t);
608 } else if (cg_pid_get_user_unit(ucred->pid, &t) >= 0) {
609 unit = strappend("_SYSTEMD_USER_UNIT=", t);
611 } else if (unit_id) {
613 unit = strappend("_SYSTEMD_USER_UNIT=", unit_id);
615 unit = strappend("_SYSTEMD_UNIT=", unit_id);
619 IOVEC_SET_STRING(iovec[n++], unit);
623 selinux_context = malloc(sizeof("_SELINUX_CONTEXT=") + label_len);
624 if (selinux_context) {
625 memcpy(selinux_context, "_SELINUX_CONTEXT=", sizeof("_SELINUX_CONTEXT=")-1);
626 memcpy(selinux_context+sizeof("_SELINUX_CONTEXT=")-1, label, label_len);
627 selinux_context[sizeof("_SELINUX_CONTEXT=")-1+label_len] = 0;
628 IOVEC_SET_STRING(iovec[n++], selinux_context);
631 security_context_t con;
633 if (getpidcon(ucred->pid, &con) >= 0) {
634 selinux_context = strappend("_SELINUX_CONTEXT=", con);
636 IOVEC_SET_STRING(iovec[n++], selinux_context);
645 if (asprintf(&source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu",
646 (unsigned long long) timeval_load(tv)) >= 0)
647 IOVEC_SET_STRING(iovec[n++], source_time);
650 /* Note that strictly speaking storing the boot id here is
651 * redundant since the entry includes this in-line
652 * anyway. However, we need this indexed, too. */
653 r = sd_id128_get_boot(&id);
655 if (asprintf(&boot_id, "_BOOT_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
656 IOVEC_SET_STRING(iovec[n++], boot_id);
658 r = sd_id128_get_machine(&id);
660 if (asprintf(&machine_id, "_MACHINE_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
661 IOVEC_SET_STRING(iovec[n++], machine_id);
663 t = gethostname_malloc();
665 hostname = strappend("_HOSTNAME=", t);
668 IOVEC_SET_STRING(iovec[n++], hostname);
673 if (s->split_mode == SPLIT_NONE)
675 else if (s->split_mode == SPLIT_UID || realuid == 0 || !loginuid_valid)
676 journal_uid = realuid;
678 journal_uid = loginuid;
680 write_to_journal(s, journal_uid, iovec, n);
683 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
684 char mid[11 + 32 + 1];
685 char buffer[16 + LINE_MAX + 1];
686 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
694 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
695 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
697 memcpy(buffer, "MESSAGE=", 8);
698 va_start(ap, format);
699 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
701 char_array_0(buffer);
702 IOVEC_SET_STRING(iovec[n++], buffer);
704 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
705 snprintf(mid, sizeof(mid), MESSAGE_ID(message_id));
707 IOVEC_SET_STRING(iovec[n++], mid);
711 ucred.pid = getpid();
712 ucred.uid = getuid();
713 ucred.gid = getgid();
715 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL);
718 void server_dispatch_message(
720 struct iovec *iovec, unsigned n, unsigned m,
723 const char *label, size_t label_len,
728 char _cleanup_free_ *path = NULL;
732 assert(iovec || n == 0);
737 if (LOG_PRI(priority) > s->max_level_store)
743 path = shortened_cgroup_path(ucred->pid);
747 /* example: /user/lennart/3/foobar
748 * /system/dbus.service/foobar
750 * So let's cut of everything past the third /, since that is
751 * where user directories start */
753 c = strchr(path, '/');
755 c = strchr(c+1, '/');
757 c = strchr(c+1, '/');
763 rl = journal_rate_limit_test(s->rate_limit, path,
764 priority & LOG_PRIMASK, available_space(s));
769 /* Write a suppression message if we suppressed something */
771 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
772 "Suppressed %u messages from %s", rl - 1, path);
775 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id);
779 static int system_journal_open(Server *s) {
785 r = sd_id128_get_machine(&machine);
789 sd_id128_to_string(machine, ids);
791 if (!s->system_journal &&
792 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
793 access("/run/systemd/journal/flushed", F_OK) >= 0) {
795 /* If in auto mode: first try to create the machine
796 * path, but not the prefix.
798 * If in persistent mode: create /var/log/journal and
799 * the machine path */
801 if (s->storage == STORAGE_PERSISTENT)
802 (void) mkdir("/var/log/journal/", 0755);
804 fn = strappend("/var/log/journal/", ids);
808 (void) mkdir(fn, 0755);
811 fn = strjoin("/var/log/journal/", ids, "/system.journal", NULL);
815 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
819 char fb[FORMAT_BYTES_MAX];
821 server_fix_perms(s, s->system_journal, 0);
822 server_driver_message(s, SD_ID128_NULL, "Allowing system journal files to grow to %s.",
823 format_bytes(fb, sizeof(fb), s->system_metrics.max_use));
827 if (r != -ENOENT && r != -EROFS)
828 log_warning("Failed to open system journal: %s", strerror(-r));
834 if (!s->runtime_journal &&
835 (s->storage != STORAGE_NONE)) {
837 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
841 if (s->system_journal) {
843 /* Try to open the runtime journal, but only
844 * if it already exists, so that we can flush
845 * it into the system journal */
847 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
852 log_warning("Failed to open runtime journal: %s", strerror(-r));
859 /* OK, we really need the runtime journal, so create
860 * it if necessary. */
862 (void) mkdir_parents(fn, 0755);
863 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
867 log_error("Failed to open runtime journal: %s", strerror(-r));
872 if (s->runtime_journal) {
873 char fb[FORMAT_BYTES_MAX];
875 server_fix_perms(s, s->runtime_journal, 0);
876 server_driver_message(s, SD_ID128_NULL, "Allowing runtime journal files to grow to %s.",
877 format_bytes(fb, sizeof(fb), s->runtime_metrics.max_use));
884 int server_flush_to_var(Server *s) {
887 sd_journal *j = NULL;
891 if (s->storage != STORAGE_AUTO &&
892 s->storage != STORAGE_PERSISTENT)
895 if (!s->runtime_journal)
898 system_journal_open(s);
900 if (!s->system_journal)
903 log_debug("Flushing to /var...");
905 r = sd_id128_get_machine(&machine);
907 log_error("Failed to get machine id: %s", strerror(-r));
911 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
913 log_error("Failed to read runtime journal: %s", strerror(-r));
917 sd_journal_set_data_threshold(j, 0);
919 SD_JOURNAL_FOREACH(j) {
924 assert(f && f->current_offset > 0);
926 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
928 log_error("Can't read entry: %s", strerror(-r));
932 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
936 if (!shall_try_append_again(s->system_journal, r)) {
937 log_error("Can't write entry: %s", strerror(-r));
944 log_debug("Retrying write.");
945 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
947 log_error("Can't write entry: %s", strerror(-r));
953 journal_file_post_change(s->system_journal);
955 journal_file_close(s->runtime_journal);
956 s->runtime_journal = NULL;
959 rm_rf("/run/log/journal", false, true, false);
967 int process_event(Server *s, struct epoll_event *ev) {
971 if (ev->data.fd == s->signal_fd) {
972 struct signalfd_siginfo sfsi;
975 if (ev->events != EPOLLIN) {
976 log_error("Got invalid event from epoll.");
980 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
981 if (n != sizeof(sfsi)) {
986 if (errno == EINTR || errno == EAGAIN)
992 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
994 if (sfsi.ssi_signo == SIGUSR1) {
995 touch("/run/systemd/journal/flushed");
996 server_flush_to_var(s);
1000 if (sfsi.ssi_signo == SIGUSR2) {
1008 } else if (ev->data.fd == s->dev_kmsg_fd) {
1011 if (ev->events != EPOLLIN) {
1012 log_error("Got invalid event from epoll.");
1016 r = server_read_dev_kmsg(s);
1022 } else if (ev->data.fd == s->native_fd ||
1023 ev->data.fd == s->syslog_fd) {
1025 if (ev->events != EPOLLIN) {
1026 log_error("Got invalid event from epoll.");
1031 struct msghdr msghdr;
1033 struct ucred *ucred = NULL;
1034 struct timeval *tv = NULL;
1035 struct cmsghdr *cmsg;
1037 size_t label_len = 0;
1039 struct cmsghdr cmsghdr;
1041 /* We use NAME_MAX space for the
1042 * SELinux label here. The kernel
1043 * currently enforces no limit, but
1044 * according to suggestions from the
1045 * SELinux people this will change and
1046 * it will probably be identical to
1047 * NAME_MAX. For now we use that, but
1048 * this should be updated one day when
1049 * the final limit is known.*/
1050 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1051 CMSG_SPACE(sizeof(struct timeval)) +
1052 CMSG_SPACE(sizeof(int)) + /* fd */
1053 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1060 if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
1061 log_error("SIOCINQ failed: %m");
1065 if (s->buffer_size < (size_t) v) {
1069 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
1070 b = realloc(s->buffer, l+1);
1073 log_error("Couldn't increase buffer.");
1082 iovec.iov_base = s->buffer;
1083 iovec.iov_len = s->buffer_size;
1087 msghdr.msg_iov = &iovec;
1088 msghdr.msg_iovlen = 1;
1089 msghdr.msg_control = &control;
1090 msghdr.msg_controllen = sizeof(control);
1092 n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1095 if (errno == EINTR || errno == EAGAIN)
1098 log_error("recvmsg() failed: %m");
1102 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1104 if (cmsg->cmsg_level == SOL_SOCKET &&
1105 cmsg->cmsg_type == SCM_CREDENTIALS &&
1106 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1107 ucred = (struct ucred*) CMSG_DATA(cmsg);
1108 else if (cmsg->cmsg_level == SOL_SOCKET &&
1109 cmsg->cmsg_type == SCM_SECURITY) {
1110 label = (char*) CMSG_DATA(cmsg);
1111 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1112 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1113 cmsg->cmsg_type == SO_TIMESTAMP &&
1114 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1115 tv = (struct timeval*) CMSG_DATA(cmsg);
1116 else if (cmsg->cmsg_level == SOL_SOCKET &&
1117 cmsg->cmsg_type == SCM_RIGHTS) {
1118 fds = (int*) CMSG_DATA(cmsg);
1119 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1123 if (ev->data.fd == s->syslog_fd) {
1126 if (n > 0 && n_fds == 0) {
1127 e = memchr(s->buffer, '\n', n);
1133 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1134 } else if (n_fds > 0)
1135 log_warning("Got file descriptors via syslog socket. Ignoring.");
1138 if (n > 0 && n_fds == 0)
1139 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1140 else if (n == 0 && n_fds == 1)
1141 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1143 log_warning("Got too many file descriptors via native socket. Ignoring.");
1146 close_many(fds, n_fds);
1151 } else if (ev->data.fd == s->stdout_fd) {
1153 if (ev->events != EPOLLIN) {
1154 log_error("Got invalid event from epoll.");
1158 stdout_stream_new(s);
1162 StdoutStream *stream;
1164 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
1165 log_error("Got invalid event from epoll.");
1169 /* If it is none of the well-known fds, it must be an
1170 * stdout stream fd. Note that this is a bit ugly here
1171 * (since we rely that none of the well-known fds
1172 * could be interpreted as pointer), but nonetheless
1173 * safe, since the well-known fds would never get an
1174 * fd > 4096, i.e. beyond the first memory page */
1176 stream = ev->data.ptr;
1178 if (stdout_stream_process(stream) <= 0)
1179 stdout_stream_free(stream);
1184 log_error("Unknown event.");
1188 static int open_signalfd(Server *s) {
1190 struct epoll_event ev;
1194 assert_se(sigemptyset(&mask) == 0);
1195 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1196 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1198 s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
1199 if (s->signal_fd < 0) {
1200 log_error("signalfd(): %m");
1205 ev.events = EPOLLIN;
1206 ev.data.fd = s->signal_fd;
1208 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
1209 log_error("epoll_ctl(): %m");
1216 static int server_parse_proc_cmdline(Server *s) {
1217 char _cleanup_free_ *line = NULL;
1222 if (detect_container(NULL) > 0)
1225 r = read_one_line_file("/proc/cmdline", &line);
1227 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1231 FOREACH_WORD_QUOTED(w, l, line, state) {
1232 char _cleanup_free_ *word;
1234 word = strndup(w, l);
1238 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1239 r = parse_boolean(word + 35);
1241 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1243 s->forward_to_syslog = r;
1244 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1245 r = parse_boolean(word + 33);
1247 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1249 s->forward_to_kmsg = r;
1250 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1251 r = parse_boolean(word + 36);
1253 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1255 s->forward_to_console = r;
1256 } else if (startswith(word, "systemd.journald"))
1257 log_warning("Invalid systemd.journald parameter. Ignoring.");
1263 static int server_parse_config_file(Server *s) {
1264 static const char *fn = "/etc/systemd/journald.conf";
1265 FILE _cleanup_fclose_ *f = NULL;
1270 f = fopen(fn, "re");
1272 if (errno == ENOENT)
1275 log_warning("Failed to open configuration file %s: %m", fn);
1279 r = config_parse(fn, f, "Journal\0", config_item_perf_lookup,
1280 (void*) journald_gperf_lookup, false, s);
1282 log_warning("Failed to parse configuration file: %s", strerror(-r));
1287 int server_init(Server *s) {
1293 s->syslog_fd = s->native_fd = s->stdout_fd = s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
1297 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1298 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1300 s->forward_to_syslog = true;
1302 s->max_level_store = LOG_DEBUG;
1303 s->max_level_syslog = LOG_DEBUG;
1304 s->max_level_kmsg = LOG_NOTICE;
1305 s->max_level_console = LOG_INFO;
1307 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1308 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1310 server_parse_config_file(s);
1311 server_parse_proc_cmdline(s);
1313 mkdir_p("/run/systemd/journal", 0755);
1315 s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
1316 if (!s->user_journals)
1319 s->mmap = mmap_cache_new();
1323 s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
1324 if (s->epoll_fd < 0) {
1325 log_error("Failed to create epoll object: %m");
1329 n = sd_listen_fds(true);
1331 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1335 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1337 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1339 if (s->native_fd >= 0) {
1340 log_error("Too many native sockets passed.");
1346 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1348 if (s->stdout_fd >= 0) {
1349 log_error("Too many stdout sockets passed.");
1355 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
1357 if (s->syslog_fd >= 0) {
1358 log_error("Too many /dev/log sockets passed.");
1365 log_error("Unknown socket passed.");
1370 r = server_open_syslog_socket(s);
1374 r = server_open_native_socket(s);
1378 r = server_open_stdout_socket(s);
1382 r = server_open_dev_kmsg(s);
1386 r = server_open_kernel_seqnum(s);
1390 r = open_signalfd(s);
1394 s->udev = udev_new();
1398 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
1402 r = system_journal_open(s);
1409 void server_maybe_append_tags(Server *s) {
1415 n = now(CLOCK_REALTIME);
1417 if (s->system_journal)
1418 journal_file_maybe_append_tag(s->system_journal, n);
1420 HASHMAP_FOREACH(f, s->user_journals, i)
1421 journal_file_maybe_append_tag(f, n);
1425 void server_done(Server *s) {
1429 while (s->stdout_streams)
1430 stdout_stream_free(s->stdout_streams);
1432 if (s->system_journal)
1433 journal_file_close(s->system_journal);
1435 if (s->runtime_journal)
1436 journal_file_close(s->runtime_journal);
1438 while ((f = hashmap_steal_first(s->user_journals)))
1439 journal_file_close(f);
1441 hashmap_free(s->user_journals);
1443 if (s->epoll_fd >= 0)
1444 close_nointr_nofail(s->epoll_fd);
1446 if (s->signal_fd >= 0)
1447 close_nointr_nofail(s->signal_fd);
1449 if (s->syslog_fd >= 0)
1450 close_nointr_nofail(s->syslog_fd);
1452 if (s->native_fd >= 0)
1453 close_nointr_nofail(s->native_fd);
1455 if (s->stdout_fd >= 0)
1456 close_nointr_nofail(s->stdout_fd);
1458 if (s->dev_kmsg_fd >= 0)
1459 close_nointr_nofail(s->dev_kmsg_fd);
1462 journal_rate_limit_free(s->rate_limit);
1464 if (s->kernel_seqnum)
1465 munmap(s->kernel_seqnum, sizeof(uint64_t));
1471 mmap_cache_unref(s->mmap);
1474 udev_unref(s->udev);