1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
29 #include <systemd/sd-journal.h>
30 #include <systemd/sd-messages.h>
31 #include <systemd/sd-daemon.h>
34 #include <systemd/sd-login.h>
39 #include "journal-file.h"
40 #include "socket-util.h"
41 #include "cgroup-util.h"
45 #include "conf-parser.h"
46 #include "journal-internal.h"
47 #include "journal-vacuum.h"
48 #include "journal-authenticate.h"
49 #include "journald-server.h"
50 #include "journald-rate-limit.h"
51 #include "journald-kmsg.h"
52 #include "journald-syslog.h"
53 #include "journald-stream.h"
54 #include "journald-console.h"
55 #include "journald-native.h"
59 #include <acl/libacl.h>
64 #include <selinux/selinux.h>
67 #define USER_JOURNALS_MAX 1024
69 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
70 #define DEFAULT_RATE_LIMIT_BURST 200
72 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
74 static const char* const storage_table[] = {
75 [STORAGE_AUTO] = "auto",
76 [STORAGE_VOLATILE] = "volatile",
77 [STORAGE_PERSISTENT] = "persistent",
78 [STORAGE_NONE] = "none"
81 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
82 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
84 static const char* const split_mode_table[] = {
85 [SPLIT_NONE] = "none",
87 [SPLIT_LOGIN] = "login"
90 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
91 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
93 static uint64_t available_space(Server *s) {
95 char _cleanup_free_ *p = NULL;
99 uint64_t sum = 0, avail = 0, ss_avail = 0;
101 DIR _cleanup_closedir_ *d = NULL;
105 ts = now(CLOCK_MONOTONIC);
107 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
108 return s->cached_available_space;
110 r = sd_id128_get_machine(&machine);
114 if (s->system_journal) {
115 f = "/var/log/journal/";
116 m = &s->system_metrics;
118 f = "/run/log/journal/";
119 m = &s->runtime_metrics;
124 p = strappend(f, sd_id128_to_string(machine, ids));
132 if (fstatvfs(dirfd(d), &ss) < 0)
138 union dirent_storage buf;
140 r = readdir_r(d, &buf.de, &de);
147 if (!endswith(de->d_name, ".journal") &&
148 !endswith(de->d_name, ".journal~"))
151 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
154 if (!S_ISREG(st.st_mode))
157 sum += (uint64_t) st.st_blocks * 512UL;
160 avail = sum >= m->max_use ? 0 : m->max_use - sum;
162 ss_avail = ss.f_bsize * ss.f_bavail;
164 ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
166 if (ss_avail < avail)
169 s->cached_available_space = avail;
170 s->cached_available_space_timestamp = ts;
175 static void server_read_file_gid(Server *s) {
176 const char *adm = "adm";
181 if (s->file_gid_valid)
184 r = get_group_creds(&adm, &s->file_gid);
186 log_warning("Failed to resolve 'adm' group: %s", strerror(-r));
188 /* if we couldn't read the gid, then it will be 0, but that's
189 * fine and we shouldn't try to resolve the group again, so
190 * let's just pretend it worked right-away. */
191 s->file_gid_valid = true;
194 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
199 acl_permset_t permset;
204 server_read_file_gid(s);
206 r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
208 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
214 acl = acl_get_fd(f->fd);
216 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
220 r = acl_find_uid(acl, uid, &entry);
223 if (acl_create_entry(&acl, &entry) < 0 ||
224 acl_set_tag_type(entry, ACL_USER) < 0 ||
225 acl_set_qualifier(entry, &uid) < 0) {
226 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
231 if (acl_get_permset(entry, &permset) < 0 ||
232 acl_add_perm(permset, ACL_READ) < 0 ||
233 acl_calc_mask(&acl) < 0) {
234 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
238 if (acl_set_fd(f->fd, acl) < 0)
239 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
246 static JournalFile* find_journal(Server *s, uid_t uid) {
254 /* We split up user logs only on /var, not on /run. If the
255 * runtime file is open, we write to it exclusively, in order
256 * to guarantee proper order as soon as we flush /run to
257 * /var and close the runtime file. */
259 if (s->runtime_journal)
260 return s->runtime_journal;
263 return s->system_journal;
265 r = sd_id128_get_machine(&machine);
267 return s->system_journal;
269 f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
273 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
274 SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
275 return s->system_journal;
277 while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
278 /* Too many open? Then let's close one */
279 f = hashmap_steal_first(s->user_journals);
281 journal_file_close(f);
284 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, s->system_journal, &f);
288 return s->system_journal;
290 server_fix_perms(s, f, uid);
292 r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
294 journal_file_close(f);
295 return s->system_journal;
301 void server_rotate(Server *s) {
307 log_debug("Rotating...");
309 if (s->runtime_journal) {
310 r = journal_file_rotate(&s->runtime_journal, s->compress, false);
312 if (s->runtime_journal)
313 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
315 log_error("Failed to create new runtime journal: %s", strerror(-r));
317 server_fix_perms(s, s->runtime_journal, 0);
320 if (s->system_journal) {
321 r = journal_file_rotate(&s->system_journal, s->compress, s->seal);
323 if (s->system_journal)
324 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
326 log_error("Failed to create new system journal: %s", strerror(-r));
329 server_fix_perms(s, s->system_journal, 0);
332 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
333 r = journal_file_rotate(&f, s->compress, s->seal);
336 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
338 log_error("Failed to create user journal: %s", strerror(-r));
340 hashmap_replace(s->user_journals, k, f);
341 server_fix_perms(s, f, PTR_TO_UINT32(k));
346 void server_vacuum(Server *s) {
352 log_debug("Vacuuming...");
354 s->oldest_file_usec = 0;
356 r = sd_id128_get_machine(&machine);
358 log_error("Failed to get machine ID: %s", strerror(-r));
362 sd_id128_to_string(machine, ids);
364 if (s->system_journal) {
365 p = strappend("/var/log/journal/", ids);
371 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
372 if (r < 0 && r != -ENOENT)
373 log_error("Failed to vacuum %s: %s", p, strerror(-r));
377 if (s->runtime_journal) {
378 p = strappend("/run/log/journal/", ids);
384 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
385 if (r < 0 && r != -ENOENT)
386 log_error("Failed to vacuum %s: %s", p, strerror(-r));
390 s->cached_available_space_timestamp = 0;
393 static char *shortened_cgroup_path(pid_t pid) {
395 char _cleanup_free_ *process_path = NULL, *init_path = NULL;
400 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, pid, &process_path);
404 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &init_path);
408 if (endswith(init_path, "/system"))
409 init_path[strlen(init_path) - 7] = 0;
410 else if (streq(init_path, "/"))
413 if (startswith(process_path, init_path)) {
414 path = strdup(process_path + strlen(init_path));
423 bool shall_try_append_again(JournalFile *f, int r) {
425 /* -E2BIG Hit configured limit
427 -EDQUOT Quota limit hit
429 -EHOSTDOWN Other machine
430 -EBUSY Unclean shutdown
431 -EPROTONOSUPPORT Unsupported feature
434 -ESHUTDOWN Already archived */
436 if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
437 log_debug("%s: Allocation limit reached, rotating.", f->path);
438 else if (r == -EHOSTDOWN)
439 log_info("%s: Journal file from other machine, rotating.", f->path);
440 else if (r == -EBUSY)
441 log_info("%s: Unclean shutdown, rotating.", f->path);
442 else if (r == -EPROTONOSUPPORT)
443 log_info("%s: Unsupported feature, rotating.", f->path);
444 else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
445 log_warning("%s: Journal file corrupted, rotating.", f->path);
452 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n) {
454 bool vacuumed = false;
461 f = find_journal(s, uid);
465 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
466 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
471 f = find_journal(s, uid);
476 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
480 if (vacuumed || !shall_try_append_again(f, r)) {
481 log_error("Failed to write entry, ignoring: %s", strerror(-r));
488 f = find_journal(s, uid);
492 log_debug("Retrying write.");
493 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
495 log_error("Failed to write entry, ignoring: %s", strerror(-r));
498 static void dispatch_message_real(
500 struct iovec *iovec, unsigned n, unsigned m,
503 const char *label, size_t label_len,
504 const char *unit_id) {
506 char _cleanup_free_ *pid = NULL, *uid = NULL, *gid = NULL,
507 *source_time = NULL, *boot_id = NULL, *machine_id = NULL,
508 *comm = NULL, *cmdline = NULL, *hostname = NULL,
509 *audit_session = NULL, *audit_loginuid = NULL,
510 *exe = NULL, *cgroup = NULL, *session = NULL,
511 *owner_uid = NULL, *unit = NULL, *selinux_context = NULL;
517 uid_t loginuid = 0, realuid = 0;
522 assert(n + N_IOVEC_META_FIELDS <= m);
530 realuid = ucred->uid;
532 if (asprintf(&pid, "_PID=%lu", (unsigned long) ucred->pid) >= 0)
533 IOVEC_SET_STRING(iovec[n++], pid);
535 if (asprintf(&uid, "_UID=%lu", (unsigned long) ucred->uid) >= 0)
536 IOVEC_SET_STRING(iovec[n++], uid);
538 if (asprintf(&gid, "_GID=%lu", (unsigned long) ucred->gid) >= 0)
539 IOVEC_SET_STRING(iovec[n++], gid);
541 r = get_process_comm(ucred->pid, &t);
543 comm = strappend("_COMM=", t);
547 IOVEC_SET_STRING(iovec[n++], comm);
550 r = get_process_exe(ucred->pid, &t);
552 exe = strappend("_EXE=", t);
556 IOVEC_SET_STRING(iovec[n++], exe);
559 r = get_process_cmdline(ucred->pid, 0, false, &t);
561 cmdline = strappend("_CMDLINE=", t);
565 IOVEC_SET_STRING(iovec[n++], cmdline);
568 r = audit_session_from_pid(ucred->pid, &audit);
570 if (asprintf(&audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit) >= 0)
571 IOVEC_SET_STRING(iovec[n++], audit_session);
573 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
575 if (asprintf(&audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0)
576 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
578 t = shortened_cgroup_path(ucred->pid);
580 cgroup = strappend("_SYSTEMD_CGROUP=", t);
584 IOVEC_SET_STRING(iovec[n++], cgroup);
588 if (sd_pid_get_session(ucred->pid, &t) >= 0) {
589 session = strappend("_SYSTEMD_SESSION=", t);
593 IOVEC_SET_STRING(iovec[n++], session);
596 if (sd_pid_get_owner_uid(ucred->uid, &owner) >= 0)
597 if (asprintf(&owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner) >= 0)
598 IOVEC_SET_STRING(iovec[n++], owner_uid);
601 if (cg_pid_get_unit(ucred->pid, &t) >= 0) {
602 unit = strappend("_SYSTEMD_UNIT=", t);
604 } else if (cg_pid_get_user_unit(ucred->pid, &t) >= 0) {
605 unit = strappend("_SYSTEMD_USER_UNIT=", t);
607 } else if (unit_id) {
609 unit = strappend("_SYSTEMD_USER_UNIT=", unit_id);
611 unit = strappend("_SYSTEMD_UNIT=", unit_id);
615 IOVEC_SET_STRING(iovec[n++], unit);
619 selinux_context = malloc(sizeof("_SELINUX_CONTEXT=") + label_len);
620 if (selinux_context) {
621 memcpy(selinux_context, "_SELINUX_CONTEXT=", sizeof("_SELINUX_CONTEXT=")-1);
622 memcpy(selinux_context+sizeof("_SELINUX_CONTEXT=")-1, label, label_len);
623 selinux_context[sizeof("_SELINUX_CONTEXT=")-1+label_len] = 0;
624 IOVEC_SET_STRING(iovec[n++], selinux_context);
627 security_context_t con;
629 if (getpidcon(ucred->pid, &con) >= 0) {
630 selinux_context = strappend("_SELINUX_CONTEXT=", con);
632 IOVEC_SET_STRING(iovec[n++], selinux_context);
641 if (asprintf(&source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu",
642 (unsigned long long) timeval_load(tv)) >= 0)
643 IOVEC_SET_STRING(iovec[n++], source_time);
646 /* Note that strictly speaking storing the boot id here is
647 * redundant since the entry includes this in-line
648 * anyway. However, we need this indexed, too. */
649 r = sd_id128_get_boot(&id);
651 if (asprintf(&boot_id, "_BOOT_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
652 IOVEC_SET_STRING(iovec[n++], boot_id);
654 r = sd_id128_get_machine(&id);
656 if (asprintf(&machine_id, "_MACHINE_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
657 IOVEC_SET_STRING(iovec[n++], machine_id);
659 t = gethostname_malloc();
661 hostname = strappend("_HOSTNAME=", t);
664 IOVEC_SET_STRING(iovec[n++], hostname);
670 s->split_mode == SPLIT_NONE ? 0 :
671 (s->split_mode == SPLIT_UID ? realuid :
672 (realuid == 0 ? 0 : loginuid)), iovec, n);
675 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
676 char mid[11 + 32 + 1];
677 char buffer[16 + LINE_MAX + 1];
678 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
686 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
687 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
689 memcpy(buffer, "MESSAGE=", 8);
690 va_start(ap, format);
691 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
693 char_array_0(buffer);
694 IOVEC_SET_STRING(iovec[n++], buffer);
696 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
697 snprintf(mid, sizeof(mid), MESSAGE_ID(message_id));
699 IOVEC_SET_STRING(iovec[n++], mid);
703 ucred.pid = getpid();
704 ucred.uid = getuid();
705 ucred.gid = getgid();
707 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL);
710 void server_dispatch_message(
712 struct iovec *iovec, unsigned n, unsigned m,
715 const char *label, size_t label_len,
720 char _cleanup_free_ *path = NULL;
724 assert(iovec || n == 0);
729 if (LOG_PRI(priority) > s->max_level_store)
735 path = shortened_cgroup_path(ucred->pid);
739 /* example: /user/lennart/3/foobar
740 * /system/dbus.service/foobar
742 * So let's cut of everything past the third /, since that is
743 * where user directories start */
745 c = strchr(path, '/');
747 c = strchr(c+1, '/');
749 c = strchr(c+1, '/');
755 rl = journal_rate_limit_test(s->rate_limit, path,
756 priority & LOG_PRIMASK, available_space(s));
761 /* Write a suppression message if we suppressed something */
763 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
764 "Suppressed %u messages from %s", rl - 1, path);
767 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id);
771 static int system_journal_open(Server *s) {
777 r = sd_id128_get_machine(&machine);
781 sd_id128_to_string(machine, ids);
783 if (!s->system_journal &&
784 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
785 access("/run/systemd/journal/flushed", F_OK) >= 0) {
787 /* If in auto mode: first try to create the machine
788 * path, but not the prefix.
790 * If in persistent mode: create /var/log/journal and
791 * the machine path */
793 if (s->storage == STORAGE_PERSISTENT)
794 (void) mkdir("/var/log/journal/", 0755);
796 fn = strappend("/var/log/journal/", ids);
800 (void) mkdir(fn, 0755);
803 fn = strjoin("/var/log/journal/", ids, "/system.journal", NULL);
807 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
811 char fb[FORMAT_BYTES_MAX];
813 server_fix_perms(s, s->system_journal, 0);
814 server_driver_message(s, SD_ID128_NULL, "Allowing system journal files to grow to %s.",
815 format_bytes(fb, sizeof(fb), s->system_metrics.max_use));
819 if (r != -ENOENT && r != -EROFS)
820 log_warning("Failed to open system journal: %s", strerror(-r));
826 if (!s->runtime_journal &&
827 (s->storage != STORAGE_NONE)) {
829 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
833 if (s->system_journal) {
835 /* Try to open the runtime journal, but only
836 * if it already exists, so that we can flush
837 * it into the system journal */
839 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
844 log_warning("Failed to open runtime journal: %s", strerror(-r));
851 /* OK, we really need the runtime journal, so create
852 * it if necessary. */
854 (void) mkdir_parents(fn, 0755);
855 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
859 log_error("Failed to open runtime journal: %s", strerror(-r));
864 if (s->runtime_journal) {
865 char fb[FORMAT_BYTES_MAX];
867 server_fix_perms(s, s->runtime_journal, 0);
868 server_driver_message(s, SD_ID128_NULL, "Allowing runtime journal files to grow to %s.",
869 format_bytes(fb, sizeof(fb), s->runtime_metrics.max_use));
876 int server_flush_to_var(Server *s) {
879 sd_journal *j = NULL;
883 if (s->storage != STORAGE_AUTO &&
884 s->storage != STORAGE_PERSISTENT)
887 if (!s->runtime_journal)
890 system_journal_open(s);
892 if (!s->system_journal)
895 log_debug("Flushing to /var...");
897 r = sd_id128_get_machine(&machine);
899 log_error("Failed to get machine id: %s", strerror(-r));
903 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
905 log_error("Failed to read runtime journal: %s", strerror(-r));
909 sd_journal_set_data_threshold(j, 0);
911 SD_JOURNAL_FOREACH(j) {
916 assert(f && f->current_offset > 0);
918 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
920 log_error("Can't read entry: %s", strerror(-r));
924 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
928 if (!shall_try_append_again(s->system_journal, r)) {
929 log_error("Can't write entry: %s", strerror(-r));
936 log_debug("Retrying write.");
937 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
939 log_error("Can't write entry: %s", strerror(-r));
945 journal_file_post_change(s->system_journal);
947 journal_file_close(s->runtime_journal);
948 s->runtime_journal = NULL;
951 rm_rf("/run/log/journal", false, true, false);
959 int process_event(Server *s, struct epoll_event *ev) {
963 if (ev->data.fd == s->signal_fd) {
964 struct signalfd_siginfo sfsi;
967 if (ev->events != EPOLLIN) {
968 log_error("Got invalid event from epoll.");
972 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
973 if (n != sizeof(sfsi)) {
978 if (errno == EINTR || errno == EAGAIN)
984 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
986 if (sfsi.ssi_signo == SIGUSR1) {
987 touch("/run/systemd/journal/flushed");
988 server_flush_to_var(s);
992 if (sfsi.ssi_signo == SIGUSR2) {
1000 } else if (ev->data.fd == s->dev_kmsg_fd) {
1003 if (ev->events != EPOLLIN) {
1004 log_error("Got invalid event from epoll.");
1008 r = server_read_dev_kmsg(s);
1014 } else if (ev->data.fd == s->native_fd ||
1015 ev->data.fd == s->syslog_fd) {
1017 if (ev->events != EPOLLIN) {
1018 log_error("Got invalid event from epoll.");
1023 struct msghdr msghdr;
1025 struct ucred *ucred = NULL;
1026 struct timeval *tv = NULL;
1027 struct cmsghdr *cmsg;
1029 size_t label_len = 0;
1031 struct cmsghdr cmsghdr;
1033 /* We use NAME_MAX space for the
1034 * SELinux label here. The kernel
1035 * currently enforces no limit, but
1036 * according to suggestions from the
1037 * SELinux people this will change and
1038 * it will probably be identical to
1039 * NAME_MAX. For now we use that, but
1040 * this should be updated one day when
1041 * the final limit is known.*/
1042 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1043 CMSG_SPACE(sizeof(struct timeval)) +
1044 CMSG_SPACE(sizeof(int)) + /* fd */
1045 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1052 if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
1053 log_error("SIOCINQ failed: %m");
1057 if (s->buffer_size < (size_t) v) {
1061 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
1062 b = realloc(s->buffer, l+1);
1065 log_error("Couldn't increase buffer.");
1074 iovec.iov_base = s->buffer;
1075 iovec.iov_len = s->buffer_size;
1079 msghdr.msg_iov = &iovec;
1080 msghdr.msg_iovlen = 1;
1081 msghdr.msg_control = &control;
1082 msghdr.msg_controllen = sizeof(control);
1084 n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1087 if (errno == EINTR || errno == EAGAIN)
1090 log_error("recvmsg() failed: %m");
1094 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1096 if (cmsg->cmsg_level == SOL_SOCKET &&
1097 cmsg->cmsg_type == SCM_CREDENTIALS &&
1098 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1099 ucred = (struct ucred*) CMSG_DATA(cmsg);
1100 else if (cmsg->cmsg_level == SOL_SOCKET &&
1101 cmsg->cmsg_type == SCM_SECURITY) {
1102 label = (char*) CMSG_DATA(cmsg);
1103 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1104 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1105 cmsg->cmsg_type == SO_TIMESTAMP &&
1106 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1107 tv = (struct timeval*) CMSG_DATA(cmsg);
1108 else if (cmsg->cmsg_level == SOL_SOCKET &&
1109 cmsg->cmsg_type == SCM_RIGHTS) {
1110 fds = (int*) CMSG_DATA(cmsg);
1111 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1115 if (ev->data.fd == s->syslog_fd) {
1118 if (n > 0 && n_fds == 0) {
1119 e = memchr(s->buffer, '\n', n);
1125 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1126 } else if (n_fds > 0)
1127 log_warning("Got file descriptors via syslog socket. Ignoring.");
1130 if (n > 0 && n_fds == 0)
1131 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1132 else if (n == 0 && n_fds == 1)
1133 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1135 log_warning("Got too many file descriptors via native socket. Ignoring.");
1138 close_many(fds, n_fds);
1143 } else if (ev->data.fd == s->stdout_fd) {
1145 if (ev->events != EPOLLIN) {
1146 log_error("Got invalid event from epoll.");
1150 stdout_stream_new(s);
1154 StdoutStream *stream;
1156 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
1157 log_error("Got invalid event from epoll.");
1161 /* If it is none of the well-known fds, it must be an
1162 * stdout stream fd. Note that this is a bit ugly here
1163 * (since we rely that none of the well-known fds
1164 * could be interpreted as pointer), but nonetheless
1165 * safe, since the well-known fds would never get an
1166 * fd > 4096, i.e. beyond the first memory page */
1168 stream = ev->data.ptr;
1170 if (stdout_stream_process(stream) <= 0)
1171 stdout_stream_free(stream);
1176 log_error("Unknown event.");
1180 static int open_signalfd(Server *s) {
1182 struct epoll_event ev;
1186 assert_se(sigemptyset(&mask) == 0);
1187 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1188 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1190 s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
1191 if (s->signal_fd < 0) {
1192 log_error("signalfd(): %m");
1197 ev.events = EPOLLIN;
1198 ev.data.fd = s->signal_fd;
1200 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
1201 log_error("epoll_ctl(): %m");
1208 static int server_parse_proc_cmdline(Server *s) {
1209 char _cleanup_free_ *line = NULL;
1214 if (detect_container(NULL) > 0)
1217 r = read_one_line_file("/proc/cmdline", &line);
1219 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1223 FOREACH_WORD_QUOTED(w, l, line, state) {
1224 char _cleanup_free_ *word;
1226 word = strndup(w, l);
1230 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1231 r = parse_boolean(word + 35);
1233 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1235 s->forward_to_syslog = r;
1236 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1237 r = parse_boolean(word + 33);
1239 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1241 s->forward_to_kmsg = r;
1242 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1243 r = parse_boolean(word + 36);
1245 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1247 s->forward_to_console = r;
1248 } else if (startswith(word, "systemd.journald"))
1249 log_warning("Invalid systemd.journald parameter. Ignoring.");
1255 static int server_parse_config_file(Server *s) {
1256 static const char *fn = "/etc/systemd/journald.conf";
1257 FILE _cleanup_fclose_ *f = NULL;
1262 f = fopen(fn, "re");
1264 if (errno == ENOENT)
1267 log_warning("Failed to open configuration file %s: %m", fn);
1271 r = config_parse(fn, f, "Journal\0", config_item_perf_lookup,
1272 (void*) journald_gperf_lookup, false, s);
1274 log_warning("Failed to parse configuration file: %s", strerror(-r));
1279 int server_init(Server *s) {
1285 s->syslog_fd = s->native_fd = s->stdout_fd = s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
1289 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1290 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1292 s->forward_to_syslog = true;
1294 s->max_level_store = LOG_DEBUG;
1295 s->max_level_syslog = LOG_DEBUG;
1296 s->max_level_kmsg = LOG_NOTICE;
1297 s->max_level_console = LOG_INFO;
1299 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1300 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1302 server_parse_config_file(s);
1303 server_parse_proc_cmdline(s);
1305 mkdir_p("/run/systemd/journal", 0755);
1307 s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
1308 if (!s->user_journals)
1311 s->mmap = mmap_cache_new();
1315 s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
1316 if (s->epoll_fd < 0) {
1317 log_error("Failed to create epoll object: %m");
1321 n = sd_listen_fds(true);
1323 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1327 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1329 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1331 if (s->native_fd >= 0) {
1332 log_error("Too many native sockets passed.");
1338 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1340 if (s->stdout_fd >= 0) {
1341 log_error("Too many stdout sockets passed.");
1347 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
1349 if (s->syslog_fd >= 0) {
1350 log_error("Too many /dev/log sockets passed.");
1357 log_error("Unknown socket passed.");
1362 r = server_open_syslog_socket(s);
1366 r = server_open_native_socket(s);
1370 r = server_open_stdout_socket(s);
1374 r = server_open_dev_kmsg(s);
1378 r = server_open_kernel_seqnum(s);
1382 r = open_signalfd(s);
1386 s->udev = udev_new();
1390 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
1394 r = system_journal_open(s);
1401 void server_maybe_append_tags(Server *s) {
1407 n = now(CLOCK_REALTIME);
1409 if (s->system_journal)
1410 journal_file_maybe_append_tag(s->system_journal, n);
1412 HASHMAP_FOREACH(f, s->user_journals, i)
1413 journal_file_maybe_append_tag(f, n);
1417 void server_done(Server *s) {
1421 while (s->stdout_streams)
1422 stdout_stream_free(s->stdout_streams);
1424 if (s->system_journal)
1425 journal_file_close(s->system_journal);
1427 if (s->runtime_journal)
1428 journal_file_close(s->runtime_journal);
1430 while ((f = hashmap_steal_first(s->user_journals)))
1431 journal_file_close(f);
1433 hashmap_free(s->user_journals);
1435 if (s->epoll_fd >= 0)
1436 close_nointr_nofail(s->epoll_fd);
1438 if (s->signal_fd >= 0)
1439 close_nointr_nofail(s->signal_fd);
1441 if (s->syslog_fd >= 0)
1442 close_nointr_nofail(s->syslog_fd);
1444 if (s->native_fd >= 0)
1445 close_nointr_nofail(s->native_fd);
1447 if (s->stdout_fd >= 0)
1448 close_nointr_nofail(s->stdout_fd);
1450 if (s->dev_kmsg_fd >= 0)
1451 close_nointr_nofail(s->dev_kmsg_fd);
1454 journal_rate_limit_free(s->rate_limit);
1456 if (s->kernel_seqnum)
1457 munmap(s->kernel_seqnum, sizeof(uint64_t));
1463 mmap_cache_unref(s->mmap);
1466 udev_unref(s->udev);