1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
29 #include <systemd/sd-journal.h>
30 #include <systemd/sd-messages.h>
31 #include <systemd/sd-daemon.h>
34 #include <systemd/sd-login.h>
39 #include "journal-file.h"
40 #include "socket-util.h"
41 #include "cgroup-util.h"
45 #include "conf-parser.h"
46 #include "journal-internal.h"
47 #include "journal-vacuum.h"
48 #include "journal-authenticate.h"
49 #include "journald-server.h"
50 #include "journald-rate-limit.h"
51 #include "journald-kmsg.h"
52 #include "journald-syslog.h"
53 #include "journald-stream.h"
54 #include "journald-console.h"
55 #include "journald-native.h"
59 #include <acl/libacl.h>
64 #include <selinux/selinux.h>
67 #define USER_JOURNALS_MAX 1024
69 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
70 #define DEFAULT_RATE_LIMIT_BURST 200
72 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
74 static const char* const storage_table[] = {
75 [STORAGE_AUTO] = "auto",
76 [STORAGE_VOLATILE] = "volatile",
77 [STORAGE_PERSISTENT] = "persistent",
78 [STORAGE_NONE] = "none"
81 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
82 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
84 static const char* const split_mode_table[] = {
85 [SPLIT_NONE] = "none",
87 [SPLIT_LOGIN] = "login"
90 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
91 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
93 static uint64_t available_space(Server *s) {
98 uint64_t sum = 0, avail = 0, ss_avail = 0;
104 ts = now(CLOCK_MONOTONIC);
106 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
107 return s->cached_available_space;
109 r = sd_id128_get_machine(&machine);
113 if (s->system_journal) {
114 f = "/var/log/journal/";
115 m = &s->system_metrics;
117 f = "/run/log/journal/";
118 m = &s->runtime_metrics;
123 p = strappend(f, sd_id128_to_string(machine, ids));
133 if (fstatvfs(dirfd(d), &ss) < 0)
139 union dirent_storage buf;
141 r = readdir_r(d, &buf.de, &de);
148 if (!endswith(de->d_name, ".journal") &&
149 !endswith(de->d_name, ".journal~"))
152 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
155 if (!S_ISREG(st.st_mode))
158 sum += (uint64_t) st.st_blocks * 512UL;
161 avail = sum >= m->max_use ? 0 : m->max_use - sum;
163 ss_avail = ss.f_bsize * ss.f_bavail;
165 ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
167 if (ss_avail < avail)
170 s->cached_available_space = avail;
171 s->cached_available_space_timestamp = ts;
179 static void server_read_file_gid(Server *s) {
180 const char *adm = "adm";
185 if (s->file_gid_valid)
188 r = get_group_creds(&adm, &s->file_gid);
190 log_warning("Failed to resolve 'adm' group: %s", strerror(-r));
192 /* if we couldn't read the gid, then it will be 0, but that's
193 * fine and we shouldn't try to resolve the group again, so
194 * let's just pretend it worked right-away. */
195 s->file_gid_valid = true;
198 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
203 acl_permset_t permset;
208 server_read_file_gid(s);
210 r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
212 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
218 acl = acl_get_fd(f->fd);
220 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
224 r = acl_find_uid(acl, uid, &entry);
227 if (acl_create_entry(&acl, &entry) < 0 ||
228 acl_set_tag_type(entry, ACL_USER) < 0 ||
229 acl_set_qualifier(entry, &uid) < 0) {
230 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
235 if (acl_get_permset(entry, &permset) < 0 ||
236 acl_add_perm(permset, ACL_READ) < 0 ||
237 acl_calc_mask(&acl) < 0) {
238 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
242 if (acl_set_fd(f->fd, acl) < 0)
243 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
250 static JournalFile* find_journal(Server *s, uid_t uid) {
258 /* We split up user logs only on /var, not on /run. If the
259 * runtime file is open, we write to it exclusively, in order
260 * to guarantee proper order as soon as we flush /run to
261 * /var and close the runtime file. */
263 if (s->runtime_journal)
264 return s->runtime_journal;
267 return s->system_journal;
269 r = sd_id128_get_machine(&machine);
271 return s->system_journal;
273 f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
277 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
278 SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
279 return s->system_journal;
281 while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
282 /* Too many open? Then let's close one */
283 f = hashmap_steal_first(s->user_journals);
285 journal_file_close(f);
288 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, s->system_journal, &f);
292 return s->system_journal;
294 server_fix_perms(s, f, uid);
296 r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
298 journal_file_close(f);
299 return s->system_journal;
305 void server_rotate(Server *s) {
311 log_debug("Rotating...");
313 if (s->runtime_journal) {
314 r = journal_file_rotate(&s->runtime_journal, s->compress, false);
316 if (s->runtime_journal)
317 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
319 log_error("Failed to create new runtime journal: %s", strerror(-r));
321 server_fix_perms(s, s->runtime_journal, 0);
324 if (s->system_journal) {
325 r = journal_file_rotate(&s->system_journal, s->compress, s->seal);
327 if (s->system_journal)
328 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
330 log_error("Failed to create new system journal: %s", strerror(-r));
333 server_fix_perms(s, s->system_journal, 0);
336 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
337 r = journal_file_rotate(&f, s->compress, s->seal);
340 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
342 log_error("Failed to create user journal: %s", strerror(-r));
344 hashmap_replace(s->user_journals, k, f);
345 server_fix_perms(s, f, PTR_TO_UINT32(k));
350 void server_vacuum(Server *s) {
356 log_debug("Vacuuming...");
358 s->oldest_file_usec = 0;
360 r = sd_id128_get_machine(&machine);
362 log_error("Failed to get machine ID: %s", strerror(-r));
366 sd_id128_to_string(machine, ids);
368 if (s->system_journal) {
369 p = strappend("/var/log/journal/", ids);
375 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
376 if (r < 0 && r != -ENOENT)
377 log_error("Failed to vacuum %s: %s", p, strerror(-r));
381 if (s->runtime_journal) {
382 p = strappend("/run/log/journal/", ids);
388 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
389 if (r < 0 && r != -ENOENT)
390 log_error("Failed to vacuum %s: %s", p, strerror(-r));
394 s->cached_available_space_timestamp = 0;
397 static char *shortened_cgroup_path(pid_t pid) {
399 char *process_path, *init_path, *path;
403 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, pid, &process_path);
407 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &init_path);
413 if (endswith(init_path, "/system"))
414 init_path[strlen(init_path) - 7] = 0;
415 else if (streq(init_path, "/"))
418 if (startswith(process_path, init_path)) {
421 p = strdup(process_path + strlen(init_path));
439 bool shall_try_append_again(JournalFile *f, int r) {
441 /* -E2BIG Hit configured limit
443 -EDQUOT Quota limit hit
445 -EHOSTDOWN Other machine
446 -EBUSY Unclean shutdown
447 -EPROTONOSUPPORT Unsupported feature
450 -ESHUTDOWN Already archived */
452 if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
453 log_debug("%s: Allocation limit reached, rotating.", f->path);
454 else if (r == -EHOSTDOWN)
455 log_info("%s: Journal file from other machine, rotating.", f->path);
456 else if (r == -EBUSY)
457 log_info("%s: Unclean shutdown, rotating.", f->path);
458 else if (r == -EPROTONOSUPPORT)
459 log_info("%s: Unsupported feature, rotating.", f->path);
460 else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
461 log_warning("%s: Journal file corrupted, rotating.", f->path);
468 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n) {
470 bool vacuumed = false;
477 f = find_journal(s, uid);
481 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
482 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
487 f = find_journal(s, uid);
492 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
496 if (vacuumed || !shall_try_append_again(f, r)) {
497 log_error("Failed to write entry, ignoring: %s", strerror(-r));
504 f = find_journal(s, uid);
508 log_debug("Retrying write.");
509 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
511 log_error("Failed to write entry, ignoring: %s", strerror(-r));
514 static void dispatch_message_real(
516 struct iovec *iovec, unsigned n, unsigned m,
519 const char *label, size_t label_len,
520 const char *unit_id) {
522 char *pid = NULL, *uid = NULL, *gid = NULL,
523 *source_time = NULL, *boot_id = NULL, *machine_id = NULL,
524 *comm = NULL, *cmdline = NULL, *hostname = NULL,
525 *audit_session = NULL, *audit_loginuid = NULL,
526 *exe = NULL, *cgroup = NULL, *session = NULL,
527 *owner_uid = NULL, *unit = NULL, *selinux_context = NULL;
533 uid_t loginuid = 0, realuid = 0;
538 assert(n + N_IOVEC_META_FIELDS <= m);
546 realuid = ucred->uid;
548 if (asprintf(&pid, "_PID=%lu", (unsigned long) ucred->pid) >= 0)
549 IOVEC_SET_STRING(iovec[n++], pid);
551 if (asprintf(&uid, "_UID=%lu", (unsigned long) ucred->uid) >= 0)
552 IOVEC_SET_STRING(iovec[n++], uid);
554 if (asprintf(&gid, "_GID=%lu", (unsigned long) ucred->gid) >= 0)
555 IOVEC_SET_STRING(iovec[n++], gid);
557 r = get_process_comm(ucred->pid, &t);
559 comm = strappend("_COMM=", t);
563 IOVEC_SET_STRING(iovec[n++], comm);
566 r = get_process_exe(ucred->pid, &t);
568 exe = strappend("_EXE=", t);
572 IOVEC_SET_STRING(iovec[n++], exe);
575 r = get_process_cmdline(ucred->pid, LINE_MAX, false, &t);
577 cmdline = strappend("_CMDLINE=", t);
581 IOVEC_SET_STRING(iovec[n++], cmdline);
584 r = audit_session_from_pid(ucred->pid, &audit);
586 if (asprintf(&audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit) >= 0)
587 IOVEC_SET_STRING(iovec[n++], audit_session);
589 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
591 if (asprintf(&audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0)
592 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
594 t = shortened_cgroup_path(ucred->pid);
596 cgroup = strappend("_SYSTEMD_CGROUP=", t);
600 IOVEC_SET_STRING(iovec[n++], cgroup);
604 if (sd_pid_get_session(ucred->pid, &t) >= 0) {
605 session = strappend("_SYSTEMD_SESSION=", t);
609 IOVEC_SET_STRING(iovec[n++], session);
612 if (sd_pid_get_owner_uid(ucred->uid, &owner) >= 0)
613 if (asprintf(&owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner) >= 0)
614 IOVEC_SET_STRING(iovec[n++], owner_uid);
617 if (cg_pid_get_unit(ucred->pid, &t) >= 0) {
618 unit = strappend("_SYSTEMD_UNIT=", t);
621 unit = strappend("_SYSTEMD_UNIT=", unit_id);
624 IOVEC_SET_STRING(iovec[n++], unit);
628 selinux_context = malloc(sizeof("_SELINUX_CONTEXT=") + label_len);
629 if (selinux_context) {
630 memcpy(selinux_context, "_SELINUX_CONTEXT=", sizeof("_SELINUX_CONTEXT=")-1);
631 memcpy(selinux_context+sizeof("_SELINUX_CONTEXT=")-1, label, label_len);
632 selinux_context[sizeof("_SELINUX_CONTEXT=")-1+label_len] = 0;
633 IOVEC_SET_STRING(iovec[n++], selinux_context);
636 security_context_t con;
638 if (getpidcon(ucred->pid, &con) >= 0) {
639 selinux_context = strappend("_SELINUX_CONTEXT=", con);
641 IOVEC_SET_STRING(iovec[n++], selinux_context);
650 if (asprintf(&source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu",
651 (unsigned long long) timeval_load(tv)) >= 0)
652 IOVEC_SET_STRING(iovec[n++], source_time);
655 /* Note that strictly speaking storing the boot id here is
656 * redundant since the entry includes this in-line
657 * anyway. However, we need this indexed, too. */
658 r = sd_id128_get_boot(&id);
660 if (asprintf(&boot_id, "_BOOT_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
661 IOVEC_SET_STRING(iovec[n++], boot_id);
663 r = sd_id128_get_machine(&id);
665 if (asprintf(&machine_id, "_MACHINE_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
666 IOVEC_SET_STRING(iovec[n++], machine_id);
668 t = gethostname_malloc();
670 hostname = strappend("_HOSTNAME=", t);
673 IOVEC_SET_STRING(iovec[n++], hostname);
679 s->split_mode == SPLIT_NONE ? 0 :
680 (s->split_mode == SPLIT_UID ? realuid :
681 (realuid == 0 ? 0 : loginuid)), iovec, n);
694 free(audit_loginuid);
699 free(selinux_context);
702 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
703 char mid[11 + 32 + 1];
704 char buffer[16 + LINE_MAX + 1];
705 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
713 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
714 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
716 memcpy(buffer, "MESSAGE=", 8);
717 va_start(ap, format);
718 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
720 char_array_0(buffer);
721 IOVEC_SET_STRING(iovec[n++], buffer);
723 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
724 snprintf(mid, sizeof(mid), MESSAGE_ID(message_id));
726 IOVEC_SET_STRING(iovec[n++], mid);
730 ucred.pid = getpid();
731 ucred.uid = getuid();
732 ucred.gid = getgid();
734 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL);
737 void server_dispatch_message(
739 struct iovec *iovec, unsigned n, unsigned m,
742 const char *label, size_t label_len,
747 char *path = NULL, *c;
750 assert(iovec || n == 0);
755 if (LOG_PRI(priority) > s->max_level_store)
761 path = shortened_cgroup_path(ucred->pid);
765 /* example: /user/lennart/3/foobar
766 * /system/dbus.service/foobar
768 * So let's cut of everything past the third /, since that is
769 * where user directories start */
771 c = strchr(path, '/');
773 c = strchr(c+1, '/');
775 c = strchr(c+1, '/');
781 rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available_space(s));
788 /* Write a suppression message if we suppressed something */
790 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED, "Suppressed %u messages from %s", rl - 1, path);
795 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id);
799 static int system_journal_open(Server *s) {
805 r = sd_id128_get_machine(&machine);
809 sd_id128_to_string(machine, ids);
811 if (!s->system_journal &&
812 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
813 access("/run/systemd/journal/flushed", F_OK) >= 0) {
815 /* If in auto mode: first try to create the machine
816 * path, but not the prefix.
818 * If in persistent mode: create /var/log/journal and
819 * the machine path */
821 if (s->storage == STORAGE_PERSISTENT)
822 (void) mkdir("/var/log/journal/", 0755);
824 fn = strappend("/var/log/journal/", ids);
828 (void) mkdir(fn, 0755);
831 fn = strjoin("/var/log/journal/", ids, "/system.journal", NULL);
835 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
839 char fb[FORMAT_BYTES_MAX];
841 server_fix_perms(s, s->system_journal, 0);
842 server_driver_message(s, SD_ID128_NULL, "Allowing system journal files to grow to %s.",
843 format_bytes(fb, sizeof(fb), s->system_metrics.max_use));
847 if (r != -ENOENT && r != -EROFS)
848 log_warning("Failed to open system journal: %s", strerror(-r));
854 if (!s->runtime_journal &&
855 (s->storage != STORAGE_NONE)) {
857 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
861 if (s->system_journal) {
863 /* Try to open the runtime journal, but only
864 * if it already exists, so that we can flush
865 * it into the system journal */
867 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
872 log_warning("Failed to open runtime journal: %s", strerror(-r));
879 /* OK, we really need the runtime journal, so create
880 * it if necessary. */
882 (void) mkdir_parents(fn, 0755);
883 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
887 log_error("Failed to open runtime journal: %s", strerror(-r));
892 if (s->runtime_journal) {
893 char fb[FORMAT_BYTES_MAX];
895 server_fix_perms(s, s->runtime_journal, 0);
896 server_driver_message(s, SD_ID128_NULL, "Allowing runtime journal files to grow to %s.",
897 format_bytes(fb, sizeof(fb), s->runtime_metrics.max_use));
904 int server_flush_to_var(Server *s) {
907 sd_journal *j = NULL;
911 if (s->storage != STORAGE_AUTO &&
912 s->storage != STORAGE_PERSISTENT)
915 if (!s->runtime_journal)
918 system_journal_open(s);
920 if (!s->system_journal)
923 log_debug("Flushing to /var...");
925 r = sd_id128_get_machine(&machine);
927 log_error("Failed to get machine id: %s", strerror(-r));
931 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
933 log_error("Failed to read runtime journal: %s", strerror(-r));
937 SD_JOURNAL_FOREACH(j) {
942 assert(f && f->current_offset > 0);
944 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
946 log_error("Can't read entry: %s", strerror(-r));
950 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
954 if (!shall_try_append_again(s->system_journal, r)) {
955 log_error("Can't write entry: %s", strerror(-r));
962 log_debug("Retrying write.");
963 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
965 log_error("Can't write entry: %s", strerror(-r));
971 journal_file_post_change(s->system_journal);
973 journal_file_close(s->runtime_journal);
974 s->runtime_journal = NULL;
977 rm_rf("/run/log/journal", false, true, false);
985 int process_event(Server *s, struct epoll_event *ev) {
989 if (ev->data.fd == s->signal_fd) {
990 struct signalfd_siginfo sfsi;
993 if (ev->events != EPOLLIN) {
994 log_error("Got invalid event from epoll.");
998 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
999 if (n != sizeof(sfsi)) {
1004 if (errno == EINTR || errno == EAGAIN)
1010 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
1012 if (sfsi.ssi_signo == SIGUSR1) {
1013 touch("/run/systemd/journal/flushed");
1014 server_flush_to_var(s);
1018 if (sfsi.ssi_signo == SIGUSR2) {
1026 } else if (ev->data.fd == s->dev_kmsg_fd) {
1029 if (ev->events != EPOLLIN) {
1030 log_error("Got invalid event from epoll.");
1034 r = server_read_dev_kmsg(s);
1040 } else if (ev->data.fd == s->native_fd ||
1041 ev->data.fd == s->syslog_fd) {
1043 if (ev->events != EPOLLIN) {
1044 log_error("Got invalid event from epoll.");
1049 struct msghdr msghdr;
1051 struct ucred *ucred = NULL;
1052 struct timeval *tv = NULL;
1053 struct cmsghdr *cmsg;
1055 size_t label_len = 0;
1057 struct cmsghdr cmsghdr;
1059 /* We use NAME_MAX space for the
1060 * SELinux label here. The kernel
1061 * currently enforces no limit, but
1062 * according to suggestions from the
1063 * SELinux people this will change and
1064 * it will probably be identical to
1065 * NAME_MAX. For now we use that, but
1066 * this should be updated one day when
1067 * the final limit is known.*/
1068 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1069 CMSG_SPACE(sizeof(struct timeval)) +
1070 CMSG_SPACE(sizeof(int)) + /* fd */
1071 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1078 if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
1079 log_error("SIOCINQ failed: %m");
1083 if (s->buffer_size < (size_t) v) {
1087 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
1088 b = realloc(s->buffer, l+1);
1091 log_error("Couldn't increase buffer.");
1100 iovec.iov_base = s->buffer;
1101 iovec.iov_len = s->buffer_size;
1105 msghdr.msg_iov = &iovec;
1106 msghdr.msg_iovlen = 1;
1107 msghdr.msg_control = &control;
1108 msghdr.msg_controllen = sizeof(control);
1110 n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1113 if (errno == EINTR || errno == EAGAIN)
1116 log_error("recvmsg() failed: %m");
1120 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1122 if (cmsg->cmsg_level == SOL_SOCKET &&
1123 cmsg->cmsg_type == SCM_CREDENTIALS &&
1124 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1125 ucred = (struct ucred*) CMSG_DATA(cmsg);
1126 else if (cmsg->cmsg_level == SOL_SOCKET &&
1127 cmsg->cmsg_type == SCM_SECURITY) {
1128 label = (char*) CMSG_DATA(cmsg);
1129 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1130 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1131 cmsg->cmsg_type == SO_TIMESTAMP &&
1132 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1133 tv = (struct timeval*) CMSG_DATA(cmsg);
1134 else if (cmsg->cmsg_level == SOL_SOCKET &&
1135 cmsg->cmsg_type == SCM_RIGHTS) {
1136 fds = (int*) CMSG_DATA(cmsg);
1137 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1141 if (ev->data.fd == s->syslog_fd) {
1144 if (n > 0 && n_fds == 0) {
1145 e = memchr(s->buffer, '\n', n);
1151 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1152 } else if (n_fds > 0)
1153 log_warning("Got file descriptors via syslog socket. Ignoring.");
1156 if (n > 0 && n_fds == 0)
1157 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1158 else if (n == 0 && n_fds == 1)
1159 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1161 log_warning("Got too many file descriptors via native socket. Ignoring.");
1164 close_many(fds, n_fds);
1169 } else if (ev->data.fd == s->stdout_fd) {
1171 if (ev->events != EPOLLIN) {
1172 log_error("Got invalid event from epoll.");
1176 stdout_stream_new(s);
1180 StdoutStream *stream;
1182 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
1183 log_error("Got invalid event from epoll.");
1187 /* If it is none of the well-known fds, it must be an
1188 * stdout stream fd. Note that this is a bit ugly here
1189 * (since we rely that none of the well-known fds
1190 * could be interpreted as pointer), but nonetheless
1191 * safe, since the well-known fds would never get an
1192 * fd > 4096, i.e. beyond the first memory page */
1194 stream = ev->data.ptr;
1196 if (stdout_stream_process(stream) <= 0)
1197 stdout_stream_free(stream);
1202 log_error("Unknown event.");
1206 static int open_signalfd(Server *s) {
1208 struct epoll_event ev;
1212 assert_se(sigemptyset(&mask) == 0);
1213 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1214 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1216 s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
1217 if (s->signal_fd < 0) {
1218 log_error("signalfd(): %m");
1223 ev.events = EPOLLIN;
1224 ev.data.fd = s->signal_fd;
1226 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
1227 log_error("epoll_ctl(): %m");
1234 static int server_parse_proc_cmdline(Server *s) {
1235 char *line, *w, *state;
1239 if (detect_container(NULL) > 0)
1242 r = read_one_line_file("/proc/cmdline", &line);
1244 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1248 FOREACH_WORD_QUOTED(w, l, line, state) {
1251 word = strndup(w, l);
1257 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1258 r = parse_boolean(word + 35);
1260 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1262 s->forward_to_syslog = r;
1263 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1264 r = parse_boolean(word + 33);
1266 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1268 s->forward_to_kmsg = r;
1269 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1270 r = parse_boolean(word + 36);
1272 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1274 s->forward_to_console = r;
1275 } else if (startswith(word, "systemd.journald"))
1276 log_warning("Invalid systemd.journald parameter. Ignoring.");
1288 static int server_parse_config_file(Server *s) {
1295 fn = "/etc/systemd/journald.conf";
1296 f = fopen(fn, "re");
1298 if (errno == ENOENT)
1301 log_warning("Failed to open configuration file %s: %m", fn);
1305 r = config_parse(fn, f, "Journal\0", config_item_perf_lookup, (void*) journald_gperf_lookup, false, s);
1307 log_warning("Failed to parse configuration file: %s", strerror(-r));
1314 int server_init(Server *s) {
1320 s->syslog_fd = s->native_fd = s->stdout_fd = s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
1324 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1325 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1327 s->forward_to_syslog = true;
1329 s->max_level_store = LOG_DEBUG;
1330 s->max_level_syslog = LOG_DEBUG;
1331 s->max_level_kmsg = LOG_NOTICE;
1332 s->max_level_console = LOG_INFO;
1334 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1335 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1337 server_parse_config_file(s);
1338 server_parse_proc_cmdline(s);
1340 mkdir_p("/run/systemd/journal", 0755);
1342 s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
1343 if (!s->user_journals)
1346 s->mmap = mmap_cache_new();
1350 s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
1351 if (s->epoll_fd < 0) {
1352 log_error("Failed to create epoll object: %m");
1356 n = sd_listen_fds(true);
1358 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1362 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1364 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1366 if (s->native_fd >= 0) {
1367 log_error("Too many native sockets passed.");
1373 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1375 if (s->stdout_fd >= 0) {
1376 log_error("Too many stdout sockets passed.");
1382 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
1384 if (s->syslog_fd >= 0) {
1385 log_error("Too many /dev/log sockets passed.");
1392 log_error("Unknown socket passed.");
1397 r = server_open_syslog_socket(s);
1401 r = server_open_native_socket(s);
1405 r = server_open_stdout_socket(s);
1409 r = server_open_dev_kmsg(s);
1413 r = server_open_kernel_seqnum(s);
1417 r = open_signalfd(s);
1421 s->udev = udev_new();
1425 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
1429 r = system_journal_open(s);
1436 void server_maybe_append_tags(Server *s) {
1442 n = now(CLOCK_REALTIME);
1444 if (s->system_journal)
1445 journal_file_maybe_append_tag(s->system_journal, n);
1447 HASHMAP_FOREACH(f, s->user_journals, i)
1448 journal_file_maybe_append_tag(f, n);
1452 void server_done(Server *s) {
1456 while (s->stdout_streams)
1457 stdout_stream_free(s->stdout_streams);
1459 if (s->system_journal)
1460 journal_file_close(s->system_journal);
1462 if (s->runtime_journal)
1463 journal_file_close(s->runtime_journal);
1465 while ((f = hashmap_steal_first(s->user_journals)))
1466 journal_file_close(f);
1468 hashmap_free(s->user_journals);
1470 if (s->epoll_fd >= 0)
1471 close_nointr_nofail(s->epoll_fd);
1473 if (s->signal_fd >= 0)
1474 close_nointr_nofail(s->signal_fd);
1476 if (s->syslog_fd >= 0)
1477 close_nointr_nofail(s->syslog_fd);
1479 if (s->native_fd >= 0)
1480 close_nointr_nofail(s->native_fd);
1482 if (s->stdout_fd >= 0)
1483 close_nointr_nofail(s->stdout_fd);
1485 if (s->dev_kmsg_fd >= 0)
1486 close_nointr_nofail(s->dev_kmsg_fd);
1489 journal_rate_limit_free(s->rate_limit);
1491 if (s->kernel_seqnum)
1492 munmap(s->kernel_seqnum, sizeof(uint64_t));
1498 mmap_cache_unref(s->mmap);
1501 udev_unref(s->udev);