1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
29 #include <systemd/sd-journal.h>
30 #include <systemd/sd-messages.h>
31 #include <systemd/sd-daemon.h>
34 #include <systemd/sd-login.h>
39 #include "journal-file.h"
40 #include "socket-util.h"
41 #include "cgroup-util.h"
45 #include "conf-parser.h"
46 #include "journal-internal.h"
47 #include "journal-vacuum.h"
48 #include "journal-authenticate.h"
49 #include "journald-server.h"
50 #include "journald-rate-limit.h"
51 #include "journald-kmsg.h"
52 #include "journald-syslog.h"
53 #include "journald-stream.h"
54 #include "journald-console.h"
55 #include "journald-native.h"
59 #include <acl/libacl.h>
64 #include <selinux/selinux.h>
67 #define USER_JOURNALS_MAX 1024
69 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
70 #define DEFAULT_RATE_LIMIT_BURST 200
72 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
74 static const char* const storage_table[] = {
75 [STORAGE_AUTO] = "auto",
76 [STORAGE_VOLATILE] = "volatile",
77 [STORAGE_PERSISTENT] = "persistent",
78 [STORAGE_NONE] = "none"
81 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
82 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
84 static const char* const split_mode_table[] = {
85 [SPLIT_NONE] = "none",
87 [SPLIT_LOGIN] = "login"
90 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
91 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
93 static uint64_t available_space(Server *s) {
98 uint64_t sum = 0, avail = 0, ss_avail = 0;
104 ts = now(CLOCK_MONOTONIC);
106 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
107 return s->cached_available_space;
109 r = sd_id128_get_machine(&machine);
113 if (s->system_journal) {
114 f = "/var/log/journal/";
115 m = &s->system_metrics;
117 f = "/run/log/journal/";
118 m = &s->runtime_metrics;
123 p = strappend(f, sd_id128_to_string(machine, ids));
133 if (fstatvfs(dirfd(d), &ss) < 0)
139 union dirent_storage buf;
141 r = readdir_r(d, &buf.de, &de);
148 if (!endswith(de->d_name, ".journal") &&
149 !endswith(de->d_name, ".journal~"))
152 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
155 if (!S_ISREG(st.st_mode))
158 sum += (uint64_t) st.st_blocks * 512UL;
161 avail = sum >= m->max_use ? 0 : m->max_use - sum;
163 ss_avail = ss.f_bsize * ss.f_bavail;
165 ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
167 if (ss_avail < avail)
170 s->cached_available_space = avail;
171 s->cached_available_space_timestamp = ts;
179 static void server_read_file_gid(Server *s) {
180 const char *adm = "adm";
185 if (s->file_gid_valid)
188 r = get_group_creds(&adm, &s->file_gid);
190 log_warning("Failed to resolve 'adm' group: %s", strerror(-r));
192 /* if we couldn't read the gid, then it will be 0, but that's
193 * fine and we shouldn't try to resolve the group again, so
194 * let's just pretend it worked right-away. */
195 s->file_gid_valid = true;
198 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
203 acl_permset_t permset;
208 server_read_file_gid(s);
210 r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
212 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
218 acl = acl_get_fd(f->fd);
220 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
224 r = acl_find_uid(acl, uid, &entry);
227 if (acl_create_entry(&acl, &entry) < 0 ||
228 acl_set_tag_type(entry, ACL_USER) < 0 ||
229 acl_set_qualifier(entry, &uid) < 0) {
230 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
235 if (acl_get_permset(entry, &permset) < 0 ||
236 acl_add_perm(permset, ACL_READ) < 0 ||
237 acl_calc_mask(&acl) < 0) {
238 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
242 if (acl_set_fd(f->fd, acl) < 0)
243 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
250 static JournalFile* find_journal(Server *s, uid_t uid) {
258 /* We split up user logs only on /var, not on /run. If the
259 * runtime file is open, we write to it exclusively, in order
260 * to guarantee proper order as soon as we flush /run to
261 * /var and close the runtime file. */
263 if (s->runtime_journal)
264 return s->runtime_journal;
267 return s->system_journal;
269 r = sd_id128_get_machine(&machine);
271 return s->system_journal;
273 f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
277 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
278 SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
279 return s->system_journal;
281 while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
282 /* Too many open? Then let's close one */
283 f = hashmap_steal_first(s->user_journals);
285 journal_file_close(f);
288 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, s->system_journal, &f);
292 return s->system_journal;
294 server_fix_perms(s, f, uid);
296 r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
298 journal_file_close(f);
299 return s->system_journal;
305 void server_rotate(Server *s) {
311 log_debug("Rotating...");
313 if (s->runtime_journal) {
314 r = journal_file_rotate(&s->runtime_journal, s->compress, false);
316 if (s->runtime_journal)
317 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
319 log_error("Failed to create new runtime journal: %s", strerror(-r));
321 server_fix_perms(s, s->runtime_journal, 0);
324 if (s->system_journal) {
325 r = journal_file_rotate(&s->system_journal, s->compress, s->seal);
327 if (s->system_journal)
328 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
330 log_error("Failed to create new system journal: %s", strerror(-r));
333 server_fix_perms(s, s->system_journal, 0);
336 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
337 r = journal_file_rotate(&f, s->compress, s->seal);
340 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
342 log_error("Failed to create user journal: %s", strerror(-r));
344 hashmap_replace(s->user_journals, k, f);
345 server_fix_perms(s, f, PTR_TO_UINT32(k));
350 void server_vacuum(Server *s) {
356 log_debug("Vacuuming...");
358 s->oldest_file_usec = 0;
360 r = sd_id128_get_machine(&machine);
362 log_error("Failed to get machine ID: %s", strerror(-r));
366 sd_id128_to_string(machine, ids);
368 if (s->system_journal) {
369 p = strappend("/var/log/journal/", ids);
375 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
376 if (r < 0 && r != -ENOENT)
377 log_error("Failed to vacuum %s: %s", p, strerror(-r));
381 if (s->runtime_journal) {
382 p = strappend("/run/log/journal/", ids);
388 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
389 if (r < 0 && r != -ENOENT)
390 log_error("Failed to vacuum %s: %s", p, strerror(-r));
394 s->cached_available_space_timestamp = 0;
397 static char *shortened_cgroup_path(pid_t pid) {
399 char *process_path, *init_path, *path;
403 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, pid, &process_path);
407 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &init_path);
413 if (endswith(init_path, "/system"))
414 init_path[strlen(init_path) - 7] = 0;
415 else if (streq(init_path, "/"))
418 if (startswith(process_path, init_path)) {
421 p = strdup(process_path + strlen(init_path));
439 bool shall_try_append_again(JournalFile *f, int r) {
441 /* -E2BIG Hit configured limit
443 -EDQUOT Quota limit hit
445 -EHOSTDOWN Other machine
446 -EBUSY Unclean shutdown
447 -EPROTONOSUPPORT Unsupported feature
450 -ESHUTDOWN Already archived */
452 if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
453 log_debug("%s: Allocation limit reached, rotating.", f->path);
454 else if (r == -EHOSTDOWN)
455 log_info("%s: Journal file from other machine, rotating.", f->path);
456 else if (r == -EBUSY)
457 log_info("%s: Unclean shutdown, rotating.", f->path);
458 else if (r == -EPROTONOSUPPORT)
459 log_info("%s: Unsupported feature, rotating.", f->path);
460 else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
461 log_warning("%s: Journal file corrupted, rotating.", f->path);
468 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n) {
470 bool vacuumed = false;
477 f = find_journal(s, uid);
481 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
482 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
487 f = find_journal(s, uid);
492 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
496 if (vacuumed || !shall_try_append_again(f, r)) {
497 log_error("Failed to write entry, ignoring: %s", strerror(-r));
504 f = find_journal(s, uid);
508 log_debug("Retrying write.");
509 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
511 log_error("Failed to write entry, ignoring: %s", strerror(-r));
514 static void dispatch_message_real(
516 struct iovec *iovec, unsigned n, unsigned m,
519 const char *label, size_t label_len,
520 const char *unit_id) {
522 char *pid = NULL, *uid = NULL, *gid = NULL,
523 *source_time = NULL, *boot_id = NULL, *machine_id = NULL,
524 *comm = NULL, *cmdline = NULL, *hostname = NULL,
525 *audit_session = NULL, *audit_loginuid = NULL,
526 *exe = NULL, *cgroup = NULL, *session = NULL,
527 *owner_uid = NULL, *unit = NULL, *selinux_context = NULL;
533 uid_t loginuid = 0, realuid = 0;
538 assert(n + N_IOVEC_META_FIELDS <= m);
546 realuid = ucred->uid;
548 if (asprintf(&pid, "_PID=%lu", (unsigned long) ucred->pid) >= 0)
549 IOVEC_SET_STRING(iovec[n++], pid);
551 if (asprintf(&uid, "_UID=%lu", (unsigned long) ucred->uid) >= 0)
552 IOVEC_SET_STRING(iovec[n++], uid);
554 if (asprintf(&gid, "_GID=%lu", (unsigned long) ucred->gid) >= 0)
555 IOVEC_SET_STRING(iovec[n++], gid);
557 r = get_process_comm(ucred->pid, &t);
559 comm = strappend("_COMM=", t);
563 IOVEC_SET_STRING(iovec[n++], comm);
566 r = get_process_exe(ucred->pid, &t);
568 exe = strappend("_EXE=", t);
572 IOVEC_SET_STRING(iovec[n++], exe);
575 r = get_process_cmdline(ucred->pid, LINE_MAX, false, &t);
577 cmdline = strappend("_CMDLINE=", t);
581 IOVEC_SET_STRING(iovec[n++], cmdline);
584 r = audit_session_from_pid(ucred->pid, &audit);
586 if (asprintf(&audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit) >= 0)
587 IOVEC_SET_STRING(iovec[n++], audit_session);
589 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
591 if (asprintf(&audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0)
592 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
594 t = shortened_cgroup_path(ucred->pid);
596 cgroup = strappend("_SYSTEMD_CGROUP=", t);
600 IOVEC_SET_STRING(iovec[n++], cgroup);
604 if (sd_pid_get_session(ucred->pid, &t) >= 0) {
605 session = strappend("_SYSTEMD_SESSION=", t);
609 IOVEC_SET_STRING(iovec[n++], session);
612 if (sd_pid_get_owner_uid(ucred->uid, &owner) >= 0)
613 if (asprintf(&owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner) >= 0)
614 IOVEC_SET_STRING(iovec[n++], owner_uid);
617 if (cg_pid_get_unit(ucred->pid, &t) >= 0) {
618 unit = strappend("_SYSTEMD_UNIT=", t);
621 unit = strappend("_SYSTEMD_UNIT=", unit_id);
624 IOVEC_SET_STRING(iovec[n++], unit);
628 selinux_context = malloc(sizeof("_SELINUX_CONTEXT=") + label_len);
629 if (selinux_context) {
630 memcpy(selinux_context, "_SELINUX_CONTEXT=", sizeof("_SELINUX_CONTEXT=")-1);
631 memcpy(selinux_context+sizeof("_SELINUX_CONTEXT=")-1, label, label_len);
632 selinux_context[sizeof("_SELINUX_CONTEXT=")-1+label_len] = 0;
633 IOVEC_SET_STRING(iovec[n++], selinux_context);
636 security_context_t con;
638 if (getpidcon(ucred->pid, &con) >= 0) {
639 selinux_context = strappend("_SELINUX_CONTEXT=", con);
641 IOVEC_SET_STRING(iovec[n++], selinux_context);
650 if (asprintf(&source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu",
651 (unsigned long long) timeval_load(tv)) >= 0)
652 IOVEC_SET_STRING(iovec[n++], source_time);
655 /* Note that strictly speaking storing the boot id here is
656 * redundant since the entry includes this in-line
657 * anyway. However, we need this indexed, too. */
658 r = sd_id128_get_boot(&id);
660 if (asprintf(&boot_id, "_BOOT_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
661 IOVEC_SET_STRING(iovec[n++], boot_id);
663 r = sd_id128_get_machine(&id);
665 if (asprintf(&machine_id, "_MACHINE_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
666 IOVEC_SET_STRING(iovec[n++], machine_id);
668 t = gethostname_malloc();
670 hostname = strappend("_HOSTNAME=", t);
673 IOVEC_SET_STRING(iovec[n++], hostname);
679 s->split_mode == SPLIT_NONE ? 0 :
680 (s->split_mode == SPLIT_UID ? realuid :
681 (realuid == 0 ? 0 : loginuid)), iovec, n);
694 free(audit_loginuid);
699 free(selinux_context);
702 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
703 char mid[11 + 32 + 1];
704 char buffer[16 + LINE_MAX + 1];
705 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
713 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
714 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
716 memcpy(buffer, "MESSAGE=", 8);
717 va_start(ap, format);
718 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
720 char_array_0(buffer);
721 IOVEC_SET_STRING(iovec[n++], buffer);
723 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
724 snprintf(mid, sizeof(mid), MESSAGE_ID(message_id));
726 IOVEC_SET_STRING(iovec[n++], mid);
730 ucred.pid = getpid();
731 ucred.uid = getuid();
732 ucred.gid = getgid();
734 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL);
737 void server_dispatch_message(
739 struct iovec *iovec, unsigned n, unsigned m,
742 const char *label, size_t label_len,
747 char *path = NULL, *c;
750 assert(iovec || n == 0);
755 if (LOG_PRI(priority) > s->max_level_store)
761 path = shortened_cgroup_path(ucred->pid);
765 /* example: /user/lennart/3/foobar
766 * /system/dbus.service/foobar
768 * So let's cut of everything past the third /, since that is
769 * where user directories start */
771 c = strchr(path, '/');
773 c = strchr(c+1, '/');
775 c = strchr(c+1, '/');
781 rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available_space(s));
788 /* Write a suppression message if we suppressed something */
790 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED, "Suppressed %u messages from %s", rl - 1, path);
795 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id);
799 static int system_journal_open(Server *s) {
805 r = sd_id128_get_machine(&machine);
809 sd_id128_to_string(machine, ids);
811 if (!s->system_journal &&
812 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
813 access("/run/systemd/journal/flushed", F_OK) >= 0) {
815 /* If in auto mode: first try to create the machine
816 * path, but not the prefix.
818 * If in persistent mode: create /var/log/journal and
819 * the machine path */
821 if (s->storage == STORAGE_PERSISTENT)
822 (void) mkdir("/var/log/journal/", 0755);
824 fn = strappend("/var/log/journal/", ids);
828 (void) mkdir(fn, 0755);
831 fn = strjoin("/var/log/journal/", ids, "/system.journal", NULL);
835 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
839 char fb[FORMAT_BYTES_MAX];
841 server_fix_perms(s, s->system_journal, 0);
842 server_driver_message(s, SD_ID128_NULL, "Allowing system journal files to grow to %s.",
843 format_bytes(fb, sizeof(fb), s->system_metrics.max_use));
847 if (r != -ENOENT && r != -EROFS)
848 log_warning("Failed to open system journal: %s", strerror(-r));
854 if (!s->runtime_journal &&
855 (s->storage != STORAGE_NONE)) {
857 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
861 if (s->system_journal) {
863 /* Try to open the runtime journal, but only
864 * if it already exists, so that we can flush
865 * it into the system journal */
867 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
872 log_warning("Failed to open runtime journal: %s", strerror(-r));
879 /* OK, we really need the runtime journal, so create
880 * it if necessary. */
882 (void) mkdir_parents(fn, 0755);
883 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
887 log_error("Failed to open runtime journal: %s", strerror(-r));
892 if (s->runtime_journal) {
893 char fb[FORMAT_BYTES_MAX];
895 server_fix_perms(s, s->runtime_journal, 0);
896 server_driver_message(s, SD_ID128_NULL, "Allowing runtime journal files to grow to %s.",
897 format_bytes(fb, sizeof(fb), s->runtime_metrics.max_use));
904 int server_flush_to_var(Server *s) {
907 sd_journal *j = NULL;
911 if (s->storage != STORAGE_AUTO &&
912 s->storage != STORAGE_PERSISTENT)
915 if (!s->runtime_journal)
918 system_journal_open(s);
920 if (!s->system_journal)
923 log_debug("Flushing to /var...");
925 r = sd_id128_get_machine(&machine);
927 log_error("Failed to get machine id: %s", strerror(-r));
931 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
933 log_error("Failed to read runtime journal: %s", strerror(-r));
937 sd_journal_set_data_threshold(j, 0);
939 SD_JOURNAL_FOREACH(j) {
944 assert(f && f->current_offset > 0);
946 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
948 log_error("Can't read entry: %s", strerror(-r));
952 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
956 if (!shall_try_append_again(s->system_journal, r)) {
957 log_error("Can't write entry: %s", strerror(-r));
964 log_debug("Retrying write.");
965 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
967 log_error("Can't write entry: %s", strerror(-r));
973 journal_file_post_change(s->system_journal);
975 journal_file_close(s->runtime_journal);
976 s->runtime_journal = NULL;
979 rm_rf("/run/log/journal", false, true, false);
987 int process_event(Server *s, struct epoll_event *ev) {
991 if (ev->data.fd == s->signal_fd) {
992 struct signalfd_siginfo sfsi;
995 if (ev->events != EPOLLIN) {
996 log_error("Got invalid event from epoll.");
1000 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
1001 if (n != sizeof(sfsi)) {
1006 if (errno == EINTR || errno == EAGAIN)
1012 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
1014 if (sfsi.ssi_signo == SIGUSR1) {
1015 touch("/run/systemd/journal/flushed");
1016 server_flush_to_var(s);
1020 if (sfsi.ssi_signo == SIGUSR2) {
1028 } else if (ev->data.fd == s->dev_kmsg_fd) {
1031 if (ev->events != EPOLLIN) {
1032 log_error("Got invalid event from epoll.");
1036 r = server_read_dev_kmsg(s);
1042 } else if (ev->data.fd == s->native_fd ||
1043 ev->data.fd == s->syslog_fd) {
1045 if (ev->events != EPOLLIN) {
1046 log_error("Got invalid event from epoll.");
1051 struct msghdr msghdr;
1053 struct ucred *ucred = NULL;
1054 struct timeval *tv = NULL;
1055 struct cmsghdr *cmsg;
1057 size_t label_len = 0;
1059 struct cmsghdr cmsghdr;
1061 /* We use NAME_MAX space for the
1062 * SELinux label here. The kernel
1063 * currently enforces no limit, but
1064 * according to suggestions from the
1065 * SELinux people this will change and
1066 * it will probably be identical to
1067 * NAME_MAX. For now we use that, but
1068 * this should be updated one day when
1069 * the final limit is known.*/
1070 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1071 CMSG_SPACE(sizeof(struct timeval)) +
1072 CMSG_SPACE(sizeof(int)) + /* fd */
1073 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1080 if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
1081 log_error("SIOCINQ failed: %m");
1085 if (s->buffer_size < (size_t) v) {
1089 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
1090 b = realloc(s->buffer, l+1);
1093 log_error("Couldn't increase buffer.");
1102 iovec.iov_base = s->buffer;
1103 iovec.iov_len = s->buffer_size;
1107 msghdr.msg_iov = &iovec;
1108 msghdr.msg_iovlen = 1;
1109 msghdr.msg_control = &control;
1110 msghdr.msg_controllen = sizeof(control);
1112 n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1115 if (errno == EINTR || errno == EAGAIN)
1118 log_error("recvmsg() failed: %m");
1122 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1124 if (cmsg->cmsg_level == SOL_SOCKET &&
1125 cmsg->cmsg_type == SCM_CREDENTIALS &&
1126 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1127 ucred = (struct ucred*) CMSG_DATA(cmsg);
1128 else if (cmsg->cmsg_level == SOL_SOCKET &&
1129 cmsg->cmsg_type == SCM_SECURITY) {
1130 label = (char*) CMSG_DATA(cmsg);
1131 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1132 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1133 cmsg->cmsg_type == SO_TIMESTAMP &&
1134 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1135 tv = (struct timeval*) CMSG_DATA(cmsg);
1136 else if (cmsg->cmsg_level == SOL_SOCKET &&
1137 cmsg->cmsg_type == SCM_RIGHTS) {
1138 fds = (int*) CMSG_DATA(cmsg);
1139 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1143 if (ev->data.fd == s->syslog_fd) {
1146 if (n > 0 && n_fds == 0) {
1147 e = memchr(s->buffer, '\n', n);
1153 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1154 } else if (n_fds > 0)
1155 log_warning("Got file descriptors via syslog socket. Ignoring.");
1158 if (n > 0 && n_fds == 0)
1159 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1160 else if (n == 0 && n_fds == 1)
1161 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1163 log_warning("Got too many file descriptors via native socket. Ignoring.");
1166 close_many(fds, n_fds);
1171 } else if (ev->data.fd == s->stdout_fd) {
1173 if (ev->events != EPOLLIN) {
1174 log_error("Got invalid event from epoll.");
1178 stdout_stream_new(s);
1182 StdoutStream *stream;
1184 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
1185 log_error("Got invalid event from epoll.");
1189 /* If it is none of the well-known fds, it must be an
1190 * stdout stream fd. Note that this is a bit ugly here
1191 * (since we rely that none of the well-known fds
1192 * could be interpreted as pointer), but nonetheless
1193 * safe, since the well-known fds would never get an
1194 * fd > 4096, i.e. beyond the first memory page */
1196 stream = ev->data.ptr;
1198 if (stdout_stream_process(stream) <= 0)
1199 stdout_stream_free(stream);
1204 log_error("Unknown event.");
1208 static int open_signalfd(Server *s) {
1210 struct epoll_event ev;
1214 assert_se(sigemptyset(&mask) == 0);
1215 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1216 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1218 s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
1219 if (s->signal_fd < 0) {
1220 log_error("signalfd(): %m");
1225 ev.events = EPOLLIN;
1226 ev.data.fd = s->signal_fd;
1228 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
1229 log_error("epoll_ctl(): %m");
1236 static int server_parse_proc_cmdline(Server *s) {
1237 char *line, *w, *state;
1241 if (detect_container(NULL) > 0)
1244 r = read_one_line_file("/proc/cmdline", &line);
1246 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1250 FOREACH_WORD_QUOTED(w, l, line, state) {
1253 word = strndup(w, l);
1259 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1260 r = parse_boolean(word + 35);
1262 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1264 s->forward_to_syslog = r;
1265 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1266 r = parse_boolean(word + 33);
1268 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1270 s->forward_to_kmsg = r;
1271 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1272 r = parse_boolean(word + 36);
1274 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1276 s->forward_to_console = r;
1277 } else if (startswith(word, "systemd.journald"))
1278 log_warning("Invalid systemd.journald parameter. Ignoring.");
1290 static int server_parse_config_file(Server *s) {
1297 fn = "/etc/systemd/journald.conf";
1298 f = fopen(fn, "re");
1300 if (errno == ENOENT)
1303 log_warning("Failed to open configuration file %s: %m", fn);
1307 r = config_parse(fn, f, "Journal\0", config_item_perf_lookup, (void*) journald_gperf_lookup, false, s);
1309 log_warning("Failed to parse configuration file: %s", strerror(-r));
1316 int server_init(Server *s) {
1322 s->syslog_fd = s->native_fd = s->stdout_fd = s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
1326 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1327 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1329 s->forward_to_syslog = true;
1331 s->max_level_store = LOG_DEBUG;
1332 s->max_level_syslog = LOG_DEBUG;
1333 s->max_level_kmsg = LOG_NOTICE;
1334 s->max_level_console = LOG_INFO;
1336 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1337 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1339 server_parse_config_file(s);
1340 server_parse_proc_cmdline(s);
1342 mkdir_p("/run/systemd/journal", 0755);
1344 s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
1345 if (!s->user_journals)
1348 s->mmap = mmap_cache_new();
1352 s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
1353 if (s->epoll_fd < 0) {
1354 log_error("Failed to create epoll object: %m");
1358 n = sd_listen_fds(true);
1360 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1364 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1366 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1368 if (s->native_fd >= 0) {
1369 log_error("Too many native sockets passed.");
1375 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1377 if (s->stdout_fd >= 0) {
1378 log_error("Too many stdout sockets passed.");
1384 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
1386 if (s->syslog_fd >= 0) {
1387 log_error("Too many /dev/log sockets passed.");
1394 log_error("Unknown socket passed.");
1399 r = server_open_syslog_socket(s);
1403 r = server_open_native_socket(s);
1407 r = server_open_stdout_socket(s);
1411 r = server_open_dev_kmsg(s);
1415 r = server_open_kernel_seqnum(s);
1419 r = open_signalfd(s);
1423 s->udev = udev_new();
1427 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
1431 r = system_journal_open(s);
1438 void server_maybe_append_tags(Server *s) {
1444 n = now(CLOCK_REALTIME);
1446 if (s->system_journal)
1447 journal_file_maybe_append_tag(s->system_journal, n);
1449 HASHMAP_FOREACH(f, s->user_journals, i)
1450 journal_file_maybe_append_tag(f, n);
1454 void server_done(Server *s) {
1458 while (s->stdout_streams)
1459 stdout_stream_free(s->stdout_streams);
1461 if (s->system_journal)
1462 journal_file_close(s->system_journal);
1464 if (s->runtime_journal)
1465 journal_file_close(s->runtime_journal);
1467 while ((f = hashmap_steal_first(s->user_journals)))
1468 journal_file_close(f);
1470 hashmap_free(s->user_journals);
1472 if (s->epoll_fd >= 0)
1473 close_nointr_nofail(s->epoll_fd);
1475 if (s->signal_fd >= 0)
1476 close_nointr_nofail(s->signal_fd);
1478 if (s->syslog_fd >= 0)
1479 close_nointr_nofail(s->syslog_fd);
1481 if (s->native_fd >= 0)
1482 close_nointr_nofail(s->native_fd);
1484 if (s->stdout_fd >= 0)
1485 close_nointr_nofail(s->stdout_fd);
1487 if (s->dev_kmsg_fd >= 0)
1488 close_nointr_nofail(s->dev_kmsg_fd);
1491 journal_rate_limit_free(s->rate_limit);
1493 if (s->kernel_seqnum)
1494 munmap(s->kernel_seqnum, sizeof(uint64_t));
1500 mmap_cache_unref(s->mmap);
1503 udev_unref(s->udev);