1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/epoll.h>
23 #include <sys/socket.h>
25 #include <sys/signalfd.h>
29 #include <sys/ioctl.h>
30 #include <linux/sockios.h>
31 #include <sys/statvfs.h>
35 #include <systemd/sd-journal.h>
36 #include <systemd/sd-messages.h>
37 #include <systemd/sd-daemon.h>
40 #include <systemd/sd-login.h>
45 #include "journal-file.h"
46 #include "socket-util.h"
47 #include "cgroup-util.h"
51 #include "conf-parser.h"
52 #include "journal-internal.h"
53 #include "journal-vacuum.h"
54 #include "journal-authenticate.h"
56 #include "journald-rate-limit.h"
57 #include "journald-kmsg.h"
58 #include "journald-syslog.h"
59 #include "journald-stream.h"
60 #include "journald-console.h"
61 #include "journald-native.h"
65 #include <acl/libacl.h>
70 #include <selinux/selinux.h>
73 #define USER_JOURNALS_MAX 1024
75 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
76 #define DEFAULT_RATE_LIMIT_BURST 200
78 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
80 static const char* const storage_table[] = {
81 [STORAGE_AUTO] = "auto",
82 [STORAGE_VOLATILE] = "volatile",
83 [STORAGE_PERSISTENT] = "persistent",
84 [STORAGE_NONE] = "none"
87 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
88 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
90 static const char* const split_mode_table[] = {
91 [SPLIT_NONE] = "none",
93 [SPLIT_LOGIN] = "login"
96 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
97 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
99 static uint64_t available_space(Server *s) {
104 uint64_t sum = 0, avail = 0, ss_avail = 0;
110 ts = now(CLOCK_MONOTONIC);
112 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
113 return s->cached_available_space;
115 r = sd_id128_get_machine(&machine);
119 if (s->system_journal) {
120 f = "/var/log/journal/";
121 m = &s->system_metrics;
123 f = "/run/log/journal/";
124 m = &s->runtime_metrics;
129 p = strappend(f, sd_id128_to_string(machine, ids));
139 if (fstatvfs(dirfd(d), &ss) < 0)
145 union dirent_storage buf;
147 r = readdir_r(d, &buf.de, &de);
154 if (!endswith(de->d_name, ".journal") &&
155 !endswith(de->d_name, ".journal~"))
158 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
161 if (!S_ISREG(st.st_mode))
164 sum += (uint64_t) st.st_blocks * 512UL;
167 avail = sum >= m->max_use ? 0 : m->max_use - sum;
169 ss_avail = ss.f_bsize * ss.f_bavail;
171 ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
173 if (ss_avail < avail)
176 s->cached_available_space = avail;
177 s->cached_available_space_timestamp = ts;
185 static void server_read_file_gid(Server *s) {
186 const char *adm = "adm";
191 if (s->file_gid_valid)
194 r = get_group_creds(&adm, &s->file_gid);
196 log_warning("Failed to resolve 'adm' group: %s", strerror(-r));
198 /* if we couldn't read the gid, then it will be 0, but that's
199 * fine and we shouldn't try to resolve the group again, so
200 * let's just pretend it worked right-away. */
201 s->file_gid_valid = true;
204 static void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
209 acl_permset_t permset;
214 server_read_file_gid(s);
216 r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
218 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
224 acl = acl_get_fd(f->fd);
226 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
230 r = acl_find_uid(acl, uid, &entry);
233 if (acl_create_entry(&acl, &entry) < 0 ||
234 acl_set_tag_type(entry, ACL_USER) < 0 ||
235 acl_set_qualifier(entry, &uid) < 0) {
236 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
241 if (acl_get_permset(entry, &permset) < 0 ||
242 acl_add_perm(permset, ACL_READ) < 0 ||
243 acl_calc_mask(&acl) < 0) {
244 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
248 if (acl_set_fd(f->fd, acl) < 0)
249 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
256 static JournalFile* find_journal(Server *s, uid_t uid) {
264 /* We split up user logs only on /var, not on /run. If the
265 * runtime file is open, we write to it exclusively, in order
266 * to guarantee proper order as soon as we flush /run to
267 * /var and close the runtime file. */
269 if (s->runtime_journal)
270 return s->runtime_journal;
273 return s->system_journal;
275 r = sd_id128_get_machine(&machine);
277 return s->system_journal;
279 f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
283 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
284 SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
285 return s->system_journal;
287 while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
288 /* Too many open? Then let's close one */
289 f = hashmap_steal_first(s->user_journals);
291 journal_file_close(f);
294 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, s->system_journal, &f);
298 return s->system_journal;
300 server_fix_perms(s, f, uid);
302 r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
304 journal_file_close(f);
305 return s->system_journal;
311 static void server_rotate(Server *s) {
317 log_debug("Rotating...");
319 if (s->runtime_journal) {
320 r = journal_file_rotate(&s->runtime_journal, s->compress, false);
322 if (s->runtime_journal)
323 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
325 log_error("Failed to create new runtime journal: %s", strerror(-r));
327 server_fix_perms(s, s->runtime_journal, 0);
330 if (s->system_journal) {
331 r = journal_file_rotate(&s->system_journal, s->compress, s->seal);
333 if (s->system_journal)
334 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
336 log_error("Failed to create new system journal: %s", strerror(-r));
339 server_fix_perms(s, s->system_journal, 0);
342 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
343 r = journal_file_rotate(&f, s->compress, s->seal);
346 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
348 log_error("Failed to create user journal: %s", strerror(-r));
350 hashmap_replace(s->user_journals, k, f);
351 server_fix_perms(s, f, PTR_TO_UINT32(k));
356 static void server_vacuum(Server *s) {
362 log_debug("Vacuuming...");
364 s->oldest_file_usec = 0;
366 r = sd_id128_get_machine(&machine);
368 log_error("Failed to get machine ID: %s", strerror(-r));
372 sd_id128_to_string(machine, ids);
374 if (s->system_journal) {
375 p = strappend("/var/log/journal/", ids);
381 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
382 if (r < 0 && r != -ENOENT)
383 log_error("Failed to vacuum %s: %s", p, strerror(-r));
387 if (s->runtime_journal) {
388 p = strappend("/run/log/journal/", ids);
394 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
395 if (r < 0 && r != -ENOENT)
396 log_error("Failed to vacuum %s: %s", p, strerror(-r));
400 s->cached_available_space_timestamp = 0;
403 static char *shortened_cgroup_path(pid_t pid) {
405 char *process_path, *init_path, *path;
409 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, pid, &process_path);
413 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &init_path);
419 if (endswith(init_path, "/system"))
420 init_path[strlen(init_path) - 7] = 0;
421 else if (streq(init_path, "/"))
424 if (startswith(process_path, init_path)) {
427 p = strdup(process_path + strlen(init_path));
445 static bool shall_try_append_again(JournalFile *f, int r) {
447 /* -E2BIG Hit configured limit
449 -EDQUOT Quota limit hit
451 -EHOSTDOWN Other machine
452 -EBUSY Unclean shutdown
453 -EPROTONOSUPPORT Unsupported feature
456 -ESHUTDOWN Already archived */
458 if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
459 log_debug("%s: Allocation limit reached, rotating.", f->path);
460 else if (r == -EHOSTDOWN)
461 log_info("%s: Journal file from other machine, rotating.", f->path);
462 else if (r == -EBUSY)
463 log_info("%s: Unclean shutdown, rotating.", f->path);
464 else if (r == -EPROTONOSUPPORT)
465 log_info("%s: Unsupported feature, rotating.", f->path);
466 else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
467 log_warning("%s: Journal file corrupted, rotating.", f->path);
474 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n) {
476 bool vacuumed = false;
483 f = find_journal(s, uid);
487 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
488 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
493 f = find_journal(s, uid);
498 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
502 if (vacuumed || !shall_try_append_again(f, r)) {
503 log_error("Failed to write entry, ignoring: %s", strerror(-r));
510 f = find_journal(s, uid);
514 log_debug("Retrying write.");
515 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
517 log_error("Failed to write entry, ignoring: %s", strerror(-r));
520 static void dispatch_message_real(
522 struct iovec *iovec, unsigned n, unsigned m,
525 const char *label, size_t label_len,
526 const char *unit_id) {
528 char *pid = NULL, *uid = NULL, *gid = NULL,
529 *source_time = NULL, *boot_id = NULL, *machine_id = NULL,
530 *comm = NULL, *cmdline = NULL, *hostname = NULL,
531 *audit_session = NULL, *audit_loginuid = NULL,
532 *exe = NULL, *cgroup = NULL, *session = NULL,
533 *owner_uid = NULL, *unit = NULL, *selinux_context = NULL;
539 uid_t loginuid = 0, realuid = 0;
544 assert(n + N_IOVEC_META_FIELDS <= m);
552 realuid = ucred->uid;
554 if (asprintf(&pid, "_PID=%lu", (unsigned long) ucred->pid) >= 0)
555 IOVEC_SET_STRING(iovec[n++], pid);
557 if (asprintf(&uid, "_UID=%lu", (unsigned long) ucred->uid) >= 0)
558 IOVEC_SET_STRING(iovec[n++], uid);
560 if (asprintf(&gid, "_GID=%lu", (unsigned long) ucred->gid) >= 0)
561 IOVEC_SET_STRING(iovec[n++], gid);
563 r = get_process_comm(ucred->pid, &t);
565 comm = strappend("_COMM=", t);
569 IOVEC_SET_STRING(iovec[n++], comm);
572 r = get_process_exe(ucred->pid, &t);
574 exe = strappend("_EXE=", t);
578 IOVEC_SET_STRING(iovec[n++], exe);
581 r = get_process_cmdline(ucred->pid, LINE_MAX, false, &t);
583 cmdline = strappend("_CMDLINE=", t);
587 IOVEC_SET_STRING(iovec[n++], cmdline);
590 r = audit_session_from_pid(ucred->pid, &audit);
592 if (asprintf(&audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit) >= 0)
593 IOVEC_SET_STRING(iovec[n++], audit_session);
595 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
597 if (asprintf(&audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0)
598 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
600 t = shortened_cgroup_path(ucred->pid);
602 cgroup = strappend("_SYSTEMD_CGROUP=", t);
606 IOVEC_SET_STRING(iovec[n++], cgroup);
610 if (sd_pid_get_session(ucred->pid, &t) >= 0) {
611 session = strappend("_SYSTEMD_SESSION=", t);
615 IOVEC_SET_STRING(iovec[n++], session);
618 if (sd_pid_get_owner_uid(ucred->uid, &owner) >= 0)
619 if (asprintf(&owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner) >= 0)
620 IOVEC_SET_STRING(iovec[n++], owner_uid);
623 if (cg_pid_get_unit(ucred->pid, &t) >= 0) {
624 unit = strappend("_SYSTEMD_UNIT=", t);
627 unit = strappend("_SYSTEMD_UNIT=", unit_id);
630 IOVEC_SET_STRING(iovec[n++], unit);
634 selinux_context = malloc(sizeof("_SELINUX_CONTEXT=") + label_len);
635 if (selinux_context) {
636 memcpy(selinux_context, "_SELINUX_CONTEXT=", sizeof("_SELINUX_CONTEXT=")-1);
637 memcpy(selinux_context+sizeof("_SELINUX_CONTEXT=")-1, label, label_len);
638 selinux_context[sizeof("_SELINUX_CONTEXT=")-1+label_len] = 0;
639 IOVEC_SET_STRING(iovec[n++], selinux_context);
642 security_context_t con;
644 if (getpidcon(ucred->pid, &con) >= 0) {
645 selinux_context = strappend("_SELINUX_CONTEXT=", con);
647 IOVEC_SET_STRING(iovec[n++], selinux_context);
656 if (asprintf(&source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu",
657 (unsigned long long) timeval_load(tv)) >= 0)
658 IOVEC_SET_STRING(iovec[n++], source_time);
661 /* Note that strictly speaking storing the boot id here is
662 * redundant since the entry includes this in-line
663 * anyway. However, we need this indexed, too. */
664 r = sd_id128_get_boot(&id);
666 if (asprintf(&boot_id, "_BOOT_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
667 IOVEC_SET_STRING(iovec[n++], boot_id);
669 r = sd_id128_get_machine(&id);
671 if (asprintf(&machine_id, "_MACHINE_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
672 IOVEC_SET_STRING(iovec[n++], machine_id);
674 t = gethostname_malloc();
676 hostname = strappend("_HOSTNAME=", t);
679 IOVEC_SET_STRING(iovec[n++], hostname);
685 s->split_mode == SPLIT_NONE ? 0 :
686 (s->split_mode == SPLIT_UID ? realuid :
687 (realuid == 0 ? 0 : loginuid)), iovec, n);
700 free(audit_loginuid);
705 free(selinux_context);
708 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
709 char mid[11 + 32 + 1];
710 char buffer[16 + LINE_MAX + 1];
711 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
719 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
720 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
722 memcpy(buffer, "MESSAGE=", 8);
723 va_start(ap, format);
724 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
726 char_array_0(buffer);
727 IOVEC_SET_STRING(iovec[n++], buffer);
729 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
730 snprintf(mid, sizeof(mid), MESSAGE_ID(message_id));
732 IOVEC_SET_STRING(iovec[n++], mid);
736 ucred.pid = getpid();
737 ucred.uid = getuid();
738 ucred.gid = getgid();
740 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL);
743 void server_dispatch_message(
745 struct iovec *iovec, unsigned n, unsigned m,
748 const char *label, size_t label_len,
753 char *path = NULL, *c;
756 assert(iovec || n == 0);
761 if (LOG_PRI(priority) > s->max_level_store)
767 path = shortened_cgroup_path(ucred->pid);
771 /* example: /user/lennart/3/foobar
772 * /system/dbus.service/foobar
774 * So let's cut of everything past the third /, since that is
775 * wher user directories start */
777 c = strchr(path, '/');
779 c = strchr(c+1, '/');
781 c = strchr(c+1, '/');
787 rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available_space(s));
794 /* Write a suppression message if we suppressed something */
796 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED, "Suppressed %u messages from %s", rl - 1, path);
801 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id);
805 static int system_journal_open(Server *s) {
811 r = sd_id128_get_machine(&machine);
815 sd_id128_to_string(machine, ids);
817 if (!s->system_journal &&
818 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
819 access("/run/systemd/journal/flushed", F_OK) >= 0) {
821 /* If in auto mode: first try to create the machine
822 * path, but not the prefix.
824 * If in persistent mode: create /var/log/journal and
825 * the machine path */
827 if (s->storage == STORAGE_PERSISTENT)
828 (void) mkdir("/var/log/journal/", 0755);
830 fn = strappend("/var/log/journal/", ids);
834 (void) mkdir(fn, 0755);
837 fn = strjoin("/var/log/journal/", ids, "/system.journal", NULL);
841 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
845 char fb[FORMAT_BYTES_MAX];
847 server_fix_perms(s, s->system_journal, 0);
848 server_driver_message(s, SD_ID128_NULL, "Allowing system journal files to grow to %s.",
849 format_bytes(fb, sizeof(fb), s->system_metrics.max_use));
853 if (r != -ENOENT && r != -EROFS)
854 log_warning("Failed to open system journal: %s", strerror(-r));
860 if (!s->runtime_journal &&
861 (s->storage != STORAGE_NONE)) {
863 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
867 if (s->system_journal) {
869 /* Try to open the runtime journal, but only
870 * if it already exists, so that we can flush
871 * it into the system journal */
873 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
878 log_warning("Failed to open runtime journal: %s", strerror(-r));
885 /* OK, we really need the runtime journal, so create
886 * it if necessary. */
888 (void) mkdir_parents(fn, 0755);
889 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
893 log_error("Failed to open runtime journal: %s", strerror(-r));
898 if (s->runtime_journal) {
899 char fb[FORMAT_BYTES_MAX];
901 server_fix_perms(s, s->runtime_journal, 0);
902 server_driver_message(s, SD_ID128_NULL, "Allowing runtime journal files to grow to %s.",
903 format_bytes(fb, sizeof(fb), s->runtime_metrics.max_use));
910 static int server_flush_to_var(Server *s) {
913 sd_journal *j = NULL;
917 if (s->storage != STORAGE_AUTO &&
918 s->storage != STORAGE_PERSISTENT)
921 if (!s->runtime_journal)
924 system_journal_open(s);
926 if (!s->system_journal)
929 log_debug("Flushing to /var...");
931 r = sd_id128_get_machine(&machine);
933 log_error("Failed to get machine id: %s", strerror(-r));
937 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
939 log_error("Failed to read runtime journal: %s", strerror(-r));
943 SD_JOURNAL_FOREACH(j) {
948 assert(f && f->current_offset > 0);
950 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
952 log_error("Can't read entry: %s", strerror(-r));
956 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
960 if (!shall_try_append_again(s->system_journal, r)) {
961 log_error("Can't write entry: %s", strerror(-r));
968 log_debug("Retrying write.");
969 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
971 log_error("Can't write entry: %s", strerror(-r));
977 journal_file_post_change(s->system_journal);
979 journal_file_close(s->runtime_journal);
980 s->runtime_journal = NULL;
983 rm_rf("/run/log/journal", false, true, false);
991 static int process_event(Server *s, struct epoll_event *ev) {
995 if (ev->data.fd == s->signal_fd) {
996 struct signalfd_siginfo sfsi;
999 if (ev->events != EPOLLIN) {
1000 log_error("Got invalid event from epoll.");
1004 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
1005 if (n != sizeof(sfsi)) {
1010 if (errno == EINTR || errno == EAGAIN)
1016 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
1018 if (sfsi.ssi_signo == SIGUSR1) {
1019 touch("/run/systemd/journal/flushed");
1020 server_flush_to_var(s);
1024 if (sfsi.ssi_signo == SIGUSR2) {
1032 } else if (ev->data.fd == s->dev_kmsg_fd) {
1035 if (ev->events != EPOLLIN) {
1036 log_error("Got invalid event from epoll.");
1040 r = server_read_dev_kmsg(s);
1046 } else if (ev->data.fd == s->native_fd ||
1047 ev->data.fd == s->syslog_fd) {
1049 if (ev->events != EPOLLIN) {
1050 log_error("Got invalid event from epoll.");
1055 struct msghdr msghdr;
1057 struct ucred *ucred = NULL;
1058 struct timeval *tv = NULL;
1059 struct cmsghdr *cmsg;
1061 size_t label_len = 0;
1063 struct cmsghdr cmsghdr;
1065 /* We use NAME_MAX space for the
1066 * SELinux label here. The kernel
1067 * currently enforces no limit, but
1068 * according to suggestions from the
1069 * SELinux people this will change and
1070 * it will probably be identical to
1071 * NAME_MAX. For now we use that, but
1072 * this should be updated one day when
1073 * the final limit is known.*/
1074 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1075 CMSG_SPACE(sizeof(struct timeval)) +
1076 CMSG_SPACE(sizeof(int)) + /* fd */
1077 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1084 if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
1085 log_error("SIOCINQ failed: %m");
1089 if (s->buffer_size < (size_t) v) {
1093 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
1094 b = realloc(s->buffer, l+1);
1097 log_error("Couldn't increase buffer.");
1106 iovec.iov_base = s->buffer;
1107 iovec.iov_len = s->buffer_size;
1111 msghdr.msg_iov = &iovec;
1112 msghdr.msg_iovlen = 1;
1113 msghdr.msg_control = &control;
1114 msghdr.msg_controllen = sizeof(control);
1116 n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1119 if (errno == EINTR || errno == EAGAIN)
1122 log_error("recvmsg() failed: %m");
1126 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1128 if (cmsg->cmsg_level == SOL_SOCKET &&
1129 cmsg->cmsg_type == SCM_CREDENTIALS &&
1130 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1131 ucred = (struct ucred*) CMSG_DATA(cmsg);
1132 else if (cmsg->cmsg_level == SOL_SOCKET &&
1133 cmsg->cmsg_type == SCM_SECURITY) {
1134 label = (char*) CMSG_DATA(cmsg);
1135 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1136 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1137 cmsg->cmsg_type == SO_TIMESTAMP &&
1138 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1139 tv = (struct timeval*) CMSG_DATA(cmsg);
1140 else if (cmsg->cmsg_level == SOL_SOCKET &&
1141 cmsg->cmsg_type == SCM_RIGHTS) {
1142 fds = (int*) CMSG_DATA(cmsg);
1143 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1147 if (ev->data.fd == s->syslog_fd) {
1150 if (n > 0 && n_fds == 0) {
1151 e = memchr(s->buffer, '\n', n);
1157 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1158 } else if (n_fds > 0)
1159 log_warning("Got file descriptors via syslog socket. Ignoring.");
1162 if (n > 0 && n_fds == 0)
1163 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1164 else if (n == 0 && n_fds == 1)
1165 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1167 log_warning("Got too many file descriptors via native socket. Ignoring.");
1170 close_many(fds, n_fds);
1175 } else if (ev->data.fd == s->stdout_fd) {
1177 if (ev->events != EPOLLIN) {
1178 log_error("Got invalid event from epoll.");
1182 stdout_stream_new(s);
1186 StdoutStream *stream;
1188 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
1189 log_error("Got invalid event from epoll.");
1193 /* If it is none of the well-known fds, it must be an
1194 * stdout stream fd. Note that this is a bit ugly here
1195 * (since we rely that none of the well-known fds
1196 * could be interpreted as pointer), but nonetheless
1197 * safe, since the well-known fds would never get an
1198 * fd > 4096, i.e. beyond the first memory page */
1200 stream = ev->data.ptr;
1202 if (stdout_stream_process(stream) <= 0)
1203 stdout_stream_free(stream);
1208 log_error("Unknown event.");
1212 static int open_signalfd(Server *s) {
1214 struct epoll_event ev;
1218 assert_se(sigemptyset(&mask) == 0);
1219 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1220 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1222 s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
1223 if (s->signal_fd < 0) {
1224 log_error("signalfd(): %m");
1229 ev.events = EPOLLIN;
1230 ev.data.fd = s->signal_fd;
1232 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
1233 log_error("epoll_ctl(): %m");
1240 static int server_parse_proc_cmdline(Server *s) {
1241 char *line, *w, *state;
1245 if (detect_container(NULL) > 0)
1248 r = read_one_line_file("/proc/cmdline", &line);
1250 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1254 FOREACH_WORD_QUOTED(w, l, line, state) {
1257 word = strndup(w, l);
1263 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1264 r = parse_boolean(word + 35);
1266 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1268 s->forward_to_syslog = r;
1269 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1270 r = parse_boolean(word + 33);
1272 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1274 s->forward_to_kmsg = r;
1275 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1276 r = parse_boolean(word + 36);
1278 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1280 s->forward_to_console = r;
1281 } else if (startswith(word, "systemd.journald"))
1282 log_warning("Invalid systemd.journald parameter. Ignoring.");
1294 static int server_parse_config_file(Server *s) {
1301 fn = "/etc/systemd/journald.conf";
1302 f = fopen(fn, "re");
1304 if (errno == ENOENT)
1307 log_warning("Failed to open configuration file %s: %m", fn);
1311 r = config_parse(fn, f, "Journal\0", config_item_perf_lookup, (void*) journald_gperf_lookup, false, s);
1313 log_warning("Failed to parse configuration file: %s", strerror(-r));
1320 static int server_init(Server *s) {
1326 s->syslog_fd = s->native_fd = s->stdout_fd = s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
1330 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1331 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1333 s->forward_to_syslog = true;
1335 s->max_level_store = LOG_DEBUG;
1336 s->max_level_syslog = LOG_DEBUG;
1337 s->max_level_kmsg = LOG_NOTICE;
1338 s->max_level_console = LOG_INFO;
1340 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1341 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1343 server_parse_config_file(s);
1344 server_parse_proc_cmdline(s);
1346 mkdir_p("/run/systemd/journal", 0755);
1348 s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
1349 if (!s->user_journals)
1352 s->mmap = mmap_cache_new();
1356 s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
1357 if (s->epoll_fd < 0) {
1358 log_error("Failed to create epoll object: %m");
1362 n = sd_listen_fds(true);
1364 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1368 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1370 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1372 if (s->native_fd >= 0) {
1373 log_error("Too many native sockets passed.");
1379 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1381 if (s->stdout_fd >= 0) {
1382 log_error("Too many stdout sockets passed.");
1388 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
1390 if (s->syslog_fd >= 0) {
1391 log_error("Too many /dev/log sockets passed.");
1398 log_error("Unknown socket passed.");
1403 r = server_open_syslog_socket(s);
1407 r = server_open_native_socket(s);
1411 r = server_open_stdout_socket(s);
1415 r = server_open_dev_kmsg(s);
1419 r = server_open_kernel_seqnum(s);
1423 r = open_signalfd(s);
1427 s->udev = udev_new();
1431 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
1435 r = system_journal_open(s);
1442 static void server_maybe_append_tags(Server *s) {
1448 n = now(CLOCK_REALTIME);
1450 if (s->system_journal)
1451 journal_file_maybe_append_tag(s->system_journal, n);
1453 HASHMAP_FOREACH(f, s->user_journals, i)
1454 journal_file_maybe_append_tag(f, n);
1458 static void server_done(Server *s) {
1462 while (s->stdout_streams)
1463 stdout_stream_free(s->stdout_streams);
1465 if (s->system_journal)
1466 journal_file_close(s->system_journal);
1468 if (s->runtime_journal)
1469 journal_file_close(s->runtime_journal);
1471 while ((f = hashmap_steal_first(s->user_journals)))
1472 journal_file_close(f);
1474 hashmap_free(s->user_journals);
1476 if (s->epoll_fd >= 0)
1477 close_nointr_nofail(s->epoll_fd);
1479 if (s->signal_fd >= 0)
1480 close_nointr_nofail(s->signal_fd);
1482 if (s->syslog_fd >= 0)
1483 close_nointr_nofail(s->syslog_fd);
1485 if (s->native_fd >= 0)
1486 close_nointr_nofail(s->native_fd);
1488 if (s->stdout_fd >= 0)
1489 close_nointr_nofail(s->stdout_fd);
1491 if (s->dev_kmsg_fd >= 0)
1492 close_nointr_nofail(s->dev_kmsg_fd);
1495 journal_rate_limit_free(s->rate_limit);
1497 if (s->kernel_seqnum)
1498 munmap(s->kernel_seqnum, sizeof(uint64_t));
1504 mmap_cache_unref(s->mmap);
1507 udev_unref(s->udev);
1510 int main(int argc, char *argv[]) {
1514 /* if (getppid() != 1) { */
1515 /* log_error("This program should be invoked by init only."); */
1516 /* return EXIT_FAILURE; */
1520 log_error("This program does not take arguments.");
1521 return EXIT_FAILURE;
1524 log_set_target(LOG_TARGET_SAFE);
1525 log_set_facility(LOG_SYSLOG);
1526 log_parse_environment();
1531 r = server_init(&server);
1535 server_vacuum(&server);
1536 server_flush_to_var(&server);
1537 server_flush_dev_kmsg(&server);
1539 log_debug("systemd-journald running as pid %lu", (unsigned long) getpid());
1540 server_driver_message(&server, SD_MESSAGE_JOURNAL_START, "Journal started");
1544 "STATUS=Processing requests...");
1547 struct epoll_event event;
1551 n = now(CLOCK_REALTIME);
1553 if (server.max_retention_usec > 0 && server.oldest_file_usec > 0) {
1555 /* The retention time is reached, so let's vacuum! */
1556 if (server.oldest_file_usec + server.max_retention_usec < n) {
1557 log_info("Retention time reached.");
1558 server_rotate(&server);
1559 server_vacuum(&server);
1563 /* Calculate when to rotate the next time */
1564 t = (int) ((server.oldest_file_usec + server.max_retention_usec - n + USEC_PER_MSEC - 1) / USEC_PER_MSEC);
1565 log_info("Sleeping for %i ms", t);
1569 if (server.system_journal) {
1572 if (journal_file_next_evolve_usec(server.system_journal, &u)) {
1576 t = MIN(t, (int) ((u - n + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
1581 r = epoll_wait(server.epoll_fd, &event, 1, t);
1587 log_error("epoll_wait() failed: %m");
1593 r = process_event(&server, &event);
1600 server_maybe_append_tags(&server);
1601 server_maybe_warn_forward_syslog_missed(&server);
1604 log_debug("systemd-journald stopped as pid %lu", (unsigned long) getpid());
1605 server_driver_message(&server, SD_MESSAGE_JOURNAL_STOP, "Journal stopped");
1609 "STATUS=Shutting down...");
1611 server_done(&server);
1613 return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;