1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/epoll.h>
23 #include <sys/socket.h>
25 #include <sys/signalfd.h>
29 #include <sys/ioctl.h>
30 #include <linux/sockios.h>
31 #include <sys/statvfs.h>
35 #include <systemd/sd-journal.h>
36 #include <systemd/sd-messages.h>
37 #include <systemd/sd-daemon.h>
40 #include <systemd/sd-login.h>
45 #include "journal-file.h"
46 #include "socket-util.h"
47 #include "cgroup-util.h"
51 #include "conf-parser.h"
52 #include "journal-internal.h"
53 #include "journal-vacuum.h"
54 #include "journal-authenticate.h"
56 #include "journald-rate-limit.h"
57 #include "journald-kmsg.h"
58 #include "journald-syslog.h"
59 #include "journald-stream.h"
60 #include "journald-console.h"
61 #include "journald-native.h"
65 #include <acl/libacl.h>
70 #include <selinux/selinux.h>
73 #define USER_JOURNALS_MAX 1024
75 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
76 #define DEFAULT_RATE_LIMIT_BURST 200
78 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
80 static const char* const storage_table[] = {
81 [STORAGE_AUTO] = "auto",
82 [STORAGE_VOLATILE] = "volatile",
83 [STORAGE_PERSISTENT] = "persistent",
84 [STORAGE_NONE] = "none"
87 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
88 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
90 static const char* const split_mode_table[] = {
91 [SPLIT_NONE] = "none",
93 [SPLIT_LOGIN] = "login"
96 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
97 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
99 static uint64_t available_space(Server *s) {
104 uint64_t sum = 0, avail = 0, ss_avail = 0;
110 ts = now(CLOCK_MONOTONIC);
112 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
113 return s->cached_available_space;
115 r = sd_id128_get_machine(&machine);
119 if (s->system_journal) {
120 f = "/var/log/journal/";
121 m = &s->system_metrics;
123 f = "/run/log/journal/";
124 m = &s->runtime_metrics;
129 p = strappend(f, sd_id128_to_string(machine, ids));
139 if (fstatvfs(dirfd(d), &ss) < 0)
145 union dirent_storage buf;
147 r = readdir_r(d, &buf.de, &de);
154 if (!endswith(de->d_name, ".journal") &&
155 !endswith(de->d_name, ".journal~"))
158 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
161 if (!S_ISREG(st.st_mode))
164 sum += (uint64_t) st.st_blocks * 512UL;
167 avail = sum >= m->max_use ? 0 : m->max_use - sum;
169 ss_avail = ss.f_bsize * ss.f_bavail;
171 ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
173 if (ss_avail < avail)
176 s->cached_available_space = avail;
177 s->cached_available_space_timestamp = ts;
185 static void server_read_file_gid(Server *s) {
186 const char *adm = "adm";
191 if (s->file_gid_valid)
194 r = get_group_creds(&adm, &s->file_gid);
196 log_warning("Failed to resolve 'adm' group: %s", strerror(-r));
198 /* if we couldn't read the gid, then it will be 0, but that's
199 * fine and we shouldn't try to resolve the group again, so
200 * let's just pretend it worked right-away. */
201 s->file_gid_valid = true;
204 static void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
209 acl_permset_t permset;
214 server_read_file_gid(s);
216 r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
218 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
224 acl = acl_get_fd(f->fd);
226 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
230 r = acl_find_uid(acl, uid, &entry);
233 if (acl_create_entry(&acl, &entry) < 0 ||
234 acl_set_tag_type(entry, ACL_USER) < 0 ||
235 acl_set_qualifier(entry, &uid) < 0) {
236 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
241 if (acl_get_permset(entry, &permset) < 0 ||
242 acl_add_perm(permset, ACL_READ) < 0 ||
243 acl_calc_mask(&acl) < 0) {
244 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
248 if (acl_set_fd(f->fd, acl) < 0)
249 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
256 static JournalFile* find_journal(Server *s, uid_t uid) {
264 /* We split up user logs only on /var, not on /run. If the
265 * runtime file is open, we write to it exclusively, in order
266 * to guarantee proper order as soon as we flush /run to
267 * /var and close the runtime file. */
269 if (s->runtime_journal)
270 return s->runtime_journal;
273 return s->system_journal;
275 r = sd_id128_get_machine(&machine);
277 return s->system_journal;
279 f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
283 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
284 SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
285 return s->system_journal;
287 while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
288 /* Too many open? Then let's close one */
289 f = hashmap_steal_first(s->user_journals);
291 journal_file_close(f);
294 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, s->system_journal, &f);
298 return s->system_journal;
300 server_fix_perms(s, f, uid);
302 r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
304 journal_file_close(f);
305 return s->system_journal;
311 static void server_rotate(Server *s) {
317 log_debug("Rotating...");
319 if (s->runtime_journal) {
320 r = journal_file_rotate(&s->runtime_journal, s->compress, false);
322 if (s->runtime_journal)
323 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
325 log_error("Failed to create new runtime journal: %s", strerror(-r));
327 server_fix_perms(s, s->runtime_journal, 0);
330 if (s->system_journal) {
331 r = journal_file_rotate(&s->system_journal, s->compress, s->seal);
333 if (s->system_journal)
334 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
336 log_error("Failed to create new system journal: %s", strerror(-r));
339 server_fix_perms(s, s->system_journal, 0);
342 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
343 r = journal_file_rotate(&f, s->compress, s->seal);
346 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
348 log_error("Failed to create user journal: %s", strerror(-r));
350 hashmap_replace(s->user_journals, k, f);
351 server_fix_perms(s, f, PTR_TO_UINT32(k));
356 static void server_vacuum(Server *s) {
362 log_debug("Vacuuming...");
364 r = sd_id128_get_machine(&machine);
366 log_error("Failed to get machine ID: %s", strerror(-r));
370 sd_id128_to_string(machine, ids);
372 if (s->system_journal) {
373 p = strappend("/var/log/journal/", ids);
379 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free);
380 if (r < 0 && r != -ENOENT)
381 log_error("Failed to vacuum %s: %s", p, strerror(-r));
385 if (s->runtime_journal) {
386 p = strappend("/run/log/journal/", ids);
392 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free);
393 if (r < 0 && r != -ENOENT)
394 log_error("Failed to vacuum %s: %s", p, strerror(-r));
398 s->cached_available_space_timestamp = 0;
401 static char *shortened_cgroup_path(pid_t pid) {
403 char *process_path, *init_path, *path;
407 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, pid, &process_path);
411 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &init_path);
417 if (endswith(init_path, "/system"))
418 init_path[strlen(init_path) - 7] = 0;
419 else if (streq(init_path, "/"))
422 if (startswith(process_path, init_path)) {
425 p = strdup(process_path + strlen(init_path));
443 static bool shall_try_append_again(JournalFile *f, int r) {
445 /* -E2BIG Hit configured limit
447 -EDQUOT Quota limit hit
449 -EHOSTDOWN Other machine
450 -EBUSY Unclean shutdown
451 -EPROTONOSUPPORT Unsupported feature
454 -ESHUTDOWN Already archived */
456 if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
457 log_debug("%s: Allocation limit reached, rotating.", f->path);
458 else if (r == -EHOSTDOWN)
459 log_info("%s: Journal file from other machine, rotating.", f->path);
460 else if (r == -EBUSY)
461 log_info("%s: Unclean shutdown, rotating.", f->path);
462 else if (r == -EPROTONOSUPPORT)
463 log_info("%s: Unsupported feature, rotating.", f->path);
464 else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
465 log_warning("%s: Journal file corrupted, rotating.", f->path);
472 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n) {
474 bool vacuumed = false;
481 f = find_journal(s, uid);
485 if (journal_file_rotate_suggested(f)) {
486 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
491 f = find_journal(s, uid);
496 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
500 if (vacuumed || !shall_try_append_again(f, r)) {
501 log_error("Failed to write entry, ignoring: %s", strerror(-r));
508 f = find_journal(s, uid);
512 log_debug("Retrying write.");
513 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
515 log_error("Failed to write entry, ignoring: %s", strerror(-r));
518 static void dispatch_message_real(
520 struct iovec *iovec, unsigned n, unsigned m,
523 const char *label, size_t label_len,
524 const char *unit_id) {
526 char *pid = NULL, *uid = NULL, *gid = NULL,
527 *source_time = NULL, *boot_id = NULL, *machine_id = NULL,
528 *comm = NULL, *cmdline = NULL, *hostname = NULL,
529 *audit_session = NULL, *audit_loginuid = NULL,
530 *exe = NULL, *cgroup = NULL, *session = NULL,
531 *owner_uid = NULL, *unit = NULL, *selinux_context = NULL;
537 uid_t loginuid = 0, realuid = 0;
542 assert(n + N_IOVEC_META_FIELDS <= m);
550 realuid = ucred->uid;
552 if (asprintf(&pid, "_PID=%lu", (unsigned long) ucred->pid) >= 0)
553 IOVEC_SET_STRING(iovec[n++], pid);
555 if (asprintf(&uid, "_UID=%lu", (unsigned long) ucred->uid) >= 0)
556 IOVEC_SET_STRING(iovec[n++], uid);
558 if (asprintf(&gid, "_GID=%lu", (unsigned long) ucred->gid) >= 0)
559 IOVEC_SET_STRING(iovec[n++], gid);
561 r = get_process_comm(ucred->pid, &t);
563 comm = strappend("_COMM=", t);
567 IOVEC_SET_STRING(iovec[n++], comm);
570 r = get_process_exe(ucred->pid, &t);
572 exe = strappend("_EXE=", t);
576 IOVEC_SET_STRING(iovec[n++], exe);
579 r = get_process_cmdline(ucred->pid, LINE_MAX, false, &t);
581 cmdline = strappend("_CMDLINE=", t);
585 IOVEC_SET_STRING(iovec[n++], cmdline);
588 r = audit_session_from_pid(ucred->pid, &audit);
590 if (asprintf(&audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit) >= 0)
591 IOVEC_SET_STRING(iovec[n++], audit_session);
593 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
595 if (asprintf(&audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0)
596 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
598 t = shortened_cgroup_path(ucred->pid);
600 cgroup = strappend("_SYSTEMD_CGROUP=", t);
604 IOVEC_SET_STRING(iovec[n++], cgroup);
608 if (sd_pid_get_session(ucred->pid, &t) >= 0) {
609 session = strappend("_SYSTEMD_SESSION=", t);
613 IOVEC_SET_STRING(iovec[n++], session);
616 if (sd_pid_get_owner_uid(ucred->uid, &owner) >= 0)
617 if (asprintf(&owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner) >= 0)
618 IOVEC_SET_STRING(iovec[n++], owner_uid);
621 if (cg_pid_get_unit(ucred->pid, &t) >= 0) {
622 unit = strappend("_SYSTEMD_UNIT=", t);
625 unit = strappend("_SYSTEMD_UNIT=", unit_id);
628 IOVEC_SET_STRING(iovec[n++], unit);
632 selinux_context = malloc(sizeof("_SELINUX_CONTEXT=") + label_len);
633 if (selinux_context) {
634 memcpy(selinux_context, "_SELINUX_CONTEXT=", sizeof("_SELINUX_CONTEXT=")-1);
635 memcpy(selinux_context+sizeof("_SELINUX_CONTEXT=")-1, label, label_len);
636 selinux_context[sizeof("_SELINUX_CONTEXT=")-1+label_len] = 0;
637 IOVEC_SET_STRING(iovec[n++], selinux_context);
640 security_context_t con;
642 if (getpidcon(ucred->pid, &con) >= 0) {
643 selinux_context = strappend("_SELINUX_CONTEXT=", con);
645 IOVEC_SET_STRING(iovec[n++], selinux_context);
654 if (asprintf(&source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu",
655 (unsigned long long) timeval_load(tv)) >= 0)
656 IOVEC_SET_STRING(iovec[n++], source_time);
659 /* Note that strictly speaking storing the boot id here is
660 * redundant since the entry includes this in-line
661 * anyway. However, we need this indexed, too. */
662 r = sd_id128_get_boot(&id);
664 if (asprintf(&boot_id, "_BOOT_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
665 IOVEC_SET_STRING(iovec[n++], boot_id);
667 r = sd_id128_get_machine(&id);
669 if (asprintf(&machine_id, "_MACHINE_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
670 IOVEC_SET_STRING(iovec[n++], machine_id);
672 t = gethostname_malloc();
674 hostname = strappend("_HOSTNAME=", t);
677 IOVEC_SET_STRING(iovec[n++], hostname);
683 s->split_mode == SPLIT_NONE ? 0 :
684 (s->split_mode == SPLIT_UID ? realuid :
685 (realuid == 0 ? 0 : loginuid)), iovec, n);
698 free(audit_loginuid);
703 free(selinux_context);
706 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
707 char mid[11 + 32 + 1];
708 char buffer[16 + LINE_MAX + 1];
709 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
717 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
718 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
720 memcpy(buffer, "MESSAGE=", 8);
721 va_start(ap, format);
722 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
724 char_array_0(buffer);
725 IOVEC_SET_STRING(iovec[n++], buffer);
727 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
728 snprintf(mid, sizeof(mid), MESSAGE_ID(message_id));
730 IOVEC_SET_STRING(iovec[n++], mid);
734 ucred.pid = getpid();
735 ucred.uid = getuid();
736 ucred.gid = getgid();
738 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL);
741 void server_dispatch_message(
743 struct iovec *iovec, unsigned n, unsigned m,
746 const char *label, size_t label_len,
751 char *path = NULL, *c;
754 assert(iovec || n == 0);
759 if (LOG_PRI(priority) > s->max_level_store)
765 path = shortened_cgroup_path(ucred->pid);
769 /* example: /user/lennart/3/foobar
770 * /system/dbus.service/foobar
772 * So let's cut of everything past the third /, since that is
773 * wher user directories start */
775 c = strchr(path, '/');
777 c = strchr(c+1, '/');
779 c = strchr(c+1, '/');
785 rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available_space(s));
792 /* Write a suppression message if we suppressed something */
794 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED, "Suppressed %u messages from %s", rl - 1, path);
799 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id);
803 static int system_journal_open(Server *s) {
809 r = sd_id128_get_machine(&machine);
813 sd_id128_to_string(machine, ids);
815 if (!s->system_journal &&
816 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
817 access("/run/systemd/journal/flushed", F_OK) >= 0) {
819 /* If in auto mode: first try to create the machine
820 * path, but not the prefix.
822 * If in persistent mode: create /var/log/journal and
823 * the machine path */
825 if (s->storage == STORAGE_PERSISTENT)
826 (void) mkdir("/var/log/journal/", 0755);
828 fn = strappend("/var/log/journal/", ids);
832 (void) mkdir(fn, 0755);
835 fn = strjoin("/var/log/journal/", ids, "/system.journal", NULL);
839 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
843 char fb[FORMAT_BYTES_MAX];
845 server_fix_perms(s, s->system_journal, 0);
846 server_driver_message(s, SD_ID128_NULL, "Allowing system journal files to grow to %s.",
847 format_bytes(fb, sizeof(fb), s->system_metrics.max_use));
851 if (r != -ENOENT && r != -EROFS)
852 log_warning("Failed to open system journal: %s", strerror(-r));
858 if (!s->runtime_journal &&
859 (s->storage != STORAGE_NONE)) {
861 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
865 if (s->system_journal) {
867 /* Try to open the runtime journal, but only
868 * if it already exists, so that we can flush
869 * it into the system journal */
871 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
876 log_warning("Failed to open runtime journal: %s", strerror(-r));
883 /* OK, we really need the runtime journal, so create
884 * it if necessary. */
886 (void) mkdir_parents(fn, 0755);
887 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
891 log_error("Failed to open runtime journal: %s", strerror(-r));
896 if (s->runtime_journal) {
897 char fb[FORMAT_BYTES_MAX];
899 server_fix_perms(s, s->runtime_journal, 0);
900 server_driver_message(s, SD_ID128_NULL, "Allowing runtime journal files to grow to %s.",
901 format_bytes(fb, sizeof(fb), s->runtime_metrics.max_use));
908 static int server_flush_to_var(Server *s) {
911 sd_journal *j = NULL;
915 if (s->storage != STORAGE_AUTO &&
916 s->storage != STORAGE_PERSISTENT)
919 if (!s->runtime_journal)
922 system_journal_open(s);
924 if (!s->system_journal)
927 log_debug("Flushing to /var...");
929 r = sd_id128_get_machine(&machine);
931 log_error("Failed to get machine id: %s", strerror(-r));
935 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
937 log_error("Failed to read runtime journal: %s", strerror(-r));
941 SD_JOURNAL_FOREACH(j) {
946 assert(f && f->current_offset > 0);
948 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
950 log_error("Can't read entry: %s", strerror(-r));
954 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
958 if (!shall_try_append_again(s->system_journal, r)) {
959 log_error("Can't write entry: %s", strerror(-r));
966 log_debug("Retrying write.");
967 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
969 log_error("Can't write entry: %s", strerror(-r));
975 journal_file_post_change(s->system_journal);
977 journal_file_close(s->runtime_journal);
978 s->runtime_journal = NULL;
981 rm_rf("/run/log/journal", false, true, false);
989 static int process_event(Server *s, struct epoll_event *ev) {
993 if (ev->data.fd == s->signal_fd) {
994 struct signalfd_siginfo sfsi;
997 if (ev->events != EPOLLIN) {
998 log_error("Got invalid event from epoll.");
1002 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
1003 if (n != sizeof(sfsi)) {
1008 if (errno == EINTR || errno == EAGAIN)
1014 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
1016 if (sfsi.ssi_signo == SIGUSR1) {
1017 touch("/run/systemd/journal/flushed");
1018 server_flush_to_var(s);
1022 if (sfsi.ssi_signo == SIGUSR2) {
1030 } else if (ev->data.fd == s->dev_kmsg_fd) {
1033 if (ev->events != EPOLLIN) {
1034 log_error("Got invalid event from epoll.");
1038 r = server_read_dev_kmsg(s);
1044 } else if (ev->data.fd == s->native_fd ||
1045 ev->data.fd == s->syslog_fd) {
1047 if (ev->events != EPOLLIN) {
1048 log_error("Got invalid event from epoll.");
1053 struct msghdr msghdr;
1055 struct ucred *ucred = NULL;
1056 struct timeval *tv = NULL;
1057 struct cmsghdr *cmsg;
1059 size_t label_len = 0;
1061 struct cmsghdr cmsghdr;
1063 /* We use NAME_MAX space for the
1064 * SELinux label here. The kernel
1065 * currently enforces no limit, but
1066 * according to suggestions from the
1067 * SELinux people this will change and
1068 * it will probably be identical to
1069 * NAME_MAX. For now we use that, but
1070 * this should be updated one day when
1071 * the final limit is known.*/
1072 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1073 CMSG_SPACE(sizeof(struct timeval)) +
1074 CMSG_SPACE(sizeof(int)) + /* fd */
1075 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1082 if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
1083 log_error("SIOCINQ failed: %m");
1087 if (s->buffer_size < (size_t) v) {
1091 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
1092 b = realloc(s->buffer, l+1);
1095 log_error("Couldn't increase buffer.");
1104 iovec.iov_base = s->buffer;
1105 iovec.iov_len = s->buffer_size;
1109 msghdr.msg_iov = &iovec;
1110 msghdr.msg_iovlen = 1;
1111 msghdr.msg_control = &control;
1112 msghdr.msg_controllen = sizeof(control);
1114 n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1117 if (errno == EINTR || errno == EAGAIN)
1120 log_error("recvmsg() failed: %m");
1124 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1126 if (cmsg->cmsg_level == SOL_SOCKET &&
1127 cmsg->cmsg_type == SCM_CREDENTIALS &&
1128 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1129 ucred = (struct ucred*) CMSG_DATA(cmsg);
1130 else if (cmsg->cmsg_level == SOL_SOCKET &&
1131 cmsg->cmsg_type == SCM_SECURITY) {
1132 label = (char*) CMSG_DATA(cmsg);
1133 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1134 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1135 cmsg->cmsg_type == SO_TIMESTAMP &&
1136 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1137 tv = (struct timeval*) CMSG_DATA(cmsg);
1138 else if (cmsg->cmsg_level == SOL_SOCKET &&
1139 cmsg->cmsg_type == SCM_RIGHTS) {
1140 fds = (int*) CMSG_DATA(cmsg);
1141 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1145 if (ev->data.fd == s->syslog_fd) {
1148 if (n > 0 && n_fds == 0) {
1149 e = memchr(s->buffer, '\n', n);
1155 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1156 } else if (n_fds > 0)
1157 log_warning("Got file descriptors via syslog socket. Ignoring.");
1160 if (n > 0 && n_fds == 0)
1161 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1162 else if (n == 0 && n_fds == 1)
1163 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1165 log_warning("Got too many file descriptors via native socket. Ignoring.");
1168 close_many(fds, n_fds);
1173 } else if (ev->data.fd == s->stdout_fd) {
1175 if (ev->events != EPOLLIN) {
1176 log_error("Got invalid event from epoll.");
1180 stdout_stream_new(s);
1184 StdoutStream *stream;
1186 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
1187 log_error("Got invalid event from epoll.");
1191 /* If it is none of the well-known fds, it must be an
1192 * stdout stream fd. Note that this is a bit ugly here
1193 * (since we rely that none of the well-known fds
1194 * could be interpreted as pointer), but nonetheless
1195 * safe, since the well-known fds would never get an
1196 * fd > 4096, i.e. beyond the first memory page */
1198 stream = ev->data.ptr;
1200 if (stdout_stream_process(stream) <= 0)
1201 stdout_stream_free(stream);
1206 log_error("Unknown event.");
1210 static int open_signalfd(Server *s) {
1212 struct epoll_event ev;
1216 assert_se(sigemptyset(&mask) == 0);
1217 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1218 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1220 s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
1221 if (s->signal_fd < 0) {
1222 log_error("signalfd(): %m");
1227 ev.events = EPOLLIN;
1228 ev.data.fd = s->signal_fd;
1230 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
1231 log_error("epoll_ctl(): %m");
1238 static int server_parse_proc_cmdline(Server *s) {
1239 char *line, *w, *state;
1243 if (detect_container(NULL) > 0)
1246 r = read_one_line_file("/proc/cmdline", &line);
1248 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1252 FOREACH_WORD_QUOTED(w, l, line, state) {
1255 word = strndup(w, l);
1261 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1262 r = parse_boolean(word + 35);
1264 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1266 s->forward_to_syslog = r;
1267 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1268 r = parse_boolean(word + 33);
1270 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1272 s->forward_to_kmsg = r;
1273 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1274 r = parse_boolean(word + 36);
1276 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1278 s->forward_to_console = r;
1279 } else if (startswith(word, "systemd.journald"))
1280 log_warning("Invalid systemd.journald parameter. Ignoring.");
1292 static int server_parse_config_file(Server *s) {
1299 fn = "/etc/systemd/journald.conf";
1300 f = fopen(fn, "re");
1302 if (errno == ENOENT)
1305 log_warning("Failed to open configuration file %s: %m", fn);
1309 r = config_parse(fn, f, "Journal\0", config_item_perf_lookup, (void*) journald_gperf_lookup, false, s);
1311 log_warning("Failed to parse configuration file: %s", strerror(-r));
1318 static int server_init(Server *s) {
1324 s->syslog_fd = s->native_fd = s->stdout_fd = s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
1328 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1329 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1331 s->forward_to_syslog = true;
1333 s->max_level_store = LOG_DEBUG;
1334 s->max_level_syslog = LOG_DEBUG;
1335 s->max_level_kmsg = LOG_NOTICE;
1336 s->max_level_console = LOG_INFO;
1338 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1339 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1341 server_parse_config_file(s);
1342 server_parse_proc_cmdline(s);
1344 mkdir_p("/run/systemd/journal", 0755);
1346 s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
1347 if (!s->user_journals)
1350 s->mmap = mmap_cache_new();
1354 s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
1355 if (s->epoll_fd < 0) {
1356 log_error("Failed to create epoll object: %m");
1360 n = sd_listen_fds(true);
1362 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1366 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1368 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1370 if (s->native_fd >= 0) {
1371 log_error("Too many native sockets passed.");
1377 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1379 if (s->stdout_fd >= 0) {
1380 log_error("Too many stdout sockets passed.");
1386 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
1388 if (s->syslog_fd >= 0) {
1389 log_error("Too many /dev/log sockets passed.");
1396 log_error("Unknown socket passed.");
1401 r = server_open_syslog_socket(s);
1405 r = server_open_native_socket(s);
1409 r = server_open_stdout_socket(s);
1413 r = server_open_dev_kmsg(s);
1417 r = server_open_kernel_seqnum(s);
1421 r = open_signalfd(s);
1425 s->udev = udev_new();
1429 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
1433 r = system_journal_open(s);
1440 static void server_maybe_append_tags(Server *s) {
1446 n = now(CLOCK_REALTIME);
1448 if (s->system_journal)
1449 journal_file_maybe_append_tag(s->system_journal, n);
1451 HASHMAP_FOREACH(f, s->user_journals, i)
1452 journal_file_maybe_append_tag(f, n);
1456 static void server_done(Server *s) {
1460 while (s->stdout_streams)
1461 stdout_stream_free(s->stdout_streams);
1463 if (s->system_journal)
1464 journal_file_close(s->system_journal);
1466 if (s->runtime_journal)
1467 journal_file_close(s->runtime_journal);
1469 while ((f = hashmap_steal_first(s->user_journals)))
1470 journal_file_close(f);
1472 hashmap_free(s->user_journals);
1474 if (s->epoll_fd >= 0)
1475 close_nointr_nofail(s->epoll_fd);
1477 if (s->signal_fd >= 0)
1478 close_nointr_nofail(s->signal_fd);
1480 if (s->syslog_fd >= 0)
1481 close_nointr_nofail(s->syslog_fd);
1483 if (s->native_fd >= 0)
1484 close_nointr_nofail(s->native_fd);
1486 if (s->stdout_fd >= 0)
1487 close_nointr_nofail(s->stdout_fd);
1489 if (s->dev_kmsg_fd >= 0)
1490 close_nointr_nofail(s->dev_kmsg_fd);
1493 journal_rate_limit_free(s->rate_limit);
1495 if (s->kernel_seqnum)
1496 munmap(s->kernel_seqnum, sizeof(uint64_t));
1502 mmap_cache_unref(s->mmap);
1505 udev_unref(s->udev);
1508 int main(int argc, char *argv[]) {
1512 /* if (getppid() != 1) { */
1513 /* log_error("This program should be invoked by init only."); */
1514 /* return EXIT_FAILURE; */
1518 log_error("This program does not take arguments.");
1519 return EXIT_FAILURE;
1522 log_set_target(LOG_TARGET_SAFE);
1523 log_set_facility(LOG_SYSLOG);
1524 log_parse_environment();
1529 r = server_init(&server);
1533 server_vacuum(&server);
1534 server_flush_to_var(&server);
1535 server_flush_dev_kmsg(&server);
1537 log_debug("systemd-journald running as pid %lu", (unsigned long) getpid());
1538 server_driver_message(&server, SD_MESSAGE_JOURNAL_START, "Journal started");
1542 "STATUS=Processing requests...");
1545 struct epoll_event event;
1551 if (server.system_journal &&
1552 journal_file_next_evolve_usec(server.system_journal, &u)) {
1555 n = now(CLOCK_REALTIME);
1560 t = (int) ((u - n + USEC_PER_MSEC - 1) / USEC_PER_MSEC);
1565 r = epoll_wait(server.epoll_fd, &event, 1, t);
1571 log_error("epoll_wait() failed: %m");
1577 r = process_event(&server, &event);
1584 server_maybe_append_tags(&server);
1585 server_maybe_warn_forward_syslog_missed(&server);
1588 log_debug("systemd-journald stopped as pid %lu", (unsigned long) getpid());
1589 server_driver_message(&server, SD_MESSAGE_JOURNAL_STOP, "Journal stopped");
1593 "STATUS=Shutting down...");
1595 server_done(&server);
1597 return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;