1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
27 #include <sys/timerfd.h>
30 #include <systemd/sd-journal.h>
31 #include <systemd/sd-messages.h>
32 #include <systemd/sd-daemon.h>
35 #include <systemd/sd-login.h>
41 #include "journal-file.h"
42 #include "socket-util.h"
43 #include "cgroup-util.h"
47 #include "conf-parser.h"
48 #include "journal-internal.h"
49 #include "journal-vacuum.h"
50 #include "journal-authenticate.h"
51 #include "journald-server.h"
52 #include "journald-rate-limit.h"
53 #include "journald-kmsg.h"
54 #include "journald-syslog.h"
55 #include "journald-stream.h"
56 #include "journald-console.h"
57 #include "journald-native.h"
61 #include <acl/libacl.h>
66 #include <selinux/selinux.h>
69 #define USER_JOURNALS_MAX 1024
71 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
72 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
73 #define DEFAULT_RATE_LIMIT_BURST 200
75 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
77 static const char* const storage_table[] = {
78 [STORAGE_AUTO] = "auto",
79 [STORAGE_VOLATILE] = "volatile",
80 [STORAGE_PERSISTENT] = "persistent",
81 [STORAGE_NONE] = "none"
84 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
85 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
87 static const char* const split_mode_table[] = {
88 [SPLIT_NONE] = "none",
90 [SPLIT_LOGIN] = "login"
93 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
94 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
96 static uint64_t available_space(Server *s) {
98 char _cleanup_free_ *p = NULL;
102 uint64_t sum = 0, avail = 0, ss_avail = 0;
104 DIR _cleanup_closedir_ *d = NULL;
108 ts = now(CLOCK_MONOTONIC);
110 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
111 return s->cached_available_space;
113 r = sd_id128_get_machine(&machine);
117 if (s->system_journal) {
118 f = "/var/log/journal/";
119 m = &s->system_metrics;
121 f = "/run/log/journal/";
122 m = &s->runtime_metrics;
127 p = strappend(f, sd_id128_to_string(machine, ids));
135 if (fstatvfs(dirfd(d), &ss) < 0)
141 union dirent_storage buf;
143 r = readdir_r(d, &buf.de, &de);
150 if (!endswith(de->d_name, ".journal") &&
151 !endswith(de->d_name, ".journal~"))
154 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
157 if (!S_ISREG(st.st_mode))
160 sum += (uint64_t) st.st_blocks * 512UL;
163 avail = sum >= m->max_use ? 0 : m->max_use - sum;
165 ss_avail = ss.f_bsize * ss.f_bavail;
167 ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
169 if (ss_avail < avail)
172 s->cached_available_space = avail;
173 s->cached_available_space_timestamp = ts;
178 static void server_read_file_gid(Server *s) {
179 const char *g = "systemd-journal";
184 if (s->file_gid_valid)
187 r = get_group_creds(&g, &s->file_gid);
189 log_warning("Failed to resolve '%s' group: %s", g, strerror(-r));
191 /* if we couldn't read the gid, then it will be 0, but that's
192 * fine and we shouldn't try to resolve the group again, so
193 * let's just pretend it worked right-away. */
194 s->file_gid_valid = true;
197 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
202 acl_permset_t permset;
207 server_read_file_gid(s);
209 r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
211 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
217 acl = acl_get_fd(f->fd);
219 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
223 r = acl_find_uid(acl, uid, &entry);
226 if (acl_create_entry(&acl, &entry) < 0 ||
227 acl_set_tag_type(entry, ACL_USER) < 0 ||
228 acl_set_qualifier(entry, &uid) < 0) {
229 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
234 if (acl_get_permset(entry, &permset) < 0 ||
235 acl_add_perm(permset, ACL_READ) < 0 ||
236 acl_calc_mask(&acl) < 0) {
237 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
241 if (acl_set_fd(f->fd, acl) < 0)
242 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
249 static JournalFile* find_journal(Server *s, uid_t uid) {
257 /* We split up user logs only on /var, not on /run. If the
258 * runtime file is open, we write to it exclusively, in order
259 * to guarantee proper order as soon as we flush /run to
260 * /var and close the runtime file. */
262 if (s->runtime_journal)
263 return s->runtime_journal;
266 return s->system_journal;
268 r = sd_id128_get_machine(&machine);
270 return s->system_journal;
272 f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
276 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
277 SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
278 return s->system_journal;
280 while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
281 /* Too many open? Then let's close one */
282 f = hashmap_steal_first(s->user_journals);
284 journal_file_close(f);
287 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, s->system_journal, &f);
291 return s->system_journal;
293 server_fix_perms(s, f, uid);
295 r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
297 journal_file_close(f);
298 return s->system_journal;
304 void server_rotate(Server *s) {
310 log_debug("Rotating...");
312 if (s->runtime_journal) {
313 r = journal_file_rotate(&s->runtime_journal, s->compress, false);
315 if (s->runtime_journal)
316 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
318 log_error("Failed to create new runtime journal: %s", strerror(-r));
320 server_fix_perms(s, s->runtime_journal, 0);
323 if (s->system_journal) {
324 r = journal_file_rotate(&s->system_journal, s->compress, s->seal);
326 if (s->system_journal)
327 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
329 log_error("Failed to create new system journal: %s", strerror(-r));
332 server_fix_perms(s, s->system_journal, 0);
335 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
336 r = journal_file_rotate(&f, s->compress, s->seal);
339 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
341 log_error("Failed to create user journal: %s", strerror(-r));
343 hashmap_replace(s->user_journals, k, f);
344 server_fix_perms(s, f, PTR_TO_UINT32(k));
349 void server_sync(Server *s) {
355 static const struct itimerspec sync_timer_disable = {};
357 if (s->system_journal) {
358 r = journal_file_set_offline(s->system_journal);
360 log_error("Failed to sync system journal: %s", strerror(-r));
363 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
364 r = journal_file_set_offline(f);
366 log_error("Failed to sync user journal: %s", strerror(-r));
369 r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_disable, NULL);
371 log_error("Failed to disable max timer: %m");
373 s->sync_scheduled = false;
376 void server_vacuum(Server *s) {
382 log_debug("Vacuuming...");
384 s->oldest_file_usec = 0;
386 r = sd_id128_get_machine(&machine);
388 log_error("Failed to get machine ID: %s", strerror(-r));
392 sd_id128_to_string(machine, ids);
394 if (s->system_journal) {
395 p = strappend("/var/log/journal/", ids);
401 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
402 if (r < 0 && r != -ENOENT)
403 log_error("Failed to vacuum %s: %s", p, strerror(-r));
407 if (s->runtime_journal) {
408 p = strappend("/run/log/journal/", ids);
414 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
415 if (r < 0 && r != -ENOENT)
416 log_error("Failed to vacuum %s: %s", p, strerror(-r));
420 s->cached_available_space_timestamp = 0;
423 static char *shortened_cgroup_path(pid_t pid) {
425 char _cleanup_free_ *process_path = NULL, *init_path = NULL;
430 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, pid, &process_path);
434 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &init_path);
438 if (endswith(init_path, "/system"))
439 init_path[strlen(init_path) - 7] = 0;
440 else if (streq(init_path, "/"))
443 if (startswith(process_path, init_path)) {
444 path = strdup(process_path + strlen(init_path));
453 bool shall_try_append_again(JournalFile *f, int r) {
455 /* -E2BIG Hit configured limit
457 -EDQUOT Quota limit hit
459 -EHOSTDOWN Other machine
460 -EBUSY Unclean shutdown
461 -EPROTONOSUPPORT Unsupported feature
464 -ESHUTDOWN Already archived */
466 if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
467 log_debug("%s: Allocation limit reached, rotating.", f->path);
468 else if (r == -EHOSTDOWN)
469 log_info("%s: Journal file from other machine, rotating.", f->path);
470 else if (r == -EBUSY)
471 log_info("%s: Unclean shutdown, rotating.", f->path);
472 else if (r == -EPROTONOSUPPORT)
473 log_info("%s: Unsupported feature, rotating.", f->path);
474 else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
475 log_warning("%s: Journal file corrupted, rotating.", f->path);
482 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n) {
484 bool vacuumed = false;
491 f = find_journal(s, uid);
495 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
496 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
501 f = find_journal(s, uid);
506 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
508 server_schedule_sync(s);
512 if (vacuumed || !shall_try_append_again(f, r)) {
513 log_error("Failed to write entry, ignoring: %s", strerror(-r));
520 f = find_journal(s, uid);
524 log_debug("Retrying write.");
525 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
527 log_error("Failed to write entry, ignoring: %s", strerror(-r));
530 static void dispatch_message_real(
532 struct iovec *iovec, unsigned n, unsigned m,
535 const char *label, size_t label_len,
536 const char *unit_id) {
538 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(ucred->pid)],
539 uid[sizeof("_UID=") + DECIMAL_STR_MAX(ucred->uid)],
540 gid[sizeof("_GID=") + DECIMAL_STR_MAX(ucred->gid)],
541 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=" + DECIMAL_STR_MAX(usec_t))];
543 char _cleanup_free_ *boot_id = NULL, *machine_id = NULL,
544 *comm = NULL, *cmdline = NULL, *hostname = NULL,
545 *audit_session = NULL, *audit_loginuid = NULL,
546 *exe = NULL, *cgroup = NULL, *session = NULL,
547 *owner_uid = NULL, *unit = NULL, *selinux_context = NULL;
553 uid_t realuid = 0, owner = 0, journal_uid;
554 bool owner_valid = false;
559 assert(n + N_IOVEC_META_FIELDS <= m);
567 realuid = ucred->uid;
569 snprintf(pid, sizeof(pid) - 1, "_PID=%lu", (unsigned long) ucred->pid);
571 IOVEC_SET_STRING(iovec[n++], pid);
573 snprintf(uid, sizeof(uid) - 1, "_UID=%lu", (unsigned long) ucred->uid);
575 IOVEC_SET_STRING(iovec[n++], uid);
577 snprintf(gid, sizeof(gid) - 1, "_GID=%lu", (unsigned long) ucred->gid);
579 IOVEC_SET_STRING(iovec[n++], gid);
581 r = get_process_comm(ucred->pid, &t);
583 comm = strappend("_COMM=", t);
587 IOVEC_SET_STRING(iovec[n++], comm);
590 r = get_process_exe(ucred->pid, &t);
592 exe = strappend("_EXE=", t);
596 IOVEC_SET_STRING(iovec[n++], exe);
599 r = get_process_cmdline(ucred->pid, 0, false, &t);
601 cmdline = strappend("_CMDLINE=", t);
605 IOVEC_SET_STRING(iovec[n++], cmdline);
609 r = audit_session_from_pid(ucred->pid, &audit);
611 if (asprintf(&audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit) >= 0)
612 IOVEC_SET_STRING(iovec[n++], audit_session);
614 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
616 if (asprintf(&audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0)
617 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
620 t = shortened_cgroup_path(ucred->pid);
622 cgroup = strappend("_SYSTEMD_CGROUP=", t);
626 IOVEC_SET_STRING(iovec[n++], cgroup);
630 if (sd_pid_get_session(ucred->pid, &t) >= 0) {
631 session = strappend("_SYSTEMD_SESSION=", t);
635 IOVEC_SET_STRING(iovec[n++], session);
638 if (sd_pid_get_owner_uid(ucred->pid, &owner) >= 0) {
640 if (asprintf(&owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner) >= 0)
641 IOVEC_SET_STRING(iovec[n++], owner_uid);
645 if (cg_pid_get_unit(ucred->pid, &t) >= 0) {
646 unit = strappend("_SYSTEMD_UNIT=", t);
648 } else if (cg_pid_get_user_unit(ucred->pid, &t) >= 0) {
649 unit = strappend("_SYSTEMD_USER_UNIT=", t);
651 } else if (unit_id) {
653 unit = strappend("_SYSTEMD_USER_UNIT=", unit_id);
655 unit = strappend("_SYSTEMD_UNIT=", unit_id);
659 IOVEC_SET_STRING(iovec[n++], unit);
663 selinux_context = malloc(sizeof("_SELINUX_CONTEXT=") + label_len);
664 if (selinux_context) {
665 *((char*) mempcpy(stpcpy(selinux_context, "_SELINUX_CONTEXT="), label, label_len)) = 0;
666 IOVEC_SET_STRING(iovec[n++], selinux_context);
669 security_context_t con;
671 if (getpidcon(ucred->pid, &con) >= 0) {
672 selinux_context = strappend("_SELINUX_CONTEXT=", con);
674 IOVEC_SET_STRING(iovec[n++], selinux_context);
682 snprintf(source_time, sizeof(source_time) - 1, "_SOURCE_REALTIME_TIMESTAMP=%llu",
683 (unsigned long long) timeval_load(tv));
684 char_array_0(source_time);
685 IOVEC_SET_STRING(iovec[n++], source_time);
688 /* Note that strictly speaking storing the boot id here is
689 * redundant since the entry includes this in-line
690 * anyway. However, we need this indexed, too. */
691 r = sd_id128_get_boot(&id);
693 if (asprintf(&boot_id, "_BOOT_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
694 IOVEC_SET_STRING(iovec[n++], boot_id);
696 r = sd_id128_get_machine(&id);
698 if (asprintf(&machine_id, "_MACHINE_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
699 IOVEC_SET_STRING(iovec[n++], machine_id);
701 t = gethostname_malloc();
703 hostname = strappend("_HOSTNAME=", t);
706 IOVEC_SET_STRING(iovec[n++], hostname);
711 if (s->split_mode == SPLIT_UID && realuid > 0)
712 /* Split up strictly by any UID */
713 journal_uid = realuid;
714 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
715 /* Split up by login UIDs, this avoids creation of
716 * individual journals for system UIDs. We do this
717 * only if the realuid is not root, in order not to
718 * accidentally leak privileged information to the
719 * user that is logged by a privileged process that is
720 * part of an unprivileged session.*/
725 write_to_journal(s, journal_uid, iovec, n);
728 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
729 char mid[11 + 32 + 1];
730 char buffer[16 + LINE_MAX + 1];
731 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
734 struct ucred ucred = {};
739 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
740 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
742 memcpy(buffer, "MESSAGE=", 8);
743 va_start(ap, format);
744 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
746 char_array_0(buffer);
747 IOVEC_SET_STRING(iovec[n++], buffer);
749 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
750 snprintf(mid, sizeof(mid), MESSAGE_ID(message_id));
752 IOVEC_SET_STRING(iovec[n++], mid);
755 ucred.pid = getpid();
756 ucred.uid = getuid();
757 ucred.gid = getgid();
759 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL);
762 void server_dispatch_message(
764 struct iovec *iovec, unsigned n, unsigned m,
767 const char *label, size_t label_len,
772 char _cleanup_free_ *path = NULL;
776 assert(iovec || n == 0);
781 if (LOG_PRI(priority) > s->max_level_store)
787 path = shortened_cgroup_path(ucred->pid);
791 /* example: /user/lennart/3/foobar
792 * /system/dbus.service/foobar
794 * So let's cut of everything past the third /, since that is
795 * where user directories start */
797 c = strchr(path, '/');
799 c = strchr(c+1, '/');
801 c = strchr(c+1, '/');
807 rl = journal_rate_limit_test(s->rate_limit, path,
808 priority & LOG_PRIMASK, available_space(s));
813 /* Write a suppression message if we suppressed something */
815 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
816 "Suppressed %u messages from %s", rl - 1, path);
819 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id);
823 static int system_journal_open(Server *s) {
829 r = sd_id128_get_machine(&machine);
833 sd_id128_to_string(machine, ids);
835 if (!s->system_journal &&
836 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
837 access("/run/systemd/journal/flushed", F_OK) >= 0) {
839 /* If in auto mode: first try to create the machine
840 * path, but not the prefix.
842 * If in persistent mode: create /var/log/journal and
843 * the machine path */
845 if (s->storage == STORAGE_PERSISTENT)
846 (void) mkdir("/var/log/journal/", 0755);
848 fn = strappend("/var/log/journal/", ids);
852 (void) mkdir(fn, 0755);
855 fn = strjoin("/var/log/journal/", ids, "/system.journal", NULL);
859 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
863 char fb[FORMAT_BYTES_MAX];
865 server_fix_perms(s, s->system_journal, 0);
866 server_driver_message(s, SD_ID128_NULL, "Allowing system journal files to grow to %s.",
867 format_bytes(fb, sizeof(fb), s->system_metrics.max_use));
871 if (r != -ENOENT && r != -EROFS)
872 log_warning("Failed to open system journal: %s", strerror(-r));
878 if (!s->runtime_journal &&
879 (s->storage != STORAGE_NONE)) {
881 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
885 if (s->system_journal) {
887 /* Try to open the runtime journal, but only
888 * if it already exists, so that we can flush
889 * it into the system journal */
891 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
896 log_warning("Failed to open runtime journal: %s", strerror(-r));
903 /* OK, we really need the runtime journal, so create
904 * it if necessary. */
906 (void) mkdir_parents(fn, 0755);
907 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
911 log_error("Failed to open runtime journal: %s", strerror(-r));
916 if (s->runtime_journal) {
917 char fb[FORMAT_BYTES_MAX];
919 server_fix_perms(s, s->runtime_journal, 0);
920 server_driver_message(s, SD_ID128_NULL, "Allowing runtime journal files to grow to %s.",
921 format_bytes(fb, sizeof(fb), s->runtime_metrics.max_use));
928 int server_flush_to_var(Server *s) {
931 sd_journal *j = NULL;
935 if (s->storage != STORAGE_AUTO &&
936 s->storage != STORAGE_PERSISTENT)
939 if (!s->runtime_journal)
942 system_journal_open(s);
944 if (!s->system_journal)
947 log_debug("Flushing to /var...");
949 r = sd_id128_get_machine(&machine);
951 log_error("Failed to get machine id: %s", strerror(-r));
955 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
957 log_error("Failed to read runtime journal: %s", strerror(-r));
961 sd_journal_set_data_threshold(j, 0);
963 SD_JOURNAL_FOREACH(j) {
968 assert(f && f->current_offset > 0);
970 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
972 log_error("Can't read entry: %s", strerror(-r));
976 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
980 if (!shall_try_append_again(s->system_journal, r)) {
981 log_error("Can't write entry: %s", strerror(-r));
988 log_debug("Retrying write.");
989 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
991 log_error("Can't write entry: %s", strerror(-r));
997 journal_file_post_change(s->system_journal);
999 journal_file_close(s->runtime_journal);
1000 s->runtime_journal = NULL;
1003 rm_rf("/run/log/journal", false, true, false);
1005 sd_journal_close(j);
1010 int process_event(Server *s, struct epoll_event *ev) {
1014 if (ev->data.fd == s->signal_fd) {
1015 struct signalfd_siginfo sfsi;
1018 if (ev->events != EPOLLIN) {
1019 log_error("Got invalid event from epoll.");
1023 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
1024 if (n != sizeof(sfsi)) {
1029 if (errno == EINTR || errno == EAGAIN)
1035 if (sfsi.ssi_signo == SIGUSR1) {
1036 touch("/run/systemd/journal/flushed");
1037 server_flush_to_var(s);
1042 if (sfsi.ssi_signo == SIGUSR2) {
1048 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
1052 } else if (ev->data.fd == s->sync_timer_fd) {
1056 log_debug("Got sync request from epoll.");
1058 r = read(ev->data.fd, (void *)&t, sizeof(t));
1065 } else if (ev->data.fd == s->dev_kmsg_fd) {
1068 if (ev->events != EPOLLIN) {
1069 log_error("Got invalid event from epoll.");
1073 r = server_read_dev_kmsg(s);
1079 } else if (ev->data.fd == s->native_fd ||
1080 ev->data.fd == s->syslog_fd) {
1082 if (ev->events != EPOLLIN) {
1083 log_error("Got invalid event from epoll.");
1088 struct msghdr msghdr;
1090 struct ucred *ucred = NULL;
1091 struct timeval *tv = NULL;
1092 struct cmsghdr *cmsg;
1094 size_t label_len = 0;
1096 struct cmsghdr cmsghdr;
1098 /* We use NAME_MAX space for the
1099 * SELinux label here. The kernel
1100 * currently enforces no limit, but
1101 * according to suggestions from the
1102 * SELinux people this will change and
1103 * it will probably be identical to
1104 * NAME_MAX. For now we use that, but
1105 * this should be updated one day when
1106 * the final limit is known.*/
1107 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1108 CMSG_SPACE(sizeof(struct timeval)) +
1109 CMSG_SPACE(sizeof(int)) + /* fd */
1110 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1117 if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
1118 log_error("SIOCINQ failed: %m");
1122 if (s->buffer_size < (size_t) v) {
1126 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
1127 b = realloc(s->buffer, l+1);
1130 log_error("Couldn't increase buffer.");
1139 iovec.iov_base = s->buffer;
1140 iovec.iov_len = s->buffer_size;
1144 msghdr.msg_iov = &iovec;
1145 msghdr.msg_iovlen = 1;
1146 msghdr.msg_control = &control;
1147 msghdr.msg_controllen = sizeof(control);
1149 n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1152 if (errno == EINTR || errno == EAGAIN)
1155 log_error("recvmsg() failed: %m");
1159 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1161 if (cmsg->cmsg_level == SOL_SOCKET &&
1162 cmsg->cmsg_type == SCM_CREDENTIALS &&
1163 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1164 ucred = (struct ucred*) CMSG_DATA(cmsg);
1165 else if (cmsg->cmsg_level == SOL_SOCKET &&
1166 cmsg->cmsg_type == SCM_SECURITY) {
1167 label = (char*) CMSG_DATA(cmsg);
1168 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1169 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1170 cmsg->cmsg_type == SO_TIMESTAMP &&
1171 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1172 tv = (struct timeval*) CMSG_DATA(cmsg);
1173 else if (cmsg->cmsg_level == SOL_SOCKET &&
1174 cmsg->cmsg_type == SCM_RIGHTS) {
1175 fds = (int*) CMSG_DATA(cmsg);
1176 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1180 if (ev->data.fd == s->syslog_fd) {
1183 if (n > 0 && n_fds == 0) {
1184 e = memchr(s->buffer, '\n', n);
1190 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1191 } else if (n_fds > 0)
1192 log_warning("Got file descriptors via syslog socket. Ignoring.");
1195 if (n > 0 && n_fds == 0)
1196 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1197 else if (n == 0 && n_fds == 1)
1198 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1200 log_warning("Got too many file descriptors via native socket. Ignoring.");
1203 close_many(fds, n_fds);
1208 } else if (ev->data.fd == s->stdout_fd) {
1210 if (ev->events != EPOLLIN) {
1211 log_error("Got invalid event from epoll.");
1215 stdout_stream_new(s);
1219 StdoutStream *stream;
1221 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
1222 log_error("Got invalid event from epoll.");
1226 /* If it is none of the well-known fds, it must be an
1227 * stdout stream fd. Note that this is a bit ugly here
1228 * (since we rely that none of the well-known fds
1229 * could be interpreted as pointer), but nonetheless
1230 * safe, since the well-known fds would never get an
1231 * fd > 4096, i.e. beyond the first memory page */
1233 stream = ev->data.ptr;
1235 if (stdout_stream_process(stream) <= 0)
1236 stdout_stream_free(stream);
1241 log_error("Unknown event.");
1245 static int open_signalfd(Server *s) {
1247 struct epoll_event ev;
1251 assert_se(sigemptyset(&mask) == 0);
1252 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1253 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1255 s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
1256 if (s->signal_fd < 0) {
1257 log_error("signalfd(): %m");
1262 ev.events = EPOLLIN;
1263 ev.data.fd = s->signal_fd;
1265 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
1266 log_error("epoll_ctl(): %m");
1273 static int server_parse_proc_cmdline(Server *s) {
1274 char _cleanup_free_ *line = NULL;
1279 if (detect_container(NULL) > 0)
1282 r = read_one_line_file("/proc/cmdline", &line);
1284 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1288 FOREACH_WORD_QUOTED(w, l, line, state) {
1289 char _cleanup_free_ *word;
1291 word = strndup(w, l);
1295 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1296 r = parse_boolean(word + 35);
1298 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1300 s->forward_to_syslog = r;
1301 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1302 r = parse_boolean(word + 33);
1304 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1306 s->forward_to_kmsg = r;
1307 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1308 r = parse_boolean(word + 36);
1310 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1312 s->forward_to_console = r;
1313 } else if (startswith(word, "systemd.journald"))
1314 log_warning("Invalid systemd.journald parameter. Ignoring.");
1320 static int server_parse_config_file(Server *s) {
1321 static const char *fn = "/etc/systemd/journald.conf";
1322 FILE _cleanup_fclose_ *f = NULL;
1327 f = fopen(fn, "re");
1329 if (errno == ENOENT)
1332 log_warning("Failed to open configuration file %s: %m", fn);
1336 r = config_parse(fn, f, "Journal\0", config_item_perf_lookup,
1337 (void*) journald_gperf_lookup, false, s);
1339 log_warning("Failed to parse configuration file: %s", strerror(-r));
1344 static int server_open_sync_timer(Server *s) {
1346 struct epoll_event ev;
1350 s->sync_timer_fd = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC);
1351 if (s->sync_timer_fd < 0)
1355 ev.events = EPOLLIN;
1356 ev.data.fd = s->sync_timer_fd;
1358 r = epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->sync_timer_fd, &ev);
1360 log_error("Failed to add idle timer fd to epoll object: %m");
1367 int server_schedule_sync(Server *s) {
1372 if (s->sync_scheduled)
1375 if (s->sync_interval_usec) {
1376 struct itimerspec sync_timer_enable = {
1377 .it_value.tv_sec = s->sync_interval_usec / USEC_PER_SEC,
1378 .it_value.tv_nsec = s->sync_interval_usec % MSEC_PER_SEC,
1381 r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_enable, NULL);
1386 s->sync_scheduled = true;
1391 int server_init(Server *s) {
1397 s->sync_timer_fd = s->syslog_fd = s->native_fd = s->stdout_fd =
1398 s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
1402 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1403 s->sync_scheduled = false;
1405 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1406 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1408 s->forward_to_syslog = true;
1410 s->max_level_store = LOG_DEBUG;
1411 s->max_level_syslog = LOG_DEBUG;
1412 s->max_level_kmsg = LOG_NOTICE;
1413 s->max_level_console = LOG_INFO;
1415 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1416 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1418 server_parse_config_file(s);
1419 server_parse_proc_cmdline(s);
1420 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1421 log_debug("Setting both rate limit interval and burst from %llu,%u to 0,0",
1422 (long long unsigned) s->rate_limit_interval,
1423 s->rate_limit_burst);
1424 s->rate_limit_interval = s->rate_limit_burst = 0;
1427 mkdir_p("/run/systemd/journal", 0755);
1429 s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
1430 if (!s->user_journals)
1433 s->mmap = mmap_cache_new();
1437 s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
1438 if (s->epoll_fd < 0) {
1439 log_error("Failed to create epoll object: %m");
1443 n = sd_listen_fds(true);
1445 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1449 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1451 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1453 if (s->native_fd >= 0) {
1454 log_error("Too many native sockets passed.");
1460 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1462 if (s->stdout_fd >= 0) {
1463 log_error("Too many stdout sockets passed.");
1469 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
1471 if (s->syslog_fd >= 0) {
1472 log_error("Too many /dev/log sockets passed.");
1479 log_error("Unknown socket passed.");
1484 r = server_open_syslog_socket(s);
1488 r = server_open_native_socket(s);
1492 r = server_open_stdout_socket(s);
1496 r = server_open_dev_kmsg(s);
1500 r = server_open_kernel_seqnum(s);
1504 r = server_open_sync_timer(s);
1508 r = open_signalfd(s);
1512 s->udev = udev_new();
1516 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval,
1517 s->rate_limit_burst);
1521 r = system_journal_open(s);
1528 void server_maybe_append_tags(Server *s) {
1534 n = now(CLOCK_REALTIME);
1536 if (s->system_journal)
1537 journal_file_maybe_append_tag(s->system_journal, n);
1539 HASHMAP_FOREACH(f, s->user_journals, i)
1540 journal_file_maybe_append_tag(f, n);
1544 void server_done(Server *s) {
1548 while (s->stdout_streams)
1549 stdout_stream_free(s->stdout_streams);
1551 if (s->system_journal)
1552 journal_file_close(s->system_journal);
1554 if (s->runtime_journal)
1555 journal_file_close(s->runtime_journal);
1557 while ((f = hashmap_steal_first(s->user_journals)))
1558 journal_file_close(f);
1560 hashmap_free(s->user_journals);
1562 if (s->epoll_fd >= 0)
1563 close_nointr_nofail(s->epoll_fd);
1565 if (s->signal_fd >= 0)
1566 close_nointr_nofail(s->signal_fd);
1568 if (s->syslog_fd >= 0)
1569 close_nointr_nofail(s->syslog_fd);
1571 if (s->native_fd >= 0)
1572 close_nointr_nofail(s->native_fd);
1574 if (s->stdout_fd >= 0)
1575 close_nointr_nofail(s->stdout_fd);
1577 if (s->dev_kmsg_fd >= 0)
1578 close_nointr_nofail(s->dev_kmsg_fd);
1580 if (s->sync_timer_fd >= 0)
1581 close_nointr_nofail(s->sync_timer_fd);
1584 journal_rate_limit_free(s->rate_limit);
1586 if (s->kernel_seqnum)
1587 munmap(s->kernel_seqnum, sizeof(uint64_t));
1593 mmap_cache_unref(s->mmap);
1596 udev_unref(s->udev);