1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
27 #include <sys/timerfd.h>
30 #include <systemd/sd-journal.h>
31 #include <systemd/sd-messages.h>
32 #include <systemd/sd-daemon.h>
35 #include <systemd/sd-login.h>
41 #include "journal-file.h"
42 #include "socket-util.h"
43 #include "cgroup-util.h"
47 #include "conf-parser.h"
48 #include "journal-internal.h"
49 #include "journal-vacuum.h"
50 #include "journal-authenticate.h"
51 #include "journald-server.h"
52 #include "journald-rate-limit.h"
53 #include "journald-kmsg.h"
54 #include "journald-syslog.h"
55 #include "journald-stream.h"
56 #include "journald-console.h"
57 #include "journald-native.h"
61 #include <acl/libacl.h>
66 #include <selinux/selinux.h>
69 #define USER_JOURNALS_MAX 1024
71 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
72 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
73 #define DEFAULT_RATE_LIMIT_BURST 200
75 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
77 static const char* const storage_table[] = {
78 [STORAGE_AUTO] = "auto",
79 [STORAGE_VOLATILE] = "volatile",
80 [STORAGE_PERSISTENT] = "persistent",
81 [STORAGE_NONE] = "none"
84 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
85 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
87 static const char* const split_mode_table[] = {
88 [SPLIT_NONE] = "none",
90 [SPLIT_LOGIN] = "login"
93 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
94 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
96 static uint64_t available_space(Server *s) {
98 char _cleanup_free_ *p = NULL;
102 uint64_t sum = 0, avail = 0, ss_avail = 0;
104 DIR _cleanup_closedir_ *d = NULL;
108 ts = now(CLOCK_MONOTONIC);
110 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
111 return s->cached_available_space;
113 r = sd_id128_get_machine(&machine);
117 if (s->system_journal) {
118 f = "/var/log/journal/";
119 m = &s->system_metrics;
121 f = "/run/log/journal/";
122 m = &s->runtime_metrics;
127 p = strappend(f, sd_id128_to_string(machine, ids));
135 if (fstatvfs(dirfd(d), &ss) < 0)
141 union dirent_storage buf;
143 r = readdir_r(d, &buf.de, &de);
150 if (!endswith(de->d_name, ".journal") &&
151 !endswith(de->d_name, ".journal~"))
154 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
157 if (!S_ISREG(st.st_mode))
160 sum += (uint64_t) st.st_blocks * 512UL;
163 avail = sum >= m->max_use ? 0 : m->max_use - sum;
165 ss_avail = ss.f_bsize * ss.f_bavail;
167 ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
169 if (ss_avail < avail)
172 s->cached_available_space = avail;
173 s->cached_available_space_timestamp = ts;
178 static void server_read_file_gid(Server *s) {
179 const char *g = "systemd-journal";
184 if (s->file_gid_valid)
187 r = get_group_creds(&g, &s->file_gid);
189 log_warning("Failed to resolve '%s' group: %s", g, strerror(-r));
191 /* if we couldn't read the gid, then it will be 0, but that's
192 * fine and we shouldn't try to resolve the group again, so
193 * let's just pretend it worked right-away. */
194 s->file_gid_valid = true;
197 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
202 acl_permset_t permset;
207 server_read_file_gid(s);
209 r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
211 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
217 acl = acl_get_fd(f->fd);
219 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
223 r = acl_find_uid(acl, uid, &entry);
226 if (acl_create_entry(&acl, &entry) < 0 ||
227 acl_set_tag_type(entry, ACL_USER) < 0 ||
228 acl_set_qualifier(entry, &uid) < 0) {
229 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
234 if (acl_get_permset(entry, &permset) < 0 ||
235 acl_add_perm(permset, ACL_READ) < 0 ||
236 acl_calc_mask(&acl) < 0) {
237 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
241 if (acl_set_fd(f->fd, acl) < 0)
242 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
249 static JournalFile* find_journal(Server *s, uid_t uid) {
257 /* We split up user logs only on /var, not on /run. If the
258 * runtime file is open, we write to it exclusively, in order
259 * to guarantee proper order as soon as we flush /run to
260 * /var and close the runtime file. */
262 if (s->runtime_journal)
263 return s->runtime_journal;
266 return s->system_journal;
268 r = sd_id128_get_machine(&machine);
270 return s->system_journal;
272 f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
276 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
277 SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
278 return s->system_journal;
280 while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
281 /* Too many open? Then let's close one */
282 f = hashmap_steal_first(s->user_journals);
284 journal_file_close(f);
287 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, s->system_journal, &f);
291 return s->system_journal;
293 server_fix_perms(s, f, uid);
295 r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
297 journal_file_close(f);
298 return s->system_journal;
304 void server_rotate(Server *s) {
310 log_debug("Rotating...");
312 if (s->runtime_journal) {
313 r = journal_file_rotate(&s->runtime_journal, s->compress, false);
315 if (s->runtime_journal)
316 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
318 log_error("Failed to create new runtime journal: %s", strerror(-r));
320 server_fix_perms(s, s->runtime_journal, 0);
323 if (s->system_journal) {
324 r = journal_file_rotate(&s->system_journal, s->compress, s->seal);
326 if (s->system_journal)
327 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
329 log_error("Failed to create new system journal: %s", strerror(-r));
332 server_fix_perms(s, s->system_journal, 0);
335 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
336 r = journal_file_rotate(&f, s->compress, s->seal);
339 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
341 log_error("Failed to create user journal: %s", strerror(-r));
343 hashmap_replace(s->user_journals, k, f);
344 server_fix_perms(s, f, PTR_TO_UINT32(k));
349 void server_sync(Server *s) {
355 static const struct itimerspec sync_timer_disable = {};
357 if (s->system_journal) {
358 r = journal_file_set_offline(s->system_journal);
360 log_error("Failed to sync system journal: %s", strerror(-r));
363 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
364 r = journal_file_set_offline(f);
366 log_error("Failed to sync user journal: %s", strerror(-r));
369 r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_disable, NULL);
371 log_error("Failed to disable max timer: %m");
373 s->sync_scheduled = false;
376 void server_vacuum(Server *s) {
382 log_debug("Vacuuming...");
384 s->oldest_file_usec = 0;
386 r = sd_id128_get_machine(&machine);
388 log_error("Failed to get machine ID: %s", strerror(-r));
392 sd_id128_to_string(machine, ids);
394 if (s->system_journal) {
395 p = strappend("/var/log/journal/", ids);
401 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
402 if (r < 0 && r != -ENOENT)
403 log_error("Failed to vacuum %s: %s", p, strerror(-r));
407 if (s->runtime_journal) {
408 p = strappend("/run/log/journal/", ids);
414 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
415 if (r < 0 && r != -ENOENT)
416 log_error("Failed to vacuum %s: %s", p, strerror(-r));
420 s->cached_available_space_timestamp = 0;
423 static char *shortened_cgroup_path(pid_t pid) {
425 char _cleanup_free_ *process_path = NULL, *init_path = NULL;
430 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, pid, &process_path);
434 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &init_path);
438 if (endswith(init_path, "/system"))
439 init_path[strlen(init_path) - 7] = 0;
440 else if (streq(init_path, "/"))
443 if (startswith(process_path, init_path)) {
444 path = strdup(process_path + strlen(init_path));
453 bool shall_try_append_again(JournalFile *f, int r) {
455 /* -E2BIG Hit configured limit
457 -EDQUOT Quota limit hit
459 -EHOSTDOWN Other machine
460 -EBUSY Unclean shutdown
461 -EPROTONOSUPPORT Unsupported feature
464 -ESHUTDOWN Already archived */
466 if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
467 log_debug("%s: Allocation limit reached, rotating.", f->path);
468 else if (r == -EHOSTDOWN)
469 log_info("%s: Journal file from other machine, rotating.", f->path);
470 else if (r == -EBUSY)
471 log_info("%s: Unclean shutdown, rotating.", f->path);
472 else if (r == -EPROTONOSUPPORT)
473 log_info("%s: Unsupported feature, rotating.", f->path);
474 else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
475 log_warning("%s: Journal file corrupted, rotating.", f->path);
482 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n) {
484 bool vacuumed = false;
491 f = find_journal(s, uid);
495 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
496 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
501 f = find_journal(s, uid);
506 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
508 server_schedule_sync(s);
512 if (vacuumed || !shall_try_append_again(f, r)) {
513 log_error("Failed to write entry, ignoring: %s", strerror(-r));
520 f = find_journal(s, uid);
524 log_debug("Retrying write.");
525 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
527 log_error("Failed to write entry, ignoring: %s", strerror(-r));
530 static void dispatch_message_real(
532 struct iovec *iovec, unsigned n, unsigned m,
535 const char *label, size_t label_len,
536 const char *unit_id) {
538 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(ucred->pid)],
539 uid[sizeof("_UID=") + DECIMAL_STR_MAX(ucred->uid)],
540 gid[sizeof("_GID=") + DECIMAL_STR_MAX(ucred->gid)],
541 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=" + DECIMAL_STR_MAX(usec_t))],
542 boot_id[sizeof("_BOOT_ID=") + 32] = "_BOOT_ID=",
543 machine_id[sizeof("_MACHINE_ID=") + 32] = "_MACHINE_ID=";
545 char _cleanup_free_ *comm = NULL, *cmdline = NULL, *hostname = NULL,
546 *audit_session = NULL, *audit_loginuid = NULL,
547 *exe = NULL, *cgroup = NULL, *session = NULL,
548 *owner_uid = NULL, *unit = NULL, *selinux_context = NULL;
553 uid_t realuid = 0, owner = 0, journal_uid;
554 bool owner_valid = false;
559 assert(n + N_IOVEC_META_FIELDS <= m);
567 realuid = ucred->uid;
569 snprintf(pid, sizeof(pid) - 1, "_PID=%lu", (unsigned long) ucred->pid);
571 IOVEC_SET_STRING(iovec[n++], pid);
573 snprintf(uid, sizeof(uid) - 1, "_UID=%lu", (unsigned long) ucred->uid);
575 IOVEC_SET_STRING(iovec[n++], uid);
577 snprintf(gid, sizeof(gid) - 1, "_GID=%lu", (unsigned long) ucred->gid);
579 IOVEC_SET_STRING(iovec[n++], gid);
581 r = get_process_comm(ucred->pid, &t);
583 comm = strappend("_COMM=", t);
587 IOVEC_SET_STRING(iovec[n++], comm);
590 r = get_process_exe(ucred->pid, &t);
592 exe = strappend("_EXE=", t);
596 IOVEC_SET_STRING(iovec[n++], exe);
599 r = get_process_cmdline(ucred->pid, 0, false, &t);
601 cmdline = strappend("_CMDLINE=", t);
605 IOVEC_SET_STRING(iovec[n++], cmdline);
609 r = audit_session_from_pid(ucred->pid, &audit);
611 if (asprintf(&audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit) >= 0)
612 IOVEC_SET_STRING(iovec[n++], audit_session);
614 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
616 if (asprintf(&audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0)
617 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
620 t = shortened_cgroup_path(ucred->pid);
622 cgroup = strappend("_SYSTEMD_CGROUP=", t);
626 IOVEC_SET_STRING(iovec[n++], cgroup);
630 if (sd_pid_get_session(ucred->pid, &t) >= 0) {
631 session = strappend("_SYSTEMD_SESSION=", t);
635 IOVEC_SET_STRING(iovec[n++], session);
638 if (sd_pid_get_owner_uid(ucred->pid, &owner) >= 0) {
640 if (asprintf(&owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner) >= 0)
641 IOVEC_SET_STRING(iovec[n++], owner_uid);
645 if (cg_pid_get_unit(ucred->pid, &t) >= 0) {
646 unit = strappend("_SYSTEMD_UNIT=", t);
648 } else if (cg_pid_get_user_unit(ucred->pid, &t) >= 0) {
649 unit = strappend("_SYSTEMD_USER_UNIT=", t);
651 } else if (unit_id) {
653 unit = strappend("_SYSTEMD_USER_UNIT=", unit_id);
655 unit = strappend("_SYSTEMD_UNIT=", unit_id);
659 IOVEC_SET_STRING(iovec[n++], unit);
663 selinux_context = malloc(sizeof("_SELINUX_CONTEXT=") + label_len);
664 if (selinux_context) {
665 *((char*) mempcpy(stpcpy(selinux_context, "_SELINUX_CONTEXT="), label, label_len)) = 0;
666 IOVEC_SET_STRING(iovec[n++], selinux_context);
669 security_context_t con;
671 if (getpidcon(ucred->pid, &con) >= 0) {
672 selinux_context = strappend("_SELINUX_CONTEXT=", con);
674 IOVEC_SET_STRING(iovec[n++], selinux_context);
682 snprintf(source_time, sizeof(source_time) - 1, "_SOURCE_REALTIME_TIMESTAMP=%llu",
683 (unsigned long long) timeval_load(tv));
684 char_array_0(source_time);
685 IOVEC_SET_STRING(iovec[n++], source_time);
688 /* Note that strictly speaking storing the boot id here is
689 * redundant since the entry includes this in-line
690 * anyway. However, we need this indexed, too. */
691 r = sd_id128_get_boot(&id);
693 sd_id128_to_string(id, boot_id + sizeof("_BOOT_ID=") - 1);
694 IOVEC_SET_STRING(iovec[n++], boot_id);
697 r = sd_id128_get_machine(&id);
699 sd_id128_to_string(id, machine_id + sizeof("_MACHINE_ID") - 1);
700 IOVEC_SET_STRING(iovec[n++], machine_id);
703 t = gethostname_malloc();
705 hostname = strappend("_HOSTNAME=", t);
708 IOVEC_SET_STRING(iovec[n++], hostname);
713 if (s->split_mode == SPLIT_UID && realuid > 0)
714 /* Split up strictly by any UID */
715 journal_uid = realuid;
716 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
717 /* Split up by login UIDs, this avoids creation of
718 * individual journals for system UIDs. We do this
719 * only if the realuid is not root, in order not to
720 * accidentally leak privileged information to the
721 * user that is logged by a privileged process that is
722 * part of an unprivileged session.*/
727 write_to_journal(s, journal_uid, iovec, n);
730 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
731 char mid[11 + 32 + 1];
732 char buffer[16 + LINE_MAX + 1];
733 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
736 struct ucred ucred = {};
741 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
742 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
744 memcpy(buffer, "MESSAGE=", 8);
745 va_start(ap, format);
746 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
748 char_array_0(buffer);
749 IOVEC_SET_STRING(iovec[n++], buffer);
751 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
752 snprintf(mid, sizeof(mid), MESSAGE_ID(message_id));
754 IOVEC_SET_STRING(iovec[n++], mid);
757 ucred.pid = getpid();
758 ucred.uid = getuid();
759 ucred.gid = getgid();
761 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL);
764 void server_dispatch_message(
766 struct iovec *iovec, unsigned n, unsigned m,
769 const char *label, size_t label_len,
774 char _cleanup_free_ *path = NULL;
778 assert(iovec || n == 0);
783 if (LOG_PRI(priority) > s->max_level_store)
789 path = shortened_cgroup_path(ucred->pid);
793 /* example: /user/lennart/3/foobar
794 * /system/dbus.service/foobar
796 * So let's cut of everything past the third /, since that is
797 * where user directories start */
799 c = strchr(path, '/');
801 c = strchr(c+1, '/');
803 c = strchr(c+1, '/');
809 rl = journal_rate_limit_test(s->rate_limit, path,
810 priority & LOG_PRIMASK, available_space(s));
815 /* Write a suppression message if we suppressed something */
817 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
818 "Suppressed %u messages from %s", rl - 1, path);
821 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id);
825 static int system_journal_open(Server *s) {
831 r = sd_id128_get_machine(&machine);
835 sd_id128_to_string(machine, ids);
837 if (!s->system_journal &&
838 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
839 access("/run/systemd/journal/flushed", F_OK) >= 0) {
841 /* If in auto mode: first try to create the machine
842 * path, but not the prefix.
844 * If in persistent mode: create /var/log/journal and
845 * the machine path */
847 if (s->storage == STORAGE_PERSISTENT)
848 (void) mkdir("/var/log/journal/", 0755);
850 fn = strappend("/var/log/journal/", ids);
854 (void) mkdir(fn, 0755);
857 fn = strjoin("/var/log/journal/", ids, "/system.journal", NULL);
861 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
865 char fb[FORMAT_BYTES_MAX];
867 server_fix_perms(s, s->system_journal, 0);
868 server_driver_message(s, SD_ID128_NULL, "Allowing system journal files to grow to %s.",
869 format_bytes(fb, sizeof(fb), s->system_metrics.max_use));
873 if (r != -ENOENT && r != -EROFS)
874 log_warning("Failed to open system journal: %s", strerror(-r));
880 if (!s->runtime_journal &&
881 (s->storage != STORAGE_NONE)) {
883 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
887 if (s->system_journal) {
889 /* Try to open the runtime journal, but only
890 * if it already exists, so that we can flush
891 * it into the system journal */
893 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
898 log_warning("Failed to open runtime journal: %s", strerror(-r));
905 /* OK, we really need the runtime journal, so create
906 * it if necessary. */
908 (void) mkdir_parents(fn, 0755);
909 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
913 log_error("Failed to open runtime journal: %s", strerror(-r));
918 if (s->runtime_journal) {
919 char fb[FORMAT_BYTES_MAX];
921 server_fix_perms(s, s->runtime_journal, 0);
922 server_driver_message(s, SD_ID128_NULL, "Allowing runtime journal files to grow to %s.",
923 format_bytes(fb, sizeof(fb), s->runtime_metrics.max_use));
930 int server_flush_to_var(Server *s) {
933 sd_journal *j = NULL;
937 if (s->storage != STORAGE_AUTO &&
938 s->storage != STORAGE_PERSISTENT)
941 if (!s->runtime_journal)
944 system_journal_open(s);
946 if (!s->system_journal)
949 log_debug("Flushing to /var...");
951 r = sd_id128_get_machine(&machine);
953 log_error("Failed to get machine id: %s", strerror(-r));
957 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
959 log_error("Failed to read runtime journal: %s", strerror(-r));
963 sd_journal_set_data_threshold(j, 0);
965 SD_JOURNAL_FOREACH(j) {
970 assert(f && f->current_offset > 0);
972 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
974 log_error("Can't read entry: %s", strerror(-r));
978 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
982 if (!shall_try_append_again(s->system_journal, r)) {
983 log_error("Can't write entry: %s", strerror(-r));
990 log_debug("Retrying write.");
991 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
993 log_error("Can't write entry: %s", strerror(-r));
999 journal_file_post_change(s->system_journal);
1001 journal_file_close(s->runtime_journal);
1002 s->runtime_journal = NULL;
1005 rm_rf("/run/log/journal", false, true, false);
1007 sd_journal_close(j);
1012 int process_event(Server *s, struct epoll_event *ev) {
1016 if (ev->data.fd == s->signal_fd) {
1017 struct signalfd_siginfo sfsi;
1020 if (ev->events != EPOLLIN) {
1021 log_error("Got invalid event from epoll.");
1025 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
1026 if (n != sizeof(sfsi)) {
1031 if (errno == EINTR || errno == EAGAIN)
1037 if (sfsi.ssi_signo == SIGUSR1) {
1038 touch("/run/systemd/journal/flushed");
1039 server_flush_to_var(s);
1044 if (sfsi.ssi_signo == SIGUSR2) {
1050 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
1054 } else if (ev->data.fd == s->sync_timer_fd) {
1058 log_debug("Got sync request from epoll.");
1060 r = read(ev->data.fd, (void *)&t, sizeof(t));
1067 } else if (ev->data.fd == s->dev_kmsg_fd) {
1070 if (ev->events != EPOLLIN) {
1071 log_error("Got invalid event from epoll.");
1075 r = server_read_dev_kmsg(s);
1081 } else if (ev->data.fd == s->native_fd ||
1082 ev->data.fd == s->syslog_fd) {
1084 if (ev->events != EPOLLIN) {
1085 log_error("Got invalid event from epoll.");
1090 struct msghdr msghdr;
1092 struct ucred *ucred = NULL;
1093 struct timeval *tv = NULL;
1094 struct cmsghdr *cmsg;
1096 size_t label_len = 0;
1098 struct cmsghdr cmsghdr;
1100 /* We use NAME_MAX space for the
1101 * SELinux label here. The kernel
1102 * currently enforces no limit, but
1103 * according to suggestions from the
1104 * SELinux people this will change and
1105 * it will probably be identical to
1106 * NAME_MAX. For now we use that, but
1107 * this should be updated one day when
1108 * the final limit is known.*/
1109 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1110 CMSG_SPACE(sizeof(struct timeval)) +
1111 CMSG_SPACE(sizeof(int)) + /* fd */
1112 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1119 if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
1120 log_error("SIOCINQ failed: %m");
1124 if (s->buffer_size < (size_t) v) {
1128 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
1129 b = realloc(s->buffer, l+1);
1132 log_error("Couldn't increase buffer.");
1141 iovec.iov_base = s->buffer;
1142 iovec.iov_len = s->buffer_size;
1146 msghdr.msg_iov = &iovec;
1147 msghdr.msg_iovlen = 1;
1148 msghdr.msg_control = &control;
1149 msghdr.msg_controllen = sizeof(control);
1151 n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1154 if (errno == EINTR || errno == EAGAIN)
1157 log_error("recvmsg() failed: %m");
1161 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1163 if (cmsg->cmsg_level == SOL_SOCKET &&
1164 cmsg->cmsg_type == SCM_CREDENTIALS &&
1165 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1166 ucred = (struct ucred*) CMSG_DATA(cmsg);
1167 else if (cmsg->cmsg_level == SOL_SOCKET &&
1168 cmsg->cmsg_type == SCM_SECURITY) {
1169 label = (char*) CMSG_DATA(cmsg);
1170 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1171 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1172 cmsg->cmsg_type == SO_TIMESTAMP &&
1173 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1174 tv = (struct timeval*) CMSG_DATA(cmsg);
1175 else if (cmsg->cmsg_level == SOL_SOCKET &&
1176 cmsg->cmsg_type == SCM_RIGHTS) {
1177 fds = (int*) CMSG_DATA(cmsg);
1178 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1182 if (ev->data.fd == s->syslog_fd) {
1185 if (n > 0 && n_fds == 0) {
1186 e = memchr(s->buffer, '\n', n);
1192 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1193 } else if (n_fds > 0)
1194 log_warning("Got file descriptors via syslog socket. Ignoring.");
1197 if (n > 0 && n_fds == 0)
1198 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1199 else if (n == 0 && n_fds == 1)
1200 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1202 log_warning("Got too many file descriptors via native socket. Ignoring.");
1205 close_many(fds, n_fds);
1210 } else if (ev->data.fd == s->stdout_fd) {
1212 if (ev->events != EPOLLIN) {
1213 log_error("Got invalid event from epoll.");
1217 stdout_stream_new(s);
1221 StdoutStream *stream;
1223 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
1224 log_error("Got invalid event from epoll.");
1228 /* If it is none of the well-known fds, it must be an
1229 * stdout stream fd. Note that this is a bit ugly here
1230 * (since we rely that none of the well-known fds
1231 * could be interpreted as pointer), but nonetheless
1232 * safe, since the well-known fds would never get an
1233 * fd > 4096, i.e. beyond the first memory page */
1235 stream = ev->data.ptr;
1237 if (stdout_stream_process(stream) <= 0)
1238 stdout_stream_free(stream);
1243 log_error("Unknown event.");
1247 static int open_signalfd(Server *s) {
1249 struct epoll_event ev;
1253 assert_se(sigemptyset(&mask) == 0);
1254 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1255 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1257 s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
1258 if (s->signal_fd < 0) {
1259 log_error("signalfd(): %m");
1264 ev.events = EPOLLIN;
1265 ev.data.fd = s->signal_fd;
1267 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
1268 log_error("epoll_ctl(): %m");
1275 static int server_parse_proc_cmdline(Server *s) {
1276 char _cleanup_free_ *line = NULL;
1281 if (detect_container(NULL) > 0)
1284 r = read_one_line_file("/proc/cmdline", &line);
1286 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1290 FOREACH_WORD_QUOTED(w, l, line, state) {
1291 char _cleanup_free_ *word;
1293 word = strndup(w, l);
1297 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1298 r = parse_boolean(word + 35);
1300 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1302 s->forward_to_syslog = r;
1303 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1304 r = parse_boolean(word + 33);
1306 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1308 s->forward_to_kmsg = r;
1309 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1310 r = parse_boolean(word + 36);
1312 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1314 s->forward_to_console = r;
1315 } else if (startswith(word, "systemd.journald"))
1316 log_warning("Invalid systemd.journald parameter. Ignoring.");
1322 static int server_parse_config_file(Server *s) {
1323 static const char *fn = "/etc/systemd/journald.conf";
1324 FILE _cleanup_fclose_ *f = NULL;
1329 f = fopen(fn, "re");
1331 if (errno == ENOENT)
1334 log_warning("Failed to open configuration file %s: %m", fn);
1338 r = config_parse(fn, f, "Journal\0", config_item_perf_lookup,
1339 (void*) journald_gperf_lookup, false, s);
1341 log_warning("Failed to parse configuration file: %s", strerror(-r));
1346 static int server_open_sync_timer(Server *s) {
1348 struct epoll_event ev;
1352 s->sync_timer_fd = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC);
1353 if (s->sync_timer_fd < 0)
1357 ev.events = EPOLLIN;
1358 ev.data.fd = s->sync_timer_fd;
1360 r = epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->sync_timer_fd, &ev);
1362 log_error("Failed to add idle timer fd to epoll object: %m");
1369 int server_schedule_sync(Server *s) {
1374 if (s->sync_scheduled)
1377 if (s->sync_interval_usec) {
1378 struct itimerspec sync_timer_enable = {
1379 .it_value.tv_sec = s->sync_interval_usec / USEC_PER_SEC,
1380 .it_value.tv_nsec = s->sync_interval_usec % MSEC_PER_SEC,
1383 r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_enable, NULL);
1388 s->sync_scheduled = true;
1393 int server_init(Server *s) {
1399 s->sync_timer_fd = s->syslog_fd = s->native_fd = s->stdout_fd =
1400 s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
1404 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1405 s->sync_scheduled = false;
1407 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1408 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1410 s->forward_to_syslog = true;
1412 s->max_level_store = LOG_DEBUG;
1413 s->max_level_syslog = LOG_DEBUG;
1414 s->max_level_kmsg = LOG_NOTICE;
1415 s->max_level_console = LOG_INFO;
1417 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1418 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1420 server_parse_config_file(s);
1421 server_parse_proc_cmdline(s);
1422 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1423 log_debug("Setting both rate limit interval and burst from %llu,%u to 0,0",
1424 (long long unsigned) s->rate_limit_interval,
1425 s->rate_limit_burst);
1426 s->rate_limit_interval = s->rate_limit_burst = 0;
1429 mkdir_p("/run/systemd/journal", 0755);
1431 s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
1432 if (!s->user_journals)
1435 s->mmap = mmap_cache_new();
1439 s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
1440 if (s->epoll_fd < 0) {
1441 log_error("Failed to create epoll object: %m");
1445 n = sd_listen_fds(true);
1447 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1451 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1453 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1455 if (s->native_fd >= 0) {
1456 log_error("Too many native sockets passed.");
1462 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1464 if (s->stdout_fd >= 0) {
1465 log_error("Too many stdout sockets passed.");
1471 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
1473 if (s->syslog_fd >= 0) {
1474 log_error("Too many /dev/log sockets passed.");
1481 log_error("Unknown socket passed.");
1486 r = server_open_syslog_socket(s);
1490 r = server_open_native_socket(s);
1494 r = server_open_stdout_socket(s);
1498 r = server_open_dev_kmsg(s);
1502 r = server_open_kernel_seqnum(s);
1506 r = server_open_sync_timer(s);
1510 r = open_signalfd(s);
1514 s->udev = udev_new();
1518 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval,
1519 s->rate_limit_burst);
1523 r = system_journal_open(s);
1530 void server_maybe_append_tags(Server *s) {
1536 n = now(CLOCK_REALTIME);
1538 if (s->system_journal)
1539 journal_file_maybe_append_tag(s->system_journal, n);
1541 HASHMAP_FOREACH(f, s->user_journals, i)
1542 journal_file_maybe_append_tag(f, n);
1546 void server_done(Server *s) {
1550 while (s->stdout_streams)
1551 stdout_stream_free(s->stdout_streams);
1553 if (s->system_journal)
1554 journal_file_close(s->system_journal);
1556 if (s->runtime_journal)
1557 journal_file_close(s->runtime_journal);
1559 while ((f = hashmap_steal_first(s->user_journals)))
1560 journal_file_close(f);
1562 hashmap_free(s->user_journals);
1564 if (s->epoll_fd >= 0)
1565 close_nointr_nofail(s->epoll_fd);
1567 if (s->signal_fd >= 0)
1568 close_nointr_nofail(s->signal_fd);
1570 if (s->syslog_fd >= 0)
1571 close_nointr_nofail(s->syslog_fd);
1573 if (s->native_fd >= 0)
1574 close_nointr_nofail(s->native_fd);
1576 if (s->stdout_fd >= 0)
1577 close_nointr_nofail(s->stdout_fd);
1579 if (s->dev_kmsg_fd >= 0)
1580 close_nointr_nofail(s->dev_kmsg_fd);
1582 if (s->sync_timer_fd >= 0)
1583 close_nointr_nofail(s->sync_timer_fd);
1586 journal_rate_limit_free(s->rate_limit);
1588 if (s->kernel_seqnum)
1589 munmap(s->kernel_seqnum, sizeof(uint64_t));
1595 mmap_cache_unref(s->mmap);
1598 udev_unref(s->udev);