1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
27 #include <sys/timerfd.h>
30 #include <systemd/sd-journal.h>
31 #include <systemd/sd-messages.h>
32 #include <systemd/sd-daemon.h>
35 #include <systemd/sd-login.h>
41 #include "journal-file.h"
42 #include "socket-util.h"
43 #include "cgroup-util.h"
47 #include "conf-parser.h"
48 #include "journal-internal.h"
49 #include "journal-vacuum.h"
50 #include "journal-authenticate.h"
51 #include "journald-server.h"
52 #include "journald-rate-limit.h"
53 #include "journald-kmsg.h"
54 #include "journald-syslog.h"
55 #include "journald-stream.h"
56 #include "journald-console.h"
57 #include "journald-native.h"
61 #include <acl/libacl.h>
66 #include <selinux/selinux.h>
69 #define USER_JOURNALS_MAX 1024
71 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
72 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
73 #define DEFAULT_RATE_LIMIT_BURST 200
75 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
77 static const char* const storage_table[] = {
78 [STORAGE_AUTO] = "auto",
79 [STORAGE_VOLATILE] = "volatile",
80 [STORAGE_PERSISTENT] = "persistent",
81 [STORAGE_NONE] = "none"
84 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
85 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
87 static const char* const split_mode_table[] = {
88 [SPLIT_NONE] = "none",
90 [SPLIT_LOGIN] = "login"
93 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
94 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
96 static uint64_t available_space(Server *s) {
98 char _cleanup_free_ *p = NULL;
102 uint64_t sum = 0, avail = 0, ss_avail = 0;
104 DIR _cleanup_closedir_ *d = NULL;
108 ts = now(CLOCK_MONOTONIC);
110 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
111 return s->cached_available_space;
113 r = sd_id128_get_machine(&machine);
117 if (s->system_journal) {
118 f = "/var/log/journal/";
119 m = &s->system_metrics;
121 f = "/run/log/journal/";
122 m = &s->runtime_metrics;
127 p = strappend(f, sd_id128_to_string(machine, ids));
135 if (fstatvfs(dirfd(d), &ss) < 0)
141 union dirent_storage buf;
143 r = readdir_r(d, &buf.de, &de);
150 if (!endswith(de->d_name, ".journal") &&
151 !endswith(de->d_name, ".journal~"))
154 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
157 if (!S_ISREG(st.st_mode))
160 sum += (uint64_t) st.st_blocks * 512UL;
163 avail = sum >= m->max_use ? 0 : m->max_use - sum;
165 ss_avail = ss.f_bsize * ss.f_bavail;
167 ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
169 if (ss_avail < avail)
172 s->cached_available_space = avail;
173 s->cached_available_space_timestamp = ts;
178 static void server_read_file_gid(Server *s) {
179 const char *g = "systemd-journal";
184 if (s->file_gid_valid)
187 r = get_group_creds(&g, &s->file_gid);
189 log_warning("Failed to resolve '%s' group: %s", g, strerror(-r));
191 /* if we couldn't read the gid, then it will be 0, but that's
192 * fine and we shouldn't try to resolve the group again, so
193 * let's just pretend it worked right-away. */
194 s->file_gid_valid = true;
197 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
202 acl_permset_t permset;
207 server_read_file_gid(s);
209 r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
211 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
217 acl = acl_get_fd(f->fd);
219 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
223 r = acl_find_uid(acl, uid, &entry);
226 if (acl_create_entry(&acl, &entry) < 0 ||
227 acl_set_tag_type(entry, ACL_USER) < 0 ||
228 acl_set_qualifier(entry, &uid) < 0) {
229 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
234 if (acl_get_permset(entry, &permset) < 0 ||
235 acl_add_perm(permset, ACL_READ) < 0 ||
236 acl_calc_mask(&acl) < 0) {
237 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
241 if (acl_set_fd(f->fd, acl) < 0)
242 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
249 static JournalFile* find_journal(Server *s, uid_t uid) {
257 /* We split up user logs only on /var, not on /run. If the
258 * runtime file is open, we write to it exclusively, in order
259 * to guarantee proper order as soon as we flush /run to
260 * /var and close the runtime file. */
262 if (s->runtime_journal)
263 return s->runtime_journal;
266 return s->system_journal;
268 r = sd_id128_get_machine(&machine);
270 return s->system_journal;
272 f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
276 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
277 SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
278 return s->system_journal;
280 while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
281 /* Too many open? Then let's close one */
282 f = hashmap_steal_first(s->user_journals);
284 journal_file_close(f);
287 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, s->system_journal, &f);
291 return s->system_journal;
293 server_fix_perms(s, f, uid);
295 r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
297 journal_file_close(f);
298 return s->system_journal;
304 void server_rotate(Server *s) {
310 log_debug("Rotating...");
312 if (s->runtime_journal) {
313 r = journal_file_rotate(&s->runtime_journal, s->compress, false);
315 if (s->runtime_journal)
316 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
318 log_error("Failed to create new runtime journal: %s", strerror(-r));
320 server_fix_perms(s, s->runtime_journal, 0);
323 if (s->system_journal) {
324 r = journal_file_rotate(&s->system_journal, s->compress, s->seal);
326 if (s->system_journal)
327 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
329 log_error("Failed to create new system journal: %s", strerror(-r));
332 server_fix_perms(s, s->system_journal, 0);
335 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
336 r = journal_file_rotate(&f, s->compress, s->seal);
339 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
341 log_error("Failed to create user journal: %s", strerror(-r));
343 hashmap_replace(s->user_journals, k, f);
344 server_fix_perms(s, f, PTR_TO_UINT32(k));
349 void server_sync(Server *s) {
355 static const struct itimerspec sync_timer_disable = {};
357 if (s->system_journal) {
358 r = journal_file_set_offline(s->system_journal);
360 log_error("Failed to sync system journal: %s", strerror(-r));
363 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
364 r = journal_file_set_offline(f);
366 log_error("Failed to sync user journal: %s", strerror(-r));
369 r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_disable, NULL);
371 log_error("Failed to disable max timer: %m");
373 s->sync_scheduled = false;
376 void server_vacuum(Server *s) {
382 log_debug("Vacuuming...");
384 s->oldest_file_usec = 0;
386 r = sd_id128_get_machine(&machine);
388 log_error("Failed to get machine ID: %s", strerror(-r));
392 sd_id128_to_string(machine, ids);
394 if (s->system_journal) {
395 p = strappend("/var/log/journal/", ids);
401 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
402 if (r < 0 && r != -ENOENT)
403 log_error("Failed to vacuum %s: %s", p, strerror(-r));
407 if (s->runtime_journal) {
408 p = strappend("/run/log/journal/", ids);
414 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
415 if (r < 0 && r != -ENOENT)
416 log_error("Failed to vacuum %s: %s", p, strerror(-r));
420 s->cached_available_space_timestamp = 0;
423 static char *shortened_cgroup_path(pid_t pid) {
425 char _cleanup_free_ *process_path = NULL, *init_path = NULL;
430 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, pid, &process_path);
434 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &init_path);
438 if (endswith(init_path, "/system"))
439 init_path[strlen(init_path) - 7] = 0;
440 else if (streq(init_path, "/"))
443 if (startswith(process_path, init_path)) {
444 path = strdup(process_path + strlen(init_path));
453 bool shall_try_append_again(JournalFile *f, int r) {
455 /* -E2BIG Hit configured limit
457 -EDQUOT Quota limit hit
459 -EHOSTDOWN Other machine
460 -EBUSY Unclean shutdown
461 -EPROTONOSUPPORT Unsupported feature
464 -ESHUTDOWN Already archived */
466 if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
467 log_debug("%s: Allocation limit reached, rotating.", f->path);
468 else if (r == -EHOSTDOWN)
469 log_info("%s: Journal file from other machine, rotating.", f->path);
470 else if (r == -EBUSY)
471 log_info("%s: Unclean shutdown, rotating.", f->path);
472 else if (r == -EPROTONOSUPPORT)
473 log_info("%s: Unsupported feature, rotating.", f->path);
474 else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
475 log_warning("%s: Journal file corrupted, rotating.", f->path);
482 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n) {
484 bool vacuumed = false;
491 f = find_journal(s, uid);
495 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
496 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
501 f = find_journal(s, uid);
506 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
508 server_schedule_sync(s);
512 if (vacuumed || !shall_try_append_again(f, r)) {
513 log_error("Failed to write entry, ignoring: %s", strerror(-r));
520 f = find_journal(s, uid);
524 log_debug("Retrying write.");
525 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
527 log_error("Failed to write entry, ignoring: %s", strerror(-r));
530 static void dispatch_message_real(
532 struct iovec *iovec, unsigned n, unsigned m,
535 const char *label, size_t label_len,
536 const char *unit_id) {
538 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(ucred->pid)],
539 uid[sizeof("_UID=") + DECIMAL_STR_MAX(ucred->uid)],
540 gid[sizeof("_GID=") + DECIMAL_STR_MAX(ucred->gid)],
541 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
542 boot_id[sizeof("_BOOT_ID=") + 32] = "_BOOT_ID=",
543 machine_id[sizeof("_MACHINE_ID=") + 32] = "_MACHINE_ID=";
545 char _cleanup_free_ *comm = NULL, *cmdline = NULL, *hostname = NULL,
546 *exe = NULL, *cgroup = NULL, *session = NULL,
547 *owner_uid = NULL, *unit = NULL, *selinux_context = NULL;
550 char _cleanup_free_ *audit_session = NULL, *audit_loginuid = NULL;
556 uid_t realuid = 0, owner = 0, journal_uid;
557 bool owner_valid = false;
562 assert(n + N_IOVEC_META_FIELDS <= m);
570 realuid = ucred->uid;
572 snprintf(pid, sizeof(pid) - 1, "_PID=%lu", (unsigned long) ucred->pid);
574 IOVEC_SET_STRING(iovec[n++], pid);
576 snprintf(uid, sizeof(uid) - 1, "_UID=%lu", (unsigned long) ucred->uid);
578 IOVEC_SET_STRING(iovec[n++], uid);
580 snprintf(gid, sizeof(gid) - 1, "_GID=%lu", (unsigned long) ucred->gid);
582 IOVEC_SET_STRING(iovec[n++], gid);
584 r = get_process_comm(ucred->pid, &t);
586 comm = strappend("_COMM=", t);
590 IOVEC_SET_STRING(iovec[n++], comm);
593 r = get_process_exe(ucred->pid, &t);
595 exe = strappend("_EXE=", t);
599 IOVEC_SET_STRING(iovec[n++], exe);
602 r = get_process_cmdline(ucred->pid, 0, false, &t);
604 cmdline = strappend("_CMDLINE=", t);
608 IOVEC_SET_STRING(iovec[n++], cmdline);
612 r = audit_session_from_pid(ucred->pid, &audit);
614 if (asprintf(&audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit) >= 0)
615 IOVEC_SET_STRING(iovec[n++], audit_session);
617 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
619 if (asprintf(&audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0)
620 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
623 t = shortened_cgroup_path(ucred->pid);
625 cgroup = strappend("_SYSTEMD_CGROUP=", t);
629 IOVEC_SET_STRING(iovec[n++], cgroup);
633 if (sd_pid_get_session(ucred->pid, &t) >= 0) {
634 session = strappend("_SYSTEMD_SESSION=", t);
638 IOVEC_SET_STRING(iovec[n++], session);
641 if (sd_pid_get_owner_uid(ucred->pid, &owner) >= 0) {
643 if (asprintf(&owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner) >= 0)
644 IOVEC_SET_STRING(iovec[n++], owner_uid);
648 if (cg_pid_get_unit(ucred->pid, &t) >= 0) {
649 unit = strappend("_SYSTEMD_UNIT=", t);
651 } else if (cg_pid_get_user_unit(ucred->pid, &t) >= 0) {
652 unit = strappend("_SYSTEMD_USER_UNIT=", t);
654 } else if (unit_id) {
656 unit = strappend("_SYSTEMD_USER_UNIT=", unit_id);
658 unit = strappend("_SYSTEMD_UNIT=", unit_id);
662 IOVEC_SET_STRING(iovec[n++], unit);
666 selinux_context = malloc(sizeof("_SELINUX_CONTEXT=") + label_len);
667 if (selinux_context) {
668 *((char*) mempcpy(stpcpy(selinux_context, "_SELINUX_CONTEXT="), label, label_len)) = 0;
669 IOVEC_SET_STRING(iovec[n++], selinux_context);
672 security_context_t con;
674 if (getpidcon(ucred->pid, &con) >= 0) {
675 selinux_context = strappend("_SELINUX_CONTEXT=", con);
677 IOVEC_SET_STRING(iovec[n++], selinux_context);
685 snprintf(source_time, sizeof(source_time) - 1, "_SOURCE_REALTIME_TIMESTAMP=%llu",
686 (unsigned long long) timeval_load(tv));
687 char_array_0(source_time);
688 IOVEC_SET_STRING(iovec[n++], source_time);
691 /* Note that strictly speaking storing the boot id here is
692 * redundant since the entry includes this in-line
693 * anyway. However, we need this indexed, too. */
694 r = sd_id128_get_boot(&id);
696 sd_id128_to_string(id, boot_id + sizeof("_BOOT_ID=") - 1);
697 IOVEC_SET_STRING(iovec[n++], boot_id);
700 r = sd_id128_get_machine(&id);
702 sd_id128_to_string(id, machine_id + sizeof("_MACHINE_ID=") - 1);
703 IOVEC_SET_STRING(iovec[n++], machine_id);
706 t = gethostname_malloc();
708 hostname = strappend("_HOSTNAME=", t);
711 IOVEC_SET_STRING(iovec[n++], hostname);
716 if (s->split_mode == SPLIT_UID && realuid > 0)
717 /* Split up strictly by any UID */
718 journal_uid = realuid;
719 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
720 /* Split up by login UIDs, this avoids creation of
721 * individual journals for system UIDs. We do this
722 * only if the realuid is not root, in order not to
723 * accidentally leak privileged information to the
724 * user that is logged by a privileged process that is
725 * part of an unprivileged session.*/
730 write_to_journal(s, journal_uid, iovec, n);
733 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
734 char mid[11 + 32 + 1];
735 char buffer[16 + LINE_MAX + 1];
736 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
739 struct ucred ucred = {};
744 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
745 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
747 memcpy(buffer, "MESSAGE=", 8);
748 va_start(ap, format);
749 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
751 char_array_0(buffer);
752 IOVEC_SET_STRING(iovec[n++], buffer);
754 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
755 snprintf(mid, sizeof(mid), MESSAGE_ID(message_id));
757 IOVEC_SET_STRING(iovec[n++], mid);
760 ucred.pid = getpid();
761 ucred.uid = getuid();
762 ucred.gid = getgid();
764 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL);
767 void server_dispatch_message(
769 struct iovec *iovec, unsigned n, unsigned m,
772 const char *label, size_t label_len,
777 char _cleanup_free_ *path = NULL;
781 assert(iovec || n == 0);
786 if (LOG_PRI(priority) > s->max_level_store)
792 path = shortened_cgroup_path(ucred->pid);
796 /* example: /user/lennart/3/foobar
797 * /system/dbus.service/foobar
799 * So let's cut of everything past the third /, since that is
800 * where user directories start */
802 c = strchr(path, '/');
804 c = strchr(c+1, '/');
806 c = strchr(c+1, '/');
812 rl = journal_rate_limit_test(s->rate_limit, path,
813 priority & LOG_PRIMASK, available_space(s));
818 /* Write a suppression message if we suppressed something */
820 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
821 "Suppressed %u messages from %s", rl - 1, path);
824 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id);
828 static int system_journal_open(Server *s) {
834 r = sd_id128_get_machine(&machine);
838 sd_id128_to_string(machine, ids);
840 if (!s->system_journal &&
841 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
842 access("/run/systemd/journal/flushed", F_OK) >= 0) {
844 /* If in auto mode: first try to create the machine
845 * path, but not the prefix.
847 * If in persistent mode: create /var/log/journal and
848 * the machine path */
850 if (s->storage == STORAGE_PERSISTENT)
851 (void) mkdir("/var/log/journal/", 0755);
853 fn = strappend("/var/log/journal/", ids);
857 (void) mkdir(fn, 0755);
860 fn = strjoin("/var/log/journal/", ids, "/system.journal", NULL);
864 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
868 char fb[FORMAT_BYTES_MAX];
870 server_fix_perms(s, s->system_journal, 0);
871 server_driver_message(s, SD_ID128_NULL, "Allowing system journal files to grow to %s.",
872 format_bytes(fb, sizeof(fb), s->system_metrics.max_use));
876 if (r != -ENOENT && r != -EROFS)
877 log_warning("Failed to open system journal: %s", strerror(-r));
883 if (!s->runtime_journal &&
884 (s->storage != STORAGE_NONE)) {
886 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
890 if (s->system_journal) {
892 /* Try to open the runtime journal, but only
893 * if it already exists, so that we can flush
894 * it into the system journal */
896 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
901 log_warning("Failed to open runtime journal: %s", strerror(-r));
908 /* OK, we really need the runtime journal, so create
909 * it if necessary. */
911 (void) mkdir_parents(fn, 0755);
912 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
916 log_error("Failed to open runtime journal: %s", strerror(-r));
921 if (s->runtime_journal) {
922 char fb[FORMAT_BYTES_MAX];
924 server_fix_perms(s, s->runtime_journal, 0);
925 server_driver_message(s, SD_ID128_NULL, "Allowing runtime journal files to grow to %s.",
926 format_bytes(fb, sizeof(fb), s->runtime_metrics.max_use));
933 int server_flush_to_var(Server *s) {
936 sd_journal *j = NULL;
940 if (s->storage != STORAGE_AUTO &&
941 s->storage != STORAGE_PERSISTENT)
944 if (!s->runtime_journal)
947 system_journal_open(s);
949 if (!s->system_journal)
952 log_debug("Flushing to /var...");
954 r = sd_id128_get_machine(&machine);
956 log_error("Failed to get machine id: %s", strerror(-r));
960 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
962 log_error("Failed to read runtime journal: %s", strerror(-r));
966 sd_journal_set_data_threshold(j, 0);
968 SD_JOURNAL_FOREACH(j) {
973 assert(f && f->current_offset > 0);
975 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
977 log_error("Can't read entry: %s", strerror(-r));
981 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
985 if (!shall_try_append_again(s->system_journal, r)) {
986 log_error("Can't write entry: %s", strerror(-r));
993 log_debug("Retrying write.");
994 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
996 log_error("Can't write entry: %s", strerror(-r));
1002 journal_file_post_change(s->system_journal);
1004 journal_file_close(s->runtime_journal);
1005 s->runtime_journal = NULL;
1008 rm_rf("/run/log/journal", false, true, false);
1010 sd_journal_close(j);
1015 int process_event(Server *s, struct epoll_event *ev) {
1019 if (ev->data.fd == s->signal_fd) {
1020 struct signalfd_siginfo sfsi;
1023 if (ev->events != EPOLLIN) {
1024 log_error("Got invalid event from epoll.");
1028 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
1029 if (n != sizeof(sfsi)) {
1034 if (errno == EINTR || errno == EAGAIN)
1040 if (sfsi.ssi_signo == SIGUSR1) {
1041 touch("/run/systemd/journal/flushed");
1042 server_flush_to_var(s);
1047 if (sfsi.ssi_signo == SIGUSR2) {
1053 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
1057 } else if (ev->data.fd == s->sync_timer_fd) {
1061 log_debug("Got sync request from epoll.");
1063 r = read(ev->data.fd, (void *)&t, sizeof(t));
1070 } else if (ev->data.fd == s->dev_kmsg_fd) {
1073 if (ev->events != EPOLLIN) {
1074 log_error("Got invalid event from epoll.");
1078 r = server_read_dev_kmsg(s);
1084 } else if (ev->data.fd == s->native_fd ||
1085 ev->data.fd == s->syslog_fd) {
1087 if (ev->events != EPOLLIN) {
1088 log_error("Got invalid event from epoll.");
1093 struct msghdr msghdr;
1095 struct ucred *ucred = NULL;
1096 struct timeval *tv = NULL;
1097 struct cmsghdr *cmsg;
1099 size_t label_len = 0;
1101 struct cmsghdr cmsghdr;
1103 /* We use NAME_MAX space for the
1104 * SELinux label here. The kernel
1105 * currently enforces no limit, but
1106 * according to suggestions from the
1107 * SELinux people this will change and
1108 * it will probably be identical to
1109 * NAME_MAX. For now we use that, but
1110 * this should be updated one day when
1111 * the final limit is known.*/
1112 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1113 CMSG_SPACE(sizeof(struct timeval)) +
1114 CMSG_SPACE(sizeof(int)) + /* fd */
1115 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1122 if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
1123 log_error("SIOCINQ failed: %m");
1127 if (s->buffer_size < (size_t) v) {
1131 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
1132 b = realloc(s->buffer, l+1);
1135 log_error("Couldn't increase buffer.");
1144 iovec.iov_base = s->buffer;
1145 iovec.iov_len = s->buffer_size;
1149 msghdr.msg_iov = &iovec;
1150 msghdr.msg_iovlen = 1;
1151 msghdr.msg_control = &control;
1152 msghdr.msg_controllen = sizeof(control);
1154 n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1157 if (errno == EINTR || errno == EAGAIN)
1160 log_error("recvmsg() failed: %m");
1164 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1166 if (cmsg->cmsg_level == SOL_SOCKET &&
1167 cmsg->cmsg_type == SCM_CREDENTIALS &&
1168 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1169 ucred = (struct ucred*) CMSG_DATA(cmsg);
1170 else if (cmsg->cmsg_level == SOL_SOCKET &&
1171 cmsg->cmsg_type == SCM_SECURITY) {
1172 label = (char*) CMSG_DATA(cmsg);
1173 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1174 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1175 cmsg->cmsg_type == SO_TIMESTAMP &&
1176 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1177 tv = (struct timeval*) CMSG_DATA(cmsg);
1178 else if (cmsg->cmsg_level == SOL_SOCKET &&
1179 cmsg->cmsg_type == SCM_RIGHTS) {
1180 fds = (int*) CMSG_DATA(cmsg);
1181 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1185 if (ev->data.fd == s->syslog_fd) {
1188 if (n > 0 && n_fds == 0) {
1189 e = memchr(s->buffer, '\n', n);
1195 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1196 } else if (n_fds > 0)
1197 log_warning("Got file descriptors via syslog socket. Ignoring.");
1200 if (n > 0 && n_fds == 0)
1201 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1202 else if (n == 0 && n_fds == 1)
1203 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1205 log_warning("Got too many file descriptors via native socket. Ignoring.");
1208 close_many(fds, n_fds);
1213 } else if (ev->data.fd == s->stdout_fd) {
1215 if (ev->events != EPOLLIN) {
1216 log_error("Got invalid event from epoll.");
1220 stdout_stream_new(s);
1224 StdoutStream *stream;
1226 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
1227 log_error("Got invalid event from epoll.");
1231 /* If it is none of the well-known fds, it must be an
1232 * stdout stream fd. Note that this is a bit ugly here
1233 * (since we rely that none of the well-known fds
1234 * could be interpreted as pointer), but nonetheless
1235 * safe, since the well-known fds would never get an
1236 * fd > 4096, i.e. beyond the first memory page */
1238 stream = ev->data.ptr;
1240 if (stdout_stream_process(stream) <= 0)
1241 stdout_stream_free(stream);
1246 log_error("Unknown event.");
1250 static int open_signalfd(Server *s) {
1252 struct epoll_event ev;
1256 assert_se(sigemptyset(&mask) == 0);
1257 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1258 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1260 s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
1261 if (s->signal_fd < 0) {
1262 log_error("signalfd(): %m");
1267 ev.events = EPOLLIN;
1268 ev.data.fd = s->signal_fd;
1270 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
1271 log_error("epoll_ctl(): %m");
1278 static int server_parse_proc_cmdline(Server *s) {
1279 char _cleanup_free_ *line = NULL;
1284 if (detect_container(NULL) > 0)
1287 r = read_one_line_file("/proc/cmdline", &line);
1289 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1293 FOREACH_WORD_QUOTED(w, l, line, state) {
1294 char _cleanup_free_ *word;
1296 word = strndup(w, l);
1300 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1301 r = parse_boolean(word + 35);
1303 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1305 s->forward_to_syslog = r;
1306 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1307 r = parse_boolean(word + 33);
1309 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1311 s->forward_to_kmsg = r;
1312 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1313 r = parse_boolean(word + 36);
1315 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1317 s->forward_to_console = r;
1318 } else if (startswith(word, "systemd.journald"))
1319 log_warning("Invalid systemd.journald parameter. Ignoring.");
1325 static int server_parse_config_file(Server *s) {
1326 static const char *fn = "/etc/systemd/journald.conf";
1327 FILE _cleanup_fclose_ *f = NULL;
1332 f = fopen(fn, "re");
1334 if (errno == ENOENT)
1337 log_warning("Failed to open configuration file %s: %m", fn);
1341 r = config_parse(fn, f, "Journal\0", config_item_perf_lookup,
1342 (void*) journald_gperf_lookup, false, s);
1344 log_warning("Failed to parse configuration file: %s", strerror(-r));
1349 static int server_open_sync_timer(Server *s) {
1351 struct epoll_event ev;
1355 s->sync_timer_fd = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC);
1356 if (s->sync_timer_fd < 0)
1360 ev.events = EPOLLIN;
1361 ev.data.fd = s->sync_timer_fd;
1363 r = epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->sync_timer_fd, &ev);
1365 log_error("Failed to add idle timer fd to epoll object: %m");
1372 int server_schedule_sync(Server *s) {
1377 if (s->sync_scheduled)
1380 if (s->sync_interval_usec) {
1381 struct itimerspec sync_timer_enable = {
1382 .it_value.tv_sec = s->sync_interval_usec / USEC_PER_SEC,
1383 .it_value.tv_nsec = s->sync_interval_usec % MSEC_PER_SEC,
1386 r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_enable, NULL);
1391 s->sync_scheduled = true;
1396 int server_init(Server *s) {
1402 s->sync_timer_fd = s->syslog_fd = s->native_fd = s->stdout_fd =
1403 s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
1407 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1408 s->sync_scheduled = false;
1410 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1411 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1413 s->forward_to_syslog = true;
1415 s->max_level_store = LOG_DEBUG;
1416 s->max_level_syslog = LOG_DEBUG;
1417 s->max_level_kmsg = LOG_NOTICE;
1418 s->max_level_console = LOG_INFO;
1420 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1421 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1423 server_parse_config_file(s);
1424 server_parse_proc_cmdline(s);
1425 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1426 log_debug("Setting both rate limit interval and burst from %llu,%u to 0,0",
1427 (long long unsigned) s->rate_limit_interval,
1428 s->rate_limit_burst);
1429 s->rate_limit_interval = s->rate_limit_burst = 0;
1432 mkdir_p("/run/systemd/journal", 0755);
1434 s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
1435 if (!s->user_journals)
1438 s->mmap = mmap_cache_new();
1442 s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
1443 if (s->epoll_fd < 0) {
1444 log_error("Failed to create epoll object: %m");
1448 n = sd_listen_fds(true);
1450 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1454 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1456 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1458 if (s->native_fd >= 0) {
1459 log_error("Too many native sockets passed.");
1465 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1467 if (s->stdout_fd >= 0) {
1468 log_error("Too many stdout sockets passed.");
1474 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
1476 if (s->syslog_fd >= 0) {
1477 log_error("Too many /dev/log sockets passed.");
1484 log_error("Unknown socket passed.");
1489 r = server_open_syslog_socket(s);
1493 r = server_open_native_socket(s);
1497 r = server_open_stdout_socket(s);
1501 r = server_open_dev_kmsg(s);
1505 r = server_open_kernel_seqnum(s);
1509 r = server_open_sync_timer(s);
1513 r = open_signalfd(s);
1517 s->udev = udev_new();
1521 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval,
1522 s->rate_limit_burst);
1526 r = system_journal_open(s);
1533 void server_maybe_append_tags(Server *s) {
1539 n = now(CLOCK_REALTIME);
1541 if (s->system_journal)
1542 journal_file_maybe_append_tag(s->system_journal, n);
1544 HASHMAP_FOREACH(f, s->user_journals, i)
1545 journal_file_maybe_append_tag(f, n);
1549 void server_done(Server *s) {
1553 while (s->stdout_streams)
1554 stdout_stream_free(s->stdout_streams);
1556 if (s->system_journal)
1557 journal_file_close(s->system_journal);
1559 if (s->runtime_journal)
1560 journal_file_close(s->runtime_journal);
1562 while ((f = hashmap_steal_first(s->user_journals)))
1563 journal_file_close(f);
1565 hashmap_free(s->user_journals);
1567 if (s->epoll_fd >= 0)
1568 close_nointr_nofail(s->epoll_fd);
1570 if (s->signal_fd >= 0)
1571 close_nointr_nofail(s->signal_fd);
1573 if (s->syslog_fd >= 0)
1574 close_nointr_nofail(s->syslog_fd);
1576 if (s->native_fd >= 0)
1577 close_nointr_nofail(s->native_fd);
1579 if (s->stdout_fd >= 0)
1580 close_nointr_nofail(s->stdout_fd);
1582 if (s->dev_kmsg_fd >= 0)
1583 close_nointr_nofail(s->dev_kmsg_fd);
1585 if (s->sync_timer_fd >= 0)
1586 close_nointr_nofail(s->sync_timer_fd);
1589 journal_rate_limit_free(s->rate_limit);
1591 if (s->kernel_seqnum)
1592 munmap(s->kernel_seqnum, sizeof(uint64_t));
1598 mmap_cache_unref(s->mmap);
1601 udev_unref(s->udev);