1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
27 #include <sys/timerfd.h>
30 #include <systemd/sd-journal.h>
31 #include <systemd/sd-messages.h>
32 #include <systemd/sd-daemon.h>
35 #include <systemd/sd-login.h>
41 #include "journal-file.h"
42 #include "socket-util.h"
43 #include "cgroup-util.h"
47 #include "conf-parser.h"
48 #include "journal-internal.h"
49 #include "journal-vacuum.h"
50 #include "journal-authenticate.h"
51 #include "journald-server.h"
52 #include "journald-rate-limit.h"
53 #include "journald-kmsg.h"
54 #include "journald-syslog.h"
55 #include "journald-stream.h"
56 #include "journald-console.h"
57 #include "journald-native.h"
61 #include <acl/libacl.h>
66 #include <selinux/selinux.h>
69 #define USER_JOURNALS_MAX 1024
71 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
72 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
73 #define DEFAULT_RATE_LIMIT_BURST 200
75 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
77 static const char* const storage_table[] = {
78 [STORAGE_AUTO] = "auto",
79 [STORAGE_VOLATILE] = "volatile",
80 [STORAGE_PERSISTENT] = "persistent",
81 [STORAGE_NONE] = "none"
84 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
85 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
87 static const char* const split_mode_table[] = {
88 [SPLIT_NONE] = "none",
90 [SPLIT_LOGIN] = "login"
93 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
94 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
96 static uint64_t available_space(Server *s) {
98 char _cleanup_free_ *p = NULL;
102 uint64_t sum = 0, avail = 0, ss_avail = 0;
104 DIR _cleanup_closedir_ *d = NULL;
108 ts = now(CLOCK_MONOTONIC);
110 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
111 return s->cached_available_space;
113 r = sd_id128_get_machine(&machine);
117 if (s->system_journal) {
118 f = "/var/log/journal/";
119 m = &s->system_metrics;
121 f = "/run/log/journal/";
122 m = &s->runtime_metrics;
127 p = strappend(f, sd_id128_to_string(machine, ids));
135 if (fstatvfs(dirfd(d), &ss) < 0)
141 union dirent_storage buf;
143 r = readdir_r(d, &buf.de, &de);
150 if (!endswith(de->d_name, ".journal") &&
151 !endswith(de->d_name, ".journal~"))
154 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
157 if (!S_ISREG(st.st_mode))
160 sum += (uint64_t) st.st_blocks * 512UL;
163 avail = sum >= m->max_use ? 0 : m->max_use - sum;
165 ss_avail = ss.f_bsize * ss.f_bavail;
167 ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
169 if (ss_avail < avail)
172 s->cached_available_space = avail;
173 s->cached_available_space_timestamp = ts;
178 static void server_read_file_gid(Server *s) {
179 const char *g = "systemd-journal";
184 if (s->file_gid_valid)
187 r = get_group_creds(&g, &s->file_gid);
189 log_warning("Failed to resolve '%s' group: %s", g, strerror(-r));
191 /* if we couldn't read the gid, then it will be 0, but that's
192 * fine and we shouldn't try to resolve the group again, so
193 * let's just pretend it worked right-away. */
194 s->file_gid_valid = true;
197 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
202 acl_permset_t permset;
207 server_read_file_gid(s);
209 r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
211 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
217 acl = acl_get_fd(f->fd);
219 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
223 r = acl_find_uid(acl, uid, &entry);
226 if (acl_create_entry(&acl, &entry) < 0 ||
227 acl_set_tag_type(entry, ACL_USER) < 0 ||
228 acl_set_qualifier(entry, &uid) < 0) {
229 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
234 if (acl_get_permset(entry, &permset) < 0 ||
235 acl_add_perm(permset, ACL_READ) < 0 ||
236 acl_calc_mask(&acl) < 0) {
237 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
241 if (acl_set_fd(f->fd, acl) < 0)
242 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
249 static JournalFile* find_journal(Server *s, uid_t uid) {
257 /* We split up user logs only on /var, not on /run. If the
258 * runtime file is open, we write to it exclusively, in order
259 * to guarantee proper order as soon as we flush /run to
260 * /var and close the runtime file. */
262 if (s->runtime_journal)
263 return s->runtime_journal;
266 return s->system_journal;
268 r = sd_id128_get_machine(&machine);
270 return s->system_journal;
272 f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
276 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
277 SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
278 return s->system_journal;
280 while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
281 /* Too many open? Then let's close one */
282 f = hashmap_steal_first(s->user_journals);
284 journal_file_close(f);
287 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, s->system_journal, &f);
291 return s->system_journal;
293 server_fix_perms(s, f, uid);
295 r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
297 journal_file_close(f);
298 return s->system_journal;
304 void server_rotate(Server *s) {
310 log_debug("Rotating...");
312 if (s->runtime_journal) {
313 r = journal_file_rotate(&s->runtime_journal, s->compress, false);
315 if (s->runtime_journal)
316 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
318 log_error("Failed to create new runtime journal: %s", strerror(-r));
320 server_fix_perms(s, s->runtime_journal, 0);
323 if (s->system_journal) {
324 r = journal_file_rotate(&s->system_journal, s->compress, s->seal);
326 if (s->system_journal)
327 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
329 log_error("Failed to create new system journal: %s", strerror(-r));
332 server_fix_perms(s, s->system_journal, 0);
335 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
336 r = journal_file_rotate(&f, s->compress, s->seal);
339 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
341 log_error("Failed to create user journal: %s", strerror(-r));
343 hashmap_replace(s->user_journals, k, f);
344 server_fix_perms(s, f, PTR_TO_UINT32(k));
349 void server_sync(Server *s) {
355 static const struct itimerspec sync_timer_disable = {};
357 if (s->system_journal) {
358 r = journal_file_set_offline(s->system_journal);
360 log_error("Failed to sync system journal: %s", strerror(-r));
363 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
364 r = journal_file_set_offline(f);
366 log_error("Failed to sync user journal: %s", strerror(-r));
369 r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_disable, NULL);
371 log_error("Failed to disable max timer: %m");
373 s->sync_scheduled = false;
376 void server_vacuum(Server *s) {
382 log_debug("Vacuuming...");
384 s->oldest_file_usec = 0;
386 r = sd_id128_get_machine(&machine);
388 log_error("Failed to get machine ID: %s", strerror(-r));
392 sd_id128_to_string(machine, ids);
394 if (s->system_journal) {
395 p = strappend("/var/log/journal/", ids);
401 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
402 if (r < 0 && r != -ENOENT)
403 log_error("Failed to vacuum %s: %s", p, strerror(-r));
407 if (s->runtime_journal) {
408 p = strappend("/run/log/journal/", ids);
414 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
415 if (r < 0 && r != -ENOENT)
416 log_error("Failed to vacuum %s: %s", p, strerror(-r));
420 s->cached_available_space_timestamp = 0;
423 static char *shortened_cgroup_path(pid_t pid) {
425 char _cleanup_free_ *process_path = NULL, *init_path = NULL;
430 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, pid, &process_path);
434 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &init_path);
438 if (endswith(init_path, "/system"))
439 init_path[strlen(init_path) - 7] = 0;
440 else if (streq(init_path, "/"))
443 if (startswith(process_path, init_path)) {
444 path = strdup(process_path + strlen(init_path));
453 bool shall_try_append_again(JournalFile *f, int r) {
455 /* -E2BIG Hit configured limit
457 -EDQUOT Quota limit hit
459 -EHOSTDOWN Other machine
460 -EBUSY Unclean shutdown
461 -EPROTONOSUPPORT Unsupported feature
464 -ESHUTDOWN Already archived */
466 if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
467 log_debug("%s: Allocation limit reached, rotating.", f->path);
468 else if (r == -EHOSTDOWN)
469 log_info("%s: Journal file from other machine, rotating.", f->path);
470 else if (r == -EBUSY)
471 log_info("%s: Unclean shutdown, rotating.", f->path);
472 else if (r == -EPROTONOSUPPORT)
473 log_info("%s: Unsupported feature, rotating.", f->path);
474 else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
475 log_warning("%s: Journal file corrupted, rotating.", f->path);
482 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n) {
484 bool vacuumed = false;
491 f = find_journal(s, uid);
495 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
496 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
501 f = find_journal(s, uid);
506 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
508 server_schedule_sync(s);
512 if (vacuumed || !shall_try_append_again(f, r)) {
513 log_error("Failed to write entry, ignoring: %s", strerror(-r));
520 f = find_journal(s, uid);
524 log_debug("Retrying write.");
525 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
527 log_error("Failed to write entry, ignoring: %s", strerror(-r));
530 static void dispatch_message_real(
532 struct iovec *iovec, unsigned n, unsigned m,
535 const char *label, size_t label_len,
536 const char *unit_id) {
538 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(ucred->pid)],
539 uid[sizeof("_UID=") + DECIMAL_STR_MAX(ucred->uid)],
540 gid[sizeof("_GID=") + DECIMAL_STR_MAX(ucred->gid)];
542 char _cleanup_free_ *source_time = NULL, *boot_id = NULL, *machine_id = NULL,
543 *comm = NULL, *cmdline = NULL, *hostname = NULL,
544 *audit_session = NULL, *audit_loginuid = NULL,
545 *exe = NULL, *cgroup = NULL, *session = NULL,
546 *owner_uid = NULL, *unit = NULL, *selinux_context = NULL;
552 uid_t realuid = 0, owner = 0, journal_uid;
553 bool owner_valid = false;
558 assert(n + N_IOVEC_META_FIELDS <= m);
566 realuid = ucred->uid;
568 snprintf(pid, sizeof(pid) - 1, "_PID=%lu", (unsigned long) ucred->pid);
570 IOVEC_SET_STRING(iovec[n++], pid);
572 snprintf(uid, sizeof(uid) - 1, "_UID=%lu", (unsigned long) ucred->uid);
574 IOVEC_SET_STRING(iovec[n++], uid);
576 snprintf(gid, sizeof(gid) - 1, "_GID=%lu", (unsigned long) ucred->gid);
578 IOVEC_SET_STRING(iovec[n++], gid);
580 r = get_process_comm(ucred->pid, &t);
582 comm = strappend("_COMM=", t);
586 IOVEC_SET_STRING(iovec[n++], comm);
589 r = get_process_exe(ucred->pid, &t);
591 exe = strappend("_EXE=", t);
595 IOVEC_SET_STRING(iovec[n++], exe);
598 r = get_process_cmdline(ucred->pid, 0, false, &t);
600 cmdline = strappend("_CMDLINE=", t);
604 IOVEC_SET_STRING(iovec[n++], cmdline);
608 r = audit_session_from_pid(ucred->pid, &audit);
610 if (asprintf(&audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit) >= 0)
611 IOVEC_SET_STRING(iovec[n++], audit_session);
613 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
615 if (asprintf(&audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0)
616 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
619 t = shortened_cgroup_path(ucred->pid);
621 cgroup = strappend("_SYSTEMD_CGROUP=", t);
625 IOVEC_SET_STRING(iovec[n++], cgroup);
629 if (sd_pid_get_session(ucred->pid, &t) >= 0) {
630 session = strappend("_SYSTEMD_SESSION=", t);
634 IOVEC_SET_STRING(iovec[n++], session);
637 if (sd_pid_get_owner_uid(ucred->pid, &owner) >= 0) {
639 if (asprintf(&owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner) >= 0)
640 IOVEC_SET_STRING(iovec[n++], owner_uid);
644 if (cg_pid_get_unit(ucred->pid, &t) >= 0) {
645 unit = strappend("_SYSTEMD_UNIT=", t);
647 } else if (cg_pid_get_user_unit(ucred->pid, &t) >= 0) {
648 unit = strappend("_SYSTEMD_USER_UNIT=", t);
650 } else if (unit_id) {
652 unit = strappend("_SYSTEMD_USER_UNIT=", unit_id);
654 unit = strappend("_SYSTEMD_UNIT=", unit_id);
658 IOVEC_SET_STRING(iovec[n++], unit);
662 selinux_context = malloc(sizeof("_SELINUX_CONTEXT=") + label_len);
663 if (selinux_context) {
664 *((char*) mempcpy(stpcpy(selinux_context, "_SELINUX_CONTEXT="), label, label_len)) = 0;
665 IOVEC_SET_STRING(iovec[n++], selinux_context);
668 security_context_t con;
670 if (getpidcon(ucred->pid, &con) >= 0) {
671 selinux_context = strappend("_SELINUX_CONTEXT=", con);
673 IOVEC_SET_STRING(iovec[n++], selinux_context);
681 if (asprintf(&source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu",
682 (unsigned long long) timeval_load(tv)) >= 0)
683 IOVEC_SET_STRING(iovec[n++], source_time);
686 /* Note that strictly speaking storing the boot id here is
687 * redundant since the entry includes this in-line
688 * anyway. However, we need this indexed, too. */
689 r = sd_id128_get_boot(&id);
691 if (asprintf(&boot_id, "_BOOT_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
692 IOVEC_SET_STRING(iovec[n++], boot_id);
694 r = sd_id128_get_machine(&id);
696 if (asprintf(&machine_id, "_MACHINE_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
697 IOVEC_SET_STRING(iovec[n++], machine_id);
699 t = gethostname_malloc();
701 hostname = strappend("_HOSTNAME=", t);
704 IOVEC_SET_STRING(iovec[n++], hostname);
709 if (s->split_mode == SPLIT_UID && realuid > 0)
710 /* Split up strictly by any UID */
711 journal_uid = realuid;
712 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
713 /* Split up by login UIDs, this avoids creation of
714 * individual journals for system UIDs. We do this
715 * only if the realuid is not root, in order not to
716 * accidentally leak privileged information to the
717 * user that is logged by a privileged process that is
718 * part of an unprivileged session.*/
723 write_to_journal(s, journal_uid, iovec, n);
726 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
727 char mid[11 + 32 + 1];
728 char buffer[16 + LINE_MAX + 1];
729 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
732 struct ucred ucred = {};
737 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
738 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
740 memcpy(buffer, "MESSAGE=", 8);
741 va_start(ap, format);
742 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
744 char_array_0(buffer);
745 IOVEC_SET_STRING(iovec[n++], buffer);
747 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
748 snprintf(mid, sizeof(mid), MESSAGE_ID(message_id));
750 IOVEC_SET_STRING(iovec[n++], mid);
753 ucred.pid = getpid();
754 ucred.uid = getuid();
755 ucred.gid = getgid();
757 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL);
760 void server_dispatch_message(
762 struct iovec *iovec, unsigned n, unsigned m,
765 const char *label, size_t label_len,
770 char _cleanup_free_ *path = NULL;
774 assert(iovec || n == 0);
779 if (LOG_PRI(priority) > s->max_level_store)
785 path = shortened_cgroup_path(ucred->pid);
789 /* example: /user/lennart/3/foobar
790 * /system/dbus.service/foobar
792 * So let's cut of everything past the third /, since that is
793 * where user directories start */
795 c = strchr(path, '/');
797 c = strchr(c+1, '/');
799 c = strchr(c+1, '/');
805 rl = journal_rate_limit_test(s->rate_limit, path,
806 priority & LOG_PRIMASK, available_space(s));
811 /* Write a suppression message if we suppressed something */
813 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
814 "Suppressed %u messages from %s", rl - 1, path);
817 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id);
821 static int system_journal_open(Server *s) {
827 r = sd_id128_get_machine(&machine);
831 sd_id128_to_string(machine, ids);
833 if (!s->system_journal &&
834 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
835 access("/run/systemd/journal/flushed", F_OK) >= 0) {
837 /* If in auto mode: first try to create the machine
838 * path, but not the prefix.
840 * If in persistent mode: create /var/log/journal and
841 * the machine path */
843 if (s->storage == STORAGE_PERSISTENT)
844 (void) mkdir("/var/log/journal/", 0755);
846 fn = strappend("/var/log/journal/", ids);
850 (void) mkdir(fn, 0755);
853 fn = strjoin("/var/log/journal/", ids, "/system.journal", NULL);
857 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
861 char fb[FORMAT_BYTES_MAX];
863 server_fix_perms(s, s->system_journal, 0);
864 server_driver_message(s, SD_ID128_NULL, "Allowing system journal files to grow to %s.",
865 format_bytes(fb, sizeof(fb), s->system_metrics.max_use));
869 if (r != -ENOENT && r != -EROFS)
870 log_warning("Failed to open system journal: %s", strerror(-r));
876 if (!s->runtime_journal &&
877 (s->storage != STORAGE_NONE)) {
879 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
883 if (s->system_journal) {
885 /* Try to open the runtime journal, but only
886 * if it already exists, so that we can flush
887 * it into the system journal */
889 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
894 log_warning("Failed to open runtime journal: %s", strerror(-r));
901 /* OK, we really need the runtime journal, so create
902 * it if necessary. */
904 (void) mkdir_parents(fn, 0755);
905 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
909 log_error("Failed to open runtime journal: %s", strerror(-r));
914 if (s->runtime_journal) {
915 char fb[FORMAT_BYTES_MAX];
917 server_fix_perms(s, s->runtime_journal, 0);
918 server_driver_message(s, SD_ID128_NULL, "Allowing runtime journal files to grow to %s.",
919 format_bytes(fb, sizeof(fb), s->runtime_metrics.max_use));
926 int server_flush_to_var(Server *s) {
929 sd_journal *j = NULL;
933 if (s->storage != STORAGE_AUTO &&
934 s->storage != STORAGE_PERSISTENT)
937 if (!s->runtime_journal)
940 system_journal_open(s);
942 if (!s->system_journal)
945 log_debug("Flushing to /var...");
947 r = sd_id128_get_machine(&machine);
949 log_error("Failed to get machine id: %s", strerror(-r));
953 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
955 log_error("Failed to read runtime journal: %s", strerror(-r));
959 sd_journal_set_data_threshold(j, 0);
961 SD_JOURNAL_FOREACH(j) {
966 assert(f && f->current_offset > 0);
968 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
970 log_error("Can't read entry: %s", strerror(-r));
974 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
978 if (!shall_try_append_again(s->system_journal, r)) {
979 log_error("Can't write entry: %s", strerror(-r));
986 log_debug("Retrying write.");
987 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
989 log_error("Can't write entry: %s", strerror(-r));
995 journal_file_post_change(s->system_journal);
997 journal_file_close(s->runtime_journal);
998 s->runtime_journal = NULL;
1001 rm_rf("/run/log/journal", false, true, false);
1003 sd_journal_close(j);
1008 int process_event(Server *s, struct epoll_event *ev) {
1012 if (ev->data.fd == s->signal_fd) {
1013 struct signalfd_siginfo sfsi;
1016 if (ev->events != EPOLLIN) {
1017 log_error("Got invalid event from epoll.");
1021 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
1022 if (n != sizeof(sfsi)) {
1027 if (errno == EINTR || errno == EAGAIN)
1033 if (sfsi.ssi_signo == SIGUSR1) {
1034 touch("/run/systemd/journal/flushed");
1035 server_flush_to_var(s);
1040 if (sfsi.ssi_signo == SIGUSR2) {
1046 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
1050 } else if (ev->data.fd == s->sync_timer_fd) {
1054 log_debug("Got sync request from epoll.");
1056 r = read(ev->data.fd, (void *)&t, sizeof(t));
1063 } else if (ev->data.fd == s->dev_kmsg_fd) {
1066 if (ev->events != EPOLLIN) {
1067 log_error("Got invalid event from epoll.");
1071 r = server_read_dev_kmsg(s);
1077 } else if (ev->data.fd == s->native_fd ||
1078 ev->data.fd == s->syslog_fd) {
1080 if (ev->events != EPOLLIN) {
1081 log_error("Got invalid event from epoll.");
1086 struct msghdr msghdr;
1088 struct ucred *ucred = NULL;
1089 struct timeval *tv = NULL;
1090 struct cmsghdr *cmsg;
1092 size_t label_len = 0;
1094 struct cmsghdr cmsghdr;
1096 /* We use NAME_MAX space for the
1097 * SELinux label here. The kernel
1098 * currently enforces no limit, but
1099 * according to suggestions from the
1100 * SELinux people this will change and
1101 * it will probably be identical to
1102 * NAME_MAX. For now we use that, but
1103 * this should be updated one day when
1104 * the final limit is known.*/
1105 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1106 CMSG_SPACE(sizeof(struct timeval)) +
1107 CMSG_SPACE(sizeof(int)) + /* fd */
1108 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1115 if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
1116 log_error("SIOCINQ failed: %m");
1120 if (s->buffer_size < (size_t) v) {
1124 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
1125 b = realloc(s->buffer, l+1);
1128 log_error("Couldn't increase buffer.");
1137 iovec.iov_base = s->buffer;
1138 iovec.iov_len = s->buffer_size;
1142 msghdr.msg_iov = &iovec;
1143 msghdr.msg_iovlen = 1;
1144 msghdr.msg_control = &control;
1145 msghdr.msg_controllen = sizeof(control);
1147 n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1150 if (errno == EINTR || errno == EAGAIN)
1153 log_error("recvmsg() failed: %m");
1157 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1159 if (cmsg->cmsg_level == SOL_SOCKET &&
1160 cmsg->cmsg_type == SCM_CREDENTIALS &&
1161 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1162 ucred = (struct ucred*) CMSG_DATA(cmsg);
1163 else if (cmsg->cmsg_level == SOL_SOCKET &&
1164 cmsg->cmsg_type == SCM_SECURITY) {
1165 label = (char*) CMSG_DATA(cmsg);
1166 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1167 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1168 cmsg->cmsg_type == SO_TIMESTAMP &&
1169 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1170 tv = (struct timeval*) CMSG_DATA(cmsg);
1171 else if (cmsg->cmsg_level == SOL_SOCKET &&
1172 cmsg->cmsg_type == SCM_RIGHTS) {
1173 fds = (int*) CMSG_DATA(cmsg);
1174 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1178 if (ev->data.fd == s->syslog_fd) {
1181 if (n > 0 && n_fds == 0) {
1182 e = memchr(s->buffer, '\n', n);
1188 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1189 } else if (n_fds > 0)
1190 log_warning("Got file descriptors via syslog socket. Ignoring.");
1193 if (n > 0 && n_fds == 0)
1194 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1195 else if (n == 0 && n_fds == 1)
1196 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1198 log_warning("Got too many file descriptors via native socket. Ignoring.");
1201 close_many(fds, n_fds);
1206 } else if (ev->data.fd == s->stdout_fd) {
1208 if (ev->events != EPOLLIN) {
1209 log_error("Got invalid event from epoll.");
1213 stdout_stream_new(s);
1217 StdoutStream *stream;
1219 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
1220 log_error("Got invalid event from epoll.");
1224 /* If it is none of the well-known fds, it must be an
1225 * stdout stream fd. Note that this is a bit ugly here
1226 * (since we rely that none of the well-known fds
1227 * could be interpreted as pointer), but nonetheless
1228 * safe, since the well-known fds would never get an
1229 * fd > 4096, i.e. beyond the first memory page */
1231 stream = ev->data.ptr;
1233 if (stdout_stream_process(stream) <= 0)
1234 stdout_stream_free(stream);
1239 log_error("Unknown event.");
1243 static int open_signalfd(Server *s) {
1245 struct epoll_event ev;
1249 assert_se(sigemptyset(&mask) == 0);
1250 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1251 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1253 s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
1254 if (s->signal_fd < 0) {
1255 log_error("signalfd(): %m");
1260 ev.events = EPOLLIN;
1261 ev.data.fd = s->signal_fd;
1263 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
1264 log_error("epoll_ctl(): %m");
1271 static int server_parse_proc_cmdline(Server *s) {
1272 char _cleanup_free_ *line = NULL;
1277 if (detect_container(NULL) > 0)
1280 r = read_one_line_file("/proc/cmdline", &line);
1282 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1286 FOREACH_WORD_QUOTED(w, l, line, state) {
1287 char _cleanup_free_ *word;
1289 word = strndup(w, l);
1293 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1294 r = parse_boolean(word + 35);
1296 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1298 s->forward_to_syslog = r;
1299 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1300 r = parse_boolean(word + 33);
1302 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1304 s->forward_to_kmsg = r;
1305 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1306 r = parse_boolean(word + 36);
1308 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1310 s->forward_to_console = r;
1311 } else if (startswith(word, "systemd.journald"))
1312 log_warning("Invalid systemd.journald parameter. Ignoring.");
1318 static int server_parse_config_file(Server *s) {
1319 static const char *fn = "/etc/systemd/journald.conf";
1320 FILE _cleanup_fclose_ *f = NULL;
1325 f = fopen(fn, "re");
1327 if (errno == ENOENT)
1330 log_warning("Failed to open configuration file %s: %m", fn);
1334 r = config_parse(fn, f, "Journal\0", config_item_perf_lookup,
1335 (void*) journald_gperf_lookup, false, s);
1337 log_warning("Failed to parse configuration file: %s", strerror(-r));
1342 static int server_open_sync_timer(Server *s) {
1344 struct epoll_event ev;
1348 s->sync_timer_fd = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC);
1349 if (s->sync_timer_fd < 0)
1353 ev.events = EPOLLIN;
1354 ev.data.fd = s->sync_timer_fd;
1356 r = epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->sync_timer_fd, &ev);
1358 log_error("Failed to add idle timer fd to epoll object: %m");
1365 int server_schedule_sync(Server *s) {
1370 if (s->sync_scheduled)
1373 if (s->sync_interval_usec) {
1374 struct itimerspec sync_timer_enable = {
1375 .it_value.tv_sec = s->sync_interval_usec / USEC_PER_SEC,
1376 .it_value.tv_nsec = s->sync_interval_usec % MSEC_PER_SEC,
1379 r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_enable, NULL);
1384 s->sync_scheduled = true;
1389 int server_init(Server *s) {
1395 s->sync_timer_fd = s->syslog_fd = s->native_fd = s->stdout_fd =
1396 s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
1400 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1401 s->sync_scheduled = false;
1403 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1404 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1406 s->forward_to_syslog = true;
1408 s->max_level_store = LOG_DEBUG;
1409 s->max_level_syslog = LOG_DEBUG;
1410 s->max_level_kmsg = LOG_NOTICE;
1411 s->max_level_console = LOG_INFO;
1413 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1414 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1416 server_parse_config_file(s);
1417 server_parse_proc_cmdline(s);
1418 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1419 log_debug("Setting both rate limit interval and burst from %llu,%u to 0,0",
1420 (long long unsigned) s->rate_limit_interval,
1421 s->rate_limit_burst);
1422 s->rate_limit_interval = s->rate_limit_burst = 0;
1425 mkdir_p("/run/systemd/journal", 0755);
1427 s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
1428 if (!s->user_journals)
1431 s->mmap = mmap_cache_new();
1435 s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
1436 if (s->epoll_fd < 0) {
1437 log_error("Failed to create epoll object: %m");
1441 n = sd_listen_fds(true);
1443 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1447 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1449 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1451 if (s->native_fd >= 0) {
1452 log_error("Too many native sockets passed.");
1458 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1460 if (s->stdout_fd >= 0) {
1461 log_error("Too many stdout sockets passed.");
1467 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
1469 if (s->syslog_fd >= 0) {
1470 log_error("Too many /dev/log sockets passed.");
1477 log_error("Unknown socket passed.");
1482 r = server_open_syslog_socket(s);
1486 r = server_open_native_socket(s);
1490 r = server_open_stdout_socket(s);
1494 r = server_open_dev_kmsg(s);
1498 r = server_open_kernel_seqnum(s);
1502 r = server_open_sync_timer(s);
1506 r = open_signalfd(s);
1510 s->udev = udev_new();
1514 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval,
1515 s->rate_limit_burst);
1519 r = system_journal_open(s);
1526 void server_maybe_append_tags(Server *s) {
1532 n = now(CLOCK_REALTIME);
1534 if (s->system_journal)
1535 journal_file_maybe_append_tag(s->system_journal, n);
1537 HASHMAP_FOREACH(f, s->user_journals, i)
1538 journal_file_maybe_append_tag(f, n);
1542 void server_done(Server *s) {
1546 while (s->stdout_streams)
1547 stdout_stream_free(s->stdout_streams);
1549 if (s->system_journal)
1550 journal_file_close(s->system_journal);
1552 if (s->runtime_journal)
1553 journal_file_close(s->runtime_journal);
1555 while ((f = hashmap_steal_first(s->user_journals)))
1556 journal_file_close(f);
1558 hashmap_free(s->user_journals);
1560 if (s->epoll_fd >= 0)
1561 close_nointr_nofail(s->epoll_fd);
1563 if (s->signal_fd >= 0)
1564 close_nointr_nofail(s->signal_fd);
1566 if (s->syslog_fd >= 0)
1567 close_nointr_nofail(s->syslog_fd);
1569 if (s->native_fd >= 0)
1570 close_nointr_nofail(s->native_fd);
1572 if (s->stdout_fd >= 0)
1573 close_nointr_nofail(s->stdout_fd);
1575 if (s->dev_kmsg_fd >= 0)
1576 close_nointr_nofail(s->dev_kmsg_fd);
1578 if (s->sync_timer_fd >= 0)
1579 close_nointr_nofail(s->sync_timer_fd);
1582 journal_rate_limit_free(s->rate_limit);
1584 if (s->kernel_seqnum)
1585 munmap(s->kernel_seqnum, sizeof(uint64_t));
1591 mmap_cache_unref(s->mmap);
1594 udev_unref(s->udev);