1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
27 #include <sys/timerfd.h>
30 #include <systemd/sd-journal.h>
31 #include <systemd/sd-messages.h>
32 #include <systemd/sd-daemon.h>
35 #include <systemd/sd-login.h>
41 #include "journal-file.h"
42 #include "socket-util.h"
43 #include "cgroup-util.h"
47 #include "conf-parser.h"
48 #include "journal-internal.h"
49 #include "journal-vacuum.h"
50 #include "journal-authenticate.h"
51 #include "journald-server.h"
52 #include "journald-rate-limit.h"
53 #include "journald-kmsg.h"
54 #include "journald-syslog.h"
55 #include "journald-stream.h"
56 #include "journald-console.h"
57 #include "journald-native.h"
61 #include <acl/libacl.h>
66 #include <selinux/selinux.h>
69 #define USER_JOURNALS_MAX 1024
71 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
72 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
73 #define DEFAULT_RATE_LIMIT_BURST 200
75 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
77 static const char* const storage_table[] = {
78 [STORAGE_AUTO] = "auto",
79 [STORAGE_VOLATILE] = "volatile",
80 [STORAGE_PERSISTENT] = "persistent",
81 [STORAGE_NONE] = "none"
84 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
85 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
87 static const char* const split_mode_table[] = {
88 [SPLIT_NONE] = "none",
90 [SPLIT_LOGIN] = "login"
93 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
94 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
96 static uint64_t available_space(Server *s) {
98 char _cleanup_free_ *p = NULL;
102 uint64_t sum = 0, avail = 0, ss_avail = 0;
104 DIR _cleanup_closedir_ *d = NULL;
108 ts = now(CLOCK_MONOTONIC);
110 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
111 return s->cached_available_space;
113 r = sd_id128_get_machine(&machine);
117 if (s->system_journal) {
118 f = "/var/log/journal/";
119 m = &s->system_metrics;
121 f = "/run/log/journal/";
122 m = &s->runtime_metrics;
127 p = strappend(f, sd_id128_to_string(machine, ids));
135 if (fstatvfs(dirfd(d), &ss) < 0)
141 union dirent_storage buf;
143 r = readdir_r(d, &buf.de, &de);
150 if (!endswith(de->d_name, ".journal") &&
151 !endswith(de->d_name, ".journal~"))
154 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
157 if (!S_ISREG(st.st_mode))
160 sum += (uint64_t) st.st_blocks * 512UL;
163 avail = sum >= m->max_use ? 0 : m->max_use - sum;
165 ss_avail = ss.f_bsize * ss.f_bavail;
167 ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
169 if (ss_avail < avail)
172 s->cached_available_space = avail;
173 s->cached_available_space_timestamp = ts;
178 static void server_read_file_gid(Server *s) {
179 const char *g = "systemd-journal";
184 if (s->file_gid_valid)
187 r = get_group_creds(&g, &s->file_gid);
189 log_warning("Failed to resolve '%s' group: %s", g, strerror(-r));
191 /* if we couldn't read the gid, then it will be 0, but that's
192 * fine and we shouldn't try to resolve the group again, so
193 * let's just pretend it worked right-away. */
194 s->file_gid_valid = true;
197 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
202 acl_permset_t permset;
207 server_read_file_gid(s);
209 r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
211 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
217 acl = acl_get_fd(f->fd);
219 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
223 r = acl_find_uid(acl, uid, &entry);
226 if (acl_create_entry(&acl, &entry) < 0 ||
227 acl_set_tag_type(entry, ACL_USER) < 0 ||
228 acl_set_qualifier(entry, &uid) < 0) {
229 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
234 if (acl_get_permset(entry, &permset) < 0 ||
235 acl_add_perm(permset, ACL_READ) < 0 ||
236 acl_calc_mask(&acl) < 0) {
237 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
241 if (acl_set_fd(f->fd, acl) < 0)
242 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
249 static JournalFile* find_journal(Server *s, uid_t uid) {
257 /* We split up user logs only on /var, not on /run. If the
258 * runtime file is open, we write to it exclusively, in order
259 * to guarantee proper order as soon as we flush /run to
260 * /var and close the runtime file. */
262 if (s->runtime_journal)
263 return s->runtime_journal;
266 return s->system_journal;
268 r = sd_id128_get_machine(&machine);
270 return s->system_journal;
272 f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
276 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
277 SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
278 return s->system_journal;
280 while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
281 /* Too many open? Then let's close one */
282 f = hashmap_steal_first(s->user_journals);
284 journal_file_close(f);
287 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, s->system_journal, &f);
291 return s->system_journal;
293 server_fix_perms(s, f, uid);
295 r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
297 journal_file_close(f);
298 return s->system_journal;
304 void server_rotate(Server *s) {
310 log_debug("Rotating...");
312 if (s->runtime_journal) {
313 r = journal_file_rotate(&s->runtime_journal, s->compress, false);
315 if (s->runtime_journal)
316 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
318 log_error("Failed to create new runtime journal: %s", strerror(-r));
320 server_fix_perms(s, s->runtime_journal, 0);
323 if (s->system_journal) {
324 r = journal_file_rotate(&s->system_journal, s->compress, s->seal);
326 if (s->system_journal)
327 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
329 log_error("Failed to create new system journal: %s", strerror(-r));
332 server_fix_perms(s, s->system_journal, 0);
335 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
336 r = journal_file_rotate(&f, s->compress, s->seal);
339 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
341 log_error("Failed to create user journal: %s", strerror(-r));
343 hashmap_replace(s->user_journals, k, f);
344 server_fix_perms(s, f, PTR_TO_UINT32(k));
349 void server_sync(Server *s) {
355 static const struct itimerspec sync_timer_disable = {};
357 if (s->system_journal) {
358 r = journal_file_set_offline(s->system_journal);
360 log_error("Failed to sync system journal: %s", strerror(-r));
363 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
364 r = journal_file_set_offline(f);
366 log_error("Failed to sync user journal: %s", strerror(-r));
369 r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_disable, NULL);
371 log_error("Failed to disable max timer: %m");
373 s->sync_scheduled = false;
376 void server_vacuum(Server *s) {
382 log_debug("Vacuuming...");
384 s->oldest_file_usec = 0;
386 r = sd_id128_get_machine(&machine);
388 log_error("Failed to get machine ID: %s", strerror(-r));
392 sd_id128_to_string(machine, ids);
394 if (s->system_journal) {
395 p = strappend("/var/log/journal/", ids);
401 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
402 if (r < 0 && r != -ENOENT)
403 log_error("Failed to vacuum %s: %s", p, strerror(-r));
407 if (s->runtime_journal) {
408 p = strappend("/run/log/journal/", ids);
414 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
415 if (r < 0 && r != -ENOENT)
416 log_error("Failed to vacuum %s: %s", p, strerror(-r));
420 s->cached_available_space_timestamp = 0;
423 static char *shortened_cgroup_path(pid_t pid) {
425 char _cleanup_free_ *process_path = NULL, *init_path = NULL;
430 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, pid, &process_path);
434 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &init_path);
438 if (endswith(init_path, "/system"))
439 init_path[strlen(init_path) - 7] = 0;
440 else if (streq(init_path, "/"))
443 if (startswith(process_path, init_path)) {
444 path = strdup(process_path + strlen(init_path));
453 bool shall_try_append_again(JournalFile *f, int r) {
455 /* -E2BIG Hit configured limit
457 -EDQUOT Quota limit hit
459 -EHOSTDOWN Other machine
460 -EBUSY Unclean shutdown
461 -EPROTONOSUPPORT Unsupported feature
464 -ESHUTDOWN Already archived */
466 if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
467 log_debug("%s: Allocation limit reached, rotating.", f->path);
468 else if (r == -EHOSTDOWN)
469 log_info("%s: Journal file from other machine, rotating.", f->path);
470 else if (r == -EBUSY)
471 log_info("%s: Unclean shutdown, rotating.", f->path);
472 else if (r == -EPROTONOSUPPORT)
473 log_info("%s: Unsupported feature, rotating.", f->path);
474 else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
475 log_warning("%s: Journal file corrupted, rotating.", f->path);
482 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n) {
484 bool vacuumed = false;
491 f = find_journal(s, uid);
495 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
496 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
501 f = find_journal(s, uid);
506 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
508 server_schedule_sync(s);
512 if (vacuumed || !shall_try_append_again(f, r)) {
513 log_error("Failed to write entry, ignoring: %s", strerror(-r));
520 f = find_journal(s, uid);
524 log_debug("Retrying write.");
525 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
527 log_error("Failed to write entry, ignoring: %s", strerror(-r));
530 static void dispatch_message_real(
532 struct iovec *iovec, unsigned n, unsigned m,
535 const char *label, size_t label_len,
536 const char *unit_id) {
538 char _cleanup_free_ *pid = NULL, *uid = NULL, *gid = NULL,
539 *source_time = NULL, *boot_id = NULL, *machine_id = NULL,
540 *comm = NULL, *cmdline = NULL, *hostname = NULL,
541 *audit_session = NULL, *audit_loginuid = NULL,
542 *exe = NULL, *cgroup = NULL, *session = NULL,
543 *owner_uid = NULL, *unit = NULL, *selinux_context = NULL;
549 uid_t realuid = 0, owner = 0, journal_uid;
550 bool owner_valid = false;
555 assert(n + N_IOVEC_META_FIELDS <= m);
563 realuid = ucred->uid;
565 if (asprintf(&pid, "_PID=%lu", (unsigned long) ucred->pid) >= 0)
566 IOVEC_SET_STRING(iovec[n++], pid);
568 if (asprintf(&uid, "_UID=%lu", (unsigned long) ucred->uid) >= 0)
569 IOVEC_SET_STRING(iovec[n++], uid);
571 if (asprintf(&gid, "_GID=%lu", (unsigned long) ucred->gid) >= 0)
572 IOVEC_SET_STRING(iovec[n++], gid);
574 r = get_process_comm(ucred->pid, &t);
576 comm = strappend("_COMM=", t);
580 IOVEC_SET_STRING(iovec[n++], comm);
583 r = get_process_exe(ucred->pid, &t);
585 exe = strappend("_EXE=", t);
589 IOVEC_SET_STRING(iovec[n++], exe);
592 r = get_process_cmdline(ucred->pid, 0, false, &t);
594 cmdline = strappend("_CMDLINE=", t);
598 IOVEC_SET_STRING(iovec[n++], cmdline);
602 r = audit_session_from_pid(ucred->pid, &audit);
604 if (asprintf(&audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit) >= 0)
605 IOVEC_SET_STRING(iovec[n++], audit_session);
607 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
609 if (asprintf(&audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0)
610 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
613 t = shortened_cgroup_path(ucred->pid);
615 cgroup = strappend("_SYSTEMD_CGROUP=", t);
619 IOVEC_SET_STRING(iovec[n++], cgroup);
623 if (sd_pid_get_session(ucred->pid, &t) >= 0) {
624 session = strappend("_SYSTEMD_SESSION=", t);
628 IOVEC_SET_STRING(iovec[n++], session);
631 if (sd_pid_get_owner_uid(ucred->pid, &owner) >= 0) {
633 if (asprintf(&owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner) >= 0)
634 IOVEC_SET_STRING(iovec[n++], owner_uid);
638 if (cg_pid_get_unit(ucred->pid, &t) >= 0) {
639 unit = strappend("_SYSTEMD_UNIT=", t);
641 } else if (cg_pid_get_user_unit(ucred->pid, &t) >= 0) {
642 unit = strappend("_SYSTEMD_USER_UNIT=", t);
644 } else if (unit_id) {
646 unit = strappend("_SYSTEMD_USER_UNIT=", unit_id);
648 unit = strappend("_SYSTEMD_UNIT=", unit_id);
652 IOVEC_SET_STRING(iovec[n++], unit);
656 selinux_context = malloc(sizeof("_SELINUX_CONTEXT=") + label_len);
657 if (selinux_context) {
658 *((char*) mempcpy(stpcpy(selinux_context, "_SELINUX_CONTEXT="), label, label_len)) = 0;
659 IOVEC_SET_STRING(iovec[n++], selinux_context);
662 security_context_t con;
664 if (getpidcon(ucred->pid, &con) >= 0) {
665 selinux_context = strappend("_SELINUX_CONTEXT=", con);
667 IOVEC_SET_STRING(iovec[n++], selinux_context);
675 if (asprintf(&source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu",
676 (unsigned long long) timeval_load(tv)) >= 0)
677 IOVEC_SET_STRING(iovec[n++], source_time);
680 /* Note that strictly speaking storing the boot id here is
681 * redundant since the entry includes this in-line
682 * anyway. However, we need this indexed, too. */
683 r = sd_id128_get_boot(&id);
685 if (asprintf(&boot_id, "_BOOT_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
686 IOVEC_SET_STRING(iovec[n++], boot_id);
688 r = sd_id128_get_machine(&id);
690 if (asprintf(&machine_id, "_MACHINE_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
691 IOVEC_SET_STRING(iovec[n++], machine_id);
693 t = gethostname_malloc();
695 hostname = strappend("_HOSTNAME=", t);
698 IOVEC_SET_STRING(iovec[n++], hostname);
703 if (s->split_mode == SPLIT_UID && realuid > 0)
704 /* Split up strictly by any UID */
705 journal_uid = realuid;
706 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
707 /* Split up by login UIDs, this avoids creation of
708 * individual journals for system UIDs. We do this
709 * only if the realuid is not root, in order not to
710 * accidentally leak privileged information to the
711 * user that is logged by a privileged process that is
712 * part of an unprivileged session.*/
717 write_to_journal(s, journal_uid, iovec, n);
720 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
721 char mid[11 + 32 + 1];
722 char buffer[16 + LINE_MAX + 1];
723 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
726 struct ucred ucred = {};
731 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
732 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
734 memcpy(buffer, "MESSAGE=", 8);
735 va_start(ap, format);
736 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
738 char_array_0(buffer);
739 IOVEC_SET_STRING(iovec[n++], buffer);
741 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
742 snprintf(mid, sizeof(mid), MESSAGE_ID(message_id));
744 IOVEC_SET_STRING(iovec[n++], mid);
747 ucred.pid = getpid();
748 ucred.uid = getuid();
749 ucred.gid = getgid();
751 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL);
754 void server_dispatch_message(
756 struct iovec *iovec, unsigned n, unsigned m,
759 const char *label, size_t label_len,
764 char _cleanup_free_ *path = NULL;
768 assert(iovec || n == 0);
773 if (LOG_PRI(priority) > s->max_level_store)
779 path = shortened_cgroup_path(ucred->pid);
783 /* example: /user/lennart/3/foobar
784 * /system/dbus.service/foobar
786 * So let's cut of everything past the third /, since that is
787 * where user directories start */
789 c = strchr(path, '/');
791 c = strchr(c+1, '/');
793 c = strchr(c+1, '/');
799 rl = journal_rate_limit_test(s->rate_limit, path,
800 priority & LOG_PRIMASK, available_space(s));
805 /* Write a suppression message if we suppressed something */
807 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
808 "Suppressed %u messages from %s", rl - 1, path);
811 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id);
815 static int system_journal_open(Server *s) {
821 r = sd_id128_get_machine(&machine);
825 sd_id128_to_string(machine, ids);
827 if (!s->system_journal &&
828 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
829 access("/run/systemd/journal/flushed", F_OK) >= 0) {
831 /* If in auto mode: first try to create the machine
832 * path, but not the prefix.
834 * If in persistent mode: create /var/log/journal and
835 * the machine path */
837 if (s->storage == STORAGE_PERSISTENT)
838 (void) mkdir("/var/log/journal/", 0755);
840 fn = strappend("/var/log/journal/", ids);
844 (void) mkdir(fn, 0755);
847 fn = strjoin("/var/log/journal/", ids, "/system.journal", NULL);
851 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
855 char fb[FORMAT_BYTES_MAX];
857 server_fix_perms(s, s->system_journal, 0);
858 server_driver_message(s, SD_ID128_NULL, "Allowing system journal files to grow to %s.",
859 format_bytes(fb, sizeof(fb), s->system_metrics.max_use));
863 if (r != -ENOENT && r != -EROFS)
864 log_warning("Failed to open system journal: %s", strerror(-r));
870 if (!s->runtime_journal &&
871 (s->storage != STORAGE_NONE)) {
873 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
877 if (s->system_journal) {
879 /* Try to open the runtime journal, but only
880 * if it already exists, so that we can flush
881 * it into the system journal */
883 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
888 log_warning("Failed to open runtime journal: %s", strerror(-r));
895 /* OK, we really need the runtime journal, so create
896 * it if necessary. */
898 (void) mkdir_parents(fn, 0755);
899 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
903 log_error("Failed to open runtime journal: %s", strerror(-r));
908 if (s->runtime_journal) {
909 char fb[FORMAT_BYTES_MAX];
911 server_fix_perms(s, s->runtime_journal, 0);
912 server_driver_message(s, SD_ID128_NULL, "Allowing runtime journal files to grow to %s.",
913 format_bytes(fb, sizeof(fb), s->runtime_metrics.max_use));
920 int server_flush_to_var(Server *s) {
923 sd_journal *j = NULL;
927 if (s->storage != STORAGE_AUTO &&
928 s->storage != STORAGE_PERSISTENT)
931 if (!s->runtime_journal)
934 system_journal_open(s);
936 if (!s->system_journal)
939 log_debug("Flushing to /var...");
941 r = sd_id128_get_machine(&machine);
943 log_error("Failed to get machine id: %s", strerror(-r));
947 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
949 log_error("Failed to read runtime journal: %s", strerror(-r));
953 sd_journal_set_data_threshold(j, 0);
955 SD_JOURNAL_FOREACH(j) {
960 assert(f && f->current_offset > 0);
962 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
964 log_error("Can't read entry: %s", strerror(-r));
968 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
972 if (!shall_try_append_again(s->system_journal, r)) {
973 log_error("Can't write entry: %s", strerror(-r));
980 log_debug("Retrying write.");
981 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
983 log_error("Can't write entry: %s", strerror(-r));
989 journal_file_post_change(s->system_journal);
991 journal_file_close(s->runtime_journal);
992 s->runtime_journal = NULL;
995 rm_rf("/run/log/journal", false, true, false);
1002 int process_event(Server *s, struct epoll_event *ev) {
1006 if (ev->data.fd == s->signal_fd) {
1007 struct signalfd_siginfo sfsi;
1010 if (ev->events != EPOLLIN) {
1011 log_error("Got invalid event from epoll.");
1015 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
1016 if (n != sizeof(sfsi)) {
1021 if (errno == EINTR || errno == EAGAIN)
1027 if (sfsi.ssi_signo == SIGUSR1) {
1028 touch("/run/systemd/journal/flushed");
1029 server_flush_to_var(s);
1034 if (sfsi.ssi_signo == SIGUSR2) {
1040 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
1044 } else if (ev->data.fd == s->sync_timer_fd) {
1048 log_debug("Got sync request from epoll.");
1050 r = read(ev->data.fd, (void *)&t, sizeof(t));
1057 } else if (ev->data.fd == s->dev_kmsg_fd) {
1060 if (ev->events != EPOLLIN) {
1061 log_error("Got invalid event from epoll.");
1065 r = server_read_dev_kmsg(s);
1071 } else if (ev->data.fd == s->native_fd ||
1072 ev->data.fd == s->syslog_fd) {
1074 if (ev->events != EPOLLIN) {
1075 log_error("Got invalid event from epoll.");
1080 struct msghdr msghdr;
1082 struct ucred *ucred = NULL;
1083 struct timeval *tv = NULL;
1084 struct cmsghdr *cmsg;
1086 size_t label_len = 0;
1088 struct cmsghdr cmsghdr;
1090 /* We use NAME_MAX space for the
1091 * SELinux label here. The kernel
1092 * currently enforces no limit, but
1093 * according to suggestions from the
1094 * SELinux people this will change and
1095 * it will probably be identical to
1096 * NAME_MAX. For now we use that, but
1097 * this should be updated one day when
1098 * the final limit is known.*/
1099 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1100 CMSG_SPACE(sizeof(struct timeval)) +
1101 CMSG_SPACE(sizeof(int)) + /* fd */
1102 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1109 if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
1110 log_error("SIOCINQ failed: %m");
1114 if (s->buffer_size < (size_t) v) {
1118 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
1119 b = realloc(s->buffer, l+1);
1122 log_error("Couldn't increase buffer.");
1131 iovec.iov_base = s->buffer;
1132 iovec.iov_len = s->buffer_size;
1136 msghdr.msg_iov = &iovec;
1137 msghdr.msg_iovlen = 1;
1138 msghdr.msg_control = &control;
1139 msghdr.msg_controllen = sizeof(control);
1141 n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1144 if (errno == EINTR || errno == EAGAIN)
1147 log_error("recvmsg() failed: %m");
1151 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1153 if (cmsg->cmsg_level == SOL_SOCKET &&
1154 cmsg->cmsg_type == SCM_CREDENTIALS &&
1155 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1156 ucred = (struct ucred*) CMSG_DATA(cmsg);
1157 else if (cmsg->cmsg_level == SOL_SOCKET &&
1158 cmsg->cmsg_type == SCM_SECURITY) {
1159 label = (char*) CMSG_DATA(cmsg);
1160 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1161 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1162 cmsg->cmsg_type == SO_TIMESTAMP &&
1163 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1164 tv = (struct timeval*) CMSG_DATA(cmsg);
1165 else if (cmsg->cmsg_level == SOL_SOCKET &&
1166 cmsg->cmsg_type == SCM_RIGHTS) {
1167 fds = (int*) CMSG_DATA(cmsg);
1168 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1172 if (ev->data.fd == s->syslog_fd) {
1175 if (n > 0 && n_fds == 0) {
1176 e = memchr(s->buffer, '\n', n);
1182 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1183 } else if (n_fds > 0)
1184 log_warning("Got file descriptors via syslog socket. Ignoring.");
1187 if (n > 0 && n_fds == 0)
1188 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1189 else if (n == 0 && n_fds == 1)
1190 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1192 log_warning("Got too many file descriptors via native socket. Ignoring.");
1195 close_many(fds, n_fds);
1200 } else if (ev->data.fd == s->stdout_fd) {
1202 if (ev->events != EPOLLIN) {
1203 log_error("Got invalid event from epoll.");
1207 stdout_stream_new(s);
1211 StdoutStream *stream;
1213 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
1214 log_error("Got invalid event from epoll.");
1218 /* If it is none of the well-known fds, it must be an
1219 * stdout stream fd. Note that this is a bit ugly here
1220 * (since we rely that none of the well-known fds
1221 * could be interpreted as pointer), but nonetheless
1222 * safe, since the well-known fds would never get an
1223 * fd > 4096, i.e. beyond the first memory page */
1225 stream = ev->data.ptr;
1227 if (stdout_stream_process(stream) <= 0)
1228 stdout_stream_free(stream);
1233 log_error("Unknown event.");
1237 static int open_signalfd(Server *s) {
1239 struct epoll_event ev;
1243 assert_se(sigemptyset(&mask) == 0);
1244 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1245 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1247 s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
1248 if (s->signal_fd < 0) {
1249 log_error("signalfd(): %m");
1254 ev.events = EPOLLIN;
1255 ev.data.fd = s->signal_fd;
1257 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
1258 log_error("epoll_ctl(): %m");
1265 static int server_parse_proc_cmdline(Server *s) {
1266 char _cleanup_free_ *line = NULL;
1271 if (detect_container(NULL) > 0)
1274 r = read_one_line_file("/proc/cmdline", &line);
1276 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1280 FOREACH_WORD_QUOTED(w, l, line, state) {
1281 char _cleanup_free_ *word;
1283 word = strndup(w, l);
1287 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1288 r = parse_boolean(word + 35);
1290 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1292 s->forward_to_syslog = r;
1293 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1294 r = parse_boolean(word + 33);
1296 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1298 s->forward_to_kmsg = r;
1299 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1300 r = parse_boolean(word + 36);
1302 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1304 s->forward_to_console = r;
1305 } else if (startswith(word, "systemd.journald"))
1306 log_warning("Invalid systemd.journald parameter. Ignoring.");
1312 static int server_parse_config_file(Server *s) {
1313 static const char *fn = "/etc/systemd/journald.conf";
1314 FILE _cleanup_fclose_ *f = NULL;
1319 f = fopen(fn, "re");
1321 if (errno == ENOENT)
1324 log_warning("Failed to open configuration file %s: %m", fn);
1328 r = config_parse(fn, f, "Journal\0", config_item_perf_lookup,
1329 (void*) journald_gperf_lookup, false, s);
1331 log_warning("Failed to parse configuration file: %s", strerror(-r));
1336 static int server_open_sync_timer(Server *s) {
1338 struct epoll_event ev;
1342 s->sync_timer_fd = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC);
1343 if (s->sync_timer_fd < 0)
1347 ev.events = EPOLLIN;
1348 ev.data.fd = s->sync_timer_fd;
1350 r = epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->sync_timer_fd, &ev);
1352 log_error("Failed to add idle timer fd to epoll object: %m");
1359 int server_schedule_sync(Server *s) {
1364 if (s->sync_scheduled)
1367 if (s->sync_interval_usec) {
1368 struct itimerspec sync_timer_enable = {
1369 .it_value.tv_sec = s->sync_interval_usec / USEC_PER_SEC,
1370 .it_value.tv_nsec = s->sync_interval_usec % MSEC_PER_SEC,
1373 r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_enable, NULL);
1378 s->sync_scheduled = true;
1383 int server_init(Server *s) {
1389 s->sync_timer_fd = s->syslog_fd = s->native_fd = s->stdout_fd =
1390 s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
1394 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1395 s->sync_scheduled = false;
1397 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1398 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1400 s->forward_to_syslog = true;
1402 s->max_level_store = LOG_DEBUG;
1403 s->max_level_syslog = LOG_DEBUG;
1404 s->max_level_kmsg = LOG_NOTICE;
1405 s->max_level_console = LOG_INFO;
1407 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1408 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1410 server_parse_config_file(s);
1411 server_parse_proc_cmdline(s);
1412 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1413 log_debug("Setting both rate limit interval and burst from %llu,%u to 0,0",
1414 (long long unsigned) s->rate_limit_interval,
1415 s->rate_limit_burst);
1416 s->rate_limit_interval = s->rate_limit_burst = 0;
1419 mkdir_p("/run/systemd/journal", 0755);
1421 s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
1422 if (!s->user_journals)
1425 s->mmap = mmap_cache_new();
1429 s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
1430 if (s->epoll_fd < 0) {
1431 log_error("Failed to create epoll object: %m");
1435 n = sd_listen_fds(true);
1437 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1441 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1443 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1445 if (s->native_fd >= 0) {
1446 log_error("Too many native sockets passed.");
1452 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1454 if (s->stdout_fd >= 0) {
1455 log_error("Too many stdout sockets passed.");
1461 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
1463 if (s->syslog_fd >= 0) {
1464 log_error("Too many /dev/log sockets passed.");
1471 log_error("Unknown socket passed.");
1476 r = server_open_syslog_socket(s);
1480 r = server_open_native_socket(s);
1484 r = server_open_stdout_socket(s);
1488 r = server_open_dev_kmsg(s);
1492 r = server_open_kernel_seqnum(s);
1496 r = server_open_sync_timer(s);
1500 r = open_signalfd(s);
1504 s->udev = udev_new();
1508 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval,
1509 s->rate_limit_burst);
1513 r = system_journal_open(s);
1520 void server_maybe_append_tags(Server *s) {
1526 n = now(CLOCK_REALTIME);
1528 if (s->system_journal)
1529 journal_file_maybe_append_tag(s->system_journal, n);
1531 HASHMAP_FOREACH(f, s->user_journals, i)
1532 journal_file_maybe_append_tag(f, n);
1536 void server_done(Server *s) {
1540 while (s->stdout_streams)
1541 stdout_stream_free(s->stdout_streams);
1543 if (s->system_journal)
1544 journal_file_close(s->system_journal);
1546 if (s->runtime_journal)
1547 journal_file_close(s->runtime_journal);
1549 while ((f = hashmap_steal_first(s->user_journals)))
1550 journal_file_close(f);
1552 hashmap_free(s->user_journals);
1554 if (s->epoll_fd >= 0)
1555 close_nointr_nofail(s->epoll_fd);
1557 if (s->signal_fd >= 0)
1558 close_nointr_nofail(s->signal_fd);
1560 if (s->syslog_fd >= 0)
1561 close_nointr_nofail(s->syslog_fd);
1563 if (s->native_fd >= 0)
1564 close_nointr_nofail(s->native_fd);
1566 if (s->stdout_fd >= 0)
1567 close_nointr_nofail(s->stdout_fd);
1569 if (s->dev_kmsg_fd >= 0)
1570 close_nointr_nofail(s->dev_kmsg_fd);
1572 if (s->sync_timer_fd >= 0)
1573 close_nointr_nofail(s->sync_timer_fd);
1576 journal_rate_limit_free(s->rate_limit);
1578 if (s->kernel_seqnum)
1579 munmap(s->kernel_seqnum, sizeof(uint64_t));
1585 mmap_cache_unref(s->mmap);
1588 udev_unref(s->udev);