1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
27 #include <sys/timerfd.h>
30 #include <systemd/sd-journal.h>
31 #include <systemd/sd-messages.h>
32 #include <systemd/sd-daemon.h>
35 #include <systemd/sd-login.h>
41 #include "journal-file.h"
42 #include "socket-util.h"
43 #include "cgroup-util.h"
47 #include "conf-parser.h"
48 #include "journal-internal.h"
49 #include "journal-vacuum.h"
50 #include "journal-authenticate.h"
51 #include "journald-server.h"
52 #include "journald-rate-limit.h"
53 #include "journald-kmsg.h"
54 #include "journald-syslog.h"
55 #include "journald-stream.h"
56 #include "journald-console.h"
57 #include "journald-native.h"
61 #include <acl/libacl.h>
66 #include <selinux/selinux.h>
69 #define USER_JOURNALS_MAX 1024
71 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
72 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
73 #define DEFAULT_RATE_LIMIT_BURST 200
75 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
77 static const char* const storage_table[] = {
78 [STORAGE_AUTO] = "auto",
79 [STORAGE_VOLATILE] = "volatile",
80 [STORAGE_PERSISTENT] = "persistent",
81 [STORAGE_NONE] = "none"
84 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
85 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
87 static const char* const split_mode_table[] = {
88 [SPLIT_NONE] = "none",
90 [SPLIT_LOGIN] = "login"
93 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
94 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
96 static uint64_t available_space(Server *s) {
98 char _cleanup_free_ *p = NULL;
102 uint64_t sum = 0, avail = 0, ss_avail = 0;
104 DIR _cleanup_closedir_ *d = NULL;
108 ts = now(CLOCK_MONOTONIC);
110 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
111 return s->cached_available_space;
113 r = sd_id128_get_machine(&machine);
117 if (s->system_journal) {
118 f = "/var/log/journal/";
119 m = &s->system_metrics;
121 f = "/run/log/journal/";
122 m = &s->runtime_metrics;
127 p = strappend(f, sd_id128_to_string(machine, ids));
135 if (fstatvfs(dirfd(d), &ss) < 0)
141 union dirent_storage buf;
143 r = readdir_r(d, &buf.de, &de);
150 if (!endswith(de->d_name, ".journal") &&
151 !endswith(de->d_name, ".journal~"))
154 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
157 if (!S_ISREG(st.st_mode))
160 sum += (uint64_t) st.st_blocks * 512UL;
163 avail = sum >= m->max_use ? 0 : m->max_use - sum;
165 ss_avail = ss.f_bsize * ss.f_bavail;
167 ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
169 if (ss_avail < avail)
172 s->cached_available_space = avail;
173 s->cached_available_space_timestamp = ts;
178 static void server_read_file_gid(Server *s) {
179 const char *g = "systemd-journal";
184 if (s->file_gid_valid)
187 r = get_group_creds(&g, &s->file_gid);
189 log_warning("Failed to resolve '%s' group: %s", g, strerror(-r));
191 /* if we couldn't read the gid, then it will be 0, but that's
192 * fine and we shouldn't try to resolve the group again, so
193 * let's just pretend it worked right-away. */
194 s->file_gid_valid = true;
197 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
202 acl_permset_t permset;
207 server_read_file_gid(s);
209 r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
211 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
217 acl = acl_get_fd(f->fd);
219 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
223 r = acl_find_uid(acl, uid, &entry);
226 if (acl_create_entry(&acl, &entry) < 0 ||
227 acl_set_tag_type(entry, ACL_USER) < 0 ||
228 acl_set_qualifier(entry, &uid) < 0) {
229 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
234 if (acl_get_permset(entry, &permset) < 0 ||
235 acl_add_perm(permset, ACL_READ) < 0 ||
236 acl_calc_mask(&acl) < 0) {
237 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
241 if (acl_set_fd(f->fd, acl) < 0)
242 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
249 static JournalFile* find_journal(Server *s, uid_t uid) {
257 /* We split up user logs only on /var, not on /run. If the
258 * runtime file is open, we write to it exclusively, in order
259 * to guarantee proper order as soon as we flush /run to
260 * /var and close the runtime file. */
262 if (s->runtime_journal)
263 return s->runtime_journal;
266 return s->system_journal;
268 r = sd_id128_get_machine(&machine);
270 return s->system_journal;
272 f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
276 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
277 SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
278 return s->system_journal;
280 while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
281 /* Too many open? Then let's close one */
282 f = hashmap_steal_first(s->user_journals);
284 journal_file_close(f);
287 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, s->system_journal, &f);
291 return s->system_journal;
293 server_fix_perms(s, f, uid);
295 r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
297 journal_file_close(f);
298 return s->system_journal;
304 void server_rotate(Server *s) {
310 log_debug("Rotating...");
312 if (s->runtime_journal) {
313 r = journal_file_rotate(&s->runtime_journal, s->compress, false);
315 if (s->runtime_journal)
316 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
318 log_error("Failed to create new runtime journal: %s", strerror(-r));
320 server_fix_perms(s, s->runtime_journal, 0);
323 if (s->system_journal) {
324 r = journal_file_rotate(&s->system_journal, s->compress, s->seal);
326 if (s->system_journal)
327 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
329 log_error("Failed to create new system journal: %s", strerror(-r));
332 server_fix_perms(s, s->system_journal, 0);
335 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
336 r = journal_file_rotate(&f, s->compress, s->seal);
339 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
341 log_error("Failed to create user journal: %s", strerror(-r));
343 hashmap_replace(s->user_journals, k, f);
344 server_fix_perms(s, f, PTR_TO_UINT32(k));
349 void server_sync(Server *s) {
355 static const struct itimerspec sync_timer_disable = {};
357 if (s->system_journal) {
358 r = journal_file_set_offline(s->system_journal);
360 log_error("Failed to sync system journal: %s", strerror(-r));
363 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
364 r = journal_file_set_offline(f);
366 log_error("Failed to sync user journal: %s", strerror(-r));
369 r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_disable, NULL);
371 log_error("Failed to disable max timer: %m");
373 s->sync_scheduled = false;
376 void server_vacuum(Server *s) {
382 log_debug("Vacuuming...");
384 s->oldest_file_usec = 0;
386 r = sd_id128_get_machine(&machine);
388 log_error("Failed to get machine ID: %s", strerror(-r));
392 sd_id128_to_string(machine, ids);
394 if (s->system_journal) {
395 p = strappend("/var/log/journal/", ids);
401 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
402 if (r < 0 && r != -ENOENT)
403 log_error("Failed to vacuum %s: %s", p, strerror(-r));
407 if (s->runtime_journal) {
408 p = strappend("/run/log/journal/", ids);
414 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
415 if (r < 0 && r != -ENOENT)
416 log_error("Failed to vacuum %s: %s", p, strerror(-r));
420 s->cached_available_space_timestamp = 0;
423 static char *shortened_cgroup_path(pid_t pid) {
425 char _cleanup_free_ *process_path = NULL, *init_path = NULL;
430 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, pid, &process_path);
434 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &init_path);
438 if (endswith(init_path, "/system"))
439 init_path[strlen(init_path) - 7] = 0;
440 else if (streq(init_path, "/"))
443 if (startswith(process_path, init_path)) {
444 path = strdup(process_path + strlen(init_path));
453 bool shall_try_append_again(JournalFile *f, int r) {
455 /* -E2BIG Hit configured limit
457 -EDQUOT Quota limit hit
459 -EHOSTDOWN Other machine
460 -EBUSY Unclean shutdown
461 -EPROTONOSUPPORT Unsupported feature
464 -ESHUTDOWN Already archived */
466 if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
467 log_debug("%s: Allocation limit reached, rotating.", f->path);
468 else if (r == -EHOSTDOWN)
469 log_info("%s: Journal file from other machine, rotating.", f->path);
470 else if (r == -EBUSY)
471 log_info("%s: Unclean shutdown, rotating.", f->path);
472 else if (r == -EPROTONOSUPPORT)
473 log_info("%s: Unsupported feature, rotating.", f->path);
474 else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
475 log_warning("%s: Journal file corrupted, rotating.", f->path);
482 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n) {
484 bool vacuumed = false;
491 f = find_journal(s, uid);
495 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
496 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
501 f = find_journal(s, uid);
506 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
508 server_schedule_sync(s);
512 if (vacuumed || !shall_try_append_again(f, r)) {
513 log_error("Failed to write entry, ignoring: %s", strerror(-r));
520 f = find_journal(s, uid);
524 log_debug("Retrying write.");
525 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
527 log_error("Failed to write entry, ignoring: %s", strerror(-r));
530 static void dispatch_message_real(
532 struct iovec *iovec, unsigned n, unsigned m,
535 const char *label, size_t label_len,
536 const char *unit_id) {
538 char _cleanup_free_ *pid = NULL, *uid = NULL, *gid = NULL,
539 *source_time = NULL, *boot_id = NULL, *machine_id = NULL,
540 *comm = NULL, *cmdline = NULL, *hostname = NULL,
541 *audit_session = NULL, *audit_loginuid = NULL,
542 *exe = NULL, *cgroup = NULL, *session = NULL,
543 *owner_uid = NULL, *unit = NULL, *selinux_context = NULL;
549 uid_t realuid = 0, owner = 0, journal_uid;
550 bool owner_valid = false;
555 assert(n + N_IOVEC_META_FIELDS <= m);
561 realuid = ucred->uid;
563 if (asprintf(&pid, "_PID=%lu", (unsigned long) ucred->pid) >= 0)
564 IOVEC_SET_STRING(iovec[n++], pid);
566 if (asprintf(&uid, "_UID=%lu", (unsigned long) ucred->uid) >= 0)
567 IOVEC_SET_STRING(iovec[n++], uid);
569 if (asprintf(&gid, "_GID=%lu", (unsigned long) ucred->gid) >= 0)
570 IOVEC_SET_STRING(iovec[n++], gid);
572 r = get_process_comm(ucred->pid, &t);
574 comm = strappend("_COMM=", t);
578 IOVEC_SET_STRING(iovec[n++], comm);
581 r = get_process_exe(ucred->pid, &t);
583 exe = strappend("_EXE=", t);
587 IOVEC_SET_STRING(iovec[n++], exe);
590 r = get_process_cmdline(ucred->pid, 0, false, &t);
592 cmdline = strappend("_CMDLINE=", t);
596 IOVEC_SET_STRING(iovec[n++], cmdline);
599 r = audit_session_from_pid(ucred->pid, &audit);
601 if (asprintf(&audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit) >= 0)
602 IOVEC_SET_STRING(iovec[n++], audit_session);
604 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
606 if (asprintf(&audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0)
607 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
609 t = shortened_cgroup_path(ucred->pid);
611 cgroup = strappend("_SYSTEMD_CGROUP=", t);
615 IOVEC_SET_STRING(iovec[n++], cgroup);
619 if (sd_pid_get_session(ucred->pid, &t) >= 0) {
620 session = strappend("_SYSTEMD_SESSION=", t);
624 IOVEC_SET_STRING(iovec[n++], session);
627 if (sd_pid_get_owner_uid(ucred->pid, &owner) >= 0) {
629 if (asprintf(&owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner) >= 0)
630 IOVEC_SET_STRING(iovec[n++], owner_uid);
634 if (cg_pid_get_unit(ucred->pid, &t) >= 0) {
635 unit = strappend("_SYSTEMD_UNIT=", t);
637 } else if (cg_pid_get_user_unit(ucred->pid, &t) >= 0) {
638 unit = strappend("_SYSTEMD_USER_UNIT=", t);
640 } else if (unit_id) {
642 unit = strappend("_SYSTEMD_USER_UNIT=", unit_id);
644 unit = strappend("_SYSTEMD_UNIT=", unit_id);
648 IOVEC_SET_STRING(iovec[n++], unit);
652 selinux_context = malloc(sizeof("_SELINUX_CONTEXT=") + label_len);
653 if (selinux_context) {
654 *((char*) mempcpy(stpcpy(selinux_context, "_SELINUX_CONTEXT="), label, label_len)) = 0;
655 IOVEC_SET_STRING(iovec[n++], selinux_context);
658 security_context_t con;
660 if (getpidcon(ucred->pid, &con) >= 0) {
661 selinux_context = strappend("_SELINUX_CONTEXT=", con);
663 IOVEC_SET_STRING(iovec[n++], selinux_context);
671 if (asprintf(&source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu",
672 (unsigned long long) timeval_load(tv)) >= 0)
673 IOVEC_SET_STRING(iovec[n++], source_time);
676 /* Note that strictly speaking storing the boot id here is
677 * redundant since the entry includes this in-line
678 * anyway. However, we need this indexed, too. */
679 r = sd_id128_get_boot(&id);
681 if (asprintf(&boot_id, "_BOOT_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
682 IOVEC_SET_STRING(iovec[n++], boot_id);
684 r = sd_id128_get_machine(&id);
686 if (asprintf(&machine_id, "_MACHINE_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
687 IOVEC_SET_STRING(iovec[n++], machine_id);
689 t = gethostname_malloc();
691 hostname = strappend("_HOSTNAME=", t);
694 IOVEC_SET_STRING(iovec[n++], hostname);
699 if (s->split_mode == SPLIT_UID && realuid > 0)
700 /* Split up strictly by any UID */
701 journal_uid = realuid;
702 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
703 /* Split up by login UIDs, this avoids creation of
704 * individual journals for system UIDs. We do this
705 * only if the realuid is not root, in order not to
706 * accidentally leak privileged information to the
707 * user that is logged by a privileged process that is
708 * part of an unprivileged session.*/
713 write_to_journal(s, journal_uid, iovec, n);
716 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
717 char mid[11 + 32 + 1];
718 char buffer[16 + LINE_MAX + 1];
719 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
727 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
728 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
730 memcpy(buffer, "MESSAGE=", 8);
731 va_start(ap, format);
732 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
734 char_array_0(buffer);
735 IOVEC_SET_STRING(iovec[n++], buffer);
737 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
738 snprintf(mid, sizeof(mid), MESSAGE_ID(message_id));
740 IOVEC_SET_STRING(iovec[n++], mid);
744 ucred.pid = getpid();
745 ucred.uid = getuid();
746 ucred.gid = getgid();
748 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL);
751 void server_dispatch_message(
753 struct iovec *iovec, unsigned n, unsigned m,
756 const char *label, size_t label_len,
761 char _cleanup_free_ *path = NULL;
765 assert(iovec || n == 0);
770 if (LOG_PRI(priority) > s->max_level_store)
776 path = shortened_cgroup_path(ucred->pid);
780 /* example: /user/lennart/3/foobar
781 * /system/dbus.service/foobar
783 * So let's cut of everything past the third /, since that is
784 * where user directories start */
786 c = strchr(path, '/');
788 c = strchr(c+1, '/');
790 c = strchr(c+1, '/');
796 rl = journal_rate_limit_test(s->rate_limit, path,
797 priority & LOG_PRIMASK, available_space(s));
802 /* Write a suppression message if we suppressed something */
804 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
805 "Suppressed %u messages from %s", rl - 1, path);
808 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id);
812 static int system_journal_open(Server *s) {
818 r = sd_id128_get_machine(&machine);
822 sd_id128_to_string(machine, ids);
824 if (!s->system_journal &&
825 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
826 access("/run/systemd/journal/flushed", F_OK) >= 0) {
828 /* If in auto mode: first try to create the machine
829 * path, but not the prefix.
831 * If in persistent mode: create /var/log/journal and
832 * the machine path */
834 if (s->storage == STORAGE_PERSISTENT)
835 (void) mkdir("/var/log/journal/", 0755);
837 fn = strappend("/var/log/journal/", ids);
841 (void) mkdir(fn, 0755);
844 fn = strjoin("/var/log/journal/", ids, "/system.journal", NULL);
848 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
852 char fb[FORMAT_BYTES_MAX];
854 server_fix_perms(s, s->system_journal, 0);
855 server_driver_message(s, SD_ID128_NULL, "Allowing system journal files to grow to %s.",
856 format_bytes(fb, sizeof(fb), s->system_metrics.max_use));
860 if (r != -ENOENT && r != -EROFS)
861 log_warning("Failed to open system journal: %s", strerror(-r));
867 if (!s->runtime_journal &&
868 (s->storage != STORAGE_NONE)) {
870 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
874 if (s->system_journal) {
876 /* Try to open the runtime journal, but only
877 * if it already exists, so that we can flush
878 * it into the system journal */
880 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
885 log_warning("Failed to open runtime journal: %s", strerror(-r));
892 /* OK, we really need the runtime journal, so create
893 * it if necessary. */
895 (void) mkdir_parents(fn, 0755);
896 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
900 log_error("Failed to open runtime journal: %s", strerror(-r));
905 if (s->runtime_journal) {
906 char fb[FORMAT_BYTES_MAX];
908 server_fix_perms(s, s->runtime_journal, 0);
909 server_driver_message(s, SD_ID128_NULL, "Allowing runtime journal files to grow to %s.",
910 format_bytes(fb, sizeof(fb), s->runtime_metrics.max_use));
917 int server_flush_to_var(Server *s) {
920 sd_journal *j = NULL;
924 if (s->storage != STORAGE_AUTO &&
925 s->storage != STORAGE_PERSISTENT)
928 if (!s->runtime_journal)
931 system_journal_open(s);
933 if (!s->system_journal)
936 log_debug("Flushing to /var...");
938 r = sd_id128_get_machine(&machine);
940 log_error("Failed to get machine id: %s", strerror(-r));
944 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
946 log_error("Failed to read runtime journal: %s", strerror(-r));
950 sd_journal_set_data_threshold(j, 0);
952 SD_JOURNAL_FOREACH(j) {
957 assert(f && f->current_offset > 0);
959 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
961 log_error("Can't read entry: %s", strerror(-r));
965 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
969 if (!shall_try_append_again(s->system_journal, r)) {
970 log_error("Can't write entry: %s", strerror(-r));
977 log_debug("Retrying write.");
978 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
980 log_error("Can't write entry: %s", strerror(-r));
986 journal_file_post_change(s->system_journal);
988 journal_file_close(s->runtime_journal);
989 s->runtime_journal = NULL;
992 rm_rf("/run/log/journal", false, true, false);
999 int process_event(Server *s, struct epoll_event *ev) {
1003 if (ev->data.fd == s->signal_fd) {
1004 struct signalfd_siginfo sfsi;
1007 if (ev->events != EPOLLIN) {
1008 log_error("Got invalid event from epoll.");
1012 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
1013 if (n != sizeof(sfsi)) {
1018 if (errno == EINTR || errno == EAGAIN)
1024 if (sfsi.ssi_signo == SIGUSR1) {
1025 touch("/run/systemd/journal/flushed");
1026 server_flush_to_var(s);
1031 if (sfsi.ssi_signo == SIGUSR2) {
1037 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
1041 } else if (ev->data.fd == s->sync_timer_fd) {
1045 log_debug("Got sync request from epoll.");
1047 r = read(ev->data.fd, (void *)&t, sizeof(t));
1054 } else if (ev->data.fd == s->dev_kmsg_fd) {
1057 if (ev->events != EPOLLIN) {
1058 log_error("Got invalid event from epoll.");
1062 r = server_read_dev_kmsg(s);
1068 } else if (ev->data.fd == s->native_fd ||
1069 ev->data.fd == s->syslog_fd) {
1071 if (ev->events != EPOLLIN) {
1072 log_error("Got invalid event from epoll.");
1077 struct msghdr msghdr;
1079 struct ucred *ucred = NULL;
1080 struct timeval *tv = NULL;
1081 struct cmsghdr *cmsg;
1083 size_t label_len = 0;
1085 struct cmsghdr cmsghdr;
1087 /* We use NAME_MAX space for the
1088 * SELinux label here. The kernel
1089 * currently enforces no limit, but
1090 * according to suggestions from the
1091 * SELinux people this will change and
1092 * it will probably be identical to
1093 * NAME_MAX. For now we use that, but
1094 * this should be updated one day when
1095 * the final limit is known.*/
1096 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1097 CMSG_SPACE(sizeof(struct timeval)) +
1098 CMSG_SPACE(sizeof(int)) + /* fd */
1099 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1106 if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
1107 log_error("SIOCINQ failed: %m");
1111 if (s->buffer_size < (size_t) v) {
1115 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
1116 b = realloc(s->buffer, l+1);
1119 log_error("Couldn't increase buffer.");
1128 iovec.iov_base = s->buffer;
1129 iovec.iov_len = s->buffer_size;
1133 msghdr.msg_iov = &iovec;
1134 msghdr.msg_iovlen = 1;
1135 msghdr.msg_control = &control;
1136 msghdr.msg_controllen = sizeof(control);
1138 n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1141 if (errno == EINTR || errno == EAGAIN)
1144 log_error("recvmsg() failed: %m");
1148 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1150 if (cmsg->cmsg_level == SOL_SOCKET &&
1151 cmsg->cmsg_type == SCM_CREDENTIALS &&
1152 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1153 ucred = (struct ucred*) CMSG_DATA(cmsg);
1154 else if (cmsg->cmsg_level == SOL_SOCKET &&
1155 cmsg->cmsg_type == SCM_SECURITY) {
1156 label = (char*) CMSG_DATA(cmsg);
1157 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1158 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1159 cmsg->cmsg_type == SO_TIMESTAMP &&
1160 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1161 tv = (struct timeval*) CMSG_DATA(cmsg);
1162 else if (cmsg->cmsg_level == SOL_SOCKET &&
1163 cmsg->cmsg_type == SCM_RIGHTS) {
1164 fds = (int*) CMSG_DATA(cmsg);
1165 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1169 if (ev->data.fd == s->syslog_fd) {
1172 if (n > 0 && n_fds == 0) {
1173 e = memchr(s->buffer, '\n', n);
1179 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1180 } else if (n_fds > 0)
1181 log_warning("Got file descriptors via syslog socket. Ignoring.");
1184 if (n > 0 && n_fds == 0)
1185 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1186 else if (n == 0 && n_fds == 1)
1187 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1189 log_warning("Got too many file descriptors via native socket. Ignoring.");
1192 close_many(fds, n_fds);
1197 } else if (ev->data.fd == s->stdout_fd) {
1199 if (ev->events != EPOLLIN) {
1200 log_error("Got invalid event from epoll.");
1204 stdout_stream_new(s);
1208 StdoutStream *stream;
1210 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
1211 log_error("Got invalid event from epoll.");
1215 /* If it is none of the well-known fds, it must be an
1216 * stdout stream fd. Note that this is a bit ugly here
1217 * (since we rely that none of the well-known fds
1218 * could be interpreted as pointer), but nonetheless
1219 * safe, since the well-known fds would never get an
1220 * fd > 4096, i.e. beyond the first memory page */
1222 stream = ev->data.ptr;
1224 if (stdout_stream_process(stream) <= 0)
1225 stdout_stream_free(stream);
1230 log_error("Unknown event.");
1234 static int open_signalfd(Server *s) {
1236 struct epoll_event ev;
1240 assert_se(sigemptyset(&mask) == 0);
1241 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1242 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1244 s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
1245 if (s->signal_fd < 0) {
1246 log_error("signalfd(): %m");
1251 ev.events = EPOLLIN;
1252 ev.data.fd = s->signal_fd;
1254 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
1255 log_error("epoll_ctl(): %m");
1262 static int server_parse_proc_cmdline(Server *s) {
1263 char _cleanup_free_ *line = NULL;
1268 if (detect_container(NULL) > 0)
1271 r = read_one_line_file("/proc/cmdline", &line);
1273 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1277 FOREACH_WORD_QUOTED(w, l, line, state) {
1278 char _cleanup_free_ *word;
1280 word = strndup(w, l);
1284 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1285 r = parse_boolean(word + 35);
1287 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1289 s->forward_to_syslog = r;
1290 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1291 r = parse_boolean(word + 33);
1293 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1295 s->forward_to_kmsg = r;
1296 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1297 r = parse_boolean(word + 36);
1299 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1301 s->forward_to_console = r;
1302 } else if (startswith(word, "systemd.journald"))
1303 log_warning("Invalid systemd.journald parameter. Ignoring.");
1309 static int server_parse_config_file(Server *s) {
1310 static const char *fn = "/etc/systemd/journald.conf";
1311 FILE _cleanup_fclose_ *f = NULL;
1316 f = fopen(fn, "re");
1318 if (errno == ENOENT)
1321 log_warning("Failed to open configuration file %s: %m", fn);
1325 r = config_parse(fn, f, "Journal\0", config_item_perf_lookup,
1326 (void*) journald_gperf_lookup, false, s);
1328 log_warning("Failed to parse configuration file: %s", strerror(-r));
1333 static int server_open_sync_timer(Server *s) {
1335 struct epoll_event ev;
1339 s->sync_timer_fd = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC);
1340 if (s->sync_timer_fd < 0)
1344 ev.events = EPOLLIN;
1345 ev.data.fd = s->sync_timer_fd;
1347 r = epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->sync_timer_fd, &ev);
1349 log_error("Failed to add idle timer fd to epoll object: %m");
1356 int server_schedule_sync(Server *s) {
1359 struct itimerspec sync_timer_enable;
1363 if (s->sync_scheduled)
1366 if (s->sync_interval_usec) {
1367 zero(sync_timer_enable);
1368 sync_timer_enable.it_value.tv_sec = s->sync_interval_usec / USEC_PER_SEC;
1369 sync_timer_enable.it_value.tv_nsec = s->sync_interval_usec % MSEC_PER_SEC;
1371 r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_enable, NULL);
1376 s->sync_scheduled = true;
1381 int server_init(Server *s) {
1387 s->sync_timer_fd = s->syslog_fd = s->native_fd = s->stdout_fd =
1388 s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
1392 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1393 s->sync_scheduled = false;
1395 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1396 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1398 s->forward_to_syslog = true;
1400 s->max_level_store = LOG_DEBUG;
1401 s->max_level_syslog = LOG_DEBUG;
1402 s->max_level_kmsg = LOG_NOTICE;
1403 s->max_level_console = LOG_INFO;
1405 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1406 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1408 server_parse_config_file(s);
1409 server_parse_proc_cmdline(s);
1410 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1411 log_debug("Setting both rate limit interval and burst from %llu,%u to 0,0",
1412 (long long unsigned) s->rate_limit_interval,
1413 s->rate_limit_burst);
1414 s->rate_limit_interval = s->rate_limit_burst = 0;
1417 mkdir_p("/run/systemd/journal", 0755);
1419 s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
1420 if (!s->user_journals)
1423 s->mmap = mmap_cache_new();
1427 s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
1428 if (s->epoll_fd < 0) {
1429 log_error("Failed to create epoll object: %m");
1433 n = sd_listen_fds(true);
1435 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1439 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1441 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1443 if (s->native_fd >= 0) {
1444 log_error("Too many native sockets passed.");
1450 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1452 if (s->stdout_fd >= 0) {
1453 log_error("Too many stdout sockets passed.");
1459 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
1461 if (s->syslog_fd >= 0) {
1462 log_error("Too many /dev/log sockets passed.");
1469 log_error("Unknown socket passed.");
1474 r = server_open_syslog_socket(s);
1478 r = server_open_native_socket(s);
1482 r = server_open_stdout_socket(s);
1486 r = server_open_dev_kmsg(s);
1490 r = server_open_kernel_seqnum(s);
1494 r = server_open_sync_timer(s);
1498 r = open_signalfd(s);
1502 s->udev = udev_new();
1506 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval,
1507 s->rate_limit_burst);
1511 r = system_journal_open(s);
1518 void server_maybe_append_tags(Server *s) {
1524 n = now(CLOCK_REALTIME);
1526 if (s->system_journal)
1527 journal_file_maybe_append_tag(s->system_journal, n);
1529 HASHMAP_FOREACH(f, s->user_journals, i)
1530 journal_file_maybe_append_tag(f, n);
1534 void server_done(Server *s) {
1538 while (s->stdout_streams)
1539 stdout_stream_free(s->stdout_streams);
1541 if (s->system_journal)
1542 journal_file_close(s->system_journal);
1544 if (s->runtime_journal)
1545 journal_file_close(s->runtime_journal);
1547 while ((f = hashmap_steal_first(s->user_journals)))
1548 journal_file_close(f);
1550 hashmap_free(s->user_journals);
1552 if (s->epoll_fd >= 0)
1553 close_nointr_nofail(s->epoll_fd);
1555 if (s->signal_fd >= 0)
1556 close_nointr_nofail(s->signal_fd);
1558 if (s->syslog_fd >= 0)
1559 close_nointr_nofail(s->syslog_fd);
1561 if (s->native_fd >= 0)
1562 close_nointr_nofail(s->native_fd);
1564 if (s->stdout_fd >= 0)
1565 close_nointr_nofail(s->stdout_fd);
1567 if (s->dev_kmsg_fd >= 0)
1568 close_nointr_nofail(s->dev_kmsg_fd);
1570 if (s->sync_timer_fd >= 0)
1571 close_nointr_nofail(s->sync_timer_fd);
1574 journal_rate_limit_free(s->rate_limit);
1576 if (s->kernel_seqnum)
1577 munmap(s->kernel_seqnum, sizeof(uint64_t));
1583 mmap_cache_unref(s->mmap);
1586 udev_unref(s->udev);