1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
29 #include <systemd/sd-journal.h>
30 #include <systemd/sd-messages.h>
31 #include <systemd/sd-daemon.h>
34 #include <systemd/sd-login.h>
39 #include "journal-file.h"
40 #include "socket-util.h"
41 #include "cgroup-util.h"
45 #include "conf-parser.h"
46 #include "journal-internal.h"
47 #include "journal-vacuum.h"
48 #include "journal-authenticate.h"
49 #include "journald-server.h"
50 #include "journald-rate-limit.h"
51 #include "journald-kmsg.h"
52 #include "journald-syslog.h"
53 #include "journald-stream.h"
54 #include "journald-console.h"
55 #include "journald-native.h"
59 #include <acl/libacl.h>
64 #include <selinux/selinux.h>
67 #define USER_JOURNALS_MAX 1024
69 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
70 #define DEFAULT_RATE_LIMIT_BURST 200
72 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
74 static const char* const storage_table[] = {
75 [STORAGE_AUTO] = "auto",
76 [STORAGE_VOLATILE] = "volatile",
77 [STORAGE_PERSISTENT] = "persistent",
78 [STORAGE_NONE] = "none"
81 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
82 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
84 static const char* const split_mode_table[] = {
85 [SPLIT_NONE] = "none",
87 [SPLIT_LOGIN] = "login"
90 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
91 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
93 static uint64_t available_space(Server *s) {
95 char _cleanup_free_ *p = NULL;
99 uint64_t sum = 0, avail = 0, ss_avail = 0;
101 DIR _cleanup_closedir_ *d = NULL;
105 ts = now(CLOCK_MONOTONIC);
107 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
108 return s->cached_available_space;
110 r = sd_id128_get_machine(&machine);
114 if (s->system_journal) {
115 f = "/var/log/journal/";
116 m = &s->system_metrics;
118 f = "/run/log/journal/";
119 m = &s->runtime_metrics;
124 p = strappend(f, sd_id128_to_string(machine, ids));
132 if (fstatvfs(dirfd(d), &ss) < 0)
138 union dirent_storage buf;
140 r = readdir_r(d, &buf.de, &de);
147 if (!endswith(de->d_name, ".journal") &&
148 !endswith(de->d_name, ".journal~"))
151 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
154 if (!S_ISREG(st.st_mode))
157 sum += (uint64_t) st.st_blocks * 512UL;
160 avail = sum >= m->max_use ? 0 : m->max_use - sum;
162 ss_avail = ss.f_bsize * ss.f_bavail;
164 ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
166 if (ss_avail < avail)
169 s->cached_available_space = avail;
170 s->cached_available_space_timestamp = ts;
175 static void server_read_file_gid(Server *s) {
176 const char *adm = "adm";
181 if (s->file_gid_valid)
184 r = get_group_creds(&adm, &s->file_gid);
186 log_warning("Failed to resolve 'adm' group: %s", strerror(-r));
188 /* if we couldn't read the gid, then it will be 0, but that's
189 * fine and we shouldn't try to resolve the group again, so
190 * let's just pretend it worked right-away. */
191 s->file_gid_valid = true;
194 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
199 acl_permset_t permset;
204 server_read_file_gid(s);
206 r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
208 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
214 acl = acl_get_fd(f->fd);
216 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
220 r = acl_find_uid(acl, uid, &entry);
223 if (acl_create_entry(&acl, &entry) < 0 ||
224 acl_set_tag_type(entry, ACL_USER) < 0 ||
225 acl_set_qualifier(entry, &uid) < 0) {
226 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
231 if (acl_get_permset(entry, &permset) < 0 ||
232 acl_add_perm(permset, ACL_READ) < 0 ||
233 acl_calc_mask(&acl) < 0) {
234 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
238 if (acl_set_fd(f->fd, acl) < 0)
239 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
246 static JournalFile* find_journal(Server *s, uid_t uid) {
254 /* We split up user logs only on /var, not on /run. If the
255 * runtime file is open, we write to it exclusively, in order
256 * to guarantee proper order as soon as we flush /run to
257 * /var and close the runtime file. */
259 if (s->runtime_journal)
260 return s->runtime_journal;
263 return s->system_journal;
265 r = sd_id128_get_machine(&machine);
267 return s->system_journal;
269 f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
273 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
274 SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
275 return s->system_journal;
277 while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
278 /* Too many open? Then let's close one */
279 f = hashmap_steal_first(s->user_journals);
281 journal_file_close(f);
284 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, s->system_journal, &f);
288 return s->system_journal;
290 server_fix_perms(s, f, uid);
292 r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
294 journal_file_close(f);
295 return s->system_journal;
301 void server_rotate(Server *s) {
307 log_debug("Rotating...");
309 if (s->runtime_journal) {
310 r = journal_file_rotate(&s->runtime_journal, s->compress, false);
312 if (s->runtime_journal)
313 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
315 log_error("Failed to create new runtime journal: %s", strerror(-r));
317 server_fix_perms(s, s->runtime_journal, 0);
320 if (s->system_journal) {
321 r = journal_file_rotate(&s->system_journal, s->compress, s->seal);
323 if (s->system_journal)
324 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
326 log_error("Failed to create new system journal: %s", strerror(-r));
329 server_fix_perms(s, s->system_journal, 0);
332 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
333 r = journal_file_rotate(&f, s->compress, s->seal);
336 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
338 log_error("Failed to create user journal: %s", strerror(-r));
340 hashmap_replace(s->user_journals, k, f);
341 server_fix_perms(s, f, PTR_TO_UINT32(k));
346 void server_vacuum(Server *s) {
352 log_debug("Vacuuming...");
354 s->oldest_file_usec = 0;
356 r = sd_id128_get_machine(&machine);
358 log_error("Failed to get machine ID: %s", strerror(-r));
362 sd_id128_to_string(machine, ids);
364 if (s->system_journal) {
365 p = strappend("/var/log/journal/", ids);
371 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
372 if (r < 0 && r != -ENOENT)
373 log_error("Failed to vacuum %s: %s", p, strerror(-r));
377 if (s->runtime_journal) {
378 p = strappend("/run/log/journal/", ids);
384 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
385 if (r < 0 && r != -ENOENT)
386 log_error("Failed to vacuum %s: %s", p, strerror(-r));
390 s->cached_available_space_timestamp = 0;
393 static char *shortened_cgroup_path(pid_t pid) {
395 char _cleanup_free_ *process_path = NULL, *init_path = NULL;
400 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, pid, &process_path);
404 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &init_path);
408 if (endswith(init_path, "/system"))
409 init_path[strlen(init_path) - 7] = 0;
410 else if (streq(init_path, "/"))
413 if (startswith(process_path, init_path)) {
414 path = strdup(process_path + strlen(init_path));
423 bool shall_try_append_again(JournalFile *f, int r) {
425 /* -E2BIG Hit configured limit
427 -EDQUOT Quota limit hit
429 -EHOSTDOWN Other machine
430 -EBUSY Unclean shutdown
431 -EPROTONOSUPPORT Unsupported feature
434 -ESHUTDOWN Already archived */
436 if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
437 log_debug("%s: Allocation limit reached, rotating.", f->path);
438 else if (r == -EHOSTDOWN)
439 log_info("%s: Journal file from other machine, rotating.", f->path);
440 else if (r == -EBUSY)
441 log_info("%s: Unclean shutdown, rotating.", f->path);
442 else if (r == -EPROTONOSUPPORT)
443 log_info("%s: Unsupported feature, rotating.", f->path);
444 else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
445 log_warning("%s: Journal file corrupted, rotating.", f->path);
452 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n) {
454 bool vacuumed = false;
461 f = find_journal(s, uid);
465 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
466 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
471 f = find_journal(s, uid);
476 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
480 if (vacuumed || !shall_try_append_again(f, r)) {
481 log_error("Failed to write entry, ignoring: %s", strerror(-r));
488 f = find_journal(s, uid);
492 log_debug("Retrying write.");
493 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
495 log_error("Failed to write entry, ignoring: %s", strerror(-r));
498 static void dispatch_message_real(
500 struct iovec *iovec, unsigned n, unsigned m,
503 const char *label, size_t label_len,
504 const char *unit_id) {
506 char _cleanup_free_ *pid = NULL, *uid = NULL, *gid = NULL,
507 *source_time = NULL, *boot_id = NULL, *machine_id = NULL,
508 *comm = NULL, *cmdline = NULL, *hostname = NULL,
509 *audit_session = NULL, *audit_loginuid = NULL,
510 *exe = NULL, *cgroup = NULL, *session = NULL,
511 *owner_uid = NULL, *unit = NULL, *selinux_context = NULL;
517 uid_t loginuid = 0, realuid = 0;
522 assert(n + N_IOVEC_META_FIELDS <= m);
530 realuid = ucred->uid;
532 if (asprintf(&pid, "_PID=%lu", (unsigned long) ucred->pid) >= 0)
533 IOVEC_SET_STRING(iovec[n++], pid);
535 if (asprintf(&uid, "_UID=%lu", (unsigned long) ucred->uid) >= 0)
536 IOVEC_SET_STRING(iovec[n++], uid);
538 if (asprintf(&gid, "_GID=%lu", (unsigned long) ucred->gid) >= 0)
539 IOVEC_SET_STRING(iovec[n++], gid);
541 r = get_process_comm(ucred->pid, &t);
543 comm = strappend("_COMM=", t);
547 IOVEC_SET_STRING(iovec[n++], comm);
550 r = get_process_exe(ucred->pid, &t);
552 exe = strappend("_EXE=", t);
556 IOVEC_SET_STRING(iovec[n++], exe);
559 r = get_process_cmdline(ucred->pid, LINE_MAX, false, &t);
561 cmdline = strappend("_CMDLINE=", t);
565 IOVEC_SET_STRING(iovec[n++], cmdline);
568 r = audit_session_from_pid(ucred->pid, &audit);
570 if (asprintf(&audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit) >= 0)
571 IOVEC_SET_STRING(iovec[n++], audit_session);
573 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
575 if (asprintf(&audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0)
576 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
578 t = shortened_cgroup_path(ucred->pid);
580 cgroup = strappend("_SYSTEMD_CGROUP=", t);
584 IOVEC_SET_STRING(iovec[n++], cgroup);
588 if (sd_pid_get_session(ucred->pid, &t) >= 0) {
589 session = strappend("_SYSTEMD_SESSION=", t);
593 IOVEC_SET_STRING(iovec[n++], session);
596 if (sd_pid_get_owner_uid(ucred->uid, &owner) >= 0)
597 if (asprintf(&owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner) >= 0)
598 IOVEC_SET_STRING(iovec[n++], owner_uid);
601 if (cg_pid_get_unit(ucred->pid, &t) >= 0) {
602 unit = strappend("_SYSTEMD_UNIT=", t);
605 unit = strappend("_SYSTEMD_UNIT=", unit_id);
608 IOVEC_SET_STRING(iovec[n++], unit);
612 selinux_context = malloc(sizeof("_SELINUX_CONTEXT=") + label_len);
613 if (selinux_context) {
614 memcpy(selinux_context, "_SELINUX_CONTEXT=", sizeof("_SELINUX_CONTEXT=")-1);
615 memcpy(selinux_context+sizeof("_SELINUX_CONTEXT=")-1, label, label_len);
616 selinux_context[sizeof("_SELINUX_CONTEXT=")-1+label_len] = 0;
617 IOVEC_SET_STRING(iovec[n++], selinux_context);
620 security_context_t con;
622 if (getpidcon(ucred->pid, &con) >= 0) {
623 selinux_context = strappend("_SELINUX_CONTEXT=", con);
625 IOVEC_SET_STRING(iovec[n++], selinux_context);
634 if (asprintf(&source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu",
635 (unsigned long long) timeval_load(tv)) >= 0)
636 IOVEC_SET_STRING(iovec[n++], source_time);
639 /* Note that strictly speaking storing the boot id here is
640 * redundant since the entry includes this in-line
641 * anyway. However, we need this indexed, too. */
642 r = sd_id128_get_boot(&id);
644 if (asprintf(&boot_id, "_BOOT_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
645 IOVEC_SET_STRING(iovec[n++], boot_id);
647 r = sd_id128_get_machine(&id);
649 if (asprintf(&machine_id, "_MACHINE_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
650 IOVEC_SET_STRING(iovec[n++], machine_id);
652 t = gethostname_malloc();
654 hostname = strappend("_HOSTNAME=", t);
657 IOVEC_SET_STRING(iovec[n++], hostname);
663 s->split_mode == SPLIT_NONE ? 0 :
664 (s->split_mode == SPLIT_UID ? realuid :
665 (realuid == 0 ? 0 : loginuid)), iovec, n);
668 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
669 char mid[11 + 32 + 1];
670 char buffer[16 + LINE_MAX + 1];
671 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
679 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
680 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
682 memcpy(buffer, "MESSAGE=", 8);
683 va_start(ap, format);
684 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
686 char_array_0(buffer);
687 IOVEC_SET_STRING(iovec[n++], buffer);
689 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
690 snprintf(mid, sizeof(mid), MESSAGE_ID(message_id));
692 IOVEC_SET_STRING(iovec[n++], mid);
696 ucred.pid = getpid();
697 ucred.uid = getuid();
698 ucred.gid = getgid();
700 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL);
703 void server_dispatch_message(
705 struct iovec *iovec, unsigned n, unsigned m,
708 const char *label, size_t label_len,
713 char _cleanup_free_ *path = NULL;
717 assert(iovec || n == 0);
722 if (LOG_PRI(priority) > s->max_level_store)
728 path = shortened_cgroup_path(ucred->pid);
732 /* example: /user/lennart/3/foobar
733 * /system/dbus.service/foobar
735 * So let's cut of everything past the third /, since that is
736 * where user directories start */
738 c = strchr(path, '/');
740 c = strchr(c+1, '/');
742 c = strchr(c+1, '/');
748 rl = journal_rate_limit_test(s->rate_limit, path,
749 priority & LOG_PRIMASK, available_space(s));
754 /* Write a suppression message if we suppressed something */
756 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
757 "Suppressed %u messages from %s", rl - 1, path);
760 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id);
764 static int system_journal_open(Server *s) {
770 r = sd_id128_get_machine(&machine);
774 sd_id128_to_string(machine, ids);
776 if (!s->system_journal &&
777 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
778 access("/run/systemd/journal/flushed", F_OK) >= 0) {
780 /* If in auto mode: first try to create the machine
781 * path, but not the prefix.
783 * If in persistent mode: create /var/log/journal and
784 * the machine path */
786 if (s->storage == STORAGE_PERSISTENT)
787 (void) mkdir("/var/log/journal/", 0755);
789 fn = strappend("/var/log/journal/", ids);
793 (void) mkdir(fn, 0755);
796 fn = strjoin("/var/log/journal/", ids, "/system.journal", NULL);
800 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
804 char fb[FORMAT_BYTES_MAX];
806 server_fix_perms(s, s->system_journal, 0);
807 server_driver_message(s, SD_ID128_NULL, "Allowing system journal files to grow to %s.",
808 format_bytes(fb, sizeof(fb), s->system_metrics.max_use));
812 if (r != -ENOENT && r != -EROFS)
813 log_warning("Failed to open system journal: %s", strerror(-r));
819 if (!s->runtime_journal &&
820 (s->storage != STORAGE_NONE)) {
822 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
826 if (s->system_journal) {
828 /* Try to open the runtime journal, but only
829 * if it already exists, so that we can flush
830 * it into the system journal */
832 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
837 log_warning("Failed to open runtime journal: %s", strerror(-r));
844 /* OK, we really need the runtime journal, so create
845 * it if necessary. */
847 (void) mkdir_parents(fn, 0755);
848 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
852 log_error("Failed to open runtime journal: %s", strerror(-r));
857 if (s->runtime_journal) {
858 char fb[FORMAT_BYTES_MAX];
860 server_fix_perms(s, s->runtime_journal, 0);
861 server_driver_message(s, SD_ID128_NULL, "Allowing runtime journal files to grow to %s.",
862 format_bytes(fb, sizeof(fb), s->runtime_metrics.max_use));
869 int server_flush_to_var(Server *s) {
872 sd_journal *j = NULL;
876 if (s->storage != STORAGE_AUTO &&
877 s->storage != STORAGE_PERSISTENT)
880 if (!s->runtime_journal)
883 system_journal_open(s);
885 if (!s->system_journal)
888 log_debug("Flushing to /var...");
890 r = sd_id128_get_machine(&machine);
892 log_error("Failed to get machine id: %s", strerror(-r));
896 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
898 log_error("Failed to read runtime journal: %s", strerror(-r));
902 sd_journal_set_data_threshold(j, 0);
904 SD_JOURNAL_FOREACH(j) {
909 assert(f && f->current_offset > 0);
911 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
913 log_error("Can't read entry: %s", strerror(-r));
917 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
921 if (!shall_try_append_again(s->system_journal, r)) {
922 log_error("Can't write entry: %s", strerror(-r));
929 log_debug("Retrying write.");
930 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
932 log_error("Can't write entry: %s", strerror(-r));
938 journal_file_post_change(s->system_journal);
940 journal_file_close(s->runtime_journal);
941 s->runtime_journal = NULL;
944 rm_rf("/run/log/journal", false, true, false);
952 int process_event(Server *s, struct epoll_event *ev) {
956 if (ev->data.fd == s->signal_fd) {
957 struct signalfd_siginfo sfsi;
960 if (ev->events != EPOLLIN) {
961 log_error("Got invalid event from epoll.");
965 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
966 if (n != sizeof(sfsi)) {
971 if (errno == EINTR || errno == EAGAIN)
977 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
979 if (sfsi.ssi_signo == SIGUSR1) {
980 touch("/run/systemd/journal/flushed");
981 server_flush_to_var(s);
985 if (sfsi.ssi_signo == SIGUSR2) {
993 } else if (ev->data.fd == s->dev_kmsg_fd) {
996 if (ev->events != EPOLLIN) {
997 log_error("Got invalid event from epoll.");
1001 r = server_read_dev_kmsg(s);
1007 } else if (ev->data.fd == s->native_fd ||
1008 ev->data.fd == s->syslog_fd) {
1010 if (ev->events != EPOLLIN) {
1011 log_error("Got invalid event from epoll.");
1016 struct msghdr msghdr;
1018 struct ucred *ucred = NULL;
1019 struct timeval *tv = NULL;
1020 struct cmsghdr *cmsg;
1022 size_t label_len = 0;
1024 struct cmsghdr cmsghdr;
1026 /* We use NAME_MAX space for the
1027 * SELinux label here. The kernel
1028 * currently enforces no limit, but
1029 * according to suggestions from the
1030 * SELinux people this will change and
1031 * it will probably be identical to
1032 * NAME_MAX. For now we use that, but
1033 * this should be updated one day when
1034 * the final limit is known.*/
1035 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1036 CMSG_SPACE(sizeof(struct timeval)) +
1037 CMSG_SPACE(sizeof(int)) + /* fd */
1038 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1045 if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
1046 log_error("SIOCINQ failed: %m");
1050 if (s->buffer_size < (size_t) v) {
1054 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
1055 b = realloc(s->buffer, l+1);
1058 log_error("Couldn't increase buffer.");
1067 iovec.iov_base = s->buffer;
1068 iovec.iov_len = s->buffer_size;
1072 msghdr.msg_iov = &iovec;
1073 msghdr.msg_iovlen = 1;
1074 msghdr.msg_control = &control;
1075 msghdr.msg_controllen = sizeof(control);
1077 n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1080 if (errno == EINTR || errno == EAGAIN)
1083 log_error("recvmsg() failed: %m");
1087 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1089 if (cmsg->cmsg_level == SOL_SOCKET &&
1090 cmsg->cmsg_type == SCM_CREDENTIALS &&
1091 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1092 ucred = (struct ucred*) CMSG_DATA(cmsg);
1093 else if (cmsg->cmsg_level == SOL_SOCKET &&
1094 cmsg->cmsg_type == SCM_SECURITY) {
1095 label = (char*) CMSG_DATA(cmsg);
1096 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1097 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1098 cmsg->cmsg_type == SO_TIMESTAMP &&
1099 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1100 tv = (struct timeval*) CMSG_DATA(cmsg);
1101 else if (cmsg->cmsg_level == SOL_SOCKET &&
1102 cmsg->cmsg_type == SCM_RIGHTS) {
1103 fds = (int*) CMSG_DATA(cmsg);
1104 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1108 if (ev->data.fd == s->syslog_fd) {
1111 if (n > 0 && n_fds == 0) {
1112 e = memchr(s->buffer, '\n', n);
1118 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1119 } else if (n_fds > 0)
1120 log_warning("Got file descriptors via syslog socket. Ignoring.");
1123 if (n > 0 && n_fds == 0)
1124 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1125 else if (n == 0 && n_fds == 1)
1126 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1128 log_warning("Got too many file descriptors via native socket. Ignoring.");
1131 close_many(fds, n_fds);
1136 } else if (ev->data.fd == s->stdout_fd) {
1138 if (ev->events != EPOLLIN) {
1139 log_error("Got invalid event from epoll.");
1143 stdout_stream_new(s);
1147 StdoutStream *stream;
1149 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
1150 log_error("Got invalid event from epoll.");
1154 /* If it is none of the well-known fds, it must be an
1155 * stdout stream fd. Note that this is a bit ugly here
1156 * (since we rely that none of the well-known fds
1157 * could be interpreted as pointer), but nonetheless
1158 * safe, since the well-known fds would never get an
1159 * fd > 4096, i.e. beyond the first memory page */
1161 stream = ev->data.ptr;
1163 if (stdout_stream_process(stream) <= 0)
1164 stdout_stream_free(stream);
1169 log_error("Unknown event.");
1173 static int open_signalfd(Server *s) {
1175 struct epoll_event ev;
1179 assert_se(sigemptyset(&mask) == 0);
1180 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1181 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1183 s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
1184 if (s->signal_fd < 0) {
1185 log_error("signalfd(): %m");
1190 ev.events = EPOLLIN;
1191 ev.data.fd = s->signal_fd;
1193 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
1194 log_error("epoll_ctl(): %m");
1201 static int server_parse_proc_cmdline(Server *s) {
1202 char _cleanup_free_ *line = NULL;
1207 if (detect_container(NULL) > 0)
1210 r = read_one_line_file("/proc/cmdline", &line);
1212 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1216 FOREACH_WORD_QUOTED(w, l, line, state) {
1217 char _cleanup_free_ *word;
1219 word = strndup(w, l);
1223 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1224 r = parse_boolean(word + 35);
1226 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1228 s->forward_to_syslog = r;
1229 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1230 r = parse_boolean(word + 33);
1232 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1234 s->forward_to_kmsg = r;
1235 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1236 r = parse_boolean(word + 36);
1238 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1240 s->forward_to_console = r;
1241 } else if (startswith(word, "systemd.journald"))
1242 log_warning("Invalid systemd.journald parameter. Ignoring.");
1248 static int server_parse_config_file(Server *s) {
1249 static const char *fn = "/etc/systemd/journald.conf";
1250 FILE _cleanup_fclose_ *f = NULL;
1255 f = fopen(fn, "re");
1257 if (errno == ENOENT)
1260 log_warning("Failed to open configuration file %s: %m", fn);
1264 r = config_parse(fn, f, "Journal\0", config_item_perf_lookup,
1265 (void*) journald_gperf_lookup, false, s);
1267 log_warning("Failed to parse configuration file: %s", strerror(-r));
1272 int server_init(Server *s) {
1278 s->syslog_fd = s->native_fd = s->stdout_fd = s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
1282 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1283 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1285 s->forward_to_syslog = true;
1287 s->max_level_store = LOG_DEBUG;
1288 s->max_level_syslog = LOG_DEBUG;
1289 s->max_level_kmsg = LOG_NOTICE;
1290 s->max_level_console = LOG_INFO;
1292 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1293 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1295 server_parse_config_file(s);
1296 server_parse_proc_cmdline(s);
1298 mkdir_p("/run/systemd/journal", 0755);
1300 s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
1301 if (!s->user_journals)
1304 s->mmap = mmap_cache_new();
1308 s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
1309 if (s->epoll_fd < 0) {
1310 log_error("Failed to create epoll object: %m");
1314 n = sd_listen_fds(true);
1316 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1320 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1322 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1324 if (s->native_fd >= 0) {
1325 log_error("Too many native sockets passed.");
1331 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1333 if (s->stdout_fd >= 0) {
1334 log_error("Too many stdout sockets passed.");
1340 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
1342 if (s->syslog_fd >= 0) {
1343 log_error("Too many /dev/log sockets passed.");
1350 log_error("Unknown socket passed.");
1355 r = server_open_syslog_socket(s);
1359 r = server_open_native_socket(s);
1363 r = server_open_stdout_socket(s);
1367 r = server_open_dev_kmsg(s);
1371 r = server_open_kernel_seqnum(s);
1375 r = open_signalfd(s);
1379 s->udev = udev_new();
1383 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
1387 r = system_journal_open(s);
1394 void server_maybe_append_tags(Server *s) {
1400 n = now(CLOCK_REALTIME);
1402 if (s->system_journal)
1403 journal_file_maybe_append_tag(s->system_journal, n);
1405 HASHMAP_FOREACH(f, s->user_journals, i)
1406 journal_file_maybe_append_tag(f, n);
1410 void server_done(Server *s) {
1414 while (s->stdout_streams)
1415 stdout_stream_free(s->stdout_streams);
1417 if (s->system_journal)
1418 journal_file_close(s->system_journal);
1420 if (s->runtime_journal)
1421 journal_file_close(s->runtime_journal);
1423 while ((f = hashmap_steal_first(s->user_journals)))
1424 journal_file_close(f);
1426 hashmap_free(s->user_journals);
1428 if (s->epoll_fd >= 0)
1429 close_nointr_nofail(s->epoll_fd);
1431 if (s->signal_fd >= 0)
1432 close_nointr_nofail(s->signal_fd);
1434 if (s->syslog_fd >= 0)
1435 close_nointr_nofail(s->syslog_fd);
1437 if (s->native_fd >= 0)
1438 close_nointr_nofail(s->native_fd);
1440 if (s->stdout_fd >= 0)
1441 close_nointr_nofail(s->stdout_fd);
1443 if (s->dev_kmsg_fd >= 0)
1444 close_nointr_nofail(s->dev_kmsg_fd);
1447 journal_rate_limit_free(s->rate_limit);
1449 if (s->kernel_seqnum)
1450 munmap(s->kernel_seqnum, sizeof(uint64_t));
1456 mmap_cache_unref(s->mmap);
1459 udev_unref(s->udev);