1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/epoll.h>
23 #include <sys/socket.h>
25 #include <sys/signalfd.h>
29 #include <sys/ioctl.h>
30 #include <linux/sockios.h>
31 #include <sys/statvfs.h>
35 #include <systemd/sd-journal.h>
36 #include <systemd/sd-messages.h>
37 #include <systemd/sd-daemon.h>
40 #include <systemd/sd-login.h>
45 #include "journal-file.h"
46 #include "socket-util.h"
47 #include "cgroup-util.h"
51 #include "conf-parser.h"
52 #include "journal-internal.h"
53 #include "journal-vacuum.h"
54 #include "journal-authenticate.h"
56 #include "journald-rate-limit.h"
57 #include "journald-kmsg.h"
58 #include "journald-syslog.h"
59 #include "journald-stream.h"
60 #include "journald-console.h"
61 #include "journald-native.h"
65 #include <acl/libacl.h>
70 #include <selinux/selinux.h>
73 #define USER_JOURNALS_MAX 1024
75 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
76 #define DEFAULT_RATE_LIMIT_BURST 200
78 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
80 static const char* const storage_table[] = {
81 [STORAGE_AUTO] = "auto",
82 [STORAGE_VOLATILE] = "volatile",
83 [STORAGE_PERSISTENT] = "persistent",
84 [STORAGE_NONE] = "none"
87 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
88 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
90 static const char* const split_mode_table[] = {
91 [SPLIT_NONE] = "none",
93 [SPLIT_LOGIN] = "login"
96 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
97 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
99 static uint64_t available_space(Server *s) {
104 uint64_t sum = 0, avail = 0, ss_avail = 0;
110 ts = now(CLOCK_MONOTONIC);
112 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
113 return s->cached_available_space;
115 r = sd_id128_get_machine(&machine);
119 if (s->system_journal) {
120 f = "/var/log/journal/";
121 m = &s->system_metrics;
123 f = "/run/log/journal/";
124 m = &s->runtime_metrics;
129 p = strappend(f, sd_id128_to_string(machine, ids));
139 if (fstatvfs(dirfd(d), &ss) < 0)
144 struct dirent buf, *de;
146 r = readdir_r(d, &buf, &de);
153 if (!endswith(de->d_name, ".journal") &&
154 !endswith(de->d_name, ".journal~"))
157 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
160 if (!S_ISREG(st.st_mode))
163 sum += (uint64_t) st.st_blocks * 512UL;
166 avail = sum >= m->max_use ? 0 : m->max_use - sum;
168 ss_avail = ss.f_bsize * ss.f_bavail;
170 ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
172 if (ss_avail < avail)
175 s->cached_available_space = avail;
176 s->cached_available_space_timestamp = ts;
184 static void server_read_file_gid(Server *s) {
185 const char *adm = "adm";
190 if (s->file_gid_valid)
193 r = get_group_creds(&adm, &s->file_gid);
195 log_warning("Failed to resolve 'adm' group: %s", strerror(-r));
197 /* if we couldn't read the gid, then it will be 0, but that's
198 * fine and we shouldn't try to resolve the group again, so
199 * let's just pretend it worked right-away. */
200 s->file_gid_valid = true;
203 static void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
208 acl_permset_t permset;
213 server_read_file_gid(s);
215 r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
217 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
223 acl = acl_get_fd(f->fd);
225 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
229 r = acl_find_uid(acl, uid, &entry);
232 if (acl_create_entry(&acl, &entry) < 0 ||
233 acl_set_tag_type(entry, ACL_USER) < 0 ||
234 acl_set_qualifier(entry, &uid) < 0) {
235 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
240 if (acl_get_permset(entry, &permset) < 0 ||
241 acl_add_perm(permset, ACL_READ) < 0 ||
242 acl_calc_mask(&acl) < 0) {
243 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
247 if (acl_set_fd(f->fd, acl) < 0)
248 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
255 static JournalFile* find_journal(Server *s, uid_t uid) {
263 /* We split up user logs only on /var, not on /run. If the
264 * runtime file is open, we write to it exclusively, in order
265 * to guarantee proper order as soon as we flush /run to
266 * /var and close the runtime file. */
268 if (s->runtime_journal)
269 return s->runtime_journal;
272 return s->system_journal;
274 r = sd_id128_get_machine(&machine);
276 return s->system_journal;
278 f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
282 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
283 SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
284 return s->system_journal;
286 while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
287 /* Too many open? Then let's close one */
288 f = hashmap_steal_first(s->user_journals);
290 journal_file_close(f);
293 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, s->system_journal, &f);
297 return s->system_journal;
299 server_fix_perms(s, f, uid);
301 r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
303 journal_file_close(f);
304 return s->system_journal;
310 static void server_rotate(Server *s) {
316 log_debug("Rotating...");
318 if (s->runtime_journal) {
319 r = journal_file_rotate(&s->runtime_journal, s->compress, false);
321 if (s->runtime_journal)
322 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
324 log_error("Failed to create new runtime journal: %s", strerror(-r));
326 server_fix_perms(s, s->runtime_journal, 0);
329 if (s->system_journal) {
330 r = journal_file_rotate(&s->system_journal, s->compress, s->seal);
332 if (s->system_journal)
333 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
335 log_error("Failed to create new system journal: %s", strerror(-r));
338 server_fix_perms(s, s->system_journal, 0);
341 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
342 r = journal_file_rotate(&f, s->compress, s->seal);
345 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
347 log_error("Failed to create user journal: %s", strerror(-r));
349 hashmap_replace(s->user_journals, k, f);
350 server_fix_perms(s, f, PTR_TO_UINT32(k));
355 static void server_vacuum(Server *s) {
361 log_debug("Vacuuming...");
363 r = sd_id128_get_machine(&machine);
365 log_error("Failed to get machine ID: %s", strerror(-r));
369 sd_id128_to_string(machine, ids);
371 if (s->system_journal) {
372 p = strappend("/var/log/journal/", ids);
378 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free);
379 if (r < 0 && r != -ENOENT)
380 log_error("Failed to vacuum %s: %s", p, strerror(-r));
384 if (s->runtime_journal) {
385 p = strappend("/run/log/journal/", ids);
391 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free);
392 if (r < 0 && r != -ENOENT)
393 log_error("Failed to vacuum %s: %s", p, strerror(-r));
397 s->cached_available_space_timestamp = 0;
400 static char *shortened_cgroup_path(pid_t pid) {
402 char *process_path, *init_path, *path;
406 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, pid, &process_path);
410 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &init_path);
416 if (endswith(init_path, "/system"))
417 init_path[strlen(init_path) - 7] = 0;
418 else if (streq(init_path, "/"))
421 if (startswith(process_path, init_path)) {
424 p = strdup(process_path + strlen(init_path));
442 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n) {
444 bool vacuumed = false;
451 f = find_journal(s, uid);
455 if (journal_file_rotate_suggested(f)) {
456 log_debug("Journal header limits reached or header out-of-date, rotating.");
461 f = find_journal(s, uid);
467 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
472 (r != -E2BIG && /* hit limit */
473 r != -EFBIG && /* hit fs limit */
474 r != -EDQUOT && /* quota hit */
475 r != -ENOSPC && /* disk full */
476 r != -EBADMSG && /* corrupted */
477 r != -ENODATA && /* truncated */
478 r != -EHOSTDOWN && /* other machine */
479 r != -EPROTONOSUPPORT && /* unsupported feature */
480 r != -EBUSY && /* unclean shutdown */
481 r != -ESHUTDOWN /* already archived */)) {
482 log_error("Failed to write entry, ignoring: %s", strerror(-r));
486 if (r == -E2BIG || r == -EFBIG || r == EDQUOT || r == ENOSPC)
487 log_debug("Allocation limit reached, rotating.");
488 else if (r == -EHOSTDOWN)
489 log_info("Journal file from other machine, rotating.");
490 else if (r == -EBUSY)
491 log_info("Unclean shutdown, rotating.");
493 log_warning("Journal file corrupted, rotating.");
499 f = find_journal(s, uid);
503 log_debug("Retrying write.");
507 static void dispatch_message_real(
509 struct iovec *iovec, unsigned n, unsigned m,
512 const char *label, size_t label_len,
513 const char *unit_id) {
515 char *pid = NULL, *uid = NULL, *gid = NULL,
516 *source_time = NULL, *boot_id = NULL, *machine_id = NULL,
517 *comm = NULL, *cmdline = NULL, *hostname = NULL,
518 *audit_session = NULL, *audit_loginuid = NULL,
519 *exe = NULL, *cgroup = NULL, *session = NULL,
520 *owner_uid = NULL, *unit = NULL, *selinux_context = NULL;
526 uid_t loginuid = 0, realuid = 0;
531 assert(n + N_IOVEC_META_FIELDS <= m);
539 realuid = ucred->uid;
541 if (asprintf(&pid, "_PID=%lu", (unsigned long) ucred->pid) >= 0)
542 IOVEC_SET_STRING(iovec[n++], pid);
544 if (asprintf(&uid, "_UID=%lu", (unsigned long) ucred->uid) >= 0)
545 IOVEC_SET_STRING(iovec[n++], uid);
547 if (asprintf(&gid, "_GID=%lu", (unsigned long) ucred->gid) >= 0)
548 IOVEC_SET_STRING(iovec[n++], gid);
550 r = get_process_comm(ucred->pid, &t);
552 comm = strappend("_COMM=", t);
556 IOVEC_SET_STRING(iovec[n++], comm);
559 r = get_process_exe(ucred->pid, &t);
561 exe = strappend("_EXE=", t);
565 IOVEC_SET_STRING(iovec[n++], exe);
568 r = get_process_cmdline(ucred->pid, LINE_MAX, false, &t);
570 cmdline = strappend("_CMDLINE=", t);
574 IOVEC_SET_STRING(iovec[n++], cmdline);
577 r = audit_session_from_pid(ucred->pid, &audit);
579 if (asprintf(&audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit) >= 0)
580 IOVEC_SET_STRING(iovec[n++], audit_session);
582 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
584 if (asprintf(&audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0)
585 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
587 t = shortened_cgroup_path(ucred->pid);
589 cgroup = strappend("_SYSTEMD_CGROUP=", t);
593 IOVEC_SET_STRING(iovec[n++], cgroup);
597 if (sd_pid_get_session(ucred->pid, &t) >= 0) {
598 session = strappend("_SYSTEMD_SESSION=", t);
602 IOVEC_SET_STRING(iovec[n++], session);
605 if (sd_pid_get_owner_uid(ucred->uid, &owner) >= 0)
606 if (asprintf(&owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner) >= 0)
607 IOVEC_SET_STRING(iovec[n++], owner_uid);
610 if (cg_pid_get_unit(ucred->pid, &t) >= 0) {
611 unit = strappend("_SYSTEMD_UNIT=", t);
614 unit = strappend("_SYSTEMD_UNIT=", unit_id);
617 IOVEC_SET_STRING(iovec[n++], unit);
621 selinux_context = malloc(sizeof("_SELINUX_CONTEXT=") + label_len);
622 if (selinux_context) {
623 memcpy(selinux_context, "_SELINUX_CONTEXT=", sizeof("_SELINUX_CONTEXT=")-1);
624 memcpy(selinux_context+sizeof("_SELINUX_CONTEXT=")-1, label, label_len);
625 selinux_context[sizeof("_SELINUX_CONTEXT=")-1+label_len] = 0;
626 IOVEC_SET_STRING(iovec[n++], selinux_context);
629 security_context_t con;
631 if (getpidcon(ucred->pid, &con) >= 0) {
632 selinux_context = strappend("_SELINUX_CONTEXT=", con);
634 IOVEC_SET_STRING(iovec[n++], selinux_context);
643 if (asprintf(&source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu",
644 (unsigned long long) timeval_load(tv)) >= 0)
645 IOVEC_SET_STRING(iovec[n++], source_time);
648 /* Note that strictly speaking storing the boot id here is
649 * redundant since the entry includes this in-line
650 * anyway. However, we need this indexed, too. */
651 r = sd_id128_get_boot(&id);
653 if (asprintf(&boot_id, "_BOOT_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
654 IOVEC_SET_STRING(iovec[n++], boot_id);
656 r = sd_id128_get_machine(&id);
658 if (asprintf(&machine_id, "_MACHINE_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
659 IOVEC_SET_STRING(iovec[n++], machine_id);
661 t = gethostname_malloc();
663 hostname = strappend("_HOSTNAME=", t);
666 IOVEC_SET_STRING(iovec[n++], hostname);
672 s->split_mode == SPLIT_NONE ? 0 :
673 (s->split_mode == SPLIT_UID ? realuid :
674 (realuid == 0 ? 0 : loginuid)), iovec, n);
687 free(audit_loginuid);
692 free(selinux_context);
695 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
696 char mid[11 + 32 + 1];
697 char buffer[16 + LINE_MAX + 1];
698 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
706 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
707 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
709 memcpy(buffer, "MESSAGE=", 8);
710 va_start(ap, format);
711 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
713 char_array_0(buffer);
714 IOVEC_SET_STRING(iovec[n++], buffer);
716 snprintf(mid, sizeof(mid), "MESSAGE_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(message_id));
718 IOVEC_SET_STRING(iovec[n++], mid);
721 ucred.pid = getpid();
722 ucred.uid = getuid();
723 ucred.gid = getgid();
725 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL);
728 void server_dispatch_message(
730 struct iovec *iovec, unsigned n, unsigned m,
733 const char *label, size_t label_len,
738 char *path = NULL, *c;
741 assert(iovec || n == 0);
746 if (LOG_PRI(priority) > s->max_level_store)
752 path = shortened_cgroup_path(ucred->pid);
756 /* example: /user/lennart/3/foobar
757 * /system/dbus.service/foobar
759 * So let's cut of everything past the third /, since that is
760 * wher user directories start */
762 c = strchr(path, '/');
764 c = strchr(c+1, '/');
766 c = strchr(c+1, '/');
772 rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available_space(s));
779 /* Write a suppression message if we suppressed something */
781 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED, "Suppressed %u messages from %s", rl - 1, path);
786 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id);
790 static int system_journal_open(Server *s) {
796 r = sd_id128_get_machine(&machine);
800 sd_id128_to_string(machine, ids);
802 if (!s->system_journal &&
803 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
804 access("/run/systemd/journal/flushed", F_OK) >= 0) {
806 /* If in auto mode: first try to create the machine
807 * path, but not the prefix.
809 * If in persistent mode: create /var/log/journal and
810 * the machine path */
812 if (s->storage == STORAGE_PERSISTENT)
813 (void) mkdir("/var/log/journal/", 0755);
815 fn = strappend("/var/log/journal/", ids);
819 (void) mkdir(fn, 0755);
822 fn = strjoin("/var/log/journal/", ids, "/system.journal", NULL);
826 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
830 server_fix_perms(s, s->system_journal, 0);
833 if (r != -ENOENT && r != -EROFS)
834 log_warning("Failed to open system journal: %s", strerror(-r));
840 if (!s->runtime_journal &&
841 (s->storage != STORAGE_NONE)) {
843 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
847 if (s->system_journal) {
849 /* Try to open the runtime journal, but only
850 * if it already exists, so that we can flush
851 * it into the system journal */
853 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
858 log_warning("Failed to open runtime journal: %s", strerror(-r));
865 /* OK, we really need the runtime journal, so create
866 * it if necessary. */
868 (void) mkdir_parents(fn, 0755);
869 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
873 log_error("Failed to open runtime journal: %s", strerror(-r));
878 if (s->runtime_journal)
879 server_fix_perms(s, s->runtime_journal, 0);
885 static int server_flush_to_var(Server *s) {
893 if (s->storage != STORAGE_AUTO &&
894 s->storage != STORAGE_PERSISTENT)
897 if (!s->runtime_journal)
900 system_journal_open(s);
902 if (!s->system_journal)
905 log_debug("Flushing to /var...");
907 r = sd_id128_get_machine(&machine);
909 log_error("Failed to get machine id: %s", strerror(-r));
913 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
915 log_error("Failed to read runtime journal: %s", strerror(-r));
919 SD_JOURNAL_FOREACH(j) {
923 assert(f && f->current_offset > 0);
925 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
927 log_error("Can't read entry: %s", strerror(-r));
931 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
933 log_debug("Allocation limit reached.");
935 journal_file_post_change(s->system_journal);
939 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
943 log_error("Can't write entry: %s", strerror(-r));
949 journal_file_post_change(s->system_journal);
951 journal_file_close(s->runtime_journal);
952 s->runtime_journal = NULL;
955 rm_rf("/run/log/journal", false, true, false);
960 static int process_event(Server *s, struct epoll_event *ev) {
964 if (ev->data.fd == s->signal_fd) {
965 struct signalfd_siginfo sfsi;
968 if (ev->events != EPOLLIN) {
969 log_error("Got invalid event from epoll.");
973 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
974 if (n != sizeof(sfsi)) {
979 if (errno == EINTR || errno == EAGAIN)
985 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
987 if (sfsi.ssi_signo == SIGUSR1) {
988 touch("/run/systemd/journal/flushed");
989 server_flush_to_var(s);
993 if (sfsi.ssi_signo == SIGUSR2) {
1001 } else if (ev->data.fd == s->dev_kmsg_fd) {
1004 if (ev->events != EPOLLIN) {
1005 log_error("Got invalid event from epoll.");
1009 r = server_read_dev_kmsg(s);
1015 } else if (ev->data.fd == s->native_fd ||
1016 ev->data.fd == s->syslog_fd) {
1018 if (ev->events != EPOLLIN) {
1019 log_error("Got invalid event from epoll.");
1024 struct msghdr msghdr;
1026 struct ucred *ucred = NULL;
1027 struct timeval *tv = NULL;
1028 struct cmsghdr *cmsg;
1030 size_t label_len = 0;
1032 struct cmsghdr cmsghdr;
1034 /* We use NAME_MAX space for the
1035 * SELinux label here. The kernel
1036 * currently enforces no limit, but
1037 * according to suggestions from the
1038 * SELinux people this will change and
1039 * it will probably be identical to
1040 * NAME_MAX. For now we use that, but
1041 * this should be updated one day when
1042 * the final limit is known.*/
1043 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1044 CMSG_SPACE(sizeof(struct timeval)) +
1045 CMSG_SPACE(sizeof(int)) + /* fd */
1046 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1053 if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
1054 log_error("SIOCINQ failed: %m");
1058 if (s->buffer_size < (size_t) v) {
1062 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
1063 b = realloc(s->buffer, l+1);
1066 log_error("Couldn't increase buffer.");
1075 iovec.iov_base = s->buffer;
1076 iovec.iov_len = s->buffer_size;
1080 msghdr.msg_iov = &iovec;
1081 msghdr.msg_iovlen = 1;
1082 msghdr.msg_control = &control;
1083 msghdr.msg_controllen = sizeof(control);
1085 n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1088 if (errno == EINTR || errno == EAGAIN)
1091 log_error("recvmsg() failed: %m");
1095 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1097 if (cmsg->cmsg_level == SOL_SOCKET &&
1098 cmsg->cmsg_type == SCM_CREDENTIALS &&
1099 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1100 ucred = (struct ucred*) CMSG_DATA(cmsg);
1101 else if (cmsg->cmsg_level == SOL_SOCKET &&
1102 cmsg->cmsg_type == SCM_SECURITY) {
1103 label = (char*) CMSG_DATA(cmsg);
1104 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1105 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1106 cmsg->cmsg_type == SO_TIMESTAMP &&
1107 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1108 tv = (struct timeval*) CMSG_DATA(cmsg);
1109 else if (cmsg->cmsg_level == SOL_SOCKET &&
1110 cmsg->cmsg_type == SCM_RIGHTS) {
1111 fds = (int*) CMSG_DATA(cmsg);
1112 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1116 if (ev->data.fd == s->syslog_fd) {
1119 if (n > 0 && n_fds == 0) {
1120 e = memchr(s->buffer, '\n', n);
1126 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1127 } else if (n_fds > 0)
1128 log_warning("Got file descriptors via syslog socket. Ignoring.");
1131 if (n > 0 && n_fds == 0)
1132 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1133 else if (n == 0 && n_fds == 1)
1134 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1136 log_warning("Got too many file descriptors via native socket. Ignoring.");
1139 close_many(fds, n_fds);
1144 } else if (ev->data.fd == s->stdout_fd) {
1146 if (ev->events != EPOLLIN) {
1147 log_error("Got invalid event from epoll.");
1151 stdout_stream_new(s);
1155 StdoutStream *stream;
1157 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
1158 log_error("Got invalid event from epoll.");
1162 /* If it is none of the well-known fds, it must be an
1163 * stdout stream fd. Note that this is a bit ugly here
1164 * (since we rely that none of the well-known fds
1165 * could be interpreted as pointer), but nonetheless
1166 * safe, since the well-known fds would never get an
1167 * fd > 4096, i.e. beyond the first memory page */
1169 stream = ev->data.ptr;
1171 if (stdout_stream_process(stream) <= 0)
1172 stdout_stream_free(stream);
1177 log_error("Unknown event.");
1181 static int open_signalfd(Server *s) {
1183 struct epoll_event ev;
1187 assert_se(sigemptyset(&mask) == 0);
1188 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1189 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1191 s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
1192 if (s->signal_fd < 0) {
1193 log_error("signalfd(): %m");
1198 ev.events = EPOLLIN;
1199 ev.data.fd = s->signal_fd;
1201 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
1202 log_error("epoll_ctl(): %m");
1209 static int server_parse_proc_cmdline(Server *s) {
1210 char *line, *w, *state;
1214 if (detect_container(NULL) > 0)
1217 r = read_one_line_file("/proc/cmdline", &line);
1219 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1223 FOREACH_WORD_QUOTED(w, l, line, state) {
1226 word = strndup(w, l);
1232 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1233 r = parse_boolean(word + 35);
1235 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1237 s->forward_to_syslog = r;
1238 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1239 r = parse_boolean(word + 33);
1241 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1243 s->forward_to_kmsg = r;
1244 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1245 r = parse_boolean(word + 36);
1247 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1249 s->forward_to_console = r;
1250 } else if (startswith(word, "systemd.journald"))
1251 log_warning("Invalid systemd.journald parameter. Ignoring.");
1263 static int server_parse_config_file(Server *s) {
1270 fn = "/etc/systemd/journald.conf";
1271 f = fopen(fn, "re");
1273 if (errno == ENOENT)
1276 log_warning("Failed to open configuration file %s: %m", fn);
1280 r = config_parse(fn, f, "Journal\0", config_item_perf_lookup, (void*) journald_gperf_lookup, false, s);
1282 log_warning("Failed to parse configuration file: %s", strerror(-r));
1289 static int server_init(Server *s) {
1295 s->syslog_fd = s->native_fd = s->stdout_fd = s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
1299 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1300 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1302 s->forward_to_syslog = true;
1304 s->max_level_store = LOG_DEBUG;
1305 s->max_level_syslog = LOG_DEBUG;
1306 s->max_level_kmsg = LOG_NOTICE;
1307 s->max_level_console = LOG_INFO;
1309 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1310 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1312 server_parse_config_file(s);
1313 server_parse_proc_cmdline(s);
1315 mkdir_p("/run/systemd/journal", 0755);
1317 s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
1318 if (!s->user_journals)
1321 s->mmap = mmap_cache_new();
1325 s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
1326 if (s->epoll_fd < 0) {
1327 log_error("Failed to create epoll object: %m");
1331 n = sd_listen_fds(true);
1333 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1337 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1339 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1341 if (s->native_fd >= 0) {
1342 log_error("Too many native sockets passed.");
1348 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1350 if (s->stdout_fd >= 0) {
1351 log_error("Too many stdout sockets passed.");
1357 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
1359 if (s->syslog_fd >= 0) {
1360 log_error("Too many /dev/log sockets passed.");
1367 log_error("Unknown socket passed.");
1372 r = server_open_syslog_socket(s);
1376 r = server_open_native_socket(s);
1380 r = server_open_stdout_socket(s);
1384 r = server_open_dev_kmsg(s);
1388 r = server_open_kernel_seqnum(s);
1392 r = open_signalfd(s);
1396 s->udev = udev_new();
1400 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
1404 r = system_journal_open(s);
1411 static void server_maybe_append_tags(Server *s) {
1417 n = now(CLOCK_REALTIME);
1419 if (s->system_journal)
1420 journal_file_maybe_append_tag(s->system_journal, n);
1422 HASHMAP_FOREACH(f, s->user_journals, i)
1423 journal_file_maybe_append_tag(f, n);
1427 static void server_done(Server *s) {
1431 while (s->stdout_streams)
1432 stdout_stream_free(s->stdout_streams);
1434 if (s->system_journal)
1435 journal_file_close(s->system_journal);
1437 if (s->runtime_journal)
1438 journal_file_close(s->runtime_journal);
1440 while ((f = hashmap_steal_first(s->user_journals)))
1441 journal_file_close(f);
1443 hashmap_free(s->user_journals);
1445 if (s->epoll_fd >= 0)
1446 close_nointr_nofail(s->epoll_fd);
1448 if (s->signal_fd >= 0)
1449 close_nointr_nofail(s->signal_fd);
1451 if (s->syslog_fd >= 0)
1452 close_nointr_nofail(s->syslog_fd);
1454 if (s->native_fd >= 0)
1455 close_nointr_nofail(s->native_fd);
1457 if (s->stdout_fd >= 0)
1458 close_nointr_nofail(s->stdout_fd);
1460 if (s->dev_kmsg_fd >= 0)
1461 close_nointr_nofail(s->dev_kmsg_fd);
1464 journal_rate_limit_free(s->rate_limit);
1466 if (s->kernel_seqnum)
1467 munmap(s->kernel_seqnum, sizeof(uint64_t));
1473 mmap_cache_unref(s->mmap);
1476 udev_unref(s->udev);
1479 int main(int argc, char *argv[]) {
1483 /* if (getppid() != 1) { */
1484 /* log_error("This program should be invoked by init only."); */
1485 /* return EXIT_FAILURE; */
1489 log_error("This program does not take arguments.");
1490 return EXIT_FAILURE;
1493 log_set_target(LOG_TARGET_SAFE);
1494 log_set_facility(LOG_SYSLOG);
1495 log_parse_environment();
1500 r = server_init(&server);
1504 server_vacuum(&server);
1505 server_flush_to_var(&server);
1506 server_flush_dev_kmsg(&server);
1508 log_debug("systemd-journald running as pid %lu", (unsigned long) getpid());
1509 server_driver_message(&server, SD_MESSAGE_JOURNAL_START, "Journal started");
1513 "STATUS=Processing requests...");
1516 struct epoll_event event;
1522 if (server.system_journal &&
1523 journal_file_next_evolve_usec(server.system_journal, &u)) {
1526 n = now(CLOCK_REALTIME);
1531 t = (int) ((u - n + USEC_PER_MSEC - 1) / USEC_PER_MSEC);
1536 r = epoll_wait(server.epoll_fd, &event, 1, t);
1542 log_error("epoll_wait() failed: %m");
1548 r = process_event(&server, &event);
1555 server_maybe_append_tags(&server);
1556 server_maybe_warn_forward_syslog_missed(&server);
1559 log_debug("systemd-journald stopped as pid %lu", (unsigned long) getpid());
1560 server_driver_message(&server, SD_MESSAGE_JOURNAL_STOP, "Journal stopped");
1564 "STATUS=Shutting down...");
1566 server_done(&server);
1568 return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;