1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/epoll.h>
23 #include <sys/socket.h>
25 #include <sys/signalfd.h>
29 #include <sys/ioctl.h>
30 #include <linux/sockios.h>
31 #include <sys/statvfs.h>
35 #include <systemd/sd-journal.h>
36 #include <systemd/sd-messages.h>
37 #include <systemd/sd-daemon.h>
40 #include <systemd/sd-login.h>
45 #include "journal-file.h"
46 #include "socket-util.h"
47 #include "cgroup-util.h"
51 #include "conf-parser.h"
52 #include "journal-internal.h"
53 #include "journal-vacuum.h"
54 #include "journal-authenticate.h"
56 #include "journald-rate-limit.h"
57 #include "journald-kmsg.h"
58 #include "journald-syslog.h"
59 #include "journald-stream.h"
60 #include "journald-console.h"
61 #include "journald-native.h"
65 #include <acl/libacl.h>
70 #include <selinux/selinux.h>
73 #define USER_JOURNALS_MAX 1024
75 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
76 #define DEFAULT_RATE_LIMIT_BURST 200
78 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
80 static const char* const storage_table[] = {
81 [STORAGE_AUTO] = "auto",
82 [STORAGE_VOLATILE] = "volatile",
83 [STORAGE_PERSISTENT] = "persistent",
84 [STORAGE_NONE] = "none"
87 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
88 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
90 static const char* const split_mode_table[] = {
91 [SPLIT_NONE] = "none",
93 [SPLIT_LOGIN] = "login"
96 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
97 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
99 static uint64_t available_space(Server *s) {
104 uint64_t sum = 0, avail = 0, ss_avail = 0;
110 ts = now(CLOCK_MONOTONIC);
112 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
113 return s->cached_available_space;
115 r = sd_id128_get_machine(&machine);
119 if (s->system_journal) {
120 f = "/var/log/journal/";
121 m = &s->system_metrics;
123 f = "/run/log/journal/";
124 m = &s->runtime_metrics;
129 p = strappend(f, sd_id128_to_string(machine, ids));
139 if (fstatvfs(dirfd(d), &ss) < 0)
145 union dirent_storage buf;
147 r = readdir_r(d, &buf.de, &de);
154 if (!endswith(de->d_name, ".journal") &&
155 !endswith(de->d_name, ".journal~"))
158 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
161 if (!S_ISREG(st.st_mode))
164 sum += (uint64_t) st.st_blocks * 512UL;
167 avail = sum >= m->max_use ? 0 : m->max_use - sum;
169 ss_avail = ss.f_bsize * ss.f_bavail;
171 ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
173 if (ss_avail < avail)
176 s->cached_available_space = avail;
177 s->cached_available_space_timestamp = ts;
185 static void server_read_file_gid(Server *s) {
186 const char *adm = "adm";
191 if (s->file_gid_valid)
194 r = get_group_creds(&adm, &s->file_gid);
196 log_warning("Failed to resolve 'adm' group: %s", strerror(-r));
198 /* if we couldn't read the gid, then it will be 0, but that's
199 * fine and we shouldn't try to resolve the group again, so
200 * let's just pretend it worked right-away. */
201 s->file_gid_valid = true;
204 static void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
209 acl_permset_t permset;
214 server_read_file_gid(s);
216 r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
218 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
224 acl = acl_get_fd(f->fd);
226 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
230 r = acl_find_uid(acl, uid, &entry);
233 if (acl_create_entry(&acl, &entry) < 0 ||
234 acl_set_tag_type(entry, ACL_USER) < 0 ||
235 acl_set_qualifier(entry, &uid) < 0) {
236 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
241 if (acl_get_permset(entry, &permset) < 0 ||
242 acl_add_perm(permset, ACL_READ) < 0 ||
243 acl_calc_mask(&acl) < 0) {
244 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
248 if (acl_set_fd(f->fd, acl) < 0)
249 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
256 static JournalFile* find_journal(Server *s, uid_t uid) {
264 /* We split up user logs only on /var, not on /run. If the
265 * runtime file is open, we write to it exclusively, in order
266 * to guarantee proper order as soon as we flush /run to
267 * /var and close the runtime file. */
269 if (s->runtime_journal)
270 return s->runtime_journal;
273 return s->system_journal;
275 r = sd_id128_get_machine(&machine);
277 return s->system_journal;
279 f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
283 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
284 SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
285 return s->system_journal;
287 while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
288 /* Too many open? Then let's close one */
289 f = hashmap_steal_first(s->user_journals);
291 journal_file_close(f);
294 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, s->system_journal, &f);
298 return s->system_journal;
300 server_fix_perms(s, f, uid);
302 r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
304 journal_file_close(f);
305 return s->system_journal;
311 static void server_rotate(Server *s) {
317 log_debug("Rotating...");
319 if (s->runtime_journal) {
320 r = journal_file_rotate(&s->runtime_journal, s->compress, false);
322 if (s->runtime_journal)
323 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
325 log_error("Failed to create new runtime journal: %s", strerror(-r));
327 server_fix_perms(s, s->runtime_journal, 0);
330 if (s->system_journal) {
331 r = journal_file_rotate(&s->system_journal, s->compress, s->seal);
333 if (s->system_journal)
334 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
336 log_error("Failed to create new system journal: %s", strerror(-r));
339 server_fix_perms(s, s->system_journal, 0);
342 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
343 r = journal_file_rotate(&f, s->compress, s->seal);
346 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
348 log_error("Failed to create user journal: %s", strerror(-r));
350 hashmap_replace(s->user_journals, k, f);
351 server_fix_perms(s, f, PTR_TO_UINT32(k));
356 static void server_vacuum(Server *s) {
362 log_debug("Vacuuming...");
364 r = sd_id128_get_machine(&machine);
366 log_error("Failed to get machine ID: %s", strerror(-r));
370 sd_id128_to_string(machine, ids);
372 if (s->system_journal) {
373 p = strappend("/var/log/journal/", ids);
379 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free);
380 if (r < 0 && r != -ENOENT)
381 log_error("Failed to vacuum %s: %s", p, strerror(-r));
385 if (s->runtime_journal) {
386 p = strappend("/run/log/journal/", ids);
392 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free);
393 if (r < 0 && r != -ENOENT)
394 log_error("Failed to vacuum %s: %s", p, strerror(-r));
398 s->cached_available_space_timestamp = 0;
401 static char *shortened_cgroup_path(pid_t pid) {
403 char *process_path, *init_path, *path;
407 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, pid, &process_path);
411 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &init_path);
417 if (endswith(init_path, "/system"))
418 init_path[strlen(init_path) - 7] = 0;
419 else if (streq(init_path, "/"))
422 if (startswith(process_path, init_path)) {
425 p = strdup(process_path + strlen(init_path));
443 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n) {
445 bool vacuumed = false;
452 f = find_journal(s, uid);
456 if (journal_file_rotate_suggested(f)) {
457 log_debug("Journal header limits reached or header out-of-date, rotating.");
462 f = find_journal(s, uid);
468 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
473 (r != -E2BIG && /* hit limit */
474 r != -EFBIG && /* hit fs limit */
475 r != -EDQUOT && /* quota hit */
476 r != -ENOSPC && /* disk full */
477 r != -EBADMSG && /* corrupted */
478 r != -ENODATA && /* truncated */
479 r != -EHOSTDOWN && /* other machine */
480 r != -EPROTONOSUPPORT && /* unsupported feature */
481 r != -EBUSY && /* unclean shutdown */
482 r != -ESHUTDOWN /* already archived */)) {
483 log_error("Failed to write entry, ignoring: %s", strerror(-r));
487 if (r == -E2BIG || r == -EFBIG || r == EDQUOT || r == ENOSPC)
488 log_debug("Allocation limit reached, rotating.");
489 else if (r == -EHOSTDOWN)
490 log_info("Journal file from other machine, rotating.");
491 else if (r == -EBUSY)
492 log_info("Unclean shutdown, rotating.");
494 log_warning("Journal file corrupted, rotating.");
500 f = find_journal(s, uid);
504 log_debug("Retrying write.");
508 static void dispatch_message_real(
510 struct iovec *iovec, unsigned n, unsigned m,
513 const char *label, size_t label_len,
514 const char *unit_id) {
516 char *pid = NULL, *uid = NULL, *gid = NULL,
517 *source_time = NULL, *boot_id = NULL, *machine_id = NULL,
518 *comm = NULL, *cmdline = NULL, *hostname = NULL,
519 *audit_session = NULL, *audit_loginuid = NULL,
520 *exe = NULL, *cgroup = NULL, *session = NULL,
521 *owner_uid = NULL, *unit = NULL, *selinux_context = NULL;
527 uid_t loginuid = 0, realuid = 0;
532 assert(n + N_IOVEC_META_FIELDS <= m);
540 realuid = ucred->uid;
542 if (asprintf(&pid, "_PID=%lu", (unsigned long) ucred->pid) >= 0)
543 IOVEC_SET_STRING(iovec[n++], pid);
545 if (asprintf(&uid, "_UID=%lu", (unsigned long) ucred->uid) >= 0)
546 IOVEC_SET_STRING(iovec[n++], uid);
548 if (asprintf(&gid, "_GID=%lu", (unsigned long) ucred->gid) >= 0)
549 IOVEC_SET_STRING(iovec[n++], gid);
551 r = get_process_comm(ucred->pid, &t);
553 comm = strappend("_COMM=", t);
557 IOVEC_SET_STRING(iovec[n++], comm);
560 r = get_process_exe(ucred->pid, &t);
562 exe = strappend("_EXE=", t);
566 IOVEC_SET_STRING(iovec[n++], exe);
569 r = get_process_cmdline(ucred->pid, LINE_MAX, false, &t);
571 cmdline = strappend("_CMDLINE=", t);
575 IOVEC_SET_STRING(iovec[n++], cmdline);
578 r = audit_session_from_pid(ucred->pid, &audit);
580 if (asprintf(&audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit) >= 0)
581 IOVEC_SET_STRING(iovec[n++], audit_session);
583 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
585 if (asprintf(&audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0)
586 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
588 t = shortened_cgroup_path(ucred->pid);
590 cgroup = strappend("_SYSTEMD_CGROUP=", t);
594 IOVEC_SET_STRING(iovec[n++], cgroup);
598 if (sd_pid_get_session(ucred->pid, &t) >= 0) {
599 session = strappend("_SYSTEMD_SESSION=", t);
603 IOVEC_SET_STRING(iovec[n++], session);
606 if (sd_pid_get_owner_uid(ucred->uid, &owner) >= 0)
607 if (asprintf(&owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner) >= 0)
608 IOVEC_SET_STRING(iovec[n++], owner_uid);
611 if (cg_pid_get_unit(ucred->pid, &t) >= 0) {
612 unit = strappend("_SYSTEMD_UNIT=", t);
615 unit = strappend("_SYSTEMD_UNIT=", unit_id);
618 IOVEC_SET_STRING(iovec[n++], unit);
622 selinux_context = malloc(sizeof("_SELINUX_CONTEXT=") + label_len);
623 if (selinux_context) {
624 memcpy(selinux_context, "_SELINUX_CONTEXT=", sizeof("_SELINUX_CONTEXT=")-1);
625 memcpy(selinux_context+sizeof("_SELINUX_CONTEXT=")-1, label, label_len);
626 selinux_context[sizeof("_SELINUX_CONTEXT=")-1+label_len] = 0;
627 IOVEC_SET_STRING(iovec[n++], selinux_context);
630 security_context_t con;
632 if (getpidcon(ucred->pid, &con) >= 0) {
633 selinux_context = strappend("_SELINUX_CONTEXT=", con);
635 IOVEC_SET_STRING(iovec[n++], selinux_context);
644 if (asprintf(&source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu",
645 (unsigned long long) timeval_load(tv)) >= 0)
646 IOVEC_SET_STRING(iovec[n++], source_time);
649 /* Note that strictly speaking storing the boot id here is
650 * redundant since the entry includes this in-line
651 * anyway. However, we need this indexed, too. */
652 r = sd_id128_get_boot(&id);
654 if (asprintf(&boot_id, "_BOOT_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
655 IOVEC_SET_STRING(iovec[n++], boot_id);
657 r = sd_id128_get_machine(&id);
659 if (asprintf(&machine_id, "_MACHINE_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
660 IOVEC_SET_STRING(iovec[n++], machine_id);
662 t = gethostname_malloc();
664 hostname = strappend("_HOSTNAME=", t);
667 IOVEC_SET_STRING(iovec[n++], hostname);
673 s->split_mode == SPLIT_NONE ? 0 :
674 (s->split_mode == SPLIT_UID ? realuid :
675 (realuid == 0 ? 0 : loginuid)), iovec, n);
688 free(audit_loginuid);
693 free(selinux_context);
696 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
697 char mid[11 + 32 + 1];
698 char buffer[16 + LINE_MAX + 1];
699 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
707 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
708 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
710 memcpy(buffer, "MESSAGE=", 8);
711 va_start(ap, format);
712 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
714 char_array_0(buffer);
715 IOVEC_SET_STRING(iovec[n++], buffer);
717 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
718 snprintf(mid, sizeof(mid), "MESSAGE_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(message_id));
720 IOVEC_SET_STRING(iovec[n++], mid);
724 ucred.pid = getpid();
725 ucred.uid = getuid();
726 ucred.gid = getgid();
728 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL);
731 void server_dispatch_message(
733 struct iovec *iovec, unsigned n, unsigned m,
736 const char *label, size_t label_len,
741 char *path = NULL, *c;
744 assert(iovec || n == 0);
749 if (LOG_PRI(priority) > s->max_level_store)
755 path = shortened_cgroup_path(ucred->pid);
759 /* example: /user/lennart/3/foobar
760 * /system/dbus.service/foobar
762 * So let's cut of everything past the third /, since that is
763 * wher user directories start */
765 c = strchr(path, '/');
767 c = strchr(c+1, '/');
769 c = strchr(c+1, '/');
775 rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available_space(s));
782 /* Write a suppression message if we suppressed something */
784 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED, "Suppressed %u messages from %s", rl - 1, path);
789 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id);
793 static int system_journal_open(Server *s) {
799 r = sd_id128_get_machine(&machine);
803 sd_id128_to_string(machine, ids);
805 if (!s->system_journal &&
806 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
807 access("/run/systemd/journal/flushed", F_OK) >= 0) {
809 /* If in auto mode: first try to create the machine
810 * path, but not the prefix.
812 * If in persistent mode: create /var/log/journal and
813 * the machine path */
815 if (s->storage == STORAGE_PERSISTENT)
816 (void) mkdir("/var/log/journal/", 0755);
818 fn = strappend("/var/log/journal/", ids);
822 (void) mkdir(fn, 0755);
825 fn = strjoin("/var/log/journal/", ids, "/system.journal", NULL);
829 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
833 char fb[FORMAT_BYTES_MAX];
835 server_fix_perms(s, s->system_journal, 0);
836 server_driver_message(s, SD_ID128_NULL, "Allowing system journal files to grow to %s.",
837 format_bytes(fb, sizeof(fb), s->system_metrics.max_use));
841 if (r != -ENOENT && r != -EROFS)
842 log_warning("Failed to open system journal: %s", strerror(-r));
848 if (!s->runtime_journal &&
849 (s->storage != STORAGE_NONE)) {
851 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
855 if (s->system_journal) {
857 /* Try to open the runtime journal, but only
858 * if it already exists, so that we can flush
859 * it into the system journal */
861 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
866 log_warning("Failed to open runtime journal: %s", strerror(-r));
873 /* OK, we really need the runtime journal, so create
874 * it if necessary. */
876 (void) mkdir_parents(fn, 0755);
877 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
881 log_error("Failed to open runtime journal: %s", strerror(-r));
886 if (s->runtime_journal) {
887 char fb[FORMAT_BYTES_MAX];
889 server_fix_perms(s, s->runtime_journal, 0);
890 server_driver_message(s, SD_ID128_NULL, "Allowing runtime journal files to grow to %s.",
891 format_bytes(fb, sizeof(fb), s->runtime_metrics.max_use));
898 static int server_flush_to_var(Server *s) {
901 sd_journal *j = NULL;
905 if (s->storage != STORAGE_AUTO &&
906 s->storage != STORAGE_PERSISTENT)
909 if (!s->runtime_journal)
912 system_journal_open(s);
914 if (!s->system_journal)
917 log_debug("Flushing to /var...");
919 r = sd_id128_get_machine(&machine);
921 log_error("Failed to get machine id: %s", strerror(-r));
925 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
927 log_error("Failed to read runtime journal: %s", strerror(-r));
931 SD_JOURNAL_FOREACH(j) {
936 assert(f && f->current_offset > 0);
938 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
940 log_error("Can't read entry: %s", strerror(-r));
944 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
946 log_debug("Allocation limit reached.");
948 journal_file_post_change(s->system_journal);
952 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
956 log_error("Can't write entry: %s", strerror(-r));
962 journal_file_post_change(s->system_journal);
964 journal_file_close(s->runtime_journal);
965 s->runtime_journal = NULL;
968 rm_rf("/run/log/journal", false, true, false);
976 static int process_event(Server *s, struct epoll_event *ev) {
980 if (ev->data.fd == s->signal_fd) {
981 struct signalfd_siginfo sfsi;
984 if (ev->events != EPOLLIN) {
985 log_error("Got invalid event from epoll.");
989 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
990 if (n != sizeof(sfsi)) {
995 if (errno == EINTR || errno == EAGAIN)
1001 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
1003 if (sfsi.ssi_signo == SIGUSR1) {
1004 touch("/run/systemd/journal/flushed");
1005 server_flush_to_var(s);
1009 if (sfsi.ssi_signo == SIGUSR2) {
1017 } else if (ev->data.fd == s->dev_kmsg_fd) {
1020 if (ev->events != EPOLLIN) {
1021 log_error("Got invalid event from epoll.");
1025 r = server_read_dev_kmsg(s);
1031 } else if (ev->data.fd == s->native_fd ||
1032 ev->data.fd == s->syslog_fd) {
1034 if (ev->events != EPOLLIN) {
1035 log_error("Got invalid event from epoll.");
1040 struct msghdr msghdr;
1042 struct ucred *ucred = NULL;
1043 struct timeval *tv = NULL;
1044 struct cmsghdr *cmsg;
1046 size_t label_len = 0;
1048 struct cmsghdr cmsghdr;
1050 /* We use NAME_MAX space for the
1051 * SELinux label here. The kernel
1052 * currently enforces no limit, but
1053 * according to suggestions from the
1054 * SELinux people this will change and
1055 * it will probably be identical to
1056 * NAME_MAX. For now we use that, but
1057 * this should be updated one day when
1058 * the final limit is known.*/
1059 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1060 CMSG_SPACE(sizeof(struct timeval)) +
1061 CMSG_SPACE(sizeof(int)) + /* fd */
1062 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1069 if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
1070 log_error("SIOCINQ failed: %m");
1074 if (s->buffer_size < (size_t) v) {
1078 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
1079 b = realloc(s->buffer, l+1);
1082 log_error("Couldn't increase buffer.");
1091 iovec.iov_base = s->buffer;
1092 iovec.iov_len = s->buffer_size;
1096 msghdr.msg_iov = &iovec;
1097 msghdr.msg_iovlen = 1;
1098 msghdr.msg_control = &control;
1099 msghdr.msg_controllen = sizeof(control);
1101 n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1104 if (errno == EINTR || errno == EAGAIN)
1107 log_error("recvmsg() failed: %m");
1111 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1113 if (cmsg->cmsg_level == SOL_SOCKET &&
1114 cmsg->cmsg_type == SCM_CREDENTIALS &&
1115 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1116 ucred = (struct ucred*) CMSG_DATA(cmsg);
1117 else if (cmsg->cmsg_level == SOL_SOCKET &&
1118 cmsg->cmsg_type == SCM_SECURITY) {
1119 label = (char*) CMSG_DATA(cmsg);
1120 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1121 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1122 cmsg->cmsg_type == SO_TIMESTAMP &&
1123 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1124 tv = (struct timeval*) CMSG_DATA(cmsg);
1125 else if (cmsg->cmsg_level == SOL_SOCKET &&
1126 cmsg->cmsg_type == SCM_RIGHTS) {
1127 fds = (int*) CMSG_DATA(cmsg);
1128 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1132 if (ev->data.fd == s->syslog_fd) {
1135 if (n > 0 && n_fds == 0) {
1136 e = memchr(s->buffer, '\n', n);
1142 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1143 } else if (n_fds > 0)
1144 log_warning("Got file descriptors via syslog socket. Ignoring.");
1147 if (n > 0 && n_fds == 0)
1148 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1149 else if (n == 0 && n_fds == 1)
1150 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1152 log_warning("Got too many file descriptors via native socket. Ignoring.");
1155 close_many(fds, n_fds);
1160 } else if (ev->data.fd == s->stdout_fd) {
1162 if (ev->events != EPOLLIN) {
1163 log_error("Got invalid event from epoll.");
1167 stdout_stream_new(s);
1171 StdoutStream *stream;
1173 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
1174 log_error("Got invalid event from epoll.");
1178 /* If it is none of the well-known fds, it must be an
1179 * stdout stream fd. Note that this is a bit ugly here
1180 * (since we rely that none of the well-known fds
1181 * could be interpreted as pointer), but nonetheless
1182 * safe, since the well-known fds would never get an
1183 * fd > 4096, i.e. beyond the first memory page */
1185 stream = ev->data.ptr;
1187 if (stdout_stream_process(stream) <= 0)
1188 stdout_stream_free(stream);
1193 log_error("Unknown event.");
1197 static int open_signalfd(Server *s) {
1199 struct epoll_event ev;
1203 assert_se(sigemptyset(&mask) == 0);
1204 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1205 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1207 s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
1208 if (s->signal_fd < 0) {
1209 log_error("signalfd(): %m");
1214 ev.events = EPOLLIN;
1215 ev.data.fd = s->signal_fd;
1217 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
1218 log_error("epoll_ctl(): %m");
1225 static int server_parse_proc_cmdline(Server *s) {
1226 char *line, *w, *state;
1230 if (detect_container(NULL) > 0)
1233 r = read_one_line_file("/proc/cmdline", &line);
1235 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1239 FOREACH_WORD_QUOTED(w, l, line, state) {
1242 word = strndup(w, l);
1248 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1249 r = parse_boolean(word + 35);
1251 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1253 s->forward_to_syslog = r;
1254 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1255 r = parse_boolean(word + 33);
1257 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1259 s->forward_to_kmsg = r;
1260 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1261 r = parse_boolean(word + 36);
1263 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1265 s->forward_to_console = r;
1266 } else if (startswith(word, "systemd.journald"))
1267 log_warning("Invalid systemd.journald parameter. Ignoring.");
1279 static int server_parse_config_file(Server *s) {
1286 fn = "/etc/systemd/journald.conf";
1287 f = fopen(fn, "re");
1289 if (errno == ENOENT)
1292 log_warning("Failed to open configuration file %s: %m", fn);
1296 r = config_parse(fn, f, "Journal\0", config_item_perf_lookup, (void*) journald_gperf_lookup, false, s);
1298 log_warning("Failed to parse configuration file: %s", strerror(-r));
1305 static int server_init(Server *s) {
1311 s->syslog_fd = s->native_fd = s->stdout_fd = s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
1315 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1316 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1318 s->forward_to_syslog = true;
1320 s->max_level_store = LOG_DEBUG;
1321 s->max_level_syslog = LOG_DEBUG;
1322 s->max_level_kmsg = LOG_NOTICE;
1323 s->max_level_console = LOG_INFO;
1325 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1326 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1328 server_parse_config_file(s);
1329 server_parse_proc_cmdline(s);
1331 mkdir_p("/run/systemd/journal", 0755);
1333 s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
1334 if (!s->user_journals)
1337 s->mmap = mmap_cache_new();
1341 s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
1342 if (s->epoll_fd < 0) {
1343 log_error("Failed to create epoll object: %m");
1347 n = sd_listen_fds(true);
1349 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1353 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1355 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1357 if (s->native_fd >= 0) {
1358 log_error("Too many native sockets passed.");
1364 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1366 if (s->stdout_fd >= 0) {
1367 log_error("Too many stdout sockets passed.");
1373 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
1375 if (s->syslog_fd >= 0) {
1376 log_error("Too many /dev/log sockets passed.");
1383 log_error("Unknown socket passed.");
1388 r = server_open_syslog_socket(s);
1392 r = server_open_native_socket(s);
1396 r = server_open_stdout_socket(s);
1400 r = server_open_dev_kmsg(s);
1404 r = server_open_kernel_seqnum(s);
1408 r = open_signalfd(s);
1412 s->udev = udev_new();
1416 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
1420 r = system_journal_open(s);
1427 static void server_maybe_append_tags(Server *s) {
1433 n = now(CLOCK_REALTIME);
1435 if (s->system_journal)
1436 journal_file_maybe_append_tag(s->system_journal, n);
1438 HASHMAP_FOREACH(f, s->user_journals, i)
1439 journal_file_maybe_append_tag(f, n);
1443 static void server_done(Server *s) {
1447 while (s->stdout_streams)
1448 stdout_stream_free(s->stdout_streams);
1450 if (s->system_journal)
1451 journal_file_close(s->system_journal);
1453 if (s->runtime_journal)
1454 journal_file_close(s->runtime_journal);
1456 while ((f = hashmap_steal_first(s->user_journals)))
1457 journal_file_close(f);
1459 hashmap_free(s->user_journals);
1461 if (s->epoll_fd >= 0)
1462 close_nointr_nofail(s->epoll_fd);
1464 if (s->signal_fd >= 0)
1465 close_nointr_nofail(s->signal_fd);
1467 if (s->syslog_fd >= 0)
1468 close_nointr_nofail(s->syslog_fd);
1470 if (s->native_fd >= 0)
1471 close_nointr_nofail(s->native_fd);
1473 if (s->stdout_fd >= 0)
1474 close_nointr_nofail(s->stdout_fd);
1476 if (s->dev_kmsg_fd >= 0)
1477 close_nointr_nofail(s->dev_kmsg_fd);
1480 journal_rate_limit_free(s->rate_limit);
1482 if (s->kernel_seqnum)
1483 munmap(s->kernel_seqnum, sizeof(uint64_t));
1489 mmap_cache_unref(s->mmap);
1492 udev_unref(s->udev);
1495 int main(int argc, char *argv[]) {
1499 /* if (getppid() != 1) { */
1500 /* log_error("This program should be invoked by init only."); */
1501 /* return EXIT_FAILURE; */
1505 log_error("This program does not take arguments.");
1506 return EXIT_FAILURE;
1509 log_set_target(LOG_TARGET_SAFE);
1510 log_set_facility(LOG_SYSLOG);
1511 log_parse_environment();
1516 r = server_init(&server);
1520 server_vacuum(&server);
1521 server_flush_to_var(&server);
1522 server_flush_dev_kmsg(&server);
1524 log_debug("systemd-journald running as pid %lu", (unsigned long) getpid());
1525 server_driver_message(&server, SD_MESSAGE_JOURNAL_START, "Journal started");
1529 "STATUS=Processing requests...");
1532 struct epoll_event event;
1538 if (server.system_journal &&
1539 journal_file_next_evolve_usec(server.system_journal, &u)) {
1542 n = now(CLOCK_REALTIME);
1547 t = (int) ((u - n + USEC_PER_MSEC - 1) / USEC_PER_MSEC);
1552 r = epoll_wait(server.epoll_fd, &event, 1, t);
1558 log_error("epoll_wait() failed: %m");
1564 r = process_event(&server, &event);
1571 server_maybe_append_tags(&server);
1572 server_maybe_warn_forward_syslog_missed(&server);
1575 log_debug("systemd-journald stopped as pid %lu", (unsigned long) getpid());
1576 server_driver_message(&server, SD_MESSAGE_JOURNAL_STOP, "Journal stopped");
1580 "STATUS=Shutting down...");
1582 server_done(&server);
1584 return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;