#include <sys/ioctl.h>
#include <linux/sockios.h>
#include <sys/statvfs.h>
+#include <sys/mman.h>
#include <systemd/sd-journal.h>
#include <systemd/sd-messages.h>
#define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
#define N_IOVEC_META_FIELDS 17
+#define N_IOVEC_KERNEL_FIELDS 64
#define ENTRY_SIZE_MAX (1024*1024*32)
if (s->system_journal) {
if (asprintf(&p, "/var/log/journal/%s", ids) < 0) {
- log_error("Out of memory.");
+ log_oom();
return;
}
if (s->runtime_journal) {
if (asprintf(&p, "/run/log/journal/%s", ids) < 0) {
- log_error("Out of memory.");
+ log_oom();
return;
}
server_rotate(s);
server_vacuum(s);
vacuumed = true;
+
+ f = find_journal(s, uid);
+ if (!f)
+ return;
}
for (;;) {
assert(s);
assert(format);
- IOVEC_SET_STRING(iovec[n++], "PRIORITY=5");
+ IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
memcpy(buffer, "MESSAGE=", 8);
return;
}
- log_debug("Failed to forward syslog message: %m");
+ if (errno != ENOENT)
+ log_debug("Failed to forward syslog message: %m");
}
static void forward_syslog_raw(Server *s, int priority, const char *buffer, struct ucred *ucred, struct timeval *tv) {
char header_priority[6], header_pid[16];
int n = 0;
char *ident_buf = NULL;
- int fd;
assert(s);
assert(priority >= 0);
assert(priority <= 999);
assert(message);
- if (LOG_PRI(priority) > s->max_level_kmsg)
+ if (_unlikely_(LOG_PRI(priority) > s->max_level_kmsg))
+ return;
+
+ if (_unlikely_(s->dev_kmsg_fd < 0))
return;
/* Never allow messages with kernel facility to be written to
IOVEC_SET_STRING(iovec[n++], message);
IOVEC_SET_STRING(iovec[n++], "\n");
- fd = open("/dev/kmsg", O_WRONLY|O_NOCTTY|O_CLOEXEC);
- if (fd < 0) {
- log_debug("Failed to open /dev/kmsg for logging: %s", strerror(errno));
- goto finish;
- }
-
- if (writev(fd, iovec, n) < 0)
+ if (writev(s->dev_kmsg_fd, iovec, n) < 0)
log_debug("Failed to write to /dev/kmsg for logging: %s", strerror(errno));
- close_nointr_nofail(fd);
-
-finish:
free(ident_buf);
}
free(syslog_priority);
free(syslog_facility);
free(syslog_identifier);
+ free(syslog_pid);
}
static bool valid_user_field(const char *p, size_t l) {
u = MAX((n+N_IOVEC_META_FIELDS+1) * 2U, 4U);
c = realloc(iovec, u * sizeof(struct iovec));
if (!c) {
- log_error("Out of memory");
+ log_oom();
break;
}
k = malloc((e - p) + 1 + l);
if (!k) {
- log_error("Out of memory");
+ log_oom();
break;
}
p = malloc(st.st_size);
if (!p) {
- log_error("Out of memory");
+ log_oom();
return;
}
s->identifier = NULL;
else {
s->identifier = strdup(p);
- if (!s->identifier) {
- log_error("Out of memory");
- return -ENOMEM;
- }
+ if (!s->identifier)
+ return log_oom();
}
s->state = STDOUT_STREAM_UNIT_ID;
s->unit_id = NULL;
else {
s->unit_id = strdup(p);
- if (!s->unit_id) {
- log_error("Out of memory");
- return -ENOMEM;
- }
+ if (!s->unit_id)
+ return log_oom();
}
}
stream = new0(StdoutStream, 1);
if (!stream) {
- log_error("Out of memory.");
close_nointr_nofail(fd);
- return -ENOMEM;
+ return log_oom();
}
stream->fd = fd;
return r;
}
-static int parse_kernel_timestamp(char **_p, usec_t *t) {
- usec_t r;
- int k, i;
- char *p;
+static bool is_us(const char *pid) {
+ pid_t t;
- assert(_p);
- assert(*_p);
- assert(t);
+ assert(pid);
- p = *_p;
+ if (parse_pid(pid, &t) < 0)
+ return false;
- if (strlen(p) < 14 || p[0] != '[' || p[13] != ']' || p[6] != '.')
- return 0;
+ return t == getpid();
+}
- r = 0;
+static void dev_kmsg_record(Server *s, char *p, size_t l) {
+ struct iovec iovec[N_IOVEC_META_FIELDS + 7 + N_IOVEC_KERNEL_FIELDS];
+ char *message = NULL, *syslog_priority = NULL, *syslog_pid = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *source_time = NULL;
+ int priority, r;
+ unsigned n = 0, z = 0, j;
+ usec_t usec;
+ char *identifier = NULL, *pid = NULL, *e, *f, *k;
+ uint64_t serial;
+ size_t pl;
- for (i = 1; i <= 5; i++) {
- r *= 10;
+ assert(s);
+ assert(p);
- if (p[i] == ' ')
- continue;
+ if (l <= 0)
+ return;
- k = undecchar(p[i]);
- if (k < 0)
- return 0;
+ e = memchr(p, ',', l);
+ if (!e)
+ return;
+ *e = 0;
- r += k;
- }
+ r = safe_atoi(p, &priority);
+ if (r < 0 || priority < 0 || priority > 999)
+ return;
- for (i = 7; i <= 12; i++) {
- r *= 10;
+ if (s->forward_to_kmsg && (priority & LOG_FACMASK) != LOG_KERN)
+ return;
- k = undecchar(p[i]);
- if (k < 0)
- return 0;
+ l -= (e - p) + 1;
+ p = e + 1;
+ e = memchr(p, ',', l);
+ if (!e)
+ return;
+ *e = 0;
+
+ r = safe_atou64(p, &serial);
+ if (r < 0)
+ return;
+
+ if (s->kernel_seqnum) {
+ /* We already read this one? */
+ if (serial < *s->kernel_seqnum)
+ return;
- r += k;
+ /* Did we lose any? */
+ if (serial > *s->kernel_seqnum)
+ driver_message(s, SD_MESSAGE_JOURNAL_MISSED, "Missed %llu kernel messages", (unsigned long long) serial - *s->kernel_seqnum - 1);
+
+ /* Make sure we never read this one again. Note that
+ * we always store the next message serial we expect
+ * here, simply because this makes handling the first
+ * message with serial 0 easy. */
+ *s->kernel_seqnum = serial + 1;
}
- *t = r;
- *_p += 14;
- *_p += strspn(*_p, WHITESPACE);
+ l -= (e - p) + 1;
+ p = e + 1;
+ f = memchr(p, ';', l);
+ if (!f)
+ return;
+ /* Kernel 3.6 has the flags field, kernel 3.5 lacks that */
+ e = memchr(p, ',', l);
+ if (!e || f < e)
+ e = f;
+ *e = 0;
- return 1;
-}
+ r = parse_usec(p, &usec);
+ if (r < 0)
+ return;
-static bool is_us(const char *pid) {
- pid_t t;
+ l -= (f - p) + 1;
+ p = f + 1;
+ e = memchr(p, '\n', l);
+ if (!e)
+ return;
+ *e = 0;
- assert(pid);
+ pl = e - p;
+ l -= (e - p) + 1;
+ k = e + 1;
- if (parse_pid(pid, &t) < 0)
- return false;
+ for (j = 0; l > 0 && j < N_IOVEC_KERNEL_FIELDS; j++) {
+ char *m;
+ /* Meta data fields attached */
- return t == getpid();
-}
+ if (*k != ' ')
+ break;
-static void proc_kmsg_line(Server *s, const char *p) {
- struct iovec iovec[N_IOVEC_META_FIELDS + 7];
- char *message = NULL, *syslog_priority = NULL, *syslog_pid = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *source_time = NULL;
- int priority = LOG_KERN | LOG_INFO;
- unsigned n = 0;
- usec_t usec;
- char *identifier = NULL, *pid = NULL;
+ k ++, l --;
- assert(s);
- assert(p);
+ e = memchr(k, '\n', l);
+ if (!e)
+ return;
- if (isempty(p))
- return;
+ *e = 0;
- parse_syslog_priority((char **) &p, &priority);
+ m = cunescape_length_with_prefix(k, e - k, "_KERNEL_");
+ if (!m)
+ break;
- if (s->forward_to_kmsg && (priority & LOG_FACMASK) != LOG_KERN)
- return;
+ IOVEC_SET_STRING(iovec[n++], m);
+ z++;
- if (parse_kernel_timestamp((char **) &p, &usec) > 0) {
- if (asprintf(&source_time, "_SOURCE_MONOTONIC_TIMESTAMP=%llu",
- (unsigned long long) usec) >= 0)
- IOVEC_SET_STRING(iovec[n++], source_time);
+ l -= (e - k) + 1;
+ k = e + 1;
}
+ if (asprintf(&source_time, "_SOURCE_MONOTONIC_TIMESTAMP=%llu",
+ (unsigned long long) usec) >= 0)
+ IOVEC_SET_STRING(iovec[n++], source_time);
+
IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=kernel");
if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
IOVEC_SET_STRING(iovec[n++], syslog_priority);
- if ((priority & LOG_FACMASK) == LOG_KERN) {
-
- if (s->forward_to_syslog)
- forward_syslog(s, priority, "kernel", p, NULL, NULL);
-
+ if ((priority & LOG_FACMASK) == LOG_KERN)
IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=kernel");
- } else {
- read_identifier(&p, &identifier, &pid);
+ else {
+ read_identifier((const char**) &p, &identifier, &pid);
/* Avoid any messages we generated ourselves via
* log_info() and friends. */
if (pid && is_us(pid))
goto finish;
- if (s->forward_to_syslog)
- forward_syslog(s, priority, identifier, p, NULL, NULL);
-
if (identifier) {
syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
if (syslog_identifier)
IOVEC_SET_STRING(iovec[n++], syslog_facility);
}
- message = strappend("MESSAGE=", p);
+ message = cunescape_length_with_prefix(p, pl, "MESSAGE=");
if (message)
IOVEC_SET_STRING(iovec[n++], message);
dispatch_message(s, iovec, n, ELEMENTSOF(iovec), NULL, NULL, NULL, 0, NULL, priority);
finish:
+ for (j = 0; j < z; j++)
+ free(iovec[j].iov_base);
+
free(message);
free(syslog_priority);
free(syslog_identifier);
free(pid);
}
-static void proc_kmsg_scan(Server *s) {
- char *p;
- size_t remaining;
-
- assert(s);
-
- p = s->proc_kmsg_buffer;
- remaining = s->proc_kmsg_length;
- for (;;) {
- char *end;
- size_t skip;
-
- end = memchr(p, '\n', remaining);
- if (end)
- skip = end - p + 1;
- else if (remaining >= sizeof(s->proc_kmsg_buffer) - 1) {
- end = p + sizeof(s->proc_kmsg_buffer) - 1;
- skip = remaining;
- } else
- break;
-
- *end = 0;
-
- proc_kmsg_line(s, p);
-
- remaining -= skip;
- p += skip;
- }
-
- if (p > s->proc_kmsg_buffer) {
- memmove(s->proc_kmsg_buffer, p, remaining);
- s->proc_kmsg_length = remaining;
- }
-}
-
static int system_journal_open(Server *s) {
int r;
char *fn;
journal_file_close(s->runtime_journal);
s->runtime_journal = NULL;
- if (r >= 0) {
- char path[] = "/run/log/journal/xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
- sd_id128_to_string(machine, path + 17);
- rm_rf(path, false, true, false);
- }
+ if (r >= 0)
+ rm_rf("/run/log/journal", false, true, false);
return r;
}
-static int server_read_proc_kmsg(Server *s) {
+static int server_read_dev_kmsg(Server *s) {
+ char buffer[8192+1]; /* the kernel-side limit per record is 8K currently */
ssize_t l;
+
assert(s);
- assert(s->proc_kmsg_fd >= 0);
+ assert(s->dev_kmsg_fd >= 0);
- l = read(s->proc_kmsg_fd, s->proc_kmsg_buffer + s->proc_kmsg_length, sizeof(s->proc_kmsg_buffer) - 1 - s->proc_kmsg_length);
- if (l == 0) /* the kernel is stupid and in some race
- * conditions returns 0 in the middle of the
- * stream. */
+ l = read(s->dev_kmsg_fd, buffer, sizeof(buffer) - 1);
+ if (l == 0)
return 0;
if (l < 0) {
+ /* Old kernels who don't allow reading from /dev/kmsg
+ * return EINVAL when we try. So handle this cleanly,
+ * but don' try to ever read from it again. */
+ if (errno == EINVAL) {
+ epoll_ctl(s->epoll_fd, EPOLL_CTL_DEL, s->dev_kmsg_fd, NULL);
+ return 0;
+ }
if (errno == EAGAIN || errno == EINTR)
return 0;
return -errno;
}
- s->proc_kmsg_length += l;
-
- proc_kmsg_scan(s);
+ dev_kmsg_record(s, buffer, l);
return 1;
}
-static int server_flush_proc_kmsg(Server *s) {
+static int server_flush_dev_kmsg(Server *s) {
int r;
assert(s);
- if (s->proc_kmsg_fd < 0)
+ if (s->dev_kmsg_fd < 0)
+ return 0;
+
+ if (!s->dev_kmsg_readable)
return 0;
- log_info("Flushing /proc/kmsg...");
+ log_info("Flushing /dev/kmsg...");
for (;;) {
- r = server_read_proc_kmsg(s);
+ r = server_read_dev_kmsg(s);
if (r < 0)
return r;
return -errno;
}
+ log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
+
if (sfsi.ssi_signo == SIGUSR1) {
touch("/run/systemd/journal/flushed");
server_flush_to_var(s);
return 1;
}
- log_debug("Received SIG%s", signal_to_string(sfsi.ssi_signo));
return 0;
- } else if (ev->data.fd == s->proc_kmsg_fd) {
+ } else if (ev->data.fd == s->dev_kmsg_fd) {
int r;
if (ev->events != EPOLLIN) {
return -EIO;
}
- r = server_read_proc_kmsg(s);
+ r = server_read_dev_kmsg(s);
if (r < 0)
return r;
return 0;
}
-static int open_proc_kmsg(Server *s) {
+static int open_dev_kmsg(Server *s) {
struct epoll_event ev;
assert(s);
- if (!s->import_proc_kmsg)
- return 0;
-
- s->proc_kmsg_fd = open("/proc/kmsg", O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
- if (s->proc_kmsg_fd < 0) {
- log_warning("Failed to open /proc/kmsg, ignoring: %m");
+ s->dev_kmsg_fd = open("/dev/kmsg", O_RDWR|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
+ if (s->dev_kmsg_fd < 0) {
+ log_warning("Failed to open /dev/kmsg, ignoring: %m");
return 0;
}
zero(ev);
ev.events = EPOLLIN;
- ev.data.fd = s->proc_kmsg_fd;
- if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->proc_kmsg_fd, &ev) < 0) {
- log_error("Failed to add /proc/kmsg fd to epoll object: %m");
+ ev.data.fd = s->dev_kmsg_fd;
+ if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->dev_kmsg_fd, &ev) < 0) {
+
+ /* This will fail with EPERM on older kernels where
+ * /dev/kmsg is not readable. */
+ if (errno == EPERM)
+ return 0;
+
+ log_error("Failed to add /dev/kmsg fd to epoll object: %m");
return -errno;
}
+ s->dev_kmsg_readable = true;
+
+ return 0;
+}
+
+static int open_kernel_seqnum(Server *s) {
+ int fd;
+ uint64_t *p;
+
+ assert(s);
+
+ /* We store the seqnum we last read in an mmaped file. That
+ * way we can just use it like a variable, but it is
+ * persistant and automatically flushed at reboot. */
+
+ fd = open("/run/systemd/journal/kernel-seqnum", O_RDWR|O_CREAT|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW, 0644);
+ if (fd < 0) {
+ log_error("Failed to open /run/systemd/journal/kernel-seqnum, ignoring: %m");
+ return 0;
+ }
+
+ if (posix_fallocate(fd, 0, sizeof(uint64_t)) < 0) {
+ log_error("Failed to allocate sequential number file, ignoring: %m");
+ close_nointr_nofail(fd);
+ return 0;
+ }
+
+ p = mmap(NULL, sizeof(uint64_t), PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
+ if (p == MAP_FAILED) {
+ log_error("Failed to map sequential number file, ignoring: %m");
+ close_nointr_nofail(fd);
+ return 0;
+ }
+
+ close_nointr_nofail(fd);
+ s->kernel_seqnum = p;
+
return 0;
}
assert(s);
zero(*s);
- s->syslog_fd = s->native_fd = s->stdout_fd = s->signal_fd = s->epoll_fd = s->proc_kmsg_fd = -1;
+ s->syslog_fd = s->native_fd = s->stdout_fd = s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
s->compress = true;
s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
server_parse_config_file(s);
server_parse_proc_cmdline(s);
+ mkdir_p("/run/systemd/journal", 0755);
+
s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
- if (!s->user_journals) {
- log_error("Out of memory.");
- return -ENOMEM;
- }
+ if (!s->user_journals)
+ return log_oom();
s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
if (s->epoll_fd < 0) {
if (r < 0)
return r;
- r = open_proc_kmsg(s);
+ r = open_dev_kmsg(s);
+ if (r < 0)
+ return r;
+
+ r = open_kernel_seqnum(s);
if (r < 0)
return r;
if (s->stdout_fd >= 0)
close_nointr_nofail(s->stdout_fd);
- if (s->proc_kmsg_fd >= 0)
- close_nointr_nofail(s->proc_kmsg_fd);
+ if (s->dev_kmsg_fd >= 0)
+ close_nointr_nofail(s->dev_kmsg_fd);
if (s->rate_limit)
journal_rate_limit_free(s->rate_limit);
+ if (s->kernel_seqnum)
+ munmap(s->kernel_seqnum, sizeof(uint64_t));
+
free(s->buffer);
free(s->tty_path);
}
server_vacuum(&server);
server_flush_to_var(&server);
- server_flush_proc_kmsg(&server);
+ server_flush_dev_kmsg(&server);
log_debug("systemd-journald running as pid %lu", (unsigned long) getpid());
driver_message(&server, SD_MESSAGE_JOURNAL_START, "Journal started");