1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/epoll.h>
23 #include <sys/socket.h>
25 #include <sys/signalfd.h>
29 #include <sys/ioctl.h>
30 #include <linux/sockios.h>
31 #include <sys/statvfs.h>
33 #include <systemd/sd-journal.h>
34 #include <systemd/sd-login.h>
35 #include <systemd/sd-messages.h>
36 #include <systemd/sd-daemon.h>
40 #include "journal-file.h"
41 #include "socket-util.h"
42 #include "cgroup-util.h"
44 #include "journal-rate-limit.h"
45 #include "journal-internal.h"
46 #include "conf-parser.h"
53 #include <acl/libacl.h>
58 #include <selinux/selinux.h>
61 #define USER_JOURNALS_MAX 1024
62 #define STDOUT_STREAMS_MAX 4096
64 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
65 #define DEFAULT_RATE_LIMIT_BURST 200
67 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
69 #define RECHECK_VAR_AVAILABLE_USEC (30*USEC_PER_SEC)
71 #define N_IOVEC_META_FIELDS 17
73 #define ENTRY_SIZE_MAX (1024*1024*32)
75 typedef enum StdoutStreamState {
76 STDOUT_STREAM_IDENTIFIER,
77 STDOUT_STREAM_PRIORITY,
78 STDOUT_STREAM_LEVEL_PREFIX,
79 STDOUT_STREAM_FORWARD_TO_SYSLOG,
80 STDOUT_STREAM_FORWARD_TO_KMSG,
81 STDOUT_STREAM_FORWARD_TO_CONSOLE,
87 StdoutStreamState state;
93 security_context_t security_context;
99 bool forward_to_syslog:1;
100 bool forward_to_kmsg:1;
101 bool forward_to_console:1;
103 char buffer[LINE_MAX+1];
106 LIST_FIELDS(StdoutStream, stdout_stream);
109 static int server_flush_to_var(Server *s);
111 static uint64_t available_space(Server *s) {
116 uint64_t sum = 0, avail = 0, ss_avail = 0;
122 ts = now(CLOCK_MONOTONIC);
124 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
125 return s->cached_available_space;
127 r = sd_id128_get_machine(&machine);
131 if (s->system_journal) {
132 f = "/var/log/journal/";
133 m = &s->system_metrics;
135 f = "/run/log/journal/";
136 m = &s->runtime_metrics;
141 p = strappend(f, sd_id128_to_string(machine, ids));
151 if (fstatvfs(dirfd(d), &ss) < 0)
156 struct dirent buf, *de;
158 r = readdir_r(d, &buf, &de);
165 if (!endswith(de->d_name, ".journal") &&
166 !endswith(de->d_name, ".journal~"))
169 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
172 if (!S_ISREG(st.st_mode))
175 sum += (uint64_t) st.st_blocks * 512UL;
178 avail = sum >= m->max_use ? 0 : m->max_use - sum;
180 ss_avail = ss.f_bsize * ss.f_bavail;
182 ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
184 if (ss_avail < avail)
187 s->cached_available_space = avail;
188 s->cached_available_space_timestamp = ts;
196 static void server_read_file_gid(Server *s) {
197 const char *adm = "adm";
202 if (s->file_gid_valid)
205 r = get_group_creds(&adm, &s->file_gid);
207 log_warning("Failed to resolve 'adm' group: %s", strerror(-r));
209 /* if we couldn't read the gid, then it will be 0, but that's
210 * fine and we shouldn't try to resolve the group again, so
211 * let's just pretend it worked right-away. */
212 s->file_gid_valid = true;
215 static void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
220 acl_permset_t permset;
225 server_read_file_gid(s);
227 r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
229 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
235 acl = acl_get_fd(f->fd);
237 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
241 r = acl_find_uid(acl, uid, &entry);
244 if (acl_create_entry(&acl, &entry) < 0 ||
245 acl_set_tag_type(entry, ACL_USER) < 0 ||
246 acl_set_qualifier(entry, &uid) < 0) {
247 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
252 if (acl_get_permset(entry, &permset) < 0 ||
253 acl_add_perm(permset, ACL_READ) < 0 ||
254 acl_calc_mask(&acl) < 0) {
255 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
259 if (acl_set_fd(f->fd, acl) < 0)
260 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
267 static JournalFile* find_journal(Server *s, uid_t uid) {
276 /* We split up user logs only on /var, not on /run. If the
277 * runtime file is open, we write to it exclusively, in order
278 * to guarantee proper order as soon as we flush /run to
279 * /var and close the runtime file. */
281 if (s->runtime_journal)
282 return s->runtime_journal;
285 return s->system_journal;
287 r = sd_id128_get_machine(&machine);
289 return s->system_journal;
291 f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
295 if (asprintf(&p, "/var/log/journal/%s/user-%lu.journal", sd_id128_to_string(machine, ids), (unsigned long) uid) < 0)
296 return s->system_journal;
298 while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
299 /* Too many open? Then let's close one */
300 f = hashmap_steal_first(s->user_journals);
302 journal_file_close(f);
305 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->system_journal, &f);
309 return s->system_journal;
311 server_fix_perms(s, f, uid);
313 r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
315 journal_file_close(f);
316 return s->system_journal;
322 static void server_rotate(Server *s) {
328 log_info("Rotating...");
330 if (s->runtime_journal) {
331 r = journal_file_rotate(&s->runtime_journal);
333 if (s->runtime_journal)
334 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
336 log_error("Failed to create new runtime journal: %s", strerror(-r));
338 server_fix_perms(s, s->runtime_journal, 0);
341 if (s->system_journal) {
342 r = journal_file_rotate(&s->system_journal);
344 if (s->system_journal)
345 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
347 log_error("Failed to create new system journal: %s", strerror(-r));
350 server_fix_perms(s, s->system_journal, 0);
353 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
354 r = journal_file_rotate(&f);
357 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
359 log_error("Failed to create user journal: %s", strerror(-r));
361 hashmap_replace(s->user_journals, k, f);
362 server_fix_perms(s, s->system_journal, PTR_TO_UINT32(k));
367 static void server_vacuum(Server *s) {
373 log_info("Vacuuming...");
375 r = sd_id128_get_machine(&machine);
377 log_error("Failed to get machine ID: %s", strerror(-r));
381 sd_id128_to_string(machine, ids);
383 if (s->system_journal) {
384 if (asprintf(&p, "/var/log/journal/%s", ids) < 0) {
385 log_error("Out of memory.");
389 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free);
390 if (r < 0 && r != -ENOENT)
391 log_error("Failed to vacuum %s: %s", p, strerror(-r));
396 if (s->runtime_journal) {
397 if (asprintf(&p, "/run/log/journal/%s", ids) < 0) {
398 log_error("Out of memory.");
402 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free);
403 if (r < 0 && r != -ENOENT)
404 log_error("Failed to vacuum %s: %s", p, strerror(-r));
408 s->cached_available_space_timestamp = 0;
411 static char *shortened_cgroup_path(pid_t pid) {
413 char *process_path, *init_path, *path;
417 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, pid, &process_path);
421 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &init_path);
427 if (endswith(init_path, "/system"))
428 init_path[strlen(init_path) - 7] = 0;
429 else if (streq(init_path, "/"))
432 if (startswith(process_path, init_path)) {
435 p = strdup(process_path + strlen(init_path));
453 static void dispatch_message_real(
455 struct iovec *iovec, unsigned n, unsigned m,
458 const char *label, size_t label_len) {
460 char *pid = NULL, *uid = NULL, *gid = NULL,
461 *source_time = NULL, *boot_id = NULL, *machine_id = NULL,
462 *comm = NULL, *cmdline = NULL, *hostname = NULL,
463 *audit_session = NULL, *audit_loginuid = NULL,
464 *exe = NULL, *cgroup = NULL, *session = NULL,
465 *owner_uid = NULL, *unit = NULL, *selinux_context = NULL;
471 uid_t loginuid = 0, realuid = 0;
473 bool vacuumed = false;
478 assert(n + N_IOVEC_META_FIELDS <= m);
484 realuid = ucred->uid;
486 if (asprintf(&pid, "_PID=%lu", (unsigned long) ucred->pid) >= 0)
487 IOVEC_SET_STRING(iovec[n++], pid);
489 if (asprintf(&uid, "_UID=%lu", (unsigned long) ucred->uid) >= 0)
490 IOVEC_SET_STRING(iovec[n++], uid);
492 if (asprintf(&gid, "_GID=%lu", (unsigned long) ucred->gid) >= 0)
493 IOVEC_SET_STRING(iovec[n++], gid);
495 r = get_process_comm(ucred->pid, &t);
497 comm = strappend("_COMM=", t);
501 IOVEC_SET_STRING(iovec[n++], comm);
504 r = get_process_exe(ucred->pid, &t);
506 exe = strappend("_EXE=", t);
510 IOVEC_SET_STRING(iovec[n++], exe);
513 r = get_process_cmdline(ucred->pid, LINE_MAX, false, &t);
515 cmdline = strappend("_CMDLINE=", t);
519 IOVEC_SET_STRING(iovec[n++], cmdline);
522 r = audit_session_from_pid(ucred->pid, &audit);
524 if (asprintf(&audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit) >= 0)
525 IOVEC_SET_STRING(iovec[n++], audit_session);
527 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
529 if (asprintf(&audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0)
530 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
532 t = shortened_cgroup_path(ucred->pid);
534 cgroup = strappend("_SYSTEMD_CGROUP=", t);
538 IOVEC_SET_STRING(iovec[n++], cgroup);
541 if (sd_pid_get_session(ucred->pid, &t) >= 0) {
542 session = strappend("_SYSTEMD_SESSION=", t);
546 IOVEC_SET_STRING(iovec[n++], session);
549 if (sd_pid_get_unit(ucred->pid, &t) >= 0) {
550 unit = strappend("_SYSTEMD_UNIT=", t);
554 IOVEC_SET_STRING(iovec[n++], unit);
557 if (sd_pid_get_owner_uid(ucred->uid, &owner) >= 0)
558 if (asprintf(&owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner) >= 0)
559 IOVEC_SET_STRING(iovec[n++], owner_uid);
563 selinux_context = malloc(sizeof("_SELINUX_CONTEXT=") + label_len);
564 if (selinux_context) {
565 memcpy(selinux_context, "_SELINUX_CONTEXT=", sizeof("_SELINUX_CONTEXT=")-1);
566 memcpy(selinux_context+sizeof("_SELINUX_CONTEXT=")-1, label, label_len);
567 selinux_context[sizeof("_SELINUX_CONTEXT=")-1+label_len] = 0;
568 IOVEC_SET_STRING(iovec[n++], selinux_context);
571 security_context_t con;
573 if (getpidcon(ucred->pid, &con) >= 0) {
574 selinux_context = strappend("_SELINUX_CONTEXT=", con);
576 IOVEC_SET_STRING(iovec[n++], selinux_context);
585 if (asprintf(&source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu",
586 (unsigned long long) timeval_load(tv)) >= 0)
587 IOVEC_SET_STRING(iovec[n++], source_time);
590 /* Note that strictly speaking storing the boot id here is
591 * redundant since the entry includes this in-line
592 * anyway. However, we need this indexed, too. */
593 r = sd_id128_get_boot(&id);
595 if (asprintf(&boot_id, "_BOOT_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
596 IOVEC_SET_STRING(iovec[n++], boot_id);
598 r = sd_id128_get_machine(&id);
600 if (asprintf(&machine_id, "_MACHINE_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
601 IOVEC_SET_STRING(iovec[n++], machine_id);
603 t = gethostname_malloc();
605 hostname = strappend("_HOSTNAME=", t);
608 IOVEC_SET_STRING(iovec[n++], hostname);
613 server_flush_to_var(s);
616 f = find_journal(s, realuid == 0 ? 0 : loginuid);
618 log_warning("Dropping message, as we can't find a place to store the data.");
620 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
622 if ((r == -E2BIG || /* hit limit */
623 r == -EFBIG || /* hit fs limit */
624 r == -EDQUOT || /* quota hit */
625 r == -ENOSPC || /* disk full */
626 r == -EBADMSG || /* corrupted */
627 r == -ENODATA || /* truncated */
628 r == -EHOSTDOWN || /* other machine */
629 r == -EPROTONOSUPPORT) && /* unsupported feature */
633 log_info("Allocation limit reached, rotating.");
635 log_warning("Journal file corrupted, rotating.");
641 log_info("Retrying write.");
646 log_error("Failed to write entry, ignoring: %s", strerror(-r));
660 free(audit_loginuid);
665 free(selinux_context);
668 static void driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
669 char mid[11 + 32 + 1];
670 char buffer[16 + LINE_MAX + 1];
671 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
679 IOVEC_SET_STRING(iovec[n++], "PRIORITY=5");
680 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
682 memcpy(buffer, "MESSAGE=", 8);
683 va_start(ap, format);
684 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
686 char_array_0(buffer);
687 IOVEC_SET_STRING(iovec[n++], buffer);
689 snprintf(mid, sizeof(mid), "MESSAGE_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(message_id));
691 IOVEC_SET_STRING(iovec[n++], mid);
694 ucred.pid = getpid();
695 ucred.uid = getuid();
696 ucred.gid = getgid();
698 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0);
701 static void dispatch_message(Server *s,
702 struct iovec *iovec, unsigned n, unsigned m,
705 const char *label, size_t label_len,
708 char *path = NULL, *c;
711 assert(iovec || n == 0);
719 path = shortened_cgroup_path(ucred->pid);
723 /* example: /user/lennart/3/foobar
724 * /system/dbus.service/foobar
726 * So let's cut of everything past the third /, since that is
727 * wher user directories start */
729 c = strchr(path, '/');
731 c = strchr(c+1, '/');
733 c = strchr(c+1, '/');
739 rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available_space(s));
746 /* Write a suppression message if we suppressed something */
748 driver_message(s, SD_MESSAGE_JOURNAL_DROPPED, "Suppressed %u messages from %s", rl - 1, path);
753 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len);
756 static void forward_syslog_iovec(Server *s, const struct iovec *iovec, unsigned n_iovec, struct ucred *ucred, struct timeval *tv) {
757 struct msghdr msghdr;
758 struct cmsghdr *cmsg;
760 struct cmsghdr cmsghdr;
761 uint8_t buf[CMSG_SPACE(sizeof(struct ucred))];
763 union sockaddr_union sa;
770 msghdr.msg_iov = (struct iovec*) iovec;
771 msghdr.msg_iovlen = n_iovec;
774 sa.un.sun_family = AF_UNIX;
775 strncpy(sa.un.sun_path, "/run/systemd/journal/syslog", sizeof(sa.un.sun_path));
776 msghdr.msg_name = &sa;
777 msghdr.msg_namelen = offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path);
781 msghdr.msg_control = &control;
782 msghdr.msg_controllen = sizeof(control);
784 cmsg = CMSG_FIRSTHDR(&msghdr);
785 cmsg->cmsg_level = SOL_SOCKET;
786 cmsg->cmsg_type = SCM_CREDENTIALS;
787 cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred));
788 memcpy(CMSG_DATA(cmsg), ucred, sizeof(struct ucred));
789 msghdr.msg_controllen = cmsg->cmsg_len;
792 /* Forward the syslog message we received via /dev/log to
793 * /run/systemd/syslog. Unfortunately we currently can't set
794 * the SO_TIMESTAMP auxiliary data, and hence we don't. */
796 if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
799 /* The socket is full? I guess the syslog implementation is
800 * too slow, and we shouldn't wait for that... */
804 if (ucred && errno == ESRCH) {
807 /* Hmm, presumably the sender process vanished
808 * by now, so let's fix it as good as we
813 memcpy(CMSG_DATA(cmsg), &u, sizeof(struct ucred));
815 if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
822 log_debug("Failed to forward syslog message: %m");
825 static void forward_syslog_raw(Server *s, const char *buffer, struct ucred *ucred, struct timeval *tv) {
831 IOVEC_SET_STRING(iovec, buffer);
832 forward_syslog_iovec(s, &iovec, 1, ucred, tv);
835 static void forward_syslog(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred, struct timeval *tv) {
836 struct iovec iovec[5];
837 char header_priority[6], header_time[64], header_pid[16];
841 char *ident_buf = NULL;
844 assert(priority >= 0);
845 assert(priority <= 999);
848 /* First: priority field */
849 snprintf(header_priority, sizeof(header_priority), "<%i>", priority);
850 char_array_0(header_priority);
851 IOVEC_SET_STRING(iovec[n++], header_priority);
853 /* Second: timestamp */
854 t = tv ? tv->tv_sec : ((time_t) (now(CLOCK_REALTIME) / USEC_PER_SEC));
858 if (strftime(header_time, sizeof(header_time), "%h %e %T ", tm) <= 0)
860 IOVEC_SET_STRING(iovec[n++], header_time);
862 /* Third: identifier and PID */
865 get_process_comm(ucred->pid, &ident_buf);
866 identifier = ident_buf;
869 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
870 char_array_0(header_pid);
873 IOVEC_SET_STRING(iovec[n++], identifier);
875 IOVEC_SET_STRING(iovec[n++], header_pid);
876 } else if (identifier) {
877 IOVEC_SET_STRING(iovec[n++], identifier);
878 IOVEC_SET_STRING(iovec[n++], ": ");
881 /* Fourth: message */
882 IOVEC_SET_STRING(iovec[n++], message);
884 forward_syslog_iovec(s, iovec, n, ucred, tv);
889 static int fixup_priority(int priority) {
891 if ((priority & LOG_FACMASK) == 0)
892 return (priority & LOG_PRIMASK) | LOG_USER;
897 static void forward_kmsg(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred) {
898 struct iovec iovec[5];
899 char header_priority[6], header_pid[16];
901 char *ident_buf = NULL;
905 assert(priority >= 0);
906 assert(priority <= 999);
909 /* Never allow messages with kernel facility to be written to
910 * kmsg, regardless where the data comes from. */
911 priority = fixup_priority(priority);
913 /* First: priority field */
914 snprintf(header_priority, sizeof(header_priority), "<%i>", priority);
915 char_array_0(header_priority);
916 IOVEC_SET_STRING(iovec[n++], header_priority);
918 /* Second: identifier and PID */
921 get_process_comm(ucred->pid, &ident_buf);
922 identifier = ident_buf;
925 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
926 char_array_0(header_pid);
929 IOVEC_SET_STRING(iovec[n++], identifier);
931 IOVEC_SET_STRING(iovec[n++], header_pid);
932 } else if (identifier) {
933 IOVEC_SET_STRING(iovec[n++], identifier);
934 IOVEC_SET_STRING(iovec[n++], ": ");
937 /* Fourth: message */
938 IOVEC_SET_STRING(iovec[n++], message);
939 IOVEC_SET_STRING(iovec[n++], "\n");
941 fd = open("/dev/kmsg", O_WRONLY|O_NOCTTY|O_CLOEXEC);
943 log_debug("Failed to open /dev/kmsg for logging: %s", strerror(errno));
947 if (writev(fd, iovec, n) < 0)
948 log_debug("Failed to write to /dev/kmsg for logging: %s", strerror(errno));
950 close_nointr_nofail(fd);
956 static void forward_console(Server *s, const char *identifier, const char *message, struct ucred *ucred) {
957 struct iovec iovec[4];
960 char *ident_buf = NULL;
965 /* First: identifier and PID */
968 get_process_comm(ucred->pid, &ident_buf);
969 identifier = ident_buf;
972 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
973 char_array_0(header_pid);
976 IOVEC_SET_STRING(iovec[n++], identifier);
978 IOVEC_SET_STRING(iovec[n++], header_pid);
979 } else if (identifier) {
980 IOVEC_SET_STRING(iovec[n++], identifier);
981 IOVEC_SET_STRING(iovec[n++], ": ");
985 IOVEC_SET_STRING(iovec[n++], message);
986 IOVEC_SET_STRING(iovec[n++], "\n");
988 fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
990 log_debug("Failed to open /dev/console for logging: %s", strerror(errno));
994 if (writev(fd, iovec, n) < 0)
995 log_debug("Failed to write to /dev/console for logging: %s", strerror(errno));
997 close_nointr_nofail(fd);
1003 static void read_identifier(const char **buf, char **identifier, char **pid) {
1014 p += strspn(p, WHITESPACE);
1015 l = strcspn(p, WHITESPACE);
1024 if (p[l-1] == ']') {
1030 t = strndup(p+k+1, l-k-2);
1050 *buf += strspn(*buf, WHITESPACE);
1053 static void process_syslog_message(Server *s, const char *buf, struct ucred *ucred, struct timeval *tv, const char *label, size_t label_len) {
1054 char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *syslog_pid = NULL;
1055 struct iovec iovec[N_IOVEC_META_FIELDS + 6];
1057 int priority = LOG_USER | LOG_INFO;
1058 char *identifier = NULL, *pid = NULL;
1063 if (s->forward_to_syslog)
1064 forward_syslog_raw(s, buf, ucred, tv);
1066 parse_syslog_priority((char**) &buf, &priority);
1067 skip_syslog_date((char**) &buf);
1068 read_identifier(&buf, &identifier, &pid);
1070 if (s->forward_to_kmsg)
1071 forward_kmsg(s, priority, identifier, buf, ucred);
1073 if (s->forward_to_console)
1074 forward_console(s, identifier, buf, ucred);
1076 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=syslog");
1078 if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1079 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1081 if (priority & LOG_FACMASK)
1082 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1083 IOVEC_SET_STRING(iovec[n++], syslog_facility);
1086 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
1087 if (syslog_identifier)
1088 IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1092 syslog_pid = strappend("SYSLOG_PID=", pid);
1094 IOVEC_SET_STRING(iovec[n++], syslog_pid);
1097 message = strappend("MESSAGE=", buf);
1099 IOVEC_SET_STRING(iovec[n++], message);
1101 dispatch_message(s, iovec, n, ELEMENTSOF(iovec), ucred, tv, label, label_len, priority);
1106 free(syslog_priority);
1107 free(syslog_facility);
1108 free(syslog_identifier);
1111 static bool valid_user_field(const char *p, size_t l) {
1114 /* We kinda enforce POSIX syntax recommendations for
1115 environment variables here, but make a couple of additional
1118 http://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html */
1120 /* No empty field names */
1124 /* Don't allow names longer than 64 chars */
1128 /* Variables starting with an underscore are protected */
1132 /* Don't allow digits as first character */
1133 if (p[0] >= '0' && p[0] <= '9')
1136 /* Only allow A-Z0-9 and '_' */
1137 for (a = p; a < p + l; a++)
1138 if (!((*a >= 'A' && *a <= 'Z') ||
1139 (*a >= '0' && *a <= '9') ||
1146 static void process_native_message(
1148 const void *buffer, size_t buffer_size,
1149 struct ucred *ucred,
1151 const char *label, size_t label_len) {
1153 struct iovec *iovec = NULL;
1154 unsigned n = 0, m = 0, j, tn = (unsigned) -1;
1157 int priority = LOG_INFO;
1158 char *identifier = NULL, *message = NULL;
1161 assert(buffer || buffer_size == 0);
1164 remaining = buffer_size;
1166 while (remaining > 0) {
1169 e = memchr(p, '\n', remaining);
1172 /* Trailing noise, let's ignore it, and flush what we collected */
1173 log_debug("Received message with trailing noise, ignoring.");
1178 /* Entry separator */
1179 dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, priority);
1181 priority = LOG_INFO;
1188 if (*p == '.' || *p == '#') {
1189 /* Ignore control commands for now, and
1191 remaining -= (e - p) + 1;
1196 /* A property follows */
1198 if (n+N_IOVEC_META_FIELDS >= m) {
1202 u = MAX((n+N_IOVEC_META_FIELDS+1) * 2U, 4U);
1203 c = realloc(iovec, u * sizeof(struct iovec));
1205 log_error("Out of memory");
1213 q = memchr(p, '=', e - p);
1215 if (valid_user_field(p, q - p)) {
1220 /* If the field name starts with an
1221 * underscore, skip the variable,
1222 * since that indidates a trusted
1224 iovec[n].iov_base = (char*) p;
1225 iovec[n].iov_len = l;
1228 /* We need to determine the priority
1229 * of this entry for the rate limiting
1232 memcmp(p, "PRIORITY=", 9) == 0 &&
1233 p[9] >= '0' && p[9] <= '9')
1234 priority = (priority & LOG_FACMASK) | (p[9] - '0');
1237 memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
1238 p[16] >= '0' && p[16] <= '9')
1239 priority = (priority & LOG_PRIMASK) | ((p[16] - '0') << 3);
1242 memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
1243 p[16] >= '0' && p[16] <= '9' &&
1244 p[17] >= '0' && p[17] <= '9')
1245 priority = (priority & LOG_PRIMASK) | (((p[16] - '0')*10 + (p[17] - '0')) << 3);
1248 memcmp(p, "SYSLOG_IDENTIFIER=", 18) == 0) {
1251 t = strndup(p + 18, l - 18);
1256 } else if (l >= 8 &&
1257 memcmp(p, "MESSAGE=", 8) == 0) {
1260 t = strndup(p + 8, l - 8);
1268 remaining -= (e - p) + 1;
1276 if (remaining < e - p + 1 + sizeof(uint64_t) + 1) {
1277 log_debug("Failed to parse message, ignoring.");
1281 memcpy(&l_le, e + 1, sizeof(uint64_t));
1284 if (remaining < e - p + 1 + sizeof(uint64_t) + l + 1 ||
1285 e[1+sizeof(uint64_t)+l] != '\n') {
1286 log_debug("Failed to parse message, ignoring.");
1290 k = malloc((e - p) + 1 + l);
1292 log_error("Out of memory");
1296 memcpy(k, p, e - p);
1298 memcpy(k + (e - p) + 1, e + 1 + sizeof(uint64_t), l);
1300 if (valid_user_field(p, e - p)) {
1301 iovec[n].iov_base = k;
1302 iovec[n].iov_len = (e - p) + 1 + l;
1307 remaining -= (e - p) + 1 + sizeof(uint64_t) + l + 1;
1308 p = e + 1 + sizeof(uint64_t) + l + 1;
1316 IOVEC_SET_STRING(iovec[tn], "_TRANSPORT=journal");
1319 if (s->forward_to_syslog)
1320 forward_syslog(s, priority, identifier, message, ucred, tv);
1322 if (s->forward_to_kmsg)
1323 forward_kmsg(s, priority, identifier, message, ucred);
1325 if (s->forward_to_console)
1326 forward_console(s, identifier, message, ucred);
1329 dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, priority);
1332 for (j = 0; j < n; j++) {
1336 if (iovec[j].iov_base < buffer ||
1337 (const uint8_t*) iovec[j].iov_base >= (const uint8_t*) buffer + buffer_size)
1338 free(iovec[j].iov_base);
1346 static void process_native_file(
1349 struct ucred *ucred,
1351 const char *label, size_t label_len) {
1360 /* Data is in the passed file, since it didn't fit in a
1361 * datagram. We can't map the file here, since clients might
1362 * then truncate it and trigger a SIGBUS for us. So let's
1363 * stupidly read it */
1365 if (fstat(fd, &st) < 0) {
1366 log_error("Failed to stat passed file, ignoring: %m");
1370 if (!S_ISREG(st.st_mode)) {
1371 log_error("File passed is not regular. Ignoring.");
1375 if (st.st_size <= 0)
1378 if (st.st_size > ENTRY_SIZE_MAX) {
1379 log_error("File passed too large. Ignoring.");
1383 p = malloc(st.st_size);
1385 log_error("Out of memory");
1389 n = pread(fd, p, st.st_size, 0);
1391 log_error("Failed to read file, ignoring: %s", strerror(-n));
1393 process_native_message(s, p, n, ucred, tv, label, label_len);
1398 static int stdout_stream_log(StdoutStream *s, const char *p) {
1399 struct iovec iovec[N_IOVEC_META_FIELDS + 5];
1400 char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *syslog_identifier = NULL;
1404 size_t label_len = 0;
1412 priority = s->priority;
1414 if (s->level_prefix)
1415 parse_syslog_priority((char**) &p, &priority);
1417 if (s->forward_to_syslog || s->server->forward_to_syslog)
1418 forward_syslog(s->server, fixup_priority(priority), s->identifier, p, &s->ucred, NULL);
1420 if (s->forward_to_kmsg || s->server->forward_to_kmsg)
1421 forward_kmsg(s->server, priority, s->identifier, p, &s->ucred);
1423 if (s->forward_to_console || s->server->forward_to_console)
1424 forward_console(s->server, s->identifier, p, &s->ucred);
1426 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=stdout");
1428 if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1429 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1431 if (priority & LOG_FACMASK)
1432 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1433 IOVEC_SET_STRING(iovec[n++], syslog_facility);
1435 if (s->identifier) {
1436 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", s->identifier);
1437 if (syslog_identifier)
1438 IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1441 message = strappend("MESSAGE=", p);
1443 IOVEC_SET_STRING(iovec[n++], message);
1446 if (s->security_context) {
1447 label = (char*) s->security_context;
1448 label_len = strlen((char*) s->security_context);
1452 dispatch_message(s->server, iovec, n, ELEMENTSOF(iovec), &s->ucred, NULL, label, label_len, priority);
1455 free(syslog_priority);
1456 free(syslog_facility);
1457 free(syslog_identifier);
1462 static int stdout_stream_line(StdoutStream *s, char *p) {
1472 case STDOUT_STREAM_IDENTIFIER:
1474 s->identifier = NULL;
1476 s->identifier = strdup(p);
1477 if (!s->identifier) {
1478 log_error("Out of memory");
1483 s->state = STDOUT_STREAM_PRIORITY;
1486 case STDOUT_STREAM_PRIORITY:
1487 r = safe_atoi(p, &s->priority);
1488 if (r < 0 || s->priority <= 0 || s->priority >= 999) {
1489 log_warning("Failed to parse log priority line.");
1493 s->state = STDOUT_STREAM_LEVEL_PREFIX;
1496 case STDOUT_STREAM_LEVEL_PREFIX:
1497 r = parse_boolean(p);
1499 log_warning("Failed to parse level prefix line.");
1503 s->level_prefix = !!r;
1504 s->state = STDOUT_STREAM_FORWARD_TO_SYSLOG;
1507 case STDOUT_STREAM_FORWARD_TO_SYSLOG:
1508 r = parse_boolean(p);
1510 log_warning("Failed to parse forward to syslog line.");
1514 s->forward_to_syslog = !!r;
1515 s->state = STDOUT_STREAM_FORWARD_TO_KMSG;
1518 case STDOUT_STREAM_FORWARD_TO_KMSG:
1519 r = parse_boolean(p);
1521 log_warning("Failed to parse copy to kmsg line.");
1525 s->forward_to_kmsg = !!r;
1526 s->state = STDOUT_STREAM_FORWARD_TO_CONSOLE;
1529 case STDOUT_STREAM_FORWARD_TO_CONSOLE:
1530 r = parse_boolean(p);
1532 log_warning("Failed to parse copy to console line.");
1536 s->forward_to_console = !!r;
1537 s->state = STDOUT_STREAM_RUNNING;
1540 case STDOUT_STREAM_RUNNING:
1541 return stdout_stream_log(s, p);
1544 assert_not_reached("Unknown stream state");
1547 static int stdout_stream_scan(StdoutStream *s, bool force_flush) {
1555 remaining = s->length;
1560 end = memchr(p, '\n', remaining);
1563 else if (remaining >= sizeof(s->buffer) - 1) {
1564 end = p + sizeof(s->buffer) - 1;
1571 r = stdout_stream_line(s, p);
1579 if (force_flush && remaining > 0) {
1581 r = stdout_stream_line(s, p);
1589 if (p > s->buffer) {
1590 memmove(s->buffer, p, remaining);
1591 s->length = remaining;
1597 static int stdout_stream_process(StdoutStream *s) {
1603 l = read(s->fd, s->buffer+s->length, sizeof(s->buffer)-1-s->length);
1606 if (errno == EAGAIN)
1609 log_warning("Failed to read from stream: %m");
1614 r = stdout_stream_scan(s, true);
1622 r = stdout_stream_scan(s, false);
1630 static void stdout_stream_free(StdoutStream *s) {
1634 assert(s->server->n_stdout_streams > 0);
1635 s->server->n_stdout_streams --;
1636 LIST_REMOVE(StdoutStream, stdout_stream, s->server->stdout_streams, s);
1641 epoll_ctl(s->server->epoll_fd, EPOLL_CTL_DEL, s->fd, NULL);
1643 close_nointr_nofail(s->fd);
1647 if (s->security_context)
1648 freecon(s->security_context);
1651 free(s->identifier);
1655 static int stdout_stream_new(Server *s) {
1656 StdoutStream *stream;
1659 struct epoll_event ev;
1663 fd = accept4(s->stdout_fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC);
1665 if (errno == EAGAIN)
1668 log_error("Failed to accept stdout connection: %m");
1672 if (s->n_stdout_streams >= STDOUT_STREAMS_MAX) {
1673 log_warning("Too many stdout streams, refusing connection.");
1674 close_nointr_nofail(fd);
1678 stream = new0(StdoutStream, 1);
1680 log_error("Out of memory.");
1681 close_nointr_nofail(fd);
1687 len = sizeof(stream->ucred);
1688 if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &stream->ucred, &len) < 0) {
1689 log_error("Failed to determine peer credentials: %m");
1695 if (getpeercon(fd, &stream->security_context) < 0 && errno != ENOPROTOOPT)
1696 log_error("Failed to determine peer security context: %m");
1699 if (shutdown(fd, SHUT_WR) < 0) {
1700 log_error("Failed to shutdown writing side of socket: %m");
1706 ev.data.ptr = stream;
1707 ev.events = EPOLLIN;
1708 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0) {
1709 log_error("Failed to add stream to event loop: %m");
1715 LIST_PREPEND(StdoutStream, stdout_stream, s->stdout_streams, stream);
1716 s->n_stdout_streams ++;
1721 stdout_stream_free(stream);
1725 static int parse_kernel_timestamp(char **_p, usec_t *t) {
1736 if (strlen(p) < 14 || p[0] != '[' || p[13] != ']' || p[6] != '.')
1741 for (i = 1; i <= 5; i++) {
1747 k = undecchar(p[i]);
1754 for (i = 7; i <= 12; i++) {
1757 k = undecchar(p[i]);
1766 *_p += strspn(*_p, WHITESPACE);
1771 static bool is_us(const char *pid) {
1776 if (parse_pid(pid, &t) < 0)
1779 return t == getpid();
1782 static void proc_kmsg_line(Server *s, const char *p) {
1783 struct iovec iovec[N_IOVEC_META_FIELDS + 7];
1784 char *message = NULL, *syslog_priority = NULL, *syslog_pid = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *source_time = NULL;
1785 int priority = LOG_KERN | LOG_INFO;
1788 char *identifier = NULL, *pid = NULL;
1796 parse_syslog_priority((char **) &p, &priority);
1798 if (s->forward_to_kmsg && (priority & LOG_FACMASK) != LOG_KERN)
1801 if (parse_kernel_timestamp((char **) &p, &usec) > 0) {
1802 if (asprintf(&source_time, "_SOURCE_MONOTONIC_TIMESTAMP=%llu",
1803 (unsigned long long) usec) >= 0)
1804 IOVEC_SET_STRING(iovec[n++], source_time);
1807 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=kernel");
1809 if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1810 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1812 if ((priority & LOG_FACMASK) == LOG_KERN) {
1814 if (s->forward_to_syslog)
1815 forward_syslog(s, priority, "kernel", p, NULL, NULL);
1817 IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=kernel");
1819 read_identifier(&p, &identifier, &pid);
1821 /* Avoid any messages we generated ourselves via
1822 * log_info() and friends. */
1826 if (s->forward_to_syslog)
1827 forward_syslog(s, priority, identifier, p, NULL, NULL);
1830 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
1831 if (syslog_identifier)
1832 IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1836 syslog_pid = strappend("SYSLOG_PID=", pid);
1838 IOVEC_SET_STRING(iovec[n++], syslog_pid);
1841 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1842 IOVEC_SET_STRING(iovec[n++], syslog_facility);
1845 message = strappend("MESSAGE=", p);
1847 IOVEC_SET_STRING(iovec[n++], message);
1849 dispatch_message(s, iovec, n, ELEMENTSOF(iovec), NULL, NULL, NULL, 0, priority);
1853 free(syslog_priority);
1854 free(syslog_identifier);
1856 free(syslog_facility);
1862 static void proc_kmsg_scan(Server *s) {
1868 p = s->proc_kmsg_buffer;
1869 remaining = s->proc_kmsg_length;
1874 end = memchr(p, '\n', remaining);
1877 else if (remaining >= sizeof(s->proc_kmsg_buffer) - 1) {
1878 end = p + sizeof(s->proc_kmsg_buffer) - 1;
1885 proc_kmsg_line(s, p);
1891 if (p > s->proc_kmsg_buffer) {
1892 memmove(s->proc_kmsg_buffer, p, remaining);
1893 s->proc_kmsg_length = remaining;
1897 static int system_journal_open(Server *s) {
1903 r = sd_id128_get_machine(&machine);
1907 sd_id128_to_string(machine, ids);
1909 if (!s->system_journal) {
1911 /* First try to create the machine path, but not the prefix */
1912 fn = strappend("/var/log/journal/", ids);
1915 (void) mkdir(fn, 0755);
1918 /* The create the system journal file */
1919 fn = join("/var/log/journal/", ids, "/system.journal", NULL);
1923 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, NULL, &s->system_journal);
1927 journal_default_metrics(&s->system_metrics, s->system_journal->fd);
1929 s->system_journal->metrics = s->system_metrics;
1930 s->system_journal->compress = s->compress;
1932 server_fix_perms(s, s->system_journal, 0);
1935 if (r != -ENOENT && r != -EROFS)
1936 log_warning("Failed to open system journal: %s", strerror(-r));
1942 if (!s->runtime_journal) {
1944 fn = join("/run/log/journal/", ids, "/system.journal", NULL);
1948 if (s->system_journal) {
1950 /* Try to open the runtime journal, but only
1951 * if it already exists, so that we can flush
1952 * it into the system journal */
1954 r = journal_file_open(fn, O_RDWR, 0640, NULL, &s->runtime_journal);
1959 log_warning("Failed to open runtime journal: %s", strerror(-r));
1966 /* OK, we really need the runtime journal, so create
1967 * it if necessary. */
1969 (void) mkdir_parents(fn, 0755);
1970 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, NULL, &s->runtime_journal);
1974 log_error("Failed to open runtime journal: %s", strerror(-r));
1979 if (s->runtime_journal) {
1980 journal_default_metrics(&s->runtime_metrics, s->runtime_journal->fd);
1982 s->runtime_journal->metrics = s->runtime_metrics;
1983 s->runtime_journal->compress = s->compress;
1985 server_fix_perms(s, s->runtime_journal, 0);
1992 static int server_flush_to_var(Server *s) {
1993 char path[] = "/run/log/journal/xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
2002 if (!s->runtime_journal)
2005 ts = now(CLOCK_MONOTONIC);
2006 if (s->var_available_timestamp + RECHECK_VAR_AVAILABLE_USEC > ts)
2009 s->var_available_timestamp = ts;
2011 system_journal_open(s);
2013 if (!s->system_journal)
2016 log_info("Flushing to /var...");
2018 r = sd_id128_get_machine(&machine);
2020 log_error("Failed to get machine id: %s", strerror(-r));
2024 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
2026 log_error("Failed to read runtime journal: %s", strerror(-r));
2030 SD_JOURNAL_FOREACH(j) {
2033 f = j->current_file;
2034 assert(f && f->current_offset > 0);
2036 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
2038 log_error("Can't read entry: %s", strerror(-r));
2042 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
2044 log_info("Allocation limit reached.");
2046 journal_file_post_change(s->system_journal);
2050 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
2054 log_error("Can't write entry: %s", strerror(-r));
2060 journal_file_post_change(s->system_journal);
2062 journal_file_close(s->runtime_journal);
2063 s->runtime_journal = NULL;
2066 sd_id128_to_string(machine, path + 17);
2067 rm_rf(path, false, true, false);
2073 static int server_read_proc_kmsg(Server *s) {
2076 assert(s->proc_kmsg_fd >= 0);
2078 l = read(s->proc_kmsg_fd, s->proc_kmsg_buffer + s->proc_kmsg_length, sizeof(s->proc_kmsg_buffer) - 1 - s->proc_kmsg_length);
2081 if (errno == EAGAIN || errno == EINTR)
2084 log_error("Failed to read from kernel: %m");
2088 s->proc_kmsg_length += l;
2094 static int server_flush_proc_kmsg(Server *s) {
2099 if (s->proc_kmsg_fd < 0)
2102 log_info("Flushing /proc/kmsg...");
2105 r = server_read_proc_kmsg(s);
2116 static int process_event(Server *s, struct epoll_event *ev) {
2119 if (ev->data.fd == s->signal_fd) {
2120 struct signalfd_siginfo sfsi;
2123 if (ev->events != EPOLLIN) {
2124 log_info("Got invalid event from epoll.");
2128 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
2129 if (n != sizeof(sfsi)) {
2134 if (errno == EINTR || errno == EAGAIN)
2140 if (sfsi.ssi_signo == SIGUSR1) {
2141 server_flush_to_var(s);
2145 log_debug("Received SIG%s", signal_to_string(sfsi.ssi_signo));
2148 } else if (ev->data.fd == s->proc_kmsg_fd) {
2151 if (ev->events != EPOLLIN) {
2152 log_info("Got invalid event from epoll.");
2156 r = server_read_proc_kmsg(s);
2162 } else if (ev->data.fd == s->native_fd ||
2163 ev->data.fd == s->syslog_fd) {
2165 if (ev->events != EPOLLIN) {
2166 log_info("Got invalid event from epoll.");
2171 struct msghdr msghdr;
2173 struct ucred *ucred = NULL;
2174 struct timeval *tv = NULL;
2175 struct cmsghdr *cmsg;
2177 size_t label_len = 0;
2179 struct cmsghdr cmsghdr;
2181 /* We use NAME_MAX space for the
2182 * SELinux label here. The kernel
2183 * currently enforces no limit, but
2184 * according to suggestions from the
2185 * SELinux people this will change and
2186 * it will probably be identical to
2187 * NAME_MAX. For now we use that, but
2188 * this should be updated one day when
2189 * the final limit is known.*/
2190 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
2191 CMSG_SPACE(sizeof(struct timeval)) +
2192 CMSG_SPACE(sizeof(int)) + /* fd */
2193 CMSG_SPACE(NAME_MAX)]; /* selinux label */
2200 if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
2201 log_error("SIOCINQ failed: %m");
2205 if (s->buffer_size < (size_t) v) {
2209 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
2210 b = realloc(s->buffer, l+1);
2213 log_error("Couldn't increase buffer.");
2222 iovec.iov_base = s->buffer;
2223 iovec.iov_len = s->buffer_size;
2227 msghdr.msg_iov = &iovec;
2228 msghdr.msg_iovlen = 1;
2229 msghdr.msg_control = &control;
2230 msghdr.msg_controllen = sizeof(control);
2232 n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
2235 if (errno == EINTR || errno == EAGAIN)
2238 log_error("recvmsg() failed: %m");
2242 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
2244 if (cmsg->cmsg_level == SOL_SOCKET &&
2245 cmsg->cmsg_type == SCM_CREDENTIALS &&
2246 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
2247 ucred = (struct ucred*) CMSG_DATA(cmsg);
2248 else if (cmsg->cmsg_level == SOL_SOCKET &&
2249 cmsg->cmsg_type == SCM_SECURITY) {
2250 label = (char*) CMSG_DATA(cmsg);
2251 label_len = cmsg->cmsg_len - CMSG_LEN(0);
2252 } else if (cmsg->cmsg_level == SOL_SOCKET &&
2253 cmsg->cmsg_type == SO_TIMESTAMP &&
2254 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
2255 tv = (struct timeval*) CMSG_DATA(cmsg);
2256 else if (cmsg->cmsg_level == SOL_SOCKET &&
2257 cmsg->cmsg_type == SCM_RIGHTS) {
2258 fds = (int*) CMSG_DATA(cmsg);
2259 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
2263 if (ev->data.fd == s->syslog_fd) {
2266 if (n > 0 && n_fds == 0) {
2267 e = memchr(s->buffer, '\n', n);
2273 process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
2274 } else if (n_fds > 0)
2275 log_warning("Got file descriptors via syslog socket. Ignoring.");
2278 if (n > 0 && n_fds == 0)
2279 process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
2280 else if (n == 0 && n_fds == 1)
2281 process_native_file(s, fds[0], ucred, tv, label, label_len);
2283 log_warning("Got too many file descriptors via native socket. Ignoring.");
2286 close_many(fds, n_fds);
2291 } else if (ev->data.fd == s->stdout_fd) {
2293 if (ev->events != EPOLLIN) {
2294 log_info("Got invalid event from epoll.");
2298 stdout_stream_new(s);
2302 StdoutStream *stream;
2304 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
2305 log_info("Got invalid event from epoll.");
2309 /* If it is none of the well-known fds, it must be an
2310 * stdout stream fd. Note that this is a bit ugly here
2311 * (since we rely that none of the well-known fds
2312 * could be interpreted as pointer), but nonetheless
2313 * safe, since the well-known fds would never get an
2314 * fd > 4096, i.e. beyond the first memory page */
2316 stream = ev->data.ptr;
2318 if (stdout_stream_process(stream) <= 0)
2319 stdout_stream_free(stream);
2324 log_error("Unknown event.");
2328 static int open_syslog_socket(Server *s) {
2329 union sockaddr_union sa;
2331 struct epoll_event ev;
2335 if (s->syslog_fd < 0) {
2337 s->syslog_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2338 if (s->syslog_fd < 0) {
2339 log_error("socket() failed: %m");
2344 sa.un.sun_family = AF_UNIX;
2345 strncpy(sa.un.sun_path, "/dev/log", sizeof(sa.un.sun_path));
2347 unlink(sa.un.sun_path);
2349 r = bind(s->syslog_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2351 log_error("bind() failed: %m");
2355 chmod(sa.un.sun_path, 0666);
2357 fd_nonblock(s->syslog_fd, 1);
2360 r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
2362 log_error("SO_PASSCRED failed: %m");
2368 r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
2370 log_warning("SO_PASSSEC failed: %m");
2374 r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
2376 log_error("SO_TIMESTAMP failed: %m");
2381 ev.events = EPOLLIN;
2382 ev.data.fd = s->syslog_fd;
2383 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->syslog_fd, &ev) < 0) {
2384 log_error("Failed to add syslog server fd to epoll object: %m");
2391 static int open_native_socket(Server*s) {
2392 union sockaddr_union sa;
2394 struct epoll_event ev;
2398 if (s->native_fd < 0) {
2400 s->native_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2401 if (s->native_fd < 0) {
2402 log_error("socket() failed: %m");
2407 sa.un.sun_family = AF_UNIX;
2408 strncpy(sa.un.sun_path, "/run/systemd/journal/socket", sizeof(sa.un.sun_path));
2410 unlink(sa.un.sun_path);
2412 r = bind(s->native_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2414 log_error("bind() failed: %m");
2418 chmod(sa.un.sun_path, 0666);
2420 fd_nonblock(s->native_fd, 1);
2423 r = setsockopt(s->native_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
2425 log_error("SO_PASSCRED failed: %m");
2431 r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
2433 log_warning("SO_PASSSEC failed: %m");
2437 r = setsockopt(s->native_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
2439 log_error("SO_TIMESTAMP failed: %m");
2444 ev.events = EPOLLIN;
2445 ev.data.fd = s->native_fd;
2446 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->native_fd, &ev) < 0) {
2447 log_error("Failed to add native server fd to epoll object: %m");
2454 static int open_stdout_socket(Server *s) {
2455 union sockaddr_union sa;
2457 struct epoll_event ev;
2461 if (s->stdout_fd < 0) {
2463 s->stdout_fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2464 if (s->stdout_fd < 0) {
2465 log_error("socket() failed: %m");
2470 sa.un.sun_family = AF_UNIX;
2471 strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
2473 unlink(sa.un.sun_path);
2475 r = bind(s->stdout_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2477 log_error("bind() failed: %m");
2481 chmod(sa.un.sun_path, 0666);
2483 if (listen(s->stdout_fd, SOMAXCONN) < 0) {
2484 log_error("liste() failed: %m");
2488 fd_nonblock(s->stdout_fd, 1);
2491 ev.events = EPOLLIN;
2492 ev.data.fd = s->stdout_fd;
2493 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->stdout_fd, &ev) < 0) {
2494 log_error("Failed to add stdout server fd to epoll object: %m");
2501 static int open_proc_kmsg(Server *s) {
2502 struct epoll_event ev;
2506 if (!s->import_proc_kmsg)
2509 s->proc_kmsg_fd = open("/proc/kmsg", O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
2510 if (s->proc_kmsg_fd < 0) {
2511 log_warning("Failed to open /proc/kmsg, ignoring: %m");
2516 ev.events = EPOLLIN;
2517 ev.data.fd = s->proc_kmsg_fd;
2518 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->proc_kmsg_fd, &ev) < 0) {
2519 log_error("Failed to add /proc/kmsg fd to epoll object: %m");
2526 static int open_signalfd(Server *s) {
2528 struct epoll_event ev;
2532 assert_se(sigemptyset(&mask) == 0);
2533 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, -1);
2534 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
2536 s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
2537 if (s->signal_fd < 0) {
2538 log_error("signalfd(): %m");
2543 ev.events = EPOLLIN;
2544 ev.data.fd = s->signal_fd;
2546 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
2547 log_error("epoll_ctl(): %m");
2554 static int server_parse_proc_cmdline(Server *s) {
2555 char *line, *w, *state;
2559 if (detect_container(NULL) > 0)
2562 r = read_one_line_file("/proc/cmdline", &line);
2564 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
2568 FOREACH_WORD_QUOTED(w, l, line, state) {
2571 word = strndup(w, l);
2577 if (startswith(word, "systemd_journald.forward_to_syslog=")) {
2578 r = parse_boolean(word + 35);
2580 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
2582 s->forward_to_syslog = r;
2583 } else if (startswith(word, "systemd_journald.forward_to_kmsg=")) {
2584 r = parse_boolean(word + 33);
2586 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
2588 s->forward_to_kmsg = r;
2589 } else if (startswith(word, "systemd_journald.forward_to_console=")) {
2590 r = parse_boolean(word + 36);
2592 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
2594 s->forward_to_console = r;
2607 static int server_parse_config_file(Server *s) {
2614 fn = "/etc/systemd/journald.conf";
2615 f = fopen(fn, "re");
2617 if (errno == ENOENT)
2620 log_warning("Failed to open configuration file %s: %m", fn);
2624 r = config_parse(fn, f, "Journal\0", config_item_perf_lookup, (void*) journald_gperf_lookup, false, s);
2626 log_warning("Failed to parse configuration file: %s", strerror(-r));
2633 static int server_init(Server *s) {
2639 s->syslog_fd = s->native_fd = s->stdout_fd = s->signal_fd = s->epoll_fd = s->proc_kmsg_fd = -1;
2642 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
2643 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
2645 s->forward_to_syslog = true;
2646 s->import_proc_kmsg = true;
2648 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
2649 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
2651 server_parse_config_file(s);
2652 server_parse_proc_cmdline(s);
2654 s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
2655 if (!s->user_journals) {
2656 log_error("Out of memory.");
2660 s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
2661 if (s->epoll_fd < 0) {
2662 log_error("Failed to create epoll object: %m");
2666 n = sd_listen_fds(true);
2668 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
2672 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
2674 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
2676 if (s->native_fd >= 0) {
2677 log_error("Too many native sockets passed.");
2683 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
2685 if (s->stdout_fd >= 0) {
2686 log_error("Too many stdout sockets passed.");
2692 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
2694 if (s->syslog_fd >= 0) {
2695 log_error("Too many /dev/log sockets passed.");
2702 log_error("Unknown socket passed.");
2707 r = open_syslog_socket(s);
2711 r = open_native_socket(s);
2715 r = open_stdout_socket(s);
2719 r = open_proc_kmsg(s);
2723 r = open_signalfd(s);
2727 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
2731 r = system_journal_open(s);
2738 static void server_done(Server *s) {
2742 while (s->stdout_streams)
2743 stdout_stream_free(s->stdout_streams);
2745 if (s->system_journal)
2746 journal_file_close(s->system_journal);
2748 if (s->runtime_journal)
2749 journal_file_close(s->runtime_journal);
2751 while ((f = hashmap_steal_first(s->user_journals)))
2752 journal_file_close(f);
2754 hashmap_free(s->user_journals);
2756 if (s->epoll_fd >= 0)
2757 close_nointr_nofail(s->epoll_fd);
2759 if (s->signal_fd >= 0)
2760 close_nointr_nofail(s->signal_fd);
2762 if (s->syslog_fd >= 0)
2763 close_nointr_nofail(s->syslog_fd);
2765 if (s->native_fd >= 0)
2766 close_nointr_nofail(s->native_fd);
2768 if (s->stdout_fd >= 0)
2769 close_nointr_nofail(s->stdout_fd);
2771 if (s->proc_kmsg_fd >= 0)
2772 close_nointr_nofail(s->proc_kmsg_fd);
2775 journal_rate_limit_free(s->rate_limit);
2780 int main(int argc, char *argv[]) {
2784 /* if (getppid() != 1) { */
2785 /* log_error("This program should be invoked by init only."); */
2786 /* return EXIT_FAILURE; */
2790 log_error("This program does not take arguments.");
2791 return EXIT_FAILURE;
2794 log_set_target(LOG_TARGET_SAFE);
2795 log_set_facility(LOG_SYSLOG);
2796 log_parse_environment();
2801 r = server_init(&server);
2805 server_vacuum(&server);
2806 server_flush_to_var(&server);
2807 server_flush_proc_kmsg(&server);
2809 log_debug("systemd-journald running as pid %lu", (unsigned long) getpid());
2810 driver_message(&server, SD_MESSAGE_JOURNAL_START, "Journal started");
2814 "STATUS=Processing requests...");
2817 struct epoll_event event;
2819 r = epoll_wait(server.epoll_fd, &event, 1, -1);
2825 log_error("epoll_wait() failed: %m");
2831 r = process_event(&server, &event);
2838 log_debug("systemd-journald stopped as pid %lu", (unsigned long) getpid());
2839 driver_message(&server, SD_MESSAGE_JOURNAL_STOP, "Journal stopped");
2843 "STATUS=Shutting down...");
2845 server_done(&server);
2847 return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;