1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/epoll.h>
23 #include <sys/socket.h>
25 #include <sys/signalfd.h>
29 #include <sys/ioctl.h>
30 #include <linux/sockios.h>
31 #include <sys/statvfs.h>
34 #include <systemd/sd-journal.h>
35 #include <systemd/sd-messages.h>
36 #include <systemd/sd-daemon.h>
39 #include <systemd/sd-login.h>
44 #include "journal-file.h"
45 #include "socket-util.h"
46 #include "cgroup-util.h"
48 #include "journal-rate-limit.h"
49 #include "journal-internal.h"
50 #include "conf-parser.h"
57 #include <acl/libacl.h>
62 #include <selinux/selinux.h>
65 #define USER_JOURNALS_MAX 1024
66 #define STDOUT_STREAMS_MAX 4096
68 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
69 #define DEFAULT_RATE_LIMIT_BURST 200
71 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
73 #define N_IOVEC_META_FIELDS 17
74 #define N_IOVEC_KERNEL_FIELDS 64
76 #define ENTRY_SIZE_MAX (1024*1024*32)
78 typedef enum StdoutStreamState {
79 STDOUT_STREAM_IDENTIFIER,
80 STDOUT_STREAM_UNIT_ID,
81 STDOUT_STREAM_PRIORITY,
82 STDOUT_STREAM_LEVEL_PREFIX,
83 STDOUT_STREAM_FORWARD_TO_SYSLOG,
84 STDOUT_STREAM_FORWARD_TO_KMSG,
85 STDOUT_STREAM_FORWARD_TO_CONSOLE,
91 StdoutStreamState state;
97 security_context_t security_context;
104 bool forward_to_syslog:1;
105 bool forward_to_kmsg:1;
106 bool forward_to_console:1;
108 char buffer[LINE_MAX+1];
111 LIST_FIELDS(StdoutStream, stdout_stream);
114 static const char* const storage_table[] = {
115 [STORAGE_AUTO] = "auto",
116 [STORAGE_VOLATILE] = "volatile",
117 [STORAGE_PERSISTENT] = "persistent",
118 [STORAGE_NONE] = "none"
121 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
122 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
124 static uint64_t available_space(Server *s) {
129 uint64_t sum = 0, avail = 0, ss_avail = 0;
135 ts = now(CLOCK_MONOTONIC);
137 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
138 return s->cached_available_space;
140 r = sd_id128_get_machine(&machine);
144 if (s->system_journal) {
145 f = "/var/log/journal/";
146 m = &s->system_metrics;
148 f = "/run/log/journal/";
149 m = &s->runtime_metrics;
154 p = strappend(f, sd_id128_to_string(machine, ids));
164 if (fstatvfs(dirfd(d), &ss) < 0)
169 struct dirent buf, *de;
171 r = readdir_r(d, &buf, &de);
178 if (!endswith(de->d_name, ".journal") &&
179 !endswith(de->d_name, ".journal~"))
182 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
185 if (!S_ISREG(st.st_mode))
188 sum += (uint64_t) st.st_blocks * 512UL;
191 avail = sum >= m->max_use ? 0 : m->max_use - sum;
193 ss_avail = ss.f_bsize * ss.f_bavail;
195 ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
197 if (ss_avail < avail)
200 s->cached_available_space = avail;
201 s->cached_available_space_timestamp = ts;
209 static void server_read_file_gid(Server *s) {
210 const char *adm = "adm";
215 if (s->file_gid_valid)
218 r = get_group_creds(&adm, &s->file_gid);
220 log_warning("Failed to resolve 'adm' group: %s", strerror(-r));
222 /* if we couldn't read the gid, then it will be 0, but that's
223 * fine and we shouldn't try to resolve the group again, so
224 * let's just pretend it worked right-away. */
225 s->file_gid_valid = true;
228 static void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
233 acl_permset_t permset;
238 server_read_file_gid(s);
240 r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
242 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
248 acl = acl_get_fd(f->fd);
250 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
254 r = acl_find_uid(acl, uid, &entry);
257 if (acl_create_entry(&acl, &entry) < 0 ||
258 acl_set_tag_type(entry, ACL_USER) < 0 ||
259 acl_set_qualifier(entry, &uid) < 0) {
260 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
265 if (acl_get_permset(entry, &permset) < 0 ||
266 acl_add_perm(permset, ACL_READ) < 0 ||
267 acl_calc_mask(&acl) < 0) {
268 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
272 if (acl_set_fd(f->fd, acl) < 0)
273 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
280 static JournalFile* find_journal(Server *s, uid_t uid) {
289 /* We split up user logs only on /var, not on /run. If the
290 * runtime file is open, we write to it exclusively, in order
291 * to guarantee proper order as soon as we flush /run to
292 * /var and close the runtime file. */
294 if (s->runtime_journal)
295 return s->runtime_journal;
298 return s->system_journal;
300 r = sd_id128_get_machine(&machine);
302 return s->system_journal;
304 f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
308 if (asprintf(&p, "/var/log/journal/%s/user-%lu.journal", sd_id128_to_string(machine, ids), (unsigned long) uid) < 0)
309 return s->system_journal;
311 while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
312 /* Too many open? Then let's close one */
313 f = hashmap_steal_first(s->user_journals);
315 journal_file_close(f);
318 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, &s->system_metrics, s->system_journal, &f);
322 return s->system_journal;
324 server_fix_perms(s, f, uid);
326 r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
328 journal_file_close(f);
329 return s->system_journal;
335 static void server_rotate(Server *s) {
341 log_info("Rotating...");
343 if (s->runtime_journal) {
344 r = journal_file_rotate(&s->runtime_journal);
346 if (s->runtime_journal)
347 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
349 log_error("Failed to create new runtime journal: %s", strerror(-r));
351 server_fix_perms(s, s->runtime_journal, 0);
354 if (s->system_journal) {
355 r = journal_file_rotate(&s->system_journal);
357 if (s->system_journal)
358 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
360 log_error("Failed to create new system journal: %s", strerror(-r));
363 server_fix_perms(s, s->system_journal, 0);
366 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
367 r = journal_file_rotate(&f);
370 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
372 log_error("Failed to create user journal: %s", strerror(-r));
374 hashmap_replace(s->user_journals, k, f);
375 server_fix_perms(s, s->system_journal, PTR_TO_UINT32(k));
380 static void server_vacuum(Server *s) {
386 log_info("Vacuuming...");
388 r = sd_id128_get_machine(&machine);
390 log_error("Failed to get machine ID: %s", strerror(-r));
394 sd_id128_to_string(machine, ids);
396 if (s->system_journal) {
397 if (asprintf(&p, "/var/log/journal/%s", ids) < 0) {
402 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free);
403 if (r < 0 && r != -ENOENT)
404 log_error("Failed to vacuum %s: %s", p, strerror(-r));
408 if (s->runtime_journal) {
409 if (asprintf(&p, "/run/log/journal/%s", ids) < 0) {
414 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free);
415 if (r < 0 && r != -ENOENT)
416 log_error("Failed to vacuum %s: %s", p, strerror(-r));
420 s->cached_available_space_timestamp = 0;
423 static char *shortened_cgroup_path(pid_t pid) {
425 char *process_path, *init_path, *path;
429 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, pid, &process_path);
433 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &init_path);
439 if (endswith(init_path, "/system"))
440 init_path[strlen(init_path) - 7] = 0;
441 else if (streq(init_path, "/"))
444 if (startswith(process_path, init_path)) {
447 p = strdup(process_path + strlen(init_path));
465 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n) {
467 bool vacuumed = false;
474 f = find_journal(s, uid);
478 if (journal_file_rotate_suggested(f)) {
479 log_info("Journal header limits reached or header out-of-date, rotating.");
484 f = find_journal(s, uid);
490 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
495 (r != -E2BIG && /* hit limit */
496 r != -EFBIG && /* hit fs limit */
497 r != -EDQUOT && /* quota hit */
498 r != -ENOSPC && /* disk full */
499 r != -EBADMSG && /* corrupted */
500 r != -ENODATA && /* truncated */
501 r != -EHOSTDOWN && /* other machine */
502 r != -EPROTONOSUPPORT && /* unsupported feature */
503 r != -EBUSY && /* unclean shutdown */
504 r != -ESHUTDOWN /* already archived */)) {
505 log_error("Failed to write entry, ignoring: %s", strerror(-r));
509 if (r == -E2BIG || r == -EFBIG || r == EDQUOT || r == ENOSPC)
510 log_info("Allocation limit reached, rotating.");
511 else if (r == -EHOSTDOWN)
512 log_info("Journal file from other machine, rotating.");
513 else if (r == -EBUSY)
514 log_info("Unlcean shutdown, rotating.");
516 log_warning("Journal file corrupted, rotating.");
522 f = find_journal(s, uid);
526 log_info("Retrying write.");
530 static void dispatch_message_real(
532 struct iovec *iovec, unsigned n, unsigned m,
535 const char *label, size_t label_len,
536 const char *unit_id) {
538 char *pid = NULL, *uid = NULL, *gid = NULL,
539 *source_time = NULL, *boot_id = NULL, *machine_id = NULL,
540 *comm = NULL, *cmdline = NULL, *hostname = NULL,
541 *audit_session = NULL, *audit_loginuid = NULL,
542 *exe = NULL, *cgroup = NULL, *session = NULL,
543 *owner_uid = NULL, *unit = NULL, *selinux_context = NULL;
549 uid_t loginuid = 0, realuid = 0;
554 assert(n + N_IOVEC_META_FIELDS <= m);
562 realuid = ucred->uid;
564 if (asprintf(&pid, "_PID=%lu", (unsigned long) ucred->pid) >= 0)
565 IOVEC_SET_STRING(iovec[n++], pid);
567 if (asprintf(&uid, "_UID=%lu", (unsigned long) ucred->uid) >= 0)
568 IOVEC_SET_STRING(iovec[n++], uid);
570 if (asprintf(&gid, "_GID=%lu", (unsigned long) ucred->gid) >= 0)
571 IOVEC_SET_STRING(iovec[n++], gid);
573 r = get_process_comm(ucred->pid, &t);
575 comm = strappend("_COMM=", t);
579 IOVEC_SET_STRING(iovec[n++], comm);
582 r = get_process_exe(ucred->pid, &t);
584 exe = strappend("_EXE=", t);
588 IOVEC_SET_STRING(iovec[n++], exe);
591 r = get_process_cmdline(ucred->pid, LINE_MAX, false, &t);
593 cmdline = strappend("_CMDLINE=", t);
597 IOVEC_SET_STRING(iovec[n++], cmdline);
600 r = audit_session_from_pid(ucred->pid, &audit);
602 if (asprintf(&audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit) >= 0)
603 IOVEC_SET_STRING(iovec[n++], audit_session);
605 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
607 if (asprintf(&audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0)
608 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
610 t = shortened_cgroup_path(ucred->pid);
612 cgroup = strappend("_SYSTEMD_CGROUP=", t);
616 IOVEC_SET_STRING(iovec[n++], cgroup);
620 if (sd_pid_get_session(ucred->pid, &t) >= 0) {
621 session = strappend("_SYSTEMD_SESSION=", t);
625 IOVEC_SET_STRING(iovec[n++], session);
628 if (sd_pid_get_owner_uid(ucred->uid, &owner) >= 0)
629 if (asprintf(&owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner) >= 0)
630 IOVEC_SET_STRING(iovec[n++], owner_uid);
633 if (cg_pid_get_unit(ucred->pid, &t) >= 0) {
634 unit = strappend("_SYSTEMD_UNIT=", t);
637 unit = strappend("_SYSTEMD_UNIT=", unit_id);
640 IOVEC_SET_STRING(iovec[n++], unit);
644 selinux_context = malloc(sizeof("_SELINUX_CONTEXT=") + label_len);
645 if (selinux_context) {
646 memcpy(selinux_context, "_SELINUX_CONTEXT=", sizeof("_SELINUX_CONTEXT=")-1);
647 memcpy(selinux_context+sizeof("_SELINUX_CONTEXT=")-1, label, label_len);
648 selinux_context[sizeof("_SELINUX_CONTEXT=")-1+label_len] = 0;
649 IOVEC_SET_STRING(iovec[n++], selinux_context);
652 security_context_t con;
654 if (getpidcon(ucred->pid, &con) >= 0) {
655 selinux_context = strappend("_SELINUX_CONTEXT=", con);
657 IOVEC_SET_STRING(iovec[n++], selinux_context);
666 if (asprintf(&source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu",
667 (unsigned long long) timeval_load(tv)) >= 0)
668 IOVEC_SET_STRING(iovec[n++], source_time);
671 /* Note that strictly speaking storing the boot id here is
672 * redundant since the entry includes this in-line
673 * anyway. However, we need this indexed, too. */
674 r = sd_id128_get_boot(&id);
676 if (asprintf(&boot_id, "_BOOT_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
677 IOVEC_SET_STRING(iovec[n++], boot_id);
679 r = sd_id128_get_machine(&id);
681 if (asprintf(&machine_id, "_MACHINE_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
682 IOVEC_SET_STRING(iovec[n++], machine_id);
684 t = gethostname_malloc();
686 hostname = strappend("_HOSTNAME=", t);
689 IOVEC_SET_STRING(iovec[n++], hostname);
694 write_to_journal(s, realuid == 0 ? 0 : loginuid, iovec, n);
707 free(audit_loginuid);
712 free(selinux_context);
715 static void driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
716 char mid[11 + 32 + 1];
717 char buffer[16 + LINE_MAX + 1];
718 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
726 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
727 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
729 memcpy(buffer, "MESSAGE=", 8);
730 va_start(ap, format);
731 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
733 char_array_0(buffer);
734 IOVEC_SET_STRING(iovec[n++], buffer);
736 snprintf(mid, sizeof(mid), "MESSAGE_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(message_id));
738 IOVEC_SET_STRING(iovec[n++], mid);
741 ucred.pid = getpid();
742 ucred.uid = getuid();
743 ucred.gid = getgid();
745 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL);
748 static void dispatch_message(Server *s,
749 struct iovec *iovec, unsigned n, unsigned m,
752 const char *label, size_t label_len,
756 char *path = NULL, *c;
759 assert(iovec || n == 0);
764 if (LOG_PRI(priority) > s->max_level_store)
770 path = shortened_cgroup_path(ucred->pid);
774 /* example: /user/lennart/3/foobar
775 * /system/dbus.service/foobar
777 * So let's cut of everything past the third /, since that is
778 * wher user directories start */
780 c = strchr(path, '/');
782 c = strchr(c+1, '/');
784 c = strchr(c+1, '/');
790 rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available_space(s));
797 /* Write a suppression message if we suppressed something */
799 driver_message(s, SD_MESSAGE_JOURNAL_DROPPED, "Suppressed %u messages from %s", rl - 1, path);
804 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id);
807 static void forward_syslog_iovec(Server *s, const struct iovec *iovec, unsigned n_iovec, struct ucred *ucred, struct timeval *tv) {
808 struct msghdr msghdr;
809 struct cmsghdr *cmsg;
811 struct cmsghdr cmsghdr;
812 uint8_t buf[CMSG_SPACE(sizeof(struct ucred))];
814 union sockaddr_union sa;
821 msghdr.msg_iov = (struct iovec*) iovec;
822 msghdr.msg_iovlen = n_iovec;
825 sa.un.sun_family = AF_UNIX;
826 strncpy(sa.un.sun_path, "/run/systemd/journal/syslog", sizeof(sa.un.sun_path));
827 msghdr.msg_name = &sa;
828 msghdr.msg_namelen = offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path);
832 msghdr.msg_control = &control;
833 msghdr.msg_controllen = sizeof(control);
835 cmsg = CMSG_FIRSTHDR(&msghdr);
836 cmsg->cmsg_level = SOL_SOCKET;
837 cmsg->cmsg_type = SCM_CREDENTIALS;
838 cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred));
839 memcpy(CMSG_DATA(cmsg), ucred, sizeof(struct ucred));
840 msghdr.msg_controllen = cmsg->cmsg_len;
843 /* Forward the syslog message we received via /dev/log to
844 * /run/systemd/syslog. Unfortunately we currently can't set
845 * the SO_TIMESTAMP auxiliary data, and hence we don't. */
847 if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
850 /* The socket is full? I guess the syslog implementation is
851 * too slow, and we shouldn't wait for that... */
855 if (ucred && errno == ESRCH) {
858 /* Hmm, presumably the sender process vanished
859 * by now, so let's fix it as good as we
864 memcpy(CMSG_DATA(cmsg), &u, sizeof(struct ucred));
866 if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
874 log_debug("Failed to forward syslog message: %m");
877 static void forward_syslog_raw(Server *s, int priority, const char *buffer, struct ucred *ucred, struct timeval *tv) {
883 if (LOG_PRI(priority) > s->max_level_syslog)
886 IOVEC_SET_STRING(iovec, buffer);
887 forward_syslog_iovec(s, &iovec, 1, ucred, tv);
890 static void forward_syslog(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred, struct timeval *tv) {
891 struct iovec iovec[5];
892 char header_priority[6], header_time[64], header_pid[16];
896 char *ident_buf = NULL;
899 assert(priority >= 0);
900 assert(priority <= 999);
903 if (LOG_PRI(priority) > s->max_level_syslog)
906 /* First: priority field */
907 snprintf(header_priority, sizeof(header_priority), "<%i>", priority);
908 char_array_0(header_priority);
909 IOVEC_SET_STRING(iovec[n++], header_priority);
911 /* Second: timestamp */
912 t = tv ? tv->tv_sec : ((time_t) (now(CLOCK_REALTIME) / USEC_PER_SEC));
916 if (strftime(header_time, sizeof(header_time), "%h %e %T ", tm) <= 0)
918 IOVEC_SET_STRING(iovec[n++], header_time);
920 /* Third: identifier and PID */
923 get_process_comm(ucred->pid, &ident_buf);
924 identifier = ident_buf;
927 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
928 char_array_0(header_pid);
931 IOVEC_SET_STRING(iovec[n++], identifier);
933 IOVEC_SET_STRING(iovec[n++], header_pid);
934 } else if (identifier) {
935 IOVEC_SET_STRING(iovec[n++], identifier);
936 IOVEC_SET_STRING(iovec[n++], ": ");
939 /* Fourth: message */
940 IOVEC_SET_STRING(iovec[n++], message);
942 forward_syslog_iovec(s, iovec, n, ucred, tv);
947 static int fixup_priority(int priority) {
949 if ((priority & LOG_FACMASK) == 0)
950 return (priority & LOG_PRIMASK) | LOG_USER;
955 static void forward_kmsg(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred) {
956 struct iovec iovec[5];
957 char header_priority[6], header_pid[16];
959 char *ident_buf = NULL;
962 assert(priority >= 0);
963 assert(priority <= 999);
966 if (_unlikely_(LOG_PRI(priority) > s->max_level_kmsg))
969 if (_unlikely_(s->dev_kmsg_fd < 0))
972 /* Never allow messages with kernel facility to be written to
973 * kmsg, regardless where the data comes from. */
974 priority = fixup_priority(priority);
976 /* First: priority field */
977 snprintf(header_priority, sizeof(header_priority), "<%i>", priority);
978 char_array_0(header_priority);
979 IOVEC_SET_STRING(iovec[n++], header_priority);
981 /* Second: identifier and PID */
984 get_process_comm(ucred->pid, &ident_buf);
985 identifier = ident_buf;
988 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
989 char_array_0(header_pid);
992 IOVEC_SET_STRING(iovec[n++], identifier);
994 IOVEC_SET_STRING(iovec[n++], header_pid);
995 } else if (identifier) {
996 IOVEC_SET_STRING(iovec[n++], identifier);
997 IOVEC_SET_STRING(iovec[n++], ": ");
1000 /* Fourth: message */
1001 IOVEC_SET_STRING(iovec[n++], message);
1002 IOVEC_SET_STRING(iovec[n++], "\n");
1004 if (writev(s->dev_kmsg_fd, iovec, n) < 0)
1005 log_debug("Failed to write to /dev/kmsg for logging: %s", strerror(errno));
1010 static void forward_console(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred) {
1011 struct iovec iovec[4];
1012 char header_pid[16];
1014 char *ident_buf = NULL;
1020 if (LOG_PRI(priority) > s->max_level_console)
1023 /* First: identifier and PID */
1026 get_process_comm(ucred->pid, &ident_buf);
1027 identifier = ident_buf;
1030 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
1031 char_array_0(header_pid);
1034 IOVEC_SET_STRING(iovec[n++], identifier);
1036 IOVEC_SET_STRING(iovec[n++], header_pid);
1037 } else if (identifier) {
1038 IOVEC_SET_STRING(iovec[n++], identifier);
1039 IOVEC_SET_STRING(iovec[n++], ": ");
1042 /* Third: message */
1043 IOVEC_SET_STRING(iovec[n++], message);
1044 IOVEC_SET_STRING(iovec[n++], "\n");
1046 tty = s->tty_path ? s->tty_path : "/dev/console";
1048 fd = open_terminal(tty, O_WRONLY|O_NOCTTY|O_CLOEXEC);
1050 log_debug("Failed to open %s for logging: %s", tty, strerror(errno));
1054 if (writev(fd, iovec, n) < 0)
1055 log_debug("Failed to write to %s for logging: %s", tty, strerror(errno));
1057 close_nointr_nofail(fd);
1063 static void read_identifier(const char **buf, char **identifier, char **pid) {
1074 p += strspn(p, WHITESPACE);
1075 l = strcspn(p, WHITESPACE);
1084 if (p[l-1] == ']') {
1090 t = strndup(p+k+1, l-k-2);
1110 *buf += strspn(*buf, WHITESPACE);
1113 static void process_syslog_message(Server *s, const char *buf, struct ucred *ucred, struct timeval *tv, const char *label, size_t label_len) {
1114 char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *syslog_pid = NULL;
1115 struct iovec iovec[N_IOVEC_META_FIELDS + 6];
1117 int priority = LOG_USER | LOG_INFO;
1118 char *identifier = NULL, *pid = NULL;
1125 parse_syslog_priority((char**) &buf, &priority);
1127 if (s->forward_to_syslog)
1128 forward_syslog_raw(s, priority, orig, ucred, tv);
1130 skip_syslog_date((char**) &buf);
1131 read_identifier(&buf, &identifier, &pid);
1133 if (s->forward_to_kmsg)
1134 forward_kmsg(s, priority, identifier, buf, ucred);
1136 if (s->forward_to_console)
1137 forward_console(s, priority, identifier, buf, ucred);
1139 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=syslog");
1141 if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1142 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1144 if (priority & LOG_FACMASK)
1145 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1146 IOVEC_SET_STRING(iovec[n++], syslog_facility);
1149 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
1150 if (syslog_identifier)
1151 IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1155 syslog_pid = strappend("SYSLOG_PID=", pid);
1157 IOVEC_SET_STRING(iovec[n++], syslog_pid);
1160 message = strappend("MESSAGE=", buf);
1162 IOVEC_SET_STRING(iovec[n++], message);
1164 dispatch_message(s, iovec, n, ELEMENTSOF(iovec), ucred, tv, label, label_len, NULL, priority);
1169 free(syslog_priority);
1170 free(syslog_facility);
1171 free(syslog_identifier);
1175 static bool valid_user_field(const char *p, size_t l) {
1178 /* We kinda enforce POSIX syntax recommendations for
1179 environment variables here, but make a couple of additional
1182 http://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html */
1184 /* No empty field names */
1188 /* Don't allow names longer than 64 chars */
1192 /* Variables starting with an underscore are protected */
1196 /* Don't allow digits as first character */
1197 if (p[0] >= '0' && p[0] <= '9')
1200 /* Only allow A-Z0-9 and '_' */
1201 for (a = p; a < p + l; a++)
1202 if (!((*a >= 'A' && *a <= 'Z') ||
1203 (*a >= '0' && *a <= '9') ||
1210 static void process_native_message(
1212 const void *buffer, size_t buffer_size,
1213 struct ucred *ucred,
1215 const char *label, size_t label_len) {
1217 struct iovec *iovec = NULL;
1218 unsigned n = 0, m = 0, j, tn = (unsigned) -1;
1221 int priority = LOG_INFO;
1222 char *identifier = NULL, *message = NULL;
1225 assert(buffer || buffer_size == 0);
1228 remaining = buffer_size;
1230 while (remaining > 0) {
1233 e = memchr(p, '\n', remaining);
1236 /* Trailing noise, let's ignore it, and flush what we collected */
1237 log_debug("Received message with trailing noise, ignoring.");
1242 /* Entry separator */
1243 dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, NULL, priority);
1245 priority = LOG_INFO;
1252 if (*p == '.' || *p == '#') {
1253 /* Ignore control commands for now, and
1255 remaining -= (e - p) + 1;
1260 /* A property follows */
1262 if (n+N_IOVEC_META_FIELDS >= m) {
1266 u = MAX((n+N_IOVEC_META_FIELDS+1) * 2U, 4U);
1267 c = realloc(iovec, u * sizeof(struct iovec));
1277 q = memchr(p, '=', e - p);
1279 if (valid_user_field(p, q - p)) {
1284 /* If the field name starts with an
1285 * underscore, skip the variable,
1286 * since that indidates a trusted
1288 iovec[n].iov_base = (char*) p;
1289 iovec[n].iov_len = l;
1292 /* We need to determine the priority
1293 * of this entry for the rate limiting
1296 memcmp(p, "PRIORITY=", 9) == 0 &&
1297 p[9] >= '0' && p[9] <= '9')
1298 priority = (priority & LOG_FACMASK) | (p[9] - '0');
1301 memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
1302 p[16] >= '0' && p[16] <= '9')
1303 priority = (priority & LOG_PRIMASK) | ((p[16] - '0') << 3);
1306 memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
1307 p[16] >= '0' && p[16] <= '9' &&
1308 p[17] >= '0' && p[17] <= '9')
1309 priority = (priority & LOG_PRIMASK) | (((p[16] - '0')*10 + (p[17] - '0')) << 3);
1312 memcmp(p, "SYSLOG_IDENTIFIER=", 18) == 0) {
1315 t = strndup(p + 18, l - 18);
1320 } else if (l >= 8 &&
1321 memcmp(p, "MESSAGE=", 8) == 0) {
1324 t = strndup(p + 8, l - 8);
1332 remaining -= (e - p) + 1;
1340 if (remaining < e - p + 1 + sizeof(uint64_t) + 1) {
1341 log_debug("Failed to parse message, ignoring.");
1345 memcpy(&l_le, e + 1, sizeof(uint64_t));
1348 if (remaining < e - p + 1 + sizeof(uint64_t) + l + 1 ||
1349 e[1+sizeof(uint64_t)+l] != '\n') {
1350 log_debug("Failed to parse message, ignoring.");
1354 k = malloc((e - p) + 1 + l);
1360 memcpy(k, p, e - p);
1362 memcpy(k + (e - p) + 1, e + 1 + sizeof(uint64_t), l);
1364 if (valid_user_field(p, e - p)) {
1365 iovec[n].iov_base = k;
1366 iovec[n].iov_len = (e - p) + 1 + l;
1371 remaining -= (e - p) + 1 + sizeof(uint64_t) + l + 1;
1372 p = e + 1 + sizeof(uint64_t) + l + 1;
1380 IOVEC_SET_STRING(iovec[tn], "_TRANSPORT=journal");
1383 if (s->forward_to_syslog)
1384 forward_syslog(s, priority, identifier, message, ucred, tv);
1386 if (s->forward_to_kmsg)
1387 forward_kmsg(s, priority, identifier, message, ucred);
1389 if (s->forward_to_console)
1390 forward_console(s, priority, identifier, message, ucred);
1393 dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, NULL, priority);
1396 for (j = 0; j < n; j++) {
1400 if (iovec[j].iov_base < buffer ||
1401 (const uint8_t*) iovec[j].iov_base >= (const uint8_t*) buffer + buffer_size)
1402 free(iovec[j].iov_base);
1410 static void process_native_file(
1413 struct ucred *ucred,
1415 const char *label, size_t label_len) {
1424 /* Data is in the passed file, since it didn't fit in a
1425 * datagram. We can't map the file here, since clients might
1426 * then truncate it and trigger a SIGBUS for us. So let's
1427 * stupidly read it */
1429 if (fstat(fd, &st) < 0) {
1430 log_error("Failed to stat passed file, ignoring: %m");
1434 if (!S_ISREG(st.st_mode)) {
1435 log_error("File passed is not regular. Ignoring.");
1439 if (st.st_size <= 0)
1442 if (st.st_size > ENTRY_SIZE_MAX) {
1443 log_error("File passed too large. Ignoring.");
1447 p = malloc(st.st_size);
1453 n = pread(fd, p, st.st_size, 0);
1455 log_error("Failed to read file, ignoring: %s", strerror(-n));
1457 process_native_message(s, p, n, ucred, tv, label, label_len);
1462 static int stdout_stream_log(StdoutStream *s, const char *p) {
1463 struct iovec iovec[N_IOVEC_META_FIELDS + 5];
1464 char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *syslog_identifier = NULL;
1468 size_t label_len = 0;
1476 priority = s->priority;
1478 if (s->level_prefix)
1479 parse_syslog_priority((char**) &p, &priority);
1481 if (s->forward_to_syslog || s->server->forward_to_syslog)
1482 forward_syslog(s->server, fixup_priority(priority), s->identifier, p, &s->ucred, NULL);
1484 if (s->forward_to_kmsg || s->server->forward_to_kmsg)
1485 forward_kmsg(s->server, priority, s->identifier, p, &s->ucred);
1487 if (s->forward_to_console || s->server->forward_to_console)
1488 forward_console(s->server, priority, s->identifier, p, &s->ucred);
1490 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=stdout");
1492 if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1493 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1495 if (priority & LOG_FACMASK)
1496 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1497 IOVEC_SET_STRING(iovec[n++], syslog_facility);
1499 if (s->identifier) {
1500 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", s->identifier);
1501 if (syslog_identifier)
1502 IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1505 message = strappend("MESSAGE=", p);
1507 IOVEC_SET_STRING(iovec[n++], message);
1510 if (s->security_context) {
1511 label = (char*) s->security_context;
1512 label_len = strlen((char*) s->security_context);
1516 dispatch_message(s->server, iovec, n, ELEMENTSOF(iovec), &s->ucred, NULL, label, label_len, s->unit_id, priority);
1519 free(syslog_priority);
1520 free(syslog_facility);
1521 free(syslog_identifier);
1526 static int stdout_stream_line(StdoutStream *s, char *p) {
1536 case STDOUT_STREAM_IDENTIFIER:
1538 s->identifier = NULL;
1540 s->identifier = strdup(p);
1545 s->state = STDOUT_STREAM_UNIT_ID;
1548 case STDOUT_STREAM_UNIT_ID:
1549 if (s->ucred.uid == 0) {
1553 s->unit_id = strdup(p);
1559 s->state = STDOUT_STREAM_PRIORITY;
1562 case STDOUT_STREAM_PRIORITY:
1563 r = safe_atoi(p, &s->priority);
1564 if (r < 0 || s->priority <= 0 || s->priority >= 999) {
1565 log_warning("Failed to parse log priority line.");
1569 s->state = STDOUT_STREAM_LEVEL_PREFIX;
1572 case STDOUT_STREAM_LEVEL_PREFIX:
1573 r = parse_boolean(p);
1575 log_warning("Failed to parse level prefix line.");
1579 s->level_prefix = !!r;
1580 s->state = STDOUT_STREAM_FORWARD_TO_SYSLOG;
1583 case STDOUT_STREAM_FORWARD_TO_SYSLOG:
1584 r = parse_boolean(p);
1586 log_warning("Failed to parse forward to syslog line.");
1590 s->forward_to_syslog = !!r;
1591 s->state = STDOUT_STREAM_FORWARD_TO_KMSG;
1594 case STDOUT_STREAM_FORWARD_TO_KMSG:
1595 r = parse_boolean(p);
1597 log_warning("Failed to parse copy to kmsg line.");
1601 s->forward_to_kmsg = !!r;
1602 s->state = STDOUT_STREAM_FORWARD_TO_CONSOLE;
1605 case STDOUT_STREAM_FORWARD_TO_CONSOLE:
1606 r = parse_boolean(p);
1608 log_warning("Failed to parse copy to console line.");
1612 s->forward_to_console = !!r;
1613 s->state = STDOUT_STREAM_RUNNING;
1616 case STDOUT_STREAM_RUNNING:
1617 return stdout_stream_log(s, p);
1620 assert_not_reached("Unknown stream state");
1623 static int stdout_stream_scan(StdoutStream *s, bool force_flush) {
1631 remaining = s->length;
1636 end = memchr(p, '\n', remaining);
1639 else if (remaining >= sizeof(s->buffer) - 1) {
1640 end = p + sizeof(s->buffer) - 1;
1647 r = stdout_stream_line(s, p);
1655 if (force_flush && remaining > 0) {
1657 r = stdout_stream_line(s, p);
1665 if (p > s->buffer) {
1666 memmove(s->buffer, p, remaining);
1667 s->length = remaining;
1673 static int stdout_stream_process(StdoutStream *s) {
1679 l = read(s->fd, s->buffer+s->length, sizeof(s->buffer)-1-s->length);
1682 if (errno == EAGAIN)
1685 log_warning("Failed to read from stream: %m");
1690 r = stdout_stream_scan(s, true);
1698 r = stdout_stream_scan(s, false);
1706 static void stdout_stream_free(StdoutStream *s) {
1710 assert(s->server->n_stdout_streams > 0);
1711 s->server->n_stdout_streams --;
1712 LIST_REMOVE(StdoutStream, stdout_stream, s->server->stdout_streams, s);
1717 epoll_ctl(s->server->epoll_fd, EPOLL_CTL_DEL, s->fd, NULL);
1719 close_nointr_nofail(s->fd);
1723 if (s->security_context)
1724 freecon(s->security_context);
1727 free(s->identifier);
1731 static int stdout_stream_new(Server *s) {
1732 StdoutStream *stream;
1735 struct epoll_event ev;
1739 fd = accept4(s->stdout_fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC);
1741 if (errno == EAGAIN)
1744 log_error("Failed to accept stdout connection: %m");
1748 if (s->n_stdout_streams >= STDOUT_STREAMS_MAX) {
1749 log_warning("Too many stdout streams, refusing connection.");
1750 close_nointr_nofail(fd);
1754 stream = new0(StdoutStream, 1);
1756 close_nointr_nofail(fd);
1762 len = sizeof(stream->ucred);
1763 if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &stream->ucred, &len) < 0) {
1764 log_error("Failed to determine peer credentials: %m");
1770 if (getpeercon(fd, &stream->security_context) < 0 && errno != ENOPROTOOPT)
1771 log_error("Failed to determine peer security context: %m");
1774 if (shutdown(fd, SHUT_WR) < 0) {
1775 log_error("Failed to shutdown writing side of socket: %m");
1781 ev.data.ptr = stream;
1782 ev.events = EPOLLIN;
1783 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0) {
1784 log_error("Failed to add stream to event loop: %m");
1790 LIST_PREPEND(StdoutStream, stdout_stream, s->stdout_streams, stream);
1791 s->n_stdout_streams ++;
1796 stdout_stream_free(stream);
1800 static bool is_us(const char *pid) {
1805 if (parse_pid(pid, &t) < 0)
1808 return t == getpid();
1811 static void dev_kmsg_record(Server *s, char *p, size_t l) {
1812 struct iovec iovec[N_IOVEC_META_FIELDS + 7 + N_IOVEC_KERNEL_FIELDS];
1813 char *message = NULL, *syslog_priority = NULL, *syslog_pid = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *source_time = NULL;
1815 unsigned n = 0, z = 0, j;
1817 char *identifier = NULL, *pid = NULL, *e, *f, *k;
1827 e = memchr(p, ',', l);
1832 r = safe_atoi(p, &priority);
1833 if (r < 0 || priority < 0 || priority > 999)
1836 if (s->forward_to_kmsg && (priority & LOG_FACMASK) != LOG_KERN)
1841 e = memchr(p, ',', l);
1846 r = safe_atou64(p, &serial);
1850 if (s->kernel_seqnum) {
1851 /* We already read this one? */
1852 if (serial < *s->kernel_seqnum)
1855 /* Did we lose any? */
1856 if (serial > *s->kernel_seqnum)
1857 driver_message(s, SD_MESSAGE_JOURNAL_MISSED, "Missed %llu kernel messages", (unsigned long long) serial - *s->kernel_seqnum - 1);
1859 /* Make sure we never read this one again. Note that
1860 * we always store the next message serial we expect
1861 * here, simply because this makes handling the first
1862 * message with serial 0 easy. */
1863 *s->kernel_seqnum = serial + 1;
1868 f = memchr(p, ';', l);
1871 /* Kernel 3.6 has the flags field, kernel 3.5 lacks that */
1872 e = memchr(p, ',', l);
1877 r = parse_usec(p, &usec);
1883 e = memchr(p, '\n', l);
1892 for (j = 0; l > 0 && j < N_IOVEC_KERNEL_FIELDS; j++) {
1894 /* Meta data fields attached */
1901 e = memchr(k, '\n', l);
1907 m = cunescape_length_with_prefix(k, e - k, "_KERNEL_");
1911 IOVEC_SET_STRING(iovec[n++], m);
1918 if (asprintf(&source_time, "_SOURCE_MONOTONIC_TIMESTAMP=%llu",
1919 (unsigned long long) usec) >= 0)
1920 IOVEC_SET_STRING(iovec[n++], source_time);
1922 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=kernel");
1924 if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1925 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1927 if ((priority & LOG_FACMASK) == LOG_KERN)
1928 IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=kernel");
1930 read_identifier((const char**) &p, &identifier, &pid);
1932 /* Avoid any messages we generated ourselves via
1933 * log_info() and friends. */
1934 if (pid && is_us(pid))
1938 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
1939 if (syslog_identifier)
1940 IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1944 syslog_pid = strappend("SYSLOG_PID=", pid);
1946 IOVEC_SET_STRING(iovec[n++], syslog_pid);
1949 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1950 IOVEC_SET_STRING(iovec[n++], syslog_facility);
1953 message = cunescape_length_with_prefix(p, pl, "MESSAGE=");
1955 IOVEC_SET_STRING(iovec[n++], message);
1957 dispatch_message(s, iovec, n, ELEMENTSOF(iovec), NULL, NULL, NULL, 0, NULL, priority);
1960 for (j = 0; j < z; j++)
1961 free(iovec[j].iov_base);
1964 free(syslog_priority);
1965 free(syslog_identifier);
1967 free(syslog_facility);
1973 static int system_journal_open(Server *s) {
1979 r = sd_id128_get_machine(&machine);
1983 sd_id128_to_string(machine, ids);
1985 if (!s->system_journal &&
1986 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
1987 access("/run/systemd/journal/flushed", F_OK) >= 0) {
1989 /* If in auto mode: first try to create the machine
1990 * path, but not the prefix.
1992 * If in persistent mode: create /var/log/journal and
1993 * the machine path */
1995 if (s->storage == STORAGE_PERSISTENT)
1996 (void) mkdir("/var/log/journal/", 0755);
1998 fn = strappend("/var/log/journal/", ids);
2002 (void) mkdir(fn, 0755);
2005 fn = strjoin("/var/log/journal/", ids, "/system.journal", NULL);
2009 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, &s->system_metrics, NULL, &s->system_journal);
2013 s->system_journal->compress = s->compress;
2015 server_fix_perms(s, s->system_journal, 0);
2018 if (r != -ENOENT && r != -EROFS)
2019 log_warning("Failed to open system journal: %s", strerror(-r));
2025 if (!s->runtime_journal &&
2026 (s->storage != STORAGE_NONE)) {
2028 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
2032 if (s->system_journal) {
2034 /* Try to open the runtime journal, but only
2035 * if it already exists, so that we can flush
2036 * it into the system journal */
2038 r = journal_file_open(fn, O_RDWR, 0640, &s->runtime_metrics, NULL, &s->runtime_journal);
2043 log_warning("Failed to open runtime journal: %s", strerror(-r));
2050 /* OK, we really need the runtime journal, so create
2051 * it if necessary. */
2053 (void) mkdir_parents(fn, 0755);
2054 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, &s->runtime_metrics, NULL, &s->runtime_journal);
2058 log_error("Failed to open runtime journal: %s", strerror(-r));
2063 if (s->runtime_journal) {
2064 s->runtime_journal->compress = s->compress;
2066 server_fix_perms(s, s->runtime_journal, 0);
2073 static int server_flush_to_var(Server *s) {
2081 if (s->storage != STORAGE_AUTO &&
2082 s->storage != STORAGE_PERSISTENT)
2085 if (!s->runtime_journal)
2088 system_journal_open(s);
2090 if (!s->system_journal)
2093 log_info("Flushing to /var...");
2095 r = sd_id128_get_machine(&machine);
2097 log_error("Failed to get machine id: %s", strerror(-r));
2101 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
2103 log_error("Failed to read runtime journal: %s", strerror(-r));
2107 SD_JOURNAL_FOREACH(j) {
2110 f = j->current_file;
2111 assert(f && f->current_offset > 0);
2113 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
2115 log_error("Can't read entry: %s", strerror(-r));
2119 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
2121 log_info("Allocation limit reached.");
2123 journal_file_post_change(s->system_journal);
2127 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
2131 log_error("Can't write entry: %s", strerror(-r));
2137 journal_file_post_change(s->system_journal);
2139 journal_file_close(s->runtime_journal);
2140 s->runtime_journal = NULL;
2143 rm_rf("/run/log/journal", false, true, false);
2148 static int server_read_dev_kmsg(Server *s) {
2149 char buffer[8192+1]; /* the kernel-side limit per record is 8K currently */
2153 assert(s->dev_kmsg_fd >= 0);
2155 l = read(s->dev_kmsg_fd, buffer, sizeof(buffer) - 1);
2159 /* Old kernels who don't allow reading from /dev/kmsg
2160 * return EINVAL when we try. So handle this cleanly,
2161 * but don' try to ever read from it again. */
2162 if (errno == EINVAL) {
2163 epoll_ctl(s->epoll_fd, EPOLL_CTL_DEL, s->dev_kmsg_fd, NULL);
2167 if (errno == EAGAIN || errno == EINTR)
2170 log_error("Failed to read from kernel: %m");
2174 dev_kmsg_record(s, buffer, l);
2178 static int server_flush_dev_kmsg(Server *s) {
2183 if (s->dev_kmsg_fd < 0)
2186 if (!s->dev_kmsg_readable)
2189 log_info("Flushing /dev/kmsg...");
2192 r = server_read_dev_kmsg(s);
2203 static int process_event(Server *s, struct epoll_event *ev) {
2207 if (ev->data.fd == s->signal_fd) {
2208 struct signalfd_siginfo sfsi;
2211 if (ev->events != EPOLLIN) {
2212 log_info("Got invalid event from epoll.");
2216 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
2217 if (n != sizeof(sfsi)) {
2222 if (errno == EINTR || errno == EAGAIN)
2228 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
2230 if (sfsi.ssi_signo == SIGUSR1) {
2231 touch("/run/systemd/journal/flushed");
2232 server_flush_to_var(s);
2236 if (sfsi.ssi_signo == SIGUSR2) {
2244 } else if (ev->data.fd == s->dev_kmsg_fd) {
2247 if (ev->events != EPOLLIN) {
2248 log_info("Got invalid event from epoll.");
2252 r = server_read_dev_kmsg(s);
2258 } else if (ev->data.fd == s->native_fd ||
2259 ev->data.fd == s->syslog_fd) {
2261 if (ev->events != EPOLLIN) {
2262 log_info("Got invalid event from epoll.");
2267 struct msghdr msghdr;
2269 struct ucred *ucred = NULL;
2270 struct timeval *tv = NULL;
2271 struct cmsghdr *cmsg;
2273 size_t label_len = 0;
2275 struct cmsghdr cmsghdr;
2277 /* We use NAME_MAX space for the
2278 * SELinux label here. The kernel
2279 * currently enforces no limit, but
2280 * according to suggestions from the
2281 * SELinux people this will change and
2282 * it will probably be identical to
2283 * NAME_MAX. For now we use that, but
2284 * this should be updated one day when
2285 * the final limit is known.*/
2286 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
2287 CMSG_SPACE(sizeof(struct timeval)) +
2288 CMSG_SPACE(sizeof(int)) + /* fd */
2289 CMSG_SPACE(NAME_MAX)]; /* selinux label */
2296 if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
2297 log_error("SIOCINQ failed: %m");
2301 if (s->buffer_size < (size_t) v) {
2305 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
2306 b = realloc(s->buffer, l+1);
2309 log_error("Couldn't increase buffer.");
2318 iovec.iov_base = s->buffer;
2319 iovec.iov_len = s->buffer_size;
2323 msghdr.msg_iov = &iovec;
2324 msghdr.msg_iovlen = 1;
2325 msghdr.msg_control = &control;
2326 msghdr.msg_controllen = sizeof(control);
2328 n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
2331 if (errno == EINTR || errno == EAGAIN)
2334 log_error("recvmsg() failed: %m");
2338 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
2340 if (cmsg->cmsg_level == SOL_SOCKET &&
2341 cmsg->cmsg_type == SCM_CREDENTIALS &&
2342 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
2343 ucred = (struct ucred*) CMSG_DATA(cmsg);
2344 else if (cmsg->cmsg_level == SOL_SOCKET &&
2345 cmsg->cmsg_type == SCM_SECURITY) {
2346 label = (char*) CMSG_DATA(cmsg);
2347 label_len = cmsg->cmsg_len - CMSG_LEN(0);
2348 } else if (cmsg->cmsg_level == SOL_SOCKET &&
2349 cmsg->cmsg_type == SO_TIMESTAMP &&
2350 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
2351 tv = (struct timeval*) CMSG_DATA(cmsg);
2352 else if (cmsg->cmsg_level == SOL_SOCKET &&
2353 cmsg->cmsg_type == SCM_RIGHTS) {
2354 fds = (int*) CMSG_DATA(cmsg);
2355 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
2359 if (ev->data.fd == s->syslog_fd) {
2362 if (n > 0 && n_fds == 0) {
2363 e = memchr(s->buffer, '\n', n);
2369 process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
2370 } else if (n_fds > 0)
2371 log_warning("Got file descriptors via syslog socket. Ignoring.");
2374 if (n > 0 && n_fds == 0)
2375 process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
2376 else if (n == 0 && n_fds == 1)
2377 process_native_file(s, fds[0], ucred, tv, label, label_len);
2379 log_warning("Got too many file descriptors via native socket. Ignoring.");
2382 close_many(fds, n_fds);
2387 } else if (ev->data.fd == s->stdout_fd) {
2389 if (ev->events != EPOLLIN) {
2390 log_info("Got invalid event from epoll.");
2394 stdout_stream_new(s);
2398 StdoutStream *stream;
2400 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
2401 log_info("Got invalid event from epoll.");
2405 /* If it is none of the well-known fds, it must be an
2406 * stdout stream fd. Note that this is a bit ugly here
2407 * (since we rely that none of the well-known fds
2408 * could be interpreted as pointer), but nonetheless
2409 * safe, since the well-known fds would never get an
2410 * fd > 4096, i.e. beyond the first memory page */
2412 stream = ev->data.ptr;
2414 if (stdout_stream_process(stream) <= 0)
2415 stdout_stream_free(stream);
2420 log_error("Unknown event.");
2424 static int open_syslog_socket(Server *s) {
2425 union sockaddr_union sa;
2427 struct epoll_event ev;
2431 if (s->syslog_fd < 0) {
2433 s->syslog_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2434 if (s->syslog_fd < 0) {
2435 log_error("socket() failed: %m");
2440 sa.un.sun_family = AF_UNIX;
2441 strncpy(sa.un.sun_path, "/dev/log", sizeof(sa.un.sun_path));
2443 unlink(sa.un.sun_path);
2445 r = bind(s->syslog_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2447 log_error("bind() failed: %m");
2451 chmod(sa.un.sun_path, 0666);
2453 fd_nonblock(s->syslog_fd, 1);
2456 r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
2458 log_error("SO_PASSCRED failed: %m");
2464 r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
2466 log_warning("SO_PASSSEC failed: %m");
2470 r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
2472 log_error("SO_TIMESTAMP failed: %m");
2477 ev.events = EPOLLIN;
2478 ev.data.fd = s->syslog_fd;
2479 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->syslog_fd, &ev) < 0) {
2480 log_error("Failed to add syslog server fd to epoll object: %m");
2487 static int open_native_socket(Server*s) {
2488 union sockaddr_union sa;
2490 struct epoll_event ev;
2494 if (s->native_fd < 0) {
2496 s->native_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2497 if (s->native_fd < 0) {
2498 log_error("socket() failed: %m");
2503 sa.un.sun_family = AF_UNIX;
2504 strncpy(sa.un.sun_path, "/run/systemd/journal/socket", sizeof(sa.un.sun_path));
2506 unlink(sa.un.sun_path);
2508 r = bind(s->native_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2510 log_error("bind() failed: %m");
2514 chmod(sa.un.sun_path, 0666);
2516 fd_nonblock(s->native_fd, 1);
2519 r = setsockopt(s->native_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
2521 log_error("SO_PASSCRED failed: %m");
2527 r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
2529 log_warning("SO_PASSSEC failed: %m");
2533 r = setsockopt(s->native_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
2535 log_error("SO_TIMESTAMP failed: %m");
2540 ev.events = EPOLLIN;
2541 ev.data.fd = s->native_fd;
2542 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->native_fd, &ev) < 0) {
2543 log_error("Failed to add native server fd to epoll object: %m");
2550 static int open_stdout_socket(Server *s) {
2551 union sockaddr_union sa;
2553 struct epoll_event ev;
2557 if (s->stdout_fd < 0) {
2559 s->stdout_fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2560 if (s->stdout_fd < 0) {
2561 log_error("socket() failed: %m");
2566 sa.un.sun_family = AF_UNIX;
2567 strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
2569 unlink(sa.un.sun_path);
2571 r = bind(s->stdout_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2573 log_error("bind() failed: %m");
2577 chmod(sa.un.sun_path, 0666);
2579 if (listen(s->stdout_fd, SOMAXCONN) < 0) {
2580 log_error("liste() failed: %m");
2584 fd_nonblock(s->stdout_fd, 1);
2587 ev.events = EPOLLIN;
2588 ev.data.fd = s->stdout_fd;
2589 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->stdout_fd, &ev) < 0) {
2590 log_error("Failed to add stdout server fd to epoll object: %m");
2597 static int open_dev_kmsg(Server *s) {
2598 struct epoll_event ev;
2602 s->dev_kmsg_fd = open("/dev/kmsg", O_RDWR|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
2603 if (s->dev_kmsg_fd < 0) {
2604 log_warning("Failed to open /dev/kmsg, ignoring: %m");
2609 ev.events = EPOLLIN;
2610 ev.data.fd = s->dev_kmsg_fd;
2611 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->dev_kmsg_fd, &ev) < 0) {
2613 /* This will fail with EPERM on older kernels where
2614 * /dev/kmsg is not readable. */
2618 log_error("Failed to add /dev/kmsg fd to epoll object: %m");
2622 s->dev_kmsg_readable = true;
2627 static int open_kernel_seqnum(Server *s) {
2633 /* We store the seqnum we last read in an mmaped file. That
2634 * way we can just use it like a variable, but it is
2635 * persistant and automatically flushed at reboot. */
2637 fd = open("/run/systemd/journal/kernel-seqnum", O_RDWR|O_CREAT|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW, 0644);
2639 log_error("Failed to open /run/systemd/journal/kernel-seqnum, ignoring: %m");
2643 if (posix_fallocate(fd, 0, sizeof(uint64_t)) < 0) {
2644 log_error("Failed to allocate sequential number file, ignoring: %m");
2645 close_nointr_nofail(fd);
2649 p = mmap(NULL, sizeof(uint64_t), PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2650 if (p == MAP_FAILED) {
2651 log_error("Failed to map sequential number file, ignoring: %m");
2652 close_nointr_nofail(fd);
2656 close_nointr_nofail(fd);
2657 s->kernel_seqnum = p;
2662 static int open_signalfd(Server *s) {
2664 struct epoll_event ev;
2668 assert_se(sigemptyset(&mask) == 0);
2669 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
2670 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
2672 s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
2673 if (s->signal_fd < 0) {
2674 log_error("signalfd(): %m");
2679 ev.events = EPOLLIN;
2680 ev.data.fd = s->signal_fd;
2682 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
2683 log_error("epoll_ctl(): %m");
2690 static int server_parse_proc_cmdline(Server *s) {
2691 char *line, *w, *state;
2695 if (detect_container(NULL) > 0)
2698 r = read_one_line_file("/proc/cmdline", &line);
2700 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
2704 FOREACH_WORD_QUOTED(w, l, line, state) {
2707 word = strndup(w, l);
2713 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
2714 r = parse_boolean(word + 35);
2716 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
2718 s->forward_to_syslog = r;
2719 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
2720 r = parse_boolean(word + 33);
2722 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
2724 s->forward_to_kmsg = r;
2725 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
2726 r = parse_boolean(word + 36);
2728 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
2730 s->forward_to_console = r;
2731 } else if (startswith(word, "systemd.journald"))
2732 log_warning("Invalid systemd.journald parameter. Ignoring.");
2744 static int server_parse_config_file(Server *s) {
2751 fn = "/etc/systemd/journald.conf";
2752 f = fopen(fn, "re");
2754 if (errno == ENOENT)
2757 log_warning("Failed to open configuration file %s: %m", fn);
2761 r = config_parse(fn, f, "Journal\0", config_item_perf_lookup, (void*) journald_gperf_lookup, false, s);
2763 log_warning("Failed to parse configuration file: %s", strerror(-r));
2770 static int server_init(Server *s) {
2776 s->syslog_fd = s->native_fd = s->stdout_fd = s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
2779 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
2780 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
2782 s->forward_to_syslog = true;
2784 s->max_level_store = LOG_DEBUG;
2785 s->max_level_syslog = LOG_DEBUG;
2786 s->max_level_kmsg = LOG_NOTICE;
2787 s->max_level_console = LOG_INFO;
2789 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
2790 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
2792 server_parse_config_file(s);
2793 server_parse_proc_cmdline(s);
2795 mkdir_p("/run/systemd/journal", 0755);
2797 s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
2798 if (!s->user_journals)
2801 s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
2802 if (s->epoll_fd < 0) {
2803 log_error("Failed to create epoll object: %m");
2807 n = sd_listen_fds(true);
2809 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
2813 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
2815 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
2817 if (s->native_fd >= 0) {
2818 log_error("Too many native sockets passed.");
2824 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
2826 if (s->stdout_fd >= 0) {
2827 log_error("Too many stdout sockets passed.");
2833 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
2835 if (s->syslog_fd >= 0) {
2836 log_error("Too many /dev/log sockets passed.");
2843 log_error("Unknown socket passed.");
2848 r = open_syslog_socket(s);
2852 r = open_native_socket(s);
2856 r = open_stdout_socket(s);
2860 r = open_dev_kmsg(s);
2864 r = open_kernel_seqnum(s);
2868 r = open_signalfd(s);
2872 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
2876 r = system_journal_open(s);
2883 static void server_done(Server *s) {
2887 while (s->stdout_streams)
2888 stdout_stream_free(s->stdout_streams);
2890 if (s->system_journal)
2891 journal_file_close(s->system_journal);
2893 if (s->runtime_journal)
2894 journal_file_close(s->runtime_journal);
2896 while ((f = hashmap_steal_first(s->user_journals)))
2897 journal_file_close(f);
2899 hashmap_free(s->user_journals);
2901 if (s->epoll_fd >= 0)
2902 close_nointr_nofail(s->epoll_fd);
2904 if (s->signal_fd >= 0)
2905 close_nointr_nofail(s->signal_fd);
2907 if (s->syslog_fd >= 0)
2908 close_nointr_nofail(s->syslog_fd);
2910 if (s->native_fd >= 0)
2911 close_nointr_nofail(s->native_fd);
2913 if (s->stdout_fd >= 0)
2914 close_nointr_nofail(s->stdout_fd);
2916 if (s->dev_kmsg_fd >= 0)
2917 close_nointr_nofail(s->dev_kmsg_fd);
2920 journal_rate_limit_free(s->rate_limit);
2922 if (s->kernel_seqnum)
2923 munmap(s->kernel_seqnum, sizeof(uint64_t));
2929 int main(int argc, char *argv[]) {
2933 /* if (getppid() != 1) { */
2934 /* log_error("This program should be invoked by init only."); */
2935 /* return EXIT_FAILURE; */
2939 log_error("This program does not take arguments.");
2940 return EXIT_FAILURE;
2943 log_set_target(LOG_TARGET_SAFE);
2944 log_set_facility(LOG_SYSLOG);
2945 log_parse_environment();
2950 r = server_init(&server);
2954 server_vacuum(&server);
2955 server_flush_to_var(&server);
2956 server_flush_dev_kmsg(&server);
2958 log_debug("systemd-journald running as pid %lu", (unsigned long) getpid());
2959 driver_message(&server, SD_MESSAGE_JOURNAL_START, "Journal started");
2963 "STATUS=Processing requests...");
2966 struct epoll_event event;
2968 r = epoll_wait(server.epoll_fd, &event, 1, -1);
2974 log_error("epoll_wait() failed: %m");
2980 r = process_event(&server, &event);
2987 log_debug("systemd-journald stopped as pid %lu", (unsigned long) getpid());
2988 driver_message(&server, SD_MESSAGE_JOURNAL_STOP, "Journal stopped");
2992 "STATUS=Shutting down...");
2994 server_done(&server);
2996 return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;