1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/epoll.h>
23 #include <sys/socket.h>
25 #include <sys/signalfd.h>
29 #include <sys/ioctl.h>
30 #include <linux/sockios.h>
31 #include <sys/statvfs.h>
33 #include <systemd/sd-journal.h>
34 #include <systemd/sd-messages.h>
35 #include <systemd/sd-daemon.h>
38 #include <systemd/sd-login.h>
43 #include "journal-file.h"
44 #include "socket-util.h"
45 #include "cgroup-util.h"
47 #include "journal-rate-limit.h"
48 #include "journal-internal.h"
49 #include "conf-parser.h"
56 #include <acl/libacl.h>
61 #include <selinux/selinux.h>
64 #define USER_JOURNALS_MAX 1024
65 #define STDOUT_STREAMS_MAX 4096
67 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
68 #define DEFAULT_RATE_LIMIT_BURST 200
70 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
72 #define N_IOVEC_META_FIELDS 17
73 #define N_IOVEC_KERNEL_FIELDS 64
75 #define ENTRY_SIZE_MAX (1024*1024*32)
77 typedef enum StdoutStreamState {
78 STDOUT_STREAM_IDENTIFIER,
79 STDOUT_STREAM_UNIT_ID,
80 STDOUT_STREAM_PRIORITY,
81 STDOUT_STREAM_LEVEL_PREFIX,
82 STDOUT_STREAM_FORWARD_TO_SYSLOG,
83 STDOUT_STREAM_FORWARD_TO_KMSG,
84 STDOUT_STREAM_FORWARD_TO_CONSOLE,
90 StdoutStreamState state;
96 security_context_t security_context;
103 bool forward_to_syslog:1;
104 bool forward_to_kmsg:1;
105 bool forward_to_console:1;
107 char buffer[LINE_MAX+1];
110 LIST_FIELDS(StdoutStream, stdout_stream);
113 static const char* const storage_table[] = {
114 [STORAGE_AUTO] = "auto",
115 [STORAGE_VOLATILE] = "volatile",
116 [STORAGE_PERSISTENT] = "persistent",
117 [STORAGE_NONE] = "none"
120 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
121 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
123 static uint64_t available_space(Server *s) {
128 uint64_t sum = 0, avail = 0, ss_avail = 0;
134 ts = now(CLOCK_MONOTONIC);
136 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
137 return s->cached_available_space;
139 r = sd_id128_get_machine(&machine);
143 if (s->system_journal) {
144 f = "/var/log/journal/";
145 m = &s->system_metrics;
147 f = "/run/log/journal/";
148 m = &s->runtime_metrics;
153 p = strappend(f, sd_id128_to_string(machine, ids));
163 if (fstatvfs(dirfd(d), &ss) < 0)
168 struct dirent buf, *de;
170 r = readdir_r(d, &buf, &de);
177 if (!endswith(de->d_name, ".journal") &&
178 !endswith(de->d_name, ".journal~"))
181 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
184 if (!S_ISREG(st.st_mode))
187 sum += (uint64_t) st.st_blocks * 512UL;
190 avail = sum >= m->max_use ? 0 : m->max_use - sum;
192 ss_avail = ss.f_bsize * ss.f_bavail;
194 ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
196 if (ss_avail < avail)
199 s->cached_available_space = avail;
200 s->cached_available_space_timestamp = ts;
208 static void server_read_file_gid(Server *s) {
209 const char *adm = "adm";
214 if (s->file_gid_valid)
217 r = get_group_creds(&adm, &s->file_gid);
219 log_warning("Failed to resolve 'adm' group: %s", strerror(-r));
221 /* if we couldn't read the gid, then it will be 0, but that's
222 * fine and we shouldn't try to resolve the group again, so
223 * let's just pretend it worked right-away. */
224 s->file_gid_valid = true;
227 static void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
232 acl_permset_t permset;
237 server_read_file_gid(s);
239 r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
241 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
247 acl = acl_get_fd(f->fd);
249 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
253 r = acl_find_uid(acl, uid, &entry);
256 if (acl_create_entry(&acl, &entry) < 0 ||
257 acl_set_tag_type(entry, ACL_USER) < 0 ||
258 acl_set_qualifier(entry, &uid) < 0) {
259 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
264 if (acl_get_permset(entry, &permset) < 0 ||
265 acl_add_perm(permset, ACL_READ) < 0 ||
266 acl_calc_mask(&acl) < 0) {
267 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
271 if (acl_set_fd(f->fd, acl) < 0)
272 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
279 static JournalFile* find_journal(Server *s, uid_t uid) {
288 /* We split up user logs only on /var, not on /run. If the
289 * runtime file is open, we write to it exclusively, in order
290 * to guarantee proper order as soon as we flush /run to
291 * /var and close the runtime file. */
293 if (s->runtime_journal)
294 return s->runtime_journal;
297 return s->system_journal;
299 r = sd_id128_get_machine(&machine);
301 return s->system_journal;
303 f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
307 if (asprintf(&p, "/var/log/journal/%s/user-%lu.journal", sd_id128_to_string(machine, ids), (unsigned long) uid) < 0)
308 return s->system_journal;
310 while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
311 /* Too many open? Then let's close one */
312 f = hashmap_steal_first(s->user_journals);
314 journal_file_close(f);
317 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, &s->system_metrics, s->system_journal, &f);
321 return s->system_journal;
323 server_fix_perms(s, f, uid);
325 r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
327 journal_file_close(f);
328 return s->system_journal;
334 static void server_rotate(Server *s) {
340 log_info("Rotating...");
342 if (s->runtime_journal) {
343 r = journal_file_rotate(&s->runtime_journal);
345 if (s->runtime_journal)
346 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
348 log_error("Failed to create new runtime journal: %s", strerror(-r));
350 server_fix_perms(s, s->runtime_journal, 0);
353 if (s->system_journal) {
354 r = journal_file_rotate(&s->system_journal);
356 if (s->system_journal)
357 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
359 log_error("Failed to create new system journal: %s", strerror(-r));
362 server_fix_perms(s, s->system_journal, 0);
365 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
366 r = journal_file_rotate(&f);
369 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
371 log_error("Failed to create user journal: %s", strerror(-r));
373 hashmap_replace(s->user_journals, k, f);
374 server_fix_perms(s, s->system_journal, PTR_TO_UINT32(k));
379 static void server_vacuum(Server *s) {
385 log_info("Vacuuming...");
387 r = sd_id128_get_machine(&machine);
389 log_error("Failed to get machine ID: %s", strerror(-r));
393 sd_id128_to_string(machine, ids);
395 if (s->system_journal) {
396 if (asprintf(&p, "/var/log/journal/%s", ids) < 0) {
401 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free);
402 if (r < 0 && r != -ENOENT)
403 log_error("Failed to vacuum %s: %s", p, strerror(-r));
407 if (s->runtime_journal) {
408 if (asprintf(&p, "/run/log/journal/%s", ids) < 0) {
413 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free);
414 if (r < 0 && r != -ENOENT)
415 log_error("Failed to vacuum %s: %s", p, strerror(-r));
419 s->cached_available_space_timestamp = 0;
422 static char *shortened_cgroup_path(pid_t pid) {
424 char *process_path, *init_path, *path;
428 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, pid, &process_path);
432 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &init_path);
438 if (endswith(init_path, "/system"))
439 init_path[strlen(init_path) - 7] = 0;
440 else if (streq(init_path, "/"))
443 if (startswith(process_path, init_path)) {
446 p = strdup(process_path + strlen(init_path));
464 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n) {
466 bool vacuumed = false;
473 f = find_journal(s, uid);
477 if (journal_file_rotate_suggested(f)) {
478 log_info("Journal header limits reached or header out-of-date, rotating.");
483 f = find_journal(s, uid);
489 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
494 (r != -E2BIG && /* hit limit */
495 r != -EFBIG && /* hit fs limit */
496 r != -EDQUOT && /* quota hit */
497 r != -ENOSPC && /* disk full */
498 r != -EBADMSG && /* corrupted */
499 r != -ENODATA && /* truncated */
500 r != -EHOSTDOWN && /* other machine */
501 r != -EPROTONOSUPPORT && /* unsupported feature */
502 r != -EBUSY && /* unclean shutdown */
503 r != -ESHUTDOWN /* already archived */)) {
504 log_error("Failed to write entry, ignoring: %s", strerror(-r));
508 if (r == -E2BIG || r == -EFBIG || r == EDQUOT || r == ENOSPC)
509 log_info("Allocation limit reached, rotating.");
510 else if (r == -EHOSTDOWN)
511 log_info("Journal file from other machine, rotating.");
512 else if (r == -EBUSY)
513 log_info("Unlcean shutdown, rotating.");
515 log_warning("Journal file corrupted, rotating.");
521 f = find_journal(s, uid);
525 log_info("Retrying write.");
529 static void dispatch_message_real(
531 struct iovec *iovec, unsigned n, unsigned m,
534 const char *label, size_t label_len,
535 const char *unit_id) {
537 char *pid = NULL, *uid = NULL, *gid = NULL,
538 *source_time = NULL, *boot_id = NULL, *machine_id = NULL,
539 *comm = NULL, *cmdline = NULL, *hostname = NULL,
540 *audit_session = NULL, *audit_loginuid = NULL,
541 *exe = NULL, *cgroup = NULL, *session = NULL,
542 *owner_uid = NULL, *unit = NULL, *selinux_context = NULL;
548 uid_t loginuid = 0, realuid = 0;
553 assert(n + N_IOVEC_META_FIELDS <= m);
561 realuid = ucred->uid;
563 if (asprintf(&pid, "_PID=%lu", (unsigned long) ucred->pid) >= 0)
564 IOVEC_SET_STRING(iovec[n++], pid);
566 if (asprintf(&uid, "_UID=%lu", (unsigned long) ucred->uid) >= 0)
567 IOVEC_SET_STRING(iovec[n++], uid);
569 if (asprintf(&gid, "_GID=%lu", (unsigned long) ucred->gid) >= 0)
570 IOVEC_SET_STRING(iovec[n++], gid);
572 r = get_process_comm(ucred->pid, &t);
574 comm = strappend("_COMM=", t);
578 IOVEC_SET_STRING(iovec[n++], comm);
581 r = get_process_exe(ucred->pid, &t);
583 exe = strappend("_EXE=", t);
587 IOVEC_SET_STRING(iovec[n++], exe);
590 r = get_process_cmdline(ucred->pid, LINE_MAX, false, &t);
592 cmdline = strappend("_CMDLINE=", t);
596 IOVEC_SET_STRING(iovec[n++], cmdline);
599 r = audit_session_from_pid(ucred->pid, &audit);
601 if (asprintf(&audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit) >= 0)
602 IOVEC_SET_STRING(iovec[n++], audit_session);
604 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
606 if (asprintf(&audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0)
607 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
609 t = shortened_cgroup_path(ucred->pid);
611 cgroup = strappend("_SYSTEMD_CGROUP=", t);
615 IOVEC_SET_STRING(iovec[n++], cgroup);
619 if (sd_pid_get_session(ucred->pid, &t) >= 0) {
620 session = strappend("_SYSTEMD_SESSION=", t);
624 IOVEC_SET_STRING(iovec[n++], session);
627 if (sd_pid_get_owner_uid(ucred->uid, &owner) >= 0)
628 if (asprintf(&owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner) >= 0)
629 IOVEC_SET_STRING(iovec[n++], owner_uid);
632 if (cg_pid_get_unit(ucred->pid, &t) >= 0) {
633 unit = strappend("_SYSTEMD_UNIT=", t);
636 unit = strappend("_SYSTEMD_UNIT=", unit_id);
639 IOVEC_SET_STRING(iovec[n++], unit);
643 selinux_context = malloc(sizeof("_SELINUX_CONTEXT=") + label_len);
644 if (selinux_context) {
645 memcpy(selinux_context, "_SELINUX_CONTEXT=", sizeof("_SELINUX_CONTEXT=")-1);
646 memcpy(selinux_context+sizeof("_SELINUX_CONTEXT=")-1, label, label_len);
647 selinux_context[sizeof("_SELINUX_CONTEXT=")-1+label_len] = 0;
648 IOVEC_SET_STRING(iovec[n++], selinux_context);
651 security_context_t con;
653 if (getpidcon(ucred->pid, &con) >= 0) {
654 selinux_context = strappend("_SELINUX_CONTEXT=", con);
656 IOVEC_SET_STRING(iovec[n++], selinux_context);
665 if (asprintf(&source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu",
666 (unsigned long long) timeval_load(tv)) >= 0)
667 IOVEC_SET_STRING(iovec[n++], source_time);
670 /* Note that strictly speaking storing the boot id here is
671 * redundant since the entry includes this in-line
672 * anyway. However, we need this indexed, too. */
673 r = sd_id128_get_boot(&id);
675 if (asprintf(&boot_id, "_BOOT_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
676 IOVEC_SET_STRING(iovec[n++], boot_id);
678 r = sd_id128_get_machine(&id);
680 if (asprintf(&machine_id, "_MACHINE_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
681 IOVEC_SET_STRING(iovec[n++], machine_id);
683 t = gethostname_malloc();
685 hostname = strappend("_HOSTNAME=", t);
688 IOVEC_SET_STRING(iovec[n++], hostname);
693 write_to_journal(s, realuid == 0 ? 0 : loginuid, iovec, n);
706 free(audit_loginuid);
711 free(selinux_context);
714 static void driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
715 char mid[11 + 32 + 1];
716 char buffer[16 + LINE_MAX + 1];
717 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
725 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
726 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
728 memcpy(buffer, "MESSAGE=", 8);
729 va_start(ap, format);
730 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
732 char_array_0(buffer);
733 IOVEC_SET_STRING(iovec[n++], buffer);
735 snprintf(mid, sizeof(mid), "MESSAGE_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(message_id));
737 IOVEC_SET_STRING(iovec[n++], mid);
740 ucred.pid = getpid();
741 ucred.uid = getuid();
742 ucred.gid = getgid();
744 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL);
747 static void dispatch_message(Server *s,
748 struct iovec *iovec, unsigned n, unsigned m,
751 const char *label, size_t label_len,
755 char *path = NULL, *c;
758 assert(iovec || n == 0);
763 if (LOG_PRI(priority) > s->max_level_store)
769 path = shortened_cgroup_path(ucred->pid);
773 /* example: /user/lennart/3/foobar
774 * /system/dbus.service/foobar
776 * So let's cut of everything past the third /, since that is
777 * wher user directories start */
779 c = strchr(path, '/');
781 c = strchr(c+1, '/');
783 c = strchr(c+1, '/');
789 rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available_space(s));
796 /* Write a suppression message if we suppressed something */
798 driver_message(s, SD_MESSAGE_JOURNAL_DROPPED, "Suppressed %u messages from %s", rl - 1, path);
803 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id);
806 static void forward_syslog_iovec(Server *s, const struct iovec *iovec, unsigned n_iovec, struct ucred *ucred, struct timeval *tv) {
807 struct msghdr msghdr;
808 struct cmsghdr *cmsg;
810 struct cmsghdr cmsghdr;
811 uint8_t buf[CMSG_SPACE(sizeof(struct ucred))];
813 union sockaddr_union sa;
820 msghdr.msg_iov = (struct iovec*) iovec;
821 msghdr.msg_iovlen = n_iovec;
824 sa.un.sun_family = AF_UNIX;
825 strncpy(sa.un.sun_path, "/run/systemd/journal/syslog", sizeof(sa.un.sun_path));
826 msghdr.msg_name = &sa;
827 msghdr.msg_namelen = offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path);
831 msghdr.msg_control = &control;
832 msghdr.msg_controllen = sizeof(control);
834 cmsg = CMSG_FIRSTHDR(&msghdr);
835 cmsg->cmsg_level = SOL_SOCKET;
836 cmsg->cmsg_type = SCM_CREDENTIALS;
837 cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred));
838 memcpy(CMSG_DATA(cmsg), ucred, sizeof(struct ucred));
839 msghdr.msg_controllen = cmsg->cmsg_len;
842 /* Forward the syslog message we received via /dev/log to
843 * /run/systemd/syslog. Unfortunately we currently can't set
844 * the SO_TIMESTAMP auxiliary data, and hence we don't. */
846 if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
849 /* The socket is full? I guess the syslog implementation is
850 * too slow, and we shouldn't wait for that... */
854 if (ucred && errno == ESRCH) {
857 /* Hmm, presumably the sender process vanished
858 * by now, so let's fix it as good as we
863 memcpy(CMSG_DATA(cmsg), &u, sizeof(struct ucred));
865 if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
873 log_debug("Failed to forward syslog message: %m");
876 static void forward_syslog_raw(Server *s, int priority, const char *buffer, struct ucred *ucred, struct timeval *tv) {
882 if (LOG_PRI(priority) > s->max_level_syslog)
885 IOVEC_SET_STRING(iovec, buffer);
886 forward_syslog_iovec(s, &iovec, 1, ucred, tv);
889 static void forward_syslog(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred, struct timeval *tv) {
890 struct iovec iovec[5];
891 char header_priority[6], header_time[64], header_pid[16];
895 char *ident_buf = NULL;
898 assert(priority >= 0);
899 assert(priority <= 999);
902 if (LOG_PRI(priority) > s->max_level_syslog)
905 /* First: priority field */
906 snprintf(header_priority, sizeof(header_priority), "<%i>", priority);
907 char_array_0(header_priority);
908 IOVEC_SET_STRING(iovec[n++], header_priority);
910 /* Second: timestamp */
911 t = tv ? tv->tv_sec : ((time_t) (now(CLOCK_REALTIME) / USEC_PER_SEC));
915 if (strftime(header_time, sizeof(header_time), "%h %e %T ", tm) <= 0)
917 IOVEC_SET_STRING(iovec[n++], header_time);
919 /* Third: identifier and PID */
922 get_process_comm(ucred->pid, &ident_buf);
923 identifier = ident_buf;
926 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
927 char_array_0(header_pid);
930 IOVEC_SET_STRING(iovec[n++], identifier);
932 IOVEC_SET_STRING(iovec[n++], header_pid);
933 } else if (identifier) {
934 IOVEC_SET_STRING(iovec[n++], identifier);
935 IOVEC_SET_STRING(iovec[n++], ": ");
938 /* Fourth: message */
939 IOVEC_SET_STRING(iovec[n++], message);
941 forward_syslog_iovec(s, iovec, n, ucred, tv);
946 static int fixup_priority(int priority) {
948 if ((priority & LOG_FACMASK) == 0)
949 return (priority & LOG_PRIMASK) | LOG_USER;
954 static void forward_kmsg(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred) {
955 struct iovec iovec[5];
956 char header_priority[6], header_pid[16];
958 char *ident_buf = NULL;
961 assert(priority >= 0);
962 assert(priority <= 999);
965 if (_unlikely_(LOG_PRI(priority) > s->max_level_kmsg))
968 if (_unlikely_(s->dev_kmsg_fd < 0))
971 /* Never allow messages with kernel facility to be written to
972 * kmsg, regardless where the data comes from. */
973 priority = fixup_priority(priority);
975 /* First: priority field */
976 snprintf(header_priority, sizeof(header_priority), "<%i>", priority);
977 char_array_0(header_priority);
978 IOVEC_SET_STRING(iovec[n++], header_priority);
980 /* Second: identifier and PID */
983 get_process_comm(ucred->pid, &ident_buf);
984 identifier = ident_buf;
987 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
988 char_array_0(header_pid);
991 IOVEC_SET_STRING(iovec[n++], identifier);
993 IOVEC_SET_STRING(iovec[n++], header_pid);
994 } else if (identifier) {
995 IOVEC_SET_STRING(iovec[n++], identifier);
996 IOVEC_SET_STRING(iovec[n++], ": ");
999 /* Fourth: message */
1000 IOVEC_SET_STRING(iovec[n++], message);
1001 IOVEC_SET_STRING(iovec[n++], "\n");
1003 if (writev(s->dev_kmsg_fd, iovec, n) < 0)
1004 log_debug("Failed to write to /dev/kmsg for logging: %s", strerror(errno));
1009 static void forward_console(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred) {
1010 struct iovec iovec[4];
1011 char header_pid[16];
1013 char *ident_buf = NULL;
1019 if (LOG_PRI(priority) > s->max_level_console)
1022 /* First: identifier and PID */
1025 get_process_comm(ucred->pid, &ident_buf);
1026 identifier = ident_buf;
1029 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
1030 char_array_0(header_pid);
1033 IOVEC_SET_STRING(iovec[n++], identifier);
1035 IOVEC_SET_STRING(iovec[n++], header_pid);
1036 } else if (identifier) {
1037 IOVEC_SET_STRING(iovec[n++], identifier);
1038 IOVEC_SET_STRING(iovec[n++], ": ");
1041 /* Third: message */
1042 IOVEC_SET_STRING(iovec[n++], message);
1043 IOVEC_SET_STRING(iovec[n++], "\n");
1045 tty = s->tty_path ? s->tty_path : "/dev/console";
1047 fd = open_terminal(tty, O_WRONLY|O_NOCTTY|O_CLOEXEC);
1049 log_debug("Failed to open %s for logging: %s", tty, strerror(errno));
1053 if (writev(fd, iovec, n) < 0)
1054 log_debug("Failed to write to %s for logging: %s", tty, strerror(errno));
1056 close_nointr_nofail(fd);
1062 static void read_identifier(const char **buf, char **identifier, char **pid) {
1073 p += strspn(p, WHITESPACE);
1074 l = strcspn(p, WHITESPACE);
1083 if (p[l-1] == ']') {
1089 t = strndup(p+k+1, l-k-2);
1109 *buf += strspn(*buf, WHITESPACE);
1112 static void process_syslog_message(Server *s, const char *buf, struct ucred *ucred, struct timeval *tv, const char *label, size_t label_len) {
1113 char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *syslog_pid = NULL;
1114 struct iovec iovec[N_IOVEC_META_FIELDS + 6];
1116 int priority = LOG_USER | LOG_INFO;
1117 char *identifier = NULL, *pid = NULL;
1124 parse_syslog_priority((char**) &buf, &priority);
1126 if (s->forward_to_syslog)
1127 forward_syslog_raw(s, priority, orig, ucred, tv);
1129 skip_syslog_date((char**) &buf);
1130 read_identifier(&buf, &identifier, &pid);
1132 if (s->forward_to_kmsg)
1133 forward_kmsg(s, priority, identifier, buf, ucred);
1135 if (s->forward_to_console)
1136 forward_console(s, priority, identifier, buf, ucred);
1138 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=syslog");
1140 if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1141 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1143 if (priority & LOG_FACMASK)
1144 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1145 IOVEC_SET_STRING(iovec[n++], syslog_facility);
1148 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
1149 if (syslog_identifier)
1150 IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1154 syslog_pid = strappend("SYSLOG_PID=", pid);
1156 IOVEC_SET_STRING(iovec[n++], syslog_pid);
1159 message = strappend("MESSAGE=", buf);
1161 IOVEC_SET_STRING(iovec[n++], message);
1163 dispatch_message(s, iovec, n, ELEMENTSOF(iovec), ucred, tv, label, label_len, NULL, priority);
1168 free(syslog_priority);
1169 free(syslog_facility);
1170 free(syslog_identifier);
1174 static bool valid_user_field(const char *p, size_t l) {
1177 /* We kinda enforce POSIX syntax recommendations for
1178 environment variables here, but make a couple of additional
1181 http://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html */
1183 /* No empty field names */
1187 /* Don't allow names longer than 64 chars */
1191 /* Variables starting with an underscore are protected */
1195 /* Don't allow digits as first character */
1196 if (p[0] >= '0' && p[0] <= '9')
1199 /* Only allow A-Z0-9 and '_' */
1200 for (a = p; a < p + l; a++)
1201 if (!((*a >= 'A' && *a <= 'Z') ||
1202 (*a >= '0' && *a <= '9') ||
1209 static void process_native_message(
1211 const void *buffer, size_t buffer_size,
1212 struct ucred *ucred,
1214 const char *label, size_t label_len) {
1216 struct iovec *iovec = NULL;
1217 unsigned n = 0, m = 0, j, tn = (unsigned) -1;
1220 int priority = LOG_INFO;
1221 char *identifier = NULL, *message = NULL;
1224 assert(buffer || buffer_size == 0);
1227 remaining = buffer_size;
1229 while (remaining > 0) {
1232 e = memchr(p, '\n', remaining);
1235 /* Trailing noise, let's ignore it, and flush what we collected */
1236 log_debug("Received message with trailing noise, ignoring.");
1241 /* Entry separator */
1242 dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, NULL, priority);
1244 priority = LOG_INFO;
1251 if (*p == '.' || *p == '#') {
1252 /* Ignore control commands for now, and
1254 remaining -= (e - p) + 1;
1259 /* A property follows */
1261 if (n+N_IOVEC_META_FIELDS >= m) {
1265 u = MAX((n+N_IOVEC_META_FIELDS+1) * 2U, 4U);
1266 c = realloc(iovec, u * sizeof(struct iovec));
1276 q = memchr(p, '=', e - p);
1278 if (valid_user_field(p, q - p)) {
1283 /* If the field name starts with an
1284 * underscore, skip the variable,
1285 * since that indidates a trusted
1287 iovec[n].iov_base = (char*) p;
1288 iovec[n].iov_len = l;
1291 /* We need to determine the priority
1292 * of this entry for the rate limiting
1295 memcmp(p, "PRIORITY=", 9) == 0 &&
1296 p[9] >= '0' && p[9] <= '9')
1297 priority = (priority & LOG_FACMASK) | (p[9] - '0');
1300 memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
1301 p[16] >= '0' && p[16] <= '9')
1302 priority = (priority & LOG_PRIMASK) | ((p[16] - '0') << 3);
1305 memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
1306 p[16] >= '0' && p[16] <= '9' &&
1307 p[17] >= '0' && p[17] <= '9')
1308 priority = (priority & LOG_PRIMASK) | (((p[16] - '0')*10 + (p[17] - '0')) << 3);
1311 memcmp(p, "SYSLOG_IDENTIFIER=", 18) == 0) {
1314 t = strndup(p + 18, l - 18);
1319 } else if (l >= 8 &&
1320 memcmp(p, "MESSAGE=", 8) == 0) {
1323 t = strndup(p + 8, l - 8);
1331 remaining -= (e - p) + 1;
1339 if (remaining < e - p + 1 + sizeof(uint64_t) + 1) {
1340 log_debug("Failed to parse message, ignoring.");
1344 memcpy(&l_le, e + 1, sizeof(uint64_t));
1347 if (remaining < e - p + 1 + sizeof(uint64_t) + l + 1 ||
1348 e[1+sizeof(uint64_t)+l] != '\n') {
1349 log_debug("Failed to parse message, ignoring.");
1353 k = malloc((e - p) + 1 + l);
1359 memcpy(k, p, e - p);
1361 memcpy(k + (e - p) + 1, e + 1 + sizeof(uint64_t), l);
1363 if (valid_user_field(p, e - p)) {
1364 iovec[n].iov_base = k;
1365 iovec[n].iov_len = (e - p) + 1 + l;
1370 remaining -= (e - p) + 1 + sizeof(uint64_t) + l + 1;
1371 p = e + 1 + sizeof(uint64_t) + l + 1;
1379 IOVEC_SET_STRING(iovec[tn], "_TRANSPORT=journal");
1382 if (s->forward_to_syslog)
1383 forward_syslog(s, priority, identifier, message, ucred, tv);
1385 if (s->forward_to_kmsg)
1386 forward_kmsg(s, priority, identifier, message, ucred);
1388 if (s->forward_to_console)
1389 forward_console(s, priority, identifier, message, ucred);
1392 dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, NULL, priority);
1395 for (j = 0; j < n; j++) {
1399 if (iovec[j].iov_base < buffer ||
1400 (const uint8_t*) iovec[j].iov_base >= (const uint8_t*) buffer + buffer_size)
1401 free(iovec[j].iov_base);
1409 static void process_native_file(
1412 struct ucred *ucred,
1414 const char *label, size_t label_len) {
1423 /* Data is in the passed file, since it didn't fit in a
1424 * datagram. We can't map the file here, since clients might
1425 * then truncate it and trigger a SIGBUS for us. So let's
1426 * stupidly read it */
1428 if (fstat(fd, &st) < 0) {
1429 log_error("Failed to stat passed file, ignoring: %m");
1433 if (!S_ISREG(st.st_mode)) {
1434 log_error("File passed is not regular. Ignoring.");
1438 if (st.st_size <= 0)
1441 if (st.st_size > ENTRY_SIZE_MAX) {
1442 log_error("File passed too large. Ignoring.");
1446 p = malloc(st.st_size);
1452 n = pread(fd, p, st.st_size, 0);
1454 log_error("Failed to read file, ignoring: %s", strerror(-n));
1456 process_native_message(s, p, n, ucred, tv, label, label_len);
1461 static int stdout_stream_log(StdoutStream *s, const char *p) {
1462 struct iovec iovec[N_IOVEC_META_FIELDS + 5];
1463 char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *syslog_identifier = NULL;
1467 size_t label_len = 0;
1475 priority = s->priority;
1477 if (s->level_prefix)
1478 parse_syslog_priority((char**) &p, &priority);
1480 if (s->forward_to_syslog || s->server->forward_to_syslog)
1481 forward_syslog(s->server, fixup_priority(priority), s->identifier, p, &s->ucred, NULL);
1483 if (s->forward_to_kmsg || s->server->forward_to_kmsg)
1484 forward_kmsg(s->server, priority, s->identifier, p, &s->ucred);
1486 if (s->forward_to_console || s->server->forward_to_console)
1487 forward_console(s->server, priority, s->identifier, p, &s->ucred);
1489 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=stdout");
1491 if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1492 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1494 if (priority & LOG_FACMASK)
1495 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1496 IOVEC_SET_STRING(iovec[n++], syslog_facility);
1498 if (s->identifier) {
1499 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", s->identifier);
1500 if (syslog_identifier)
1501 IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1504 message = strappend("MESSAGE=", p);
1506 IOVEC_SET_STRING(iovec[n++], message);
1509 if (s->security_context) {
1510 label = (char*) s->security_context;
1511 label_len = strlen((char*) s->security_context);
1515 dispatch_message(s->server, iovec, n, ELEMENTSOF(iovec), &s->ucred, NULL, label, label_len, s->unit_id, priority);
1518 free(syslog_priority);
1519 free(syslog_facility);
1520 free(syslog_identifier);
1525 static int stdout_stream_line(StdoutStream *s, char *p) {
1535 case STDOUT_STREAM_IDENTIFIER:
1537 s->identifier = NULL;
1539 s->identifier = strdup(p);
1544 s->state = STDOUT_STREAM_UNIT_ID;
1547 case STDOUT_STREAM_UNIT_ID:
1548 if (s->ucred.uid == 0) {
1552 s->unit_id = strdup(p);
1558 s->state = STDOUT_STREAM_PRIORITY;
1561 case STDOUT_STREAM_PRIORITY:
1562 r = safe_atoi(p, &s->priority);
1563 if (r < 0 || s->priority <= 0 || s->priority >= 999) {
1564 log_warning("Failed to parse log priority line.");
1568 s->state = STDOUT_STREAM_LEVEL_PREFIX;
1571 case STDOUT_STREAM_LEVEL_PREFIX:
1572 r = parse_boolean(p);
1574 log_warning("Failed to parse level prefix line.");
1578 s->level_prefix = !!r;
1579 s->state = STDOUT_STREAM_FORWARD_TO_SYSLOG;
1582 case STDOUT_STREAM_FORWARD_TO_SYSLOG:
1583 r = parse_boolean(p);
1585 log_warning("Failed to parse forward to syslog line.");
1589 s->forward_to_syslog = !!r;
1590 s->state = STDOUT_STREAM_FORWARD_TO_KMSG;
1593 case STDOUT_STREAM_FORWARD_TO_KMSG:
1594 r = parse_boolean(p);
1596 log_warning("Failed to parse copy to kmsg line.");
1600 s->forward_to_kmsg = !!r;
1601 s->state = STDOUT_STREAM_FORWARD_TO_CONSOLE;
1604 case STDOUT_STREAM_FORWARD_TO_CONSOLE:
1605 r = parse_boolean(p);
1607 log_warning("Failed to parse copy to console line.");
1611 s->forward_to_console = !!r;
1612 s->state = STDOUT_STREAM_RUNNING;
1615 case STDOUT_STREAM_RUNNING:
1616 return stdout_stream_log(s, p);
1619 assert_not_reached("Unknown stream state");
1622 static int stdout_stream_scan(StdoutStream *s, bool force_flush) {
1630 remaining = s->length;
1635 end = memchr(p, '\n', remaining);
1638 else if (remaining >= sizeof(s->buffer) - 1) {
1639 end = p + sizeof(s->buffer) - 1;
1646 r = stdout_stream_line(s, p);
1654 if (force_flush && remaining > 0) {
1656 r = stdout_stream_line(s, p);
1664 if (p > s->buffer) {
1665 memmove(s->buffer, p, remaining);
1666 s->length = remaining;
1672 static int stdout_stream_process(StdoutStream *s) {
1678 l = read(s->fd, s->buffer+s->length, sizeof(s->buffer)-1-s->length);
1681 if (errno == EAGAIN)
1684 log_warning("Failed to read from stream: %m");
1689 r = stdout_stream_scan(s, true);
1697 r = stdout_stream_scan(s, false);
1705 static void stdout_stream_free(StdoutStream *s) {
1709 assert(s->server->n_stdout_streams > 0);
1710 s->server->n_stdout_streams --;
1711 LIST_REMOVE(StdoutStream, stdout_stream, s->server->stdout_streams, s);
1716 epoll_ctl(s->server->epoll_fd, EPOLL_CTL_DEL, s->fd, NULL);
1718 close_nointr_nofail(s->fd);
1722 if (s->security_context)
1723 freecon(s->security_context);
1726 free(s->identifier);
1730 static int stdout_stream_new(Server *s) {
1731 StdoutStream *stream;
1734 struct epoll_event ev;
1738 fd = accept4(s->stdout_fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC);
1740 if (errno == EAGAIN)
1743 log_error("Failed to accept stdout connection: %m");
1747 if (s->n_stdout_streams >= STDOUT_STREAMS_MAX) {
1748 log_warning("Too many stdout streams, refusing connection.");
1749 close_nointr_nofail(fd);
1753 stream = new0(StdoutStream, 1);
1755 close_nointr_nofail(fd);
1761 len = sizeof(stream->ucred);
1762 if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &stream->ucred, &len) < 0) {
1763 log_error("Failed to determine peer credentials: %m");
1769 if (getpeercon(fd, &stream->security_context) < 0 && errno != ENOPROTOOPT)
1770 log_error("Failed to determine peer security context: %m");
1773 if (shutdown(fd, SHUT_WR) < 0) {
1774 log_error("Failed to shutdown writing side of socket: %m");
1780 ev.data.ptr = stream;
1781 ev.events = EPOLLIN;
1782 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0) {
1783 log_error("Failed to add stream to event loop: %m");
1789 LIST_PREPEND(StdoutStream, stdout_stream, s->stdout_streams, stream);
1790 s->n_stdout_streams ++;
1795 stdout_stream_free(stream);
1799 static bool is_us(const char *pid) {
1804 if (parse_pid(pid, &t) < 0)
1807 return t == getpid();
1810 static void dev_kmsg_record(Server *s, char *p, size_t l) {
1811 struct iovec iovec[N_IOVEC_META_FIELDS + 7 + N_IOVEC_KERNEL_FIELDS];
1812 char *message = NULL, *syslog_priority = NULL, *syslog_pid = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *source_time = NULL;
1814 unsigned n = 0, z = 0, j;
1816 char *identifier = NULL, *pid = NULL, *e, *f, *k;
1826 e = memchr(p, ',', l);
1831 r = safe_atoi(p, &priority);
1832 if (r < 0 || priority < 0 || priority > 999)
1835 if (s->forward_to_kmsg && (priority & LOG_FACMASK) != LOG_KERN)
1840 e = memchr(p, ',', l);
1845 r = safe_atou64(p, &serial);
1851 f = memchr(p, ';', l);
1854 /* Kernel 3.6 has the flags field, kernel 3.5 lacks that */
1855 e = memchr(p, ',', l);
1860 r = parse_usec(p, &usec);
1866 e = memchr(p, '\n', l);
1875 for (j = 0; l > 0 && j < N_IOVEC_KERNEL_FIELDS; j++) {
1877 /* Meta data fields attached */
1884 e = memchr(k, '\n', l);
1890 m = cunescape_length_with_prefix(k, e - k, "_KERNEL_");
1894 IOVEC_SET_STRING(iovec[n++], m);
1901 if (asprintf(&source_time, "_SOURCE_MONOTONIC_TIMESTAMP=%llu",
1902 (unsigned long long) usec) >= 0)
1903 IOVEC_SET_STRING(iovec[n++], source_time);
1905 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=kernel");
1907 if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1908 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1910 if ((priority & LOG_FACMASK) == LOG_KERN)
1911 IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=kernel");
1913 read_identifier((const char**) &p, &identifier, &pid);
1915 /* Avoid any messages we generated ourselves via
1916 * log_info() and friends. */
1917 if (pid && is_us(pid))
1921 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
1922 if (syslog_identifier)
1923 IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1927 syslog_pid = strappend("SYSLOG_PID=", pid);
1929 IOVEC_SET_STRING(iovec[n++], syslog_pid);
1932 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1933 IOVEC_SET_STRING(iovec[n++], syslog_facility);
1936 message = cunescape_length_with_prefix(p, pl, "MESSAGE=");
1938 IOVEC_SET_STRING(iovec[n++], message);
1940 dispatch_message(s, iovec, n, ELEMENTSOF(iovec), NULL, NULL, NULL, 0, NULL, priority);
1943 for (j = 0; j < z; j++)
1944 free(iovec[j].iov_base);
1947 free(syslog_priority);
1948 free(syslog_identifier);
1950 free(syslog_facility);
1956 static int system_journal_open(Server *s) {
1962 r = sd_id128_get_machine(&machine);
1966 sd_id128_to_string(machine, ids);
1968 if (!s->system_journal &&
1969 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
1970 access("/run/systemd/journal/flushed", F_OK) >= 0) {
1972 /* If in auto mode: first try to create the machine
1973 * path, but not the prefix.
1975 * If in persistent mode: create /var/log/journal and
1976 * the machine path */
1978 if (s->storage == STORAGE_PERSISTENT)
1979 (void) mkdir("/var/log/journal/", 0755);
1981 fn = strappend("/var/log/journal/", ids);
1985 (void) mkdir(fn, 0755);
1988 fn = strjoin("/var/log/journal/", ids, "/system.journal", NULL);
1992 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, &s->system_metrics, NULL, &s->system_journal);
1996 s->system_journal->compress = s->compress;
1998 server_fix_perms(s, s->system_journal, 0);
2001 if (r != -ENOENT && r != -EROFS)
2002 log_warning("Failed to open system journal: %s", strerror(-r));
2008 if (!s->runtime_journal &&
2009 (s->storage != STORAGE_NONE)) {
2011 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
2015 if (s->system_journal) {
2017 /* Try to open the runtime journal, but only
2018 * if it already exists, so that we can flush
2019 * it into the system journal */
2021 r = journal_file_open(fn, O_RDWR, 0640, &s->runtime_metrics, NULL, &s->runtime_journal);
2026 log_warning("Failed to open runtime journal: %s", strerror(-r));
2033 /* OK, we really need the runtime journal, so create
2034 * it if necessary. */
2036 (void) mkdir_parents(fn, 0755);
2037 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, &s->runtime_metrics, NULL, &s->runtime_journal);
2041 log_error("Failed to open runtime journal: %s", strerror(-r));
2046 if (s->runtime_journal) {
2047 s->runtime_journal->compress = s->compress;
2049 server_fix_perms(s, s->runtime_journal, 0);
2056 static int server_flush_to_var(Server *s) {
2064 if (s->storage != STORAGE_AUTO &&
2065 s->storage != STORAGE_PERSISTENT)
2068 if (!s->runtime_journal)
2071 system_journal_open(s);
2073 if (!s->system_journal)
2076 log_info("Flushing to /var...");
2078 r = sd_id128_get_machine(&machine);
2080 log_error("Failed to get machine id: %s", strerror(-r));
2084 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
2086 log_error("Failed to read runtime journal: %s", strerror(-r));
2090 SD_JOURNAL_FOREACH(j) {
2093 f = j->current_file;
2094 assert(f && f->current_offset > 0);
2096 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
2098 log_error("Can't read entry: %s", strerror(-r));
2102 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
2104 log_info("Allocation limit reached.");
2106 journal_file_post_change(s->system_journal);
2110 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
2114 log_error("Can't write entry: %s", strerror(-r));
2120 journal_file_post_change(s->system_journal);
2122 journal_file_close(s->runtime_journal);
2123 s->runtime_journal = NULL;
2126 rm_rf("/run/log/journal", false, true, false);
2131 static int server_read_dev_kmsg(Server *s) {
2132 char buffer[8192+1]; /* the kernel-side limit per record is 8K currently */
2136 assert(s->dev_kmsg_fd >= 0);
2138 l = read(s->dev_kmsg_fd, buffer, sizeof(buffer) - 1);
2142 /* Old kernels who don't allow reading from /dev/kmsg
2143 * return EINVAL when we try. So handle this cleanly,
2144 * but don' try to ever read from it again. */
2145 if (errno == EINVAL) {
2146 epoll_ctl(s->epoll_fd, EPOLL_CTL_DEL, s->dev_kmsg_fd, NULL);
2150 if (errno == EAGAIN || errno == EINTR)
2153 log_error("Failed to read from kernel: %m");
2157 dev_kmsg_record(s, buffer, l);
2161 static int server_flush_dev_kmsg(Server *s) {
2166 if (s->dev_kmsg_fd < 0)
2169 if (!s->dev_kmsg_readable)
2172 log_info("Flushing /dev/kmsg...");
2175 r = server_read_dev_kmsg(s);
2186 static int process_event(Server *s, struct epoll_event *ev) {
2190 if (ev->data.fd == s->signal_fd) {
2191 struct signalfd_siginfo sfsi;
2194 if (ev->events != EPOLLIN) {
2195 log_info("Got invalid event from epoll.");
2199 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
2200 if (n != sizeof(sfsi)) {
2205 if (errno == EINTR || errno == EAGAIN)
2211 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
2213 if (sfsi.ssi_signo == SIGUSR1) {
2214 touch("/run/systemd/journal/flushed");
2215 server_flush_to_var(s);
2219 if (sfsi.ssi_signo == SIGUSR2) {
2227 } else if (ev->data.fd == s->dev_kmsg_fd) {
2230 if (ev->events != EPOLLIN) {
2231 log_info("Got invalid event from epoll.");
2235 r = server_read_dev_kmsg(s);
2241 } else if (ev->data.fd == s->native_fd ||
2242 ev->data.fd == s->syslog_fd) {
2244 if (ev->events != EPOLLIN) {
2245 log_info("Got invalid event from epoll.");
2250 struct msghdr msghdr;
2252 struct ucred *ucred = NULL;
2253 struct timeval *tv = NULL;
2254 struct cmsghdr *cmsg;
2256 size_t label_len = 0;
2258 struct cmsghdr cmsghdr;
2260 /* We use NAME_MAX space for the
2261 * SELinux label here. The kernel
2262 * currently enforces no limit, but
2263 * according to suggestions from the
2264 * SELinux people this will change and
2265 * it will probably be identical to
2266 * NAME_MAX. For now we use that, but
2267 * this should be updated one day when
2268 * the final limit is known.*/
2269 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
2270 CMSG_SPACE(sizeof(struct timeval)) +
2271 CMSG_SPACE(sizeof(int)) + /* fd */
2272 CMSG_SPACE(NAME_MAX)]; /* selinux label */
2279 if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
2280 log_error("SIOCINQ failed: %m");
2284 if (s->buffer_size < (size_t) v) {
2288 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
2289 b = realloc(s->buffer, l+1);
2292 log_error("Couldn't increase buffer.");
2301 iovec.iov_base = s->buffer;
2302 iovec.iov_len = s->buffer_size;
2306 msghdr.msg_iov = &iovec;
2307 msghdr.msg_iovlen = 1;
2308 msghdr.msg_control = &control;
2309 msghdr.msg_controllen = sizeof(control);
2311 n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
2314 if (errno == EINTR || errno == EAGAIN)
2317 log_error("recvmsg() failed: %m");
2321 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
2323 if (cmsg->cmsg_level == SOL_SOCKET &&
2324 cmsg->cmsg_type == SCM_CREDENTIALS &&
2325 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
2326 ucred = (struct ucred*) CMSG_DATA(cmsg);
2327 else if (cmsg->cmsg_level == SOL_SOCKET &&
2328 cmsg->cmsg_type == SCM_SECURITY) {
2329 label = (char*) CMSG_DATA(cmsg);
2330 label_len = cmsg->cmsg_len - CMSG_LEN(0);
2331 } else if (cmsg->cmsg_level == SOL_SOCKET &&
2332 cmsg->cmsg_type == SO_TIMESTAMP &&
2333 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
2334 tv = (struct timeval*) CMSG_DATA(cmsg);
2335 else if (cmsg->cmsg_level == SOL_SOCKET &&
2336 cmsg->cmsg_type == SCM_RIGHTS) {
2337 fds = (int*) CMSG_DATA(cmsg);
2338 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
2342 if (ev->data.fd == s->syslog_fd) {
2345 if (n > 0 && n_fds == 0) {
2346 e = memchr(s->buffer, '\n', n);
2352 process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
2353 } else if (n_fds > 0)
2354 log_warning("Got file descriptors via syslog socket. Ignoring.");
2357 if (n > 0 && n_fds == 0)
2358 process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
2359 else if (n == 0 && n_fds == 1)
2360 process_native_file(s, fds[0], ucred, tv, label, label_len);
2362 log_warning("Got too many file descriptors via native socket. Ignoring.");
2365 close_many(fds, n_fds);
2370 } else if (ev->data.fd == s->stdout_fd) {
2372 if (ev->events != EPOLLIN) {
2373 log_info("Got invalid event from epoll.");
2377 stdout_stream_new(s);
2381 StdoutStream *stream;
2383 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
2384 log_info("Got invalid event from epoll.");
2388 /* If it is none of the well-known fds, it must be an
2389 * stdout stream fd. Note that this is a bit ugly here
2390 * (since we rely that none of the well-known fds
2391 * could be interpreted as pointer), but nonetheless
2392 * safe, since the well-known fds would never get an
2393 * fd > 4096, i.e. beyond the first memory page */
2395 stream = ev->data.ptr;
2397 if (stdout_stream_process(stream) <= 0)
2398 stdout_stream_free(stream);
2403 log_error("Unknown event.");
2407 static int open_syslog_socket(Server *s) {
2408 union sockaddr_union sa;
2410 struct epoll_event ev;
2414 if (s->syslog_fd < 0) {
2416 s->syslog_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2417 if (s->syslog_fd < 0) {
2418 log_error("socket() failed: %m");
2423 sa.un.sun_family = AF_UNIX;
2424 strncpy(sa.un.sun_path, "/dev/log", sizeof(sa.un.sun_path));
2426 unlink(sa.un.sun_path);
2428 r = bind(s->syslog_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2430 log_error("bind() failed: %m");
2434 chmod(sa.un.sun_path, 0666);
2436 fd_nonblock(s->syslog_fd, 1);
2439 r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
2441 log_error("SO_PASSCRED failed: %m");
2447 r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
2449 log_warning("SO_PASSSEC failed: %m");
2453 r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
2455 log_error("SO_TIMESTAMP failed: %m");
2460 ev.events = EPOLLIN;
2461 ev.data.fd = s->syslog_fd;
2462 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->syslog_fd, &ev) < 0) {
2463 log_error("Failed to add syslog server fd to epoll object: %m");
2470 static int open_native_socket(Server*s) {
2471 union sockaddr_union sa;
2473 struct epoll_event ev;
2477 if (s->native_fd < 0) {
2479 s->native_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2480 if (s->native_fd < 0) {
2481 log_error("socket() failed: %m");
2486 sa.un.sun_family = AF_UNIX;
2487 strncpy(sa.un.sun_path, "/run/systemd/journal/socket", sizeof(sa.un.sun_path));
2489 unlink(sa.un.sun_path);
2491 r = bind(s->native_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2493 log_error("bind() failed: %m");
2497 chmod(sa.un.sun_path, 0666);
2499 fd_nonblock(s->native_fd, 1);
2502 r = setsockopt(s->native_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
2504 log_error("SO_PASSCRED failed: %m");
2510 r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
2512 log_warning("SO_PASSSEC failed: %m");
2516 r = setsockopt(s->native_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
2518 log_error("SO_TIMESTAMP failed: %m");
2523 ev.events = EPOLLIN;
2524 ev.data.fd = s->native_fd;
2525 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->native_fd, &ev) < 0) {
2526 log_error("Failed to add native server fd to epoll object: %m");
2533 static int open_stdout_socket(Server *s) {
2534 union sockaddr_union sa;
2536 struct epoll_event ev;
2540 if (s->stdout_fd < 0) {
2542 s->stdout_fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2543 if (s->stdout_fd < 0) {
2544 log_error("socket() failed: %m");
2549 sa.un.sun_family = AF_UNIX;
2550 strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
2552 unlink(sa.un.sun_path);
2554 r = bind(s->stdout_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2556 log_error("bind() failed: %m");
2560 chmod(sa.un.sun_path, 0666);
2562 if (listen(s->stdout_fd, SOMAXCONN) < 0) {
2563 log_error("liste() failed: %m");
2567 fd_nonblock(s->stdout_fd, 1);
2570 ev.events = EPOLLIN;
2571 ev.data.fd = s->stdout_fd;
2572 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->stdout_fd, &ev) < 0) {
2573 log_error("Failed to add stdout server fd to epoll object: %m");
2580 static int open_dev_kmsg(Server *s) {
2581 struct epoll_event ev;
2585 s->dev_kmsg_fd = open("/dev/kmsg", O_RDWR|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
2586 if (s->dev_kmsg_fd < 0) {
2587 log_warning("Failed to open /dev/kmsg, ignoring: %m");
2592 ev.events = EPOLLIN;
2593 ev.data.fd = s->dev_kmsg_fd;
2594 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->dev_kmsg_fd, &ev) < 0) {
2596 /* This will fail with EPERM on older kernels where
2597 * /dev/kmsg is not readable. */
2601 log_error("Failed to add /dev/kmsg fd to epoll object: %m");
2605 s->dev_kmsg_readable = true;
2610 static int open_signalfd(Server *s) {
2612 struct epoll_event ev;
2616 assert_se(sigemptyset(&mask) == 0);
2617 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
2618 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
2620 s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
2621 if (s->signal_fd < 0) {
2622 log_error("signalfd(): %m");
2627 ev.events = EPOLLIN;
2628 ev.data.fd = s->signal_fd;
2630 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
2631 log_error("epoll_ctl(): %m");
2638 static int server_parse_proc_cmdline(Server *s) {
2639 char *line, *w, *state;
2643 if (detect_container(NULL) > 0)
2646 r = read_one_line_file("/proc/cmdline", &line);
2648 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
2652 FOREACH_WORD_QUOTED(w, l, line, state) {
2655 word = strndup(w, l);
2661 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
2662 r = parse_boolean(word + 35);
2664 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
2666 s->forward_to_syslog = r;
2667 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
2668 r = parse_boolean(word + 33);
2670 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
2672 s->forward_to_kmsg = r;
2673 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
2674 r = parse_boolean(word + 36);
2676 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
2678 s->forward_to_console = r;
2679 } else if (startswith(word, "systemd.journald"))
2680 log_warning("Invalid systemd.journald parameter. Ignoring.");
2692 static int server_parse_config_file(Server *s) {
2699 fn = "/etc/systemd/journald.conf";
2700 f = fopen(fn, "re");
2702 if (errno == ENOENT)
2705 log_warning("Failed to open configuration file %s: %m", fn);
2709 r = config_parse(fn, f, "Journal\0", config_item_perf_lookup, (void*) journald_gperf_lookup, false, s);
2711 log_warning("Failed to parse configuration file: %s", strerror(-r));
2718 static int server_init(Server *s) {
2724 s->syslog_fd = s->native_fd = s->stdout_fd = s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
2727 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
2728 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
2730 s->forward_to_syslog = true;
2732 s->max_level_store = LOG_DEBUG;
2733 s->max_level_syslog = LOG_DEBUG;
2734 s->max_level_kmsg = LOG_NOTICE;
2735 s->max_level_console = LOG_INFO;
2737 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
2738 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
2740 server_parse_config_file(s);
2741 server_parse_proc_cmdline(s);
2743 s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
2744 if (!s->user_journals)
2747 s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
2748 if (s->epoll_fd < 0) {
2749 log_error("Failed to create epoll object: %m");
2753 n = sd_listen_fds(true);
2755 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
2759 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
2761 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
2763 if (s->native_fd >= 0) {
2764 log_error("Too many native sockets passed.");
2770 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
2772 if (s->stdout_fd >= 0) {
2773 log_error("Too many stdout sockets passed.");
2779 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
2781 if (s->syslog_fd >= 0) {
2782 log_error("Too many /dev/log sockets passed.");
2789 log_error("Unknown socket passed.");
2794 r = open_syslog_socket(s);
2798 r = open_native_socket(s);
2802 r = open_stdout_socket(s);
2806 r = open_dev_kmsg(s);
2810 r = open_signalfd(s);
2814 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
2818 r = system_journal_open(s);
2825 static void server_done(Server *s) {
2829 while (s->stdout_streams)
2830 stdout_stream_free(s->stdout_streams);
2832 if (s->system_journal)
2833 journal_file_close(s->system_journal);
2835 if (s->runtime_journal)
2836 journal_file_close(s->runtime_journal);
2838 while ((f = hashmap_steal_first(s->user_journals)))
2839 journal_file_close(f);
2841 hashmap_free(s->user_journals);
2843 if (s->epoll_fd >= 0)
2844 close_nointr_nofail(s->epoll_fd);
2846 if (s->signal_fd >= 0)
2847 close_nointr_nofail(s->signal_fd);
2849 if (s->syslog_fd >= 0)
2850 close_nointr_nofail(s->syslog_fd);
2852 if (s->native_fd >= 0)
2853 close_nointr_nofail(s->native_fd);
2855 if (s->stdout_fd >= 0)
2856 close_nointr_nofail(s->stdout_fd);
2858 if (s->dev_kmsg_fd >= 0)
2859 close_nointr_nofail(s->dev_kmsg_fd);
2862 journal_rate_limit_free(s->rate_limit);
2868 int main(int argc, char *argv[]) {
2872 /* if (getppid() != 1) { */
2873 /* log_error("This program should be invoked by init only."); */
2874 /* return EXIT_FAILURE; */
2878 log_error("This program does not take arguments.");
2879 return EXIT_FAILURE;
2882 log_set_target(LOG_TARGET_SAFE);
2883 log_set_facility(LOG_SYSLOG);
2884 log_parse_environment();
2889 r = server_init(&server);
2893 server_vacuum(&server);
2894 server_flush_to_var(&server);
2895 server_flush_dev_kmsg(&server);
2897 log_debug("systemd-journald running as pid %lu", (unsigned long) getpid());
2898 driver_message(&server, SD_MESSAGE_JOURNAL_START, "Journal started");
2902 "STATUS=Processing requests...");
2905 struct epoll_event event;
2907 r = epoll_wait(server.epoll_fd, &event, 1, -1);
2913 log_error("epoll_wait() failed: %m");
2919 r = process_event(&server, &event);
2926 log_debug("systemd-journald stopped as pid %lu", (unsigned long) getpid());
2927 driver_message(&server, SD_MESSAGE_JOURNAL_STOP, "Journal stopped");
2931 "STATUS=Shutting down...");
2933 server_done(&server);
2935 return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;