1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/epoll.h>
23 #include <sys/socket.h>
25 #include <sys/signalfd.h>
29 #include <sys/ioctl.h>
30 #include <linux/sockios.h>
31 #include <sys/statvfs.h>
33 #include <systemd/sd-journal.h>
34 #include <systemd/sd-messages.h>
35 #include <systemd/sd-daemon.h>
38 #include <systemd/sd-login.h>
43 #include "journal-file.h"
44 #include "socket-util.h"
45 #include "cgroup-util.h"
47 #include "journal-rate-limit.h"
48 #include "journal-internal.h"
49 #include "conf-parser.h"
56 #include <acl/libacl.h>
61 #include <selinux/selinux.h>
64 #define USER_JOURNALS_MAX 1024
65 #define STDOUT_STREAMS_MAX 4096
67 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
68 #define DEFAULT_RATE_LIMIT_BURST 200
70 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
72 #define N_IOVEC_META_FIELDS 17
73 #define N_IOVEC_KERNEL_FIELDS 64
75 #define ENTRY_SIZE_MAX (1024*1024*32)
77 typedef enum StdoutStreamState {
78 STDOUT_STREAM_IDENTIFIER,
79 STDOUT_STREAM_UNIT_ID,
80 STDOUT_STREAM_PRIORITY,
81 STDOUT_STREAM_LEVEL_PREFIX,
82 STDOUT_STREAM_FORWARD_TO_SYSLOG,
83 STDOUT_STREAM_FORWARD_TO_KMSG,
84 STDOUT_STREAM_FORWARD_TO_CONSOLE,
90 StdoutStreamState state;
96 security_context_t security_context;
103 bool forward_to_syslog:1;
104 bool forward_to_kmsg:1;
105 bool forward_to_console:1;
107 char buffer[LINE_MAX+1];
110 LIST_FIELDS(StdoutStream, stdout_stream);
113 static const char* const storage_table[] = {
114 [STORAGE_AUTO] = "auto",
115 [STORAGE_VOLATILE] = "volatile",
116 [STORAGE_PERSISTENT] = "persistent",
117 [STORAGE_NONE] = "none"
120 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
121 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
123 static uint64_t available_space(Server *s) {
128 uint64_t sum = 0, avail = 0, ss_avail = 0;
134 ts = now(CLOCK_MONOTONIC);
136 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
137 return s->cached_available_space;
139 r = sd_id128_get_machine(&machine);
143 if (s->system_journal) {
144 f = "/var/log/journal/";
145 m = &s->system_metrics;
147 f = "/run/log/journal/";
148 m = &s->runtime_metrics;
153 p = strappend(f, sd_id128_to_string(machine, ids));
163 if (fstatvfs(dirfd(d), &ss) < 0)
168 struct dirent buf, *de;
170 r = readdir_r(d, &buf, &de);
177 if (!endswith(de->d_name, ".journal") &&
178 !endswith(de->d_name, ".journal~"))
181 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
184 if (!S_ISREG(st.st_mode))
187 sum += (uint64_t) st.st_blocks * 512UL;
190 avail = sum >= m->max_use ? 0 : m->max_use - sum;
192 ss_avail = ss.f_bsize * ss.f_bavail;
194 ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
196 if (ss_avail < avail)
199 s->cached_available_space = avail;
200 s->cached_available_space_timestamp = ts;
208 static void server_read_file_gid(Server *s) {
209 const char *adm = "adm";
214 if (s->file_gid_valid)
217 r = get_group_creds(&adm, &s->file_gid);
219 log_warning("Failed to resolve 'adm' group: %s", strerror(-r));
221 /* if we couldn't read the gid, then it will be 0, but that's
222 * fine and we shouldn't try to resolve the group again, so
223 * let's just pretend it worked right-away. */
224 s->file_gid_valid = true;
227 static void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
232 acl_permset_t permset;
237 server_read_file_gid(s);
239 r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
241 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
247 acl = acl_get_fd(f->fd);
249 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
253 r = acl_find_uid(acl, uid, &entry);
256 if (acl_create_entry(&acl, &entry) < 0 ||
257 acl_set_tag_type(entry, ACL_USER) < 0 ||
258 acl_set_qualifier(entry, &uid) < 0) {
259 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
264 if (acl_get_permset(entry, &permset) < 0 ||
265 acl_add_perm(permset, ACL_READ) < 0 ||
266 acl_calc_mask(&acl) < 0) {
267 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
271 if (acl_set_fd(f->fd, acl) < 0)
272 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
279 static JournalFile* find_journal(Server *s, uid_t uid) {
288 /* We split up user logs only on /var, not on /run. If the
289 * runtime file is open, we write to it exclusively, in order
290 * to guarantee proper order as soon as we flush /run to
291 * /var and close the runtime file. */
293 if (s->runtime_journal)
294 return s->runtime_journal;
297 return s->system_journal;
299 r = sd_id128_get_machine(&machine);
301 return s->system_journal;
303 f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
307 if (asprintf(&p, "/var/log/journal/%s/user-%lu.journal", sd_id128_to_string(machine, ids), (unsigned long) uid) < 0)
308 return s->system_journal;
310 while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
311 /* Too many open? Then let's close one */
312 f = hashmap_steal_first(s->user_journals);
314 journal_file_close(f);
317 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, &s->system_metrics, s->system_journal, &f);
321 return s->system_journal;
323 server_fix_perms(s, f, uid);
325 r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
327 journal_file_close(f);
328 return s->system_journal;
334 static void server_rotate(Server *s) {
340 log_info("Rotating...");
342 if (s->runtime_journal) {
343 r = journal_file_rotate(&s->runtime_journal);
345 if (s->runtime_journal)
346 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
348 log_error("Failed to create new runtime journal: %s", strerror(-r));
350 server_fix_perms(s, s->runtime_journal, 0);
353 if (s->system_journal) {
354 r = journal_file_rotate(&s->system_journal);
356 if (s->system_journal)
357 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
359 log_error("Failed to create new system journal: %s", strerror(-r));
362 server_fix_perms(s, s->system_journal, 0);
365 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
366 r = journal_file_rotate(&f);
369 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
371 log_error("Failed to create user journal: %s", strerror(-r));
373 hashmap_replace(s->user_journals, k, f);
374 server_fix_perms(s, s->system_journal, PTR_TO_UINT32(k));
379 static void server_vacuum(Server *s) {
385 log_info("Vacuuming...");
387 r = sd_id128_get_machine(&machine);
389 log_error("Failed to get machine ID: %s", strerror(-r));
393 sd_id128_to_string(machine, ids);
395 if (s->system_journal) {
396 if (asprintf(&p, "/var/log/journal/%s", ids) < 0) {
401 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free);
402 if (r < 0 && r != -ENOENT)
403 log_error("Failed to vacuum %s: %s", p, strerror(-r));
407 if (s->runtime_journal) {
408 if (asprintf(&p, "/run/log/journal/%s", ids) < 0) {
413 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free);
414 if (r < 0 && r != -ENOENT)
415 log_error("Failed to vacuum %s: %s", p, strerror(-r));
419 s->cached_available_space_timestamp = 0;
422 static char *shortened_cgroup_path(pid_t pid) {
424 char *process_path, *init_path, *path;
428 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, pid, &process_path);
432 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &init_path);
438 if (endswith(init_path, "/system"))
439 init_path[strlen(init_path) - 7] = 0;
440 else if (streq(init_path, "/"))
443 if (startswith(process_path, init_path)) {
446 p = strdup(process_path + strlen(init_path));
464 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n) {
466 bool vacuumed = false;
473 f = find_journal(s, uid);
477 if (journal_file_rotate_suggested(f)) {
478 log_info("Journal header limits reached or header out-of-date, rotating.");
483 f = find_journal(s, uid);
489 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
494 (r != -E2BIG && /* hit limit */
495 r != -EFBIG && /* hit fs limit */
496 r != -EDQUOT && /* quota hit */
497 r != -ENOSPC && /* disk full */
498 r != -EBADMSG && /* corrupted */
499 r != -ENODATA && /* truncated */
500 r != -EHOSTDOWN && /* other machine */
501 r != -EPROTONOSUPPORT && /* unsupported feature */
502 r != -EBUSY && /* unclean shutdown */
503 r != -ESHUTDOWN /* already archived */)) {
504 log_error("Failed to write entry, ignoring: %s", strerror(-r));
508 if (r == -E2BIG || r == -EFBIG || r == EDQUOT || r == ENOSPC)
509 log_info("Allocation limit reached, rotating.");
510 else if (r == -EHOSTDOWN)
511 log_info("Journal file from other machine, rotating.");
512 else if (r == -EBUSY)
513 log_info("Unlcean shutdown, rotating.");
515 log_warning("Journal file corrupted, rotating.");
521 f = find_journal(s, uid);
525 log_info("Retrying write.");
529 static void dispatch_message_real(
531 struct iovec *iovec, unsigned n, unsigned m,
534 const char *label, size_t label_len,
535 const char *unit_id) {
537 char *pid = NULL, *uid = NULL, *gid = NULL,
538 *source_time = NULL, *boot_id = NULL, *machine_id = NULL,
539 *comm = NULL, *cmdline = NULL, *hostname = NULL,
540 *audit_session = NULL, *audit_loginuid = NULL,
541 *exe = NULL, *cgroup = NULL, *session = NULL,
542 *owner_uid = NULL, *unit = NULL, *selinux_context = NULL;
548 uid_t loginuid = 0, realuid = 0;
553 assert(n + N_IOVEC_META_FIELDS <= m);
561 realuid = ucred->uid;
563 if (asprintf(&pid, "_PID=%lu", (unsigned long) ucred->pid) >= 0)
564 IOVEC_SET_STRING(iovec[n++], pid);
566 if (asprintf(&uid, "_UID=%lu", (unsigned long) ucred->uid) >= 0)
567 IOVEC_SET_STRING(iovec[n++], uid);
569 if (asprintf(&gid, "_GID=%lu", (unsigned long) ucred->gid) >= 0)
570 IOVEC_SET_STRING(iovec[n++], gid);
572 r = get_process_comm(ucred->pid, &t);
574 comm = strappend("_COMM=", t);
578 IOVEC_SET_STRING(iovec[n++], comm);
581 r = get_process_exe(ucred->pid, &t);
583 exe = strappend("_EXE=", t);
587 IOVEC_SET_STRING(iovec[n++], exe);
590 r = get_process_cmdline(ucred->pid, LINE_MAX, false, &t);
592 cmdline = strappend("_CMDLINE=", t);
596 IOVEC_SET_STRING(iovec[n++], cmdline);
599 r = audit_session_from_pid(ucred->pid, &audit);
601 if (asprintf(&audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit) >= 0)
602 IOVEC_SET_STRING(iovec[n++], audit_session);
604 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
606 if (asprintf(&audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0)
607 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
609 t = shortened_cgroup_path(ucred->pid);
611 cgroup = strappend("_SYSTEMD_CGROUP=", t);
615 IOVEC_SET_STRING(iovec[n++], cgroup);
619 if (sd_pid_get_session(ucred->pid, &t) >= 0) {
620 session = strappend("_SYSTEMD_SESSION=", t);
624 IOVEC_SET_STRING(iovec[n++], session);
627 if (sd_pid_get_owner_uid(ucred->uid, &owner) >= 0)
628 if (asprintf(&owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner) >= 0)
629 IOVEC_SET_STRING(iovec[n++], owner_uid);
632 if (cg_pid_get_unit(ucred->pid, &t) >= 0) {
633 unit = strappend("_SYSTEMD_UNIT=", t);
636 unit = strappend("_SYSTEMD_UNIT=", unit_id);
639 IOVEC_SET_STRING(iovec[n++], unit);
643 selinux_context = malloc(sizeof("_SELINUX_CONTEXT=") + label_len);
644 if (selinux_context) {
645 memcpy(selinux_context, "_SELINUX_CONTEXT=", sizeof("_SELINUX_CONTEXT=")-1);
646 memcpy(selinux_context+sizeof("_SELINUX_CONTEXT=")-1, label, label_len);
647 selinux_context[sizeof("_SELINUX_CONTEXT=")-1+label_len] = 0;
648 IOVEC_SET_STRING(iovec[n++], selinux_context);
651 security_context_t con;
653 if (getpidcon(ucred->pid, &con) >= 0) {
654 selinux_context = strappend("_SELINUX_CONTEXT=", con);
656 IOVEC_SET_STRING(iovec[n++], selinux_context);
665 if (asprintf(&source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu",
666 (unsigned long long) timeval_load(tv)) >= 0)
667 IOVEC_SET_STRING(iovec[n++], source_time);
670 /* Note that strictly speaking storing the boot id here is
671 * redundant since the entry includes this in-line
672 * anyway. However, we need this indexed, too. */
673 r = sd_id128_get_boot(&id);
675 if (asprintf(&boot_id, "_BOOT_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
676 IOVEC_SET_STRING(iovec[n++], boot_id);
678 r = sd_id128_get_machine(&id);
680 if (asprintf(&machine_id, "_MACHINE_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
681 IOVEC_SET_STRING(iovec[n++], machine_id);
683 t = gethostname_malloc();
685 hostname = strappend("_HOSTNAME=", t);
688 IOVEC_SET_STRING(iovec[n++], hostname);
693 write_to_journal(s, realuid == 0 ? 0 : loginuid, iovec, n);
706 free(audit_loginuid);
711 free(selinux_context);
714 static void driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
715 char mid[11 + 32 + 1];
716 char buffer[16 + LINE_MAX + 1];
717 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
725 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
726 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
728 memcpy(buffer, "MESSAGE=", 8);
729 va_start(ap, format);
730 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
732 char_array_0(buffer);
733 IOVEC_SET_STRING(iovec[n++], buffer);
735 snprintf(mid, sizeof(mid), "MESSAGE_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(message_id));
737 IOVEC_SET_STRING(iovec[n++], mid);
740 ucred.pid = getpid();
741 ucred.uid = getuid();
742 ucred.gid = getgid();
744 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL);
747 static void dispatch_message(Server *s,
748 struct iovec *iovec, unsigned n, unsigned m,
751 const char *label, size_t label_len,
755 char *path = NULL, *c;
758 assert(iovec || n == 0);
763 if (LOG_PRI(priority) > s->max_level_store)
769 path = shortened_cgroup_path(ucred->pid);
773 /* example: /user/lennart/3/foobar
774 * /system/dbus.service/foobar
776 * So let's cut of everything past the third /, since that is
777 * wher user directories start */
779 c = strchr(path, '/');
781 c = strchr(c+1, '/');
783 c = strchr(c+1, '/');
789 rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available_space(s));
796 /* Write a suppression message if we suppressed something */
798 driver_message(s, SD_MESSAGE_JOURNAL_DROPPED, "Suppressed %u messages from %s", rl - 1, path);
803 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id);
806 static void forward_syslog_iovec(Server *s, const struct iovec *iovec, unsigned n_iovec, struct ucred *ucred, struct timeval *tv) {
807 struct msghdr msghdr;
808 struct cmsghdr *cmsg;
810 struct cmsghdr cmsghdr;
811 uint8_t buf[CMSG_SPACE(sizeof(struct ucred))];
813 union sockaddr_union sa;
820 msghdr.msg_iov = (struct iovec*) iovec;
821 msghdr.msg_iovlen = n_iovec;
824 sa.un.sun_family = AF_UNIX;
825 strncpy(sa.un.sun_path, "/run/systemd/journal/syslog", sizeof(sa.un.sun_path));
826 msghdr.msg_name = &sa;
827 msghdr.msg_namelen = offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path);
831 msghdr.msg_control = &control;
832 msghdr.msg_controllen = sizeof(control);
834 cmsg = CMSG_FIRSTHDR(&msghdr);
835 cmsg->cmsg_level = SOL_SOCKET;
836 cmsg->cmsg_type = SCM_CREDENTIALS;
837 cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred));
838 memcpy(CMSG_DATA(cmsg), ucred, sizeof(struct ucred));
839 msghdr.msg_controllen = cmsg->cmsg_len;
842 /* Forward the syslog message we received via /dev/log to
843 * /run/systemd/syslog. Unfortunately we currently can't set
844 * the SO_TIMESTAMP auxiliary data, and hence we don't. */
846 if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
849 /* The socket is full? I guess the syslog implementation is
850 * too slow, and we shouldn't wait for that... */
854 if (ucred && errno == ESRCH) {
857 /* Hmm, presumably the sender process vanished
858 * by now, so let's fix it as good as we
863 memcpy(CMSG_DATA(cmsg), &u, sizeof(struct ucred));
865 if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
873 log_debug("Failed to forward syslog message: %m");
876 static void forward_syslog_raw(Server *s, int priority, const char *buffer, struct ucred *ucred, struct timeval *tv) {
882 if (LOG_PRI(priority) > s->max_level_syslog)
885 IOVEC_SET_STRING(iovec, buffer);
886 forward_syslog_iovec(s, &iovec, 1, ucred, tv);
889 static void forward_syslog(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred, struct timeval *tv) {
890 struct iovec iovec[5];
891 char header_priority[6], header_time[64], header_pid[16];
895 char *ident_buf = NULL;
898 assert(priority >= 0);
899 assert(priority <= 999);
902 if (LOG_PRI(priority) > s->max_level_syslog)
905 /* First: priority field */
906 snprintf(header_priority, sizeof(header_priority), "<%i>", priority);
907 char_array_0(header_priority);
908 IOVEC_SET_STRING(iovec[n++], header_priority);
910 /* Second: timestamp */
911 t = tv ? tv->tv_sec : ((time_t) (now(CLOCK_REALTIME) / USEC_PER_SEC));
915 if (strftime(header_time, sizeof(header_time), "%h %e %T ", tm) <= 0)
917 IOVEC_SET_STRING(iovec[n++], header_time);
919 /* Third: identifier and PID */
922 get_process_comm(ucred->pid, &ident_buf);
923 identifier = ident_buf;
926 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
927 char_array_0(header_pid);
930 IOVEC_SET_STRING(iovec[n++], identifier);
932 IOVEC_SET_STRING(iovec[n++], header_pid);
933 } else if (identifier) {
934 IOVEC_SET_STRING(iovec[n++], identifier);
935 IOVEC_SET_STRING(iovec[n++], ": ");
938 /* Fourth: message */
939 IOVEC_SET_STRING(iovec[n++], message);
941 forward_syslog_iovec(s, iovec, n, ucred, tv);
946 static int fixup_priority(int priority) {
948 if ((priority & LOG_FACMASK) == 0)
949 return (priority & LOG_PRIMASK) | LOG_USER;
954 static void forward_kmsg(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred) {
955 struct iovec iovec[5];
956 char header_priority[6], header_pid[16];
958 char *ident_buf = NULL;
961 assert(priority >= 0);
962 assert(priority <= 999);
965 if (_unlikely_(LOG_PRI(priority) > s->max_level_kmsg))
968 if (_unlikely_(s->dev_kmsg_fd < 0))
971 /* Never allow messages with kernel facility to be written to
972 * kmsg, regardless where the data comes from. */
973 priority = fixup_priority(priority);
975 /* First: priority field */
976 snprintf(header_priority, sizeof(header_priority), "<%i>", priority);
977 char_array_0(header_priority);
978 IOVEC_SET_STRING(iovec[n++], header_priority);
980 /* Second: identifier and PID */
983 get_process_comm(ucred->pid, &ident_buf);
984 identifier = ident_buf;
987 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
988 char_array_0(header_pid);
991 IOVEC_SET_STRING(iovec[n++], identifier);
993 IOVEC_SET_STRING(iovec[n++], header_pid);
994 } else if (identifier) {
995 IOVEC_SET_STRING(iovec[n++], identifier);
996 IOVEC_SET_STRING(iovec[n++], ": ");
999 /* Fourth: message */
1000 IOVEC_SET_STRING(iovec[n++], message);
1001 IOVEC_SET_STRING(iovec[n++], "\n");
1003 if (writev(s->dev_kmsg_fd, iovec, n) < 0)
1004 log_debug("Failed to write to /dev/kmsg for logging: %s", strerror(errno));
1009 static void forward_console(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred) {
1010 struct iovec iovec[4];
1011 char header_pid[16];
1013 char *ident_buf = NULL;
1019 if (LOG_PRI(priority) > s->max_level_console)
1022 /* First: identifier and PID */
1025 get_process_comm(ucred->pid, &ident_buf);
1026 identifier = ident_buf;
1029 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
1030 char_array_0(header_pid);
1033 IOVEC_SET_STRING(iovec[n++], identifier);
1035 IOVEC_SET_STRING(iovec[n++], header_pid);
1036 } else if (identifier) {
1037 IOVEC_SET_STRING(iovec[n++], identifier);
1038 IOVEC_SET_STRING(iovec[n++], ": ");
1041 /* Third: message */
1042 IOVEC_SET_STRING(iovec[n++], message);
1043 IOVEC_SET_STRING(iovec[n++], "\n");
1045 tty = s->tty_path ? s->tty_path : "/dev/console";
1047 fd = open_terminal(tty, O_WRONLY|O_NOCTTY|O_CLOEXEC);
1049 log_debug("Failed to open %s for logging: %s", tty, strerror(errno));
1053 if (writev(fd, iovec, n) < 0)
1054 log_debug("Failed to write to %s for logging: %s", tty, strerror(errno));
1056 close_nointr_nofail(fd);
1062 static void read_identifier(const char **buf, char **identifier, char **pid) {
1073 p += strspn(p, WHITESPACE);
1074 l = strcspn(p, WHITESPACE);
1083 if (p[l-1] == ']') {
1089 t = strndup(p+k+1, l-k-2);
1109 *buf += strspn(*buf, WHITESPACE);
1112 static void process_syslog_message(Server *s, const char *buf, struct ucred *ucred, struct timeval *tv, const char *label, size_t label_len) {
1113 char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *syslog_pid = NULL;
1114 struct iovec iovec[N_IOVEC_META_FIELDS + 6];
1116 int priority = LOG_USER | LOG_INFO;
1117 char *identifier = NULL, *pid = NULL;
1124 parse_syslog_priority((char**) &buf, &priority);
1126 if (s->forward_to_syslog)
1127 forward_syslog_raw(s, priority, orig, ucred, tv);
1129 skip_syslog_date((char**) &buf);
1130 read_identifier(&buf, &identifier, &pid);
1132 if (s->forward_to_kmsg)
1133 forward_kmsg(s, priority, identifier, buf, ucred);
1135 if (s->forward_to_console)
1136 forward_console(s, priority, identifier, buf, ucred);
1138 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=syslog");
1140 if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1141 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1143 if (priority & LOG_FACMASK)
1144 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1145 IOVEC_SET_STRING(iovec[n++], syslog_facility);
1148 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
1149 if (syslog_identifier)
1150 IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1154 syslog_pid = strappend("SYSLOG_PID=", pid);
1156 IOVEC_SET_STRING(iovec[n++], syslog_pid);
1159 message = strappend("MESSAGE=", buf);
1161 IOVEC_SET_STRING(iovec[n++], message);
1163 dispatch_message(s, iovec, n, ELEMENTSOF(iovec), ucred, tv, label, label_len, NULL, priority);
1168 free(syslog_priority);
1169 free(syslog_facility);
1170 free(syslog_identifier);
1174 static bool valid_user_field(const char *p, size_t l) {
1177 /* We kinda enforce POSIX syntax recommendations for
1178 environment variables here, but make a couple of additional
1181 http://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html */
1183 /* No empty field names */
1187 /* Don't allow names longer than 64 chars */
1191 /* Variables starting with an underscore are protected */
1195 /* Don't allow digits as first character */
1196 if (p[0] >= '0' && p[0] <= '9')
1199 /* Only allow A-Z0-9 and '_' */
1200 for (a = p; a < p + l; a++)
1201 if (!((*a >= 'A' && *a <= 'Z') ||
1202 (*a >= '0' && *a <= '9') ||
1209 static void process_native_message(
1211 const void *buffer, size_t buffer_size,
1212 struct ucred *ucred,
1214 const char *label, size_t label_len) {
1216 struct iovec *iovec = NULL;
1217 unsigned n = 0, m = 0, j, tn = (unsigned) -1;
1220 int priority = LOG_INFO;
1221 char *identifier = NULL, *message = NULL;
1224 assert(buffer || buffer_size == 0);
1227 remaining = buffer_size;
1229 while (remaining > 0) {
1232 e = memchr(p, '\n', remaining);
1235 /* Trailing noise, let's ignore it, and flush what we collected */
1236 log_debug("Received message with trailing noise, ignoring.");
1241 /* Entry separator */
1242 dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, NULL, priority);
1244 priority = LOG_INFO;
1251 if (*p == '.' || *p == '#') {
1252 /* Ignore control commands for now, and
1254 remaining -= (e - p) + 1;
1259 /* A property follows */
1261 if (n+N_IOVEC_META_FIELDS >= m) {
1265 u = MAX((n+N_IOVEC_META_FIELDS+1) * 2U, 4U);
1266 c = realloc(iovec, u * sizeof(struct iovec));
1276 q = memchr(p, '=', e - p);
1278 if (valid_user_field(p, q - p)) {
1283 /* If the field name starts with an
1284 * underscore, skip the variable,
1285 * since that indidates a trusted
1287 iovec[n].iov_base = (char*) p;
1288 iovec[n].iov_len = l;
1291 /* We need to determine the priority
1292 * of this entry for the rate limiting
1295 memcmp(p, "PRIORITY=", 9) == 0 &&
1296 p[9] >= '0' && p[9] <= '9')
1297 priority = (priority & LOG_FACMASK) | (p[9] - '0');
1300 memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
1301 p[16] >= '0' && p[16] <= '9')
1302 priority = (priority & LOG_PRIMASK) | ((p[16] - '0') << 3);
1305 memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
1306 p[16] >= '0' && p[16] <= '9' &&
1307 p[17] >= '0' && p[17] <= '9')
1308 priority = (priority & LOG_PRIMASK) | (((p[16] - '0')*10 + (p[17] - '0')) << 3);
1311 memcmp(p, "SYSLOG_IDENTIFIER=", 18) == 0) {
1314 t = strndup(p + 18, l - 18);
1319 } else if (l >= 8 &&
1320 memcmp(p, "MESSAGE=", 8) == 0) {
1323 t = strndup(p + 8, l - 8);
1331 remaining -= (e - p) + 1;
1339 if (remaining < e - p + 1 + sizeof(uint64_t) + 1) {
1340 log_debug("Failed to parse message, ignoring.");
1344 memcpy(&l_le, e + 1, sizeof(uint64_t));
1347 if (remaining < e - p + 1 + sizeof(uint64_t) + l + 1 ||
1348 e[1+sizeof(uint64_t)+l] != '\n') {
1349 log_debug("Failed to parse message, ignoring.");
1353 k = malloc((e - p) + 1 + l);
1359 memcpy(k, p, e - p);
1361 memcpy(k + (e - p) + 1, e + 1 + sizeof(uint64_t), l);
1363 if (valid_user_field(p, e - p)) {
1364 iovec[n].iov_base = k;
1365 iovec[n].iov_len = (e - p) + 1 + l;
1370 remaining -= (e - p) + 1 + sizeof(uint64_t) + l + 1;
1371 p = e + 1 + sizeof(uint64_t) + l + 1;
1379 IOVEC_SET_STRING(iovec[tn], "_TRANSPORT=journal");
1382 if (s->forward_to_syslog)
1383 forward_syslog(s, priority, identifier, message, ucred, tv);
1385 if (s->forward_to_kmsg)
1386 forward_kmsg(s, priority, identifier, message, ucred);
1388 if (s->forward_to_console)
1389 forward_console(s, priority, identifier, message, ucred);
1392 dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, NULL, priority);
1395 for (j = 0; j < n; j++) {
1399 if (iovec[j].iov_base < buffer ||
1400 (const uint8_t*) iovec[j].iov_base >= (const uint8_t*) buffer + buffer_size)
1401 free(iovec[j].iov_base);
1409 static void process_native_file(
1412 struct ucred *ucred,
1414 const char *label, size_t label_len) {
1423 /* Data is in the passed file, since it didn't fit in a
1424 * datagram. We can't map the file here, since clients might
1425 * then truncate it and trigger a SIGBUS for us. So let's
1426 * stupidly read it */
1428 if (fstat(fd, &st) < 0) {
1429 log_error("Failed to stat passed file, ignoring: %m");
1433 if (!S_ISREG(st.st_mode)) {
1434 log_error("File passed is not regular. Ignoring.");
1438 if (st.st_size <= 0)
1441 if (st.st_size > ENTRY_SIZE_MAX) {
1442 log_error("File passed too large. Ignoring.");
1446 p = malloc(st.st_size);
1452 n = pread(fd, p, st.st_size, 0);
1454 log_error("Failed to read file, ignoring: %s", strerror(-n));
1456 process_native_message(s, p, n, ucred, tv, label, label_len);
1461 static int stdout_stream_log(StdoutStream *s, const char *p) {
1462 struct iovec iovec[N_IOVEC_META_FIELDS + 5];
1463 char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *syslog_identifier = NULL;
1467 size_t label_len = 0;
1475 priority = s->priority;
1477 if (s->level_prefix)
1478 parse_syslog_priority((char**) &p, &priority);
1480 if (s->forward_to_syslog || s->server->forward_to_syslog)
1481 forward_syslog(s->server, fixup_priority(priority), s->identifier, p, &s->ucred, NULL);
1483 if (s->forward_to_kmsg || s->server->forward_to_kmsg)
1484 forward_kmsg(s->server, priority, s->identifier, p, &s->ucred);
1486 if (s->forward_to_console || s->server->forward_to_console)
1487 forward_console(s->server, priority, s->identifier, p, &s->ucred);
1489 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=stdout");
1491 if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1492 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1494 if (priority & LOG_FACMASK)
1495 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1496 IOVEC_SET_STRING(iovec[n++], syslog_facility);
1498 if (s->identifier) {
1499 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", s->identifier);
1500 if (syslog_identifier)
1501 IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1504 message = strappend("MESSAGE=", p);
1506 IOVEC_SET_STRING(iovec[n++], message);
1509 if (s->security_context) {
1510 label = (char*) s->security_context;
1511 label_len = strlen((char*) s->security_context);
1515 dispatch_message(s->server, iovec, n, ELEMENTSOF(iovec), &s->ucred, NULL, label, label_len, s->unit_id, priority);
1518 free(syslog_priority);
1519 free(syslog_facility);
1520 free(syslog_identifier);
1525 static int stdout_stream_line(StdoutStream *s, char *p) {
1535 case STDOUT_STREAM_IDENTIFIER:
1537 s->identifier = NULL;
1539 s->identifier = strdup(p);
1544 s->state = STDOUT_STREAM_UNIT_ID;
1547 case STDOUT_STREAM_UNIT_ID:
1548 if (s->ucred.uid == 0) {
1552 s->unit_id = strdup(p);
1558 s->state = STDOUT_STREAM_PRIORITY;
1561 case STDOUT_STREAM_PRIORITY:
1562 r = safe_atoi(p, &s->priority);
1563 if (r < 0 || s->priority <= 0 || s->priority >= 999) {
1564 log_warning("Failed to parse log priority line.");
1568 s->state = STDOUT_STREAM_LEVEL_PREFIX;
1571 case STDOUT_STREAM_LEVEL_PREFIX:
1572 r = parse_boolean(p);
1574 log_warning("Failed to parse level prefix line.");
1578 s->level_prefix = !!r;
1579 s->state = STDOUT_STREAM_FORWARD_TO_SYSLOG;
1582 case STDOUT_STREAM_FORWARD_TO_SYSLOG:
1583 r = parse_boolean(p);
1585 log_warning("Failed to parse forward to syslog line.");
1589 s->forward_to_syslog = !!r;
1590 s->state = STDOUT_STREAM_FORWARD_TO_KMSG;
1593 case STDOUT_STREAM_FORWARD_TO_KMSG:
1594 r = parse_boolean(p);
1596 log_warning("Failed to parse copy to kmsg line.");
1600 s->forward_to_kmsg = !!r;
1601 s->state = STDOUT_STREAM_FORWARD_TO_CONSOLE;
1604 case STDOUT_STREAM_FORWARD_TO_CONSOLE:
1605 r = parse_boolean(p);
1607 log_warning("Failed to parse copy to console line.");
1611 s->forward_to_console = !!r;
1612 s->state = STDOUT_STREAM_RUNNING;
1615 case STDOUT_STREAM_RUNNING:
1616 return stdout_stream_log(s, p);
1619 assert_not_reached("Unknown stream state");
1622 static int stdout_stream_scan(StdoutStream *s, bool force_flush) {
1630 remaining = s->length;
1635 end = memchr(p, '\n', remaining);
1638 else if (remaining >= sizeof(s->buffer) - 1) {
1639 end = p + sizeof(s->buffer) - 1;
1646 r = stdout_stream_line(s, p);
1654 if (force_flush && remaining > 0) {
1656 r = stdout_stream_line(s, p);
1664 if (p > s->buffer) {
1665 memmove(s->buffer, p, remaining);
1666 s->length = remaining;
1672 static int stdout_stream_process(StdoutStream *s) {
1678 l = read(s->fd, s->buffer+s->length, sizeof(s->buffer)-1-s->length);
1681 if (errno == EAGAIN)
1684 log_warning("Failed to read from stream: %m");
1689 r = stdout_stream_scan(s, true);
1697 r = stdout_stream_scan(s, false);
1705 static void stdout_stream_free(StdoutStream *s) {
1709 assert(s->server->n_stdout_streams > 0);
1710 s->server->n_stdout_streams --;
1711 LIST_REMOVE(StdoutStream, stdout_stream, s->server->stdout_streams, s);
1716 epoll_ctl(s->server->epoll_fd, EPOLL_CTL_DEL, s->fd, NULL);
1718 close_nointr_nofail(s->fd);
1722 if (s->security_context)
1723 freecon(s->security_context);
1726 free(s->identifier);
1730 static int stdout_stream_new(Server *s) {
1731 StdoutStream *stream;
1734 struct epoll_event ev;
1738 fd = accept4(s->stdout_fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC);
1740 if (errno == EAGAIN)
1743 log_error("Failed to accept stdout connection: %m");
1747 if (s->n_stdout_streams >= STDOUT_STREAMS_MAX) {
1748 log_warning("Too many stdout streams, refusing connection.");
1749 close_nointr_nofail(fd);
1753 stream = new0(StdoutStream, 1);
1755 close_nointr_nofail(fd);
1761 len = sizeof(stream->ucred);
1762 if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &stream->ucred, &len) < 0) {
1763 log_error("Failed to determine peer credentials: %m");
1769 if (getpeercon(fd, &stream->security_context) < 0 && errno != ENOPROTOOPT)
1770 log_error("Failed to determine peer security context: %m");
1773 if (shutdown(fd, SHUT_WR) < 0) {
1774 log_error("Failed to shutdown writing side of socket: %m");
1780 ev.data.ptr = stream;
1781 ev.events = EPOLLIN;
1782 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0) {
1783 log_error("Failed to add stream to event loop: %m");
1789 LIST_PREPEND(StdoutStream, stdout_stream, s->stdout_streams, stream);
1790 s->n_stdout_streams ++;
1795 stdout_stream_free(stream);
1799 static bool is_us(const char *pid) {
1804 if (parse_pid(pid, &t) < 0)
1807 return t == getpid();
1810 static void dev_kmsg_record(Server *s, char *p, size_t l) {
1811 struct iovec iovec[N_IOVEC_META_FIELDS + 7 + N_IOVEC_KERNEL_FIELDS];
1812 char *message = NULL, *syslog_priority = NULL, *syslog_pid = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *source_time = NULL;
1814 unsigned n = 0, z = 0, j;
1816 char *identifier = NULL, *pid = NULL, *e, *f, *k;
1825 e = memchr(p, ',', l);
1830 r = safe_atoi(p, &priority);
1831 if (r < 0 || priority < 0 || priority > 999)
1834 if (s->forward_to_kmsg && (priority & LOG_FACMASK) != LOG_KERN)
1839 e = memchr(p, ',', l);
1844 r = safe_atou64(p, &serial);
1850 f = memchr(p, ';', l);
1853 /* Kernel 3.6 has the flags field, kernel 3.5 lacks that */
1854 e = memchr(p, ',', l);
1859 r = parse_usec(p, &usec);
1865 e = memchr(p, '\n', l);
1873 for (j = 0; l > 0 && j < N_IOVEC_KERNEL_FIELDS; j++) {
1875 /* Meta data fields attached */
1882 e = memchr(k, '\n', l);
1888 m = new(char, sizeof("_KERNEL_") - 1 + e - k);
1892 memcpy(m, "_KERNEL_", sizeof("_KERNEL_") - 1);
1893 memcpy(m + sizeof("_KERNEL_") - 1, k, e - k);
1895 iovec[n].iov_base = m;
1896 iovec[n].iov_len = sizeof("_KERNEL_") - 1 + e - k;
1903 if (asprintf(&source_time, "_SOURCE_MONOTONIC_TIMESTAMP=%llu",
1904 (unsigned long long) usec) >= 0)
1905 IOVEC_SET_STRING(iovec[n++], source_time);
1907 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=kernel");
1909 if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1910 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1912 if ((priority & LOG_FACMASK) == LOG_KERN)
1913 IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=kernel");
1915 read_identifier((const char**) &p, &identifier, &pid);
1917 /* Avoid any messages we generated ourselves via
1918 * log_info() and friends. */
1919 if (pid && is_us(pid))
1923 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
1924 if (syslog_identifier)
1925 IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1929 syslog_pid = strappend("SYSLOG_PID=", pid);
1931 IOVEC_SET_STRING(iovec[n++], syslog_pid);
1934 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1935 IOVEC_SET_STRING(iovec[n++], syslog_facility);
1938 message = strappend("MESSAGE=", p);
1940 IOVEC_SET_STRING(iovec[n++], message);
1942 dispatch_message(s, iovec, n, ELEMENTSOF(iovec), NULL, NULL, NULL, 0, NULL, priority);
1945 for (j = 0; j < z; j++)
1946 free(iovec[j].iov_base);
1949 free(syslog_priority);
1950 free(syslog_identifier);
1952 free(syslog_facility);
1958 static int system_journal_open(Server *s) {
1964 r = sd_id128_get_machine(&machine);
1968 sd_id128_to_string(machine, ids);
1970 if (!s->system_journal &&
1971 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
1972 access("/run/systemd/journal/flushed", F_OK) >= 0) {
1974 /* If in auto mode: first try to create the machine
1975 * path, but not the prefix.
1977 * If in persistent mode: create /var/log/journal and
1978 * the machine path */
1980 if (s->storage == STORAGE_PERSISTENT)
1981 (void) mkdir("/var/log/journal/", 0755);
1983 fn = strappend("/var/log/journal/", ids);
1987 (void) mkdir(fn, 0755);
1990 fn = strjoin("/var/log/journal/", ids, "/system.journal", NULL);
1994 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, &s->system_metrics, NULL, &s->system_journal);
1998 s->system_journal->compress = s->compress;
2000 server_fix_perms(s, s->system_journal, 0);
2003 if (r != -ENOENT && r != -EROFS)
2004 log_warning("Failed to open system journal: %s", strerror(-r));
2010 if (!s->runtime_journal &&
2011 (s->storage != STORAGE_NONE)) {
2013 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
2017 if (s->system_journal) {
2019 /* Try to open the runtime journal, but only
2020 * if it already exists, so that we can flush
2021 * it into the system journal */
2023 r = journal_file_open(fn, O_RDWR, 0640, &s->runtime_metrics, NULL, &s->runtime_journal);
2028 log_warning("Failed to open runtime journal: %s", strerror(-r));
2035 /* OK, we really need the runtime journal, so create
2036 * it if necessary. */
2038 (void) mkdir_parents(fn, 0755);
2039 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, &s->runtime_metrics, NULL, &s->runtime_journal);
2043 log_error("Failed to open runtime journal: %s", strerror(-r));
2048 if (s->runtime_journal) {
2049 s->runtime_journal->compress = s->compress;
2051 server_fix_perms(s, s->runtime_journal, 0);
2058 static int server_flush_to_var(Server *s) {
2066 if (s->storage != STORAGE_AUTO &&
2067 s->storage != STORAGE_PERSISTENT)
2070 if (!s->runtime_journal)
2073 system_journal_open(s);
2075 if (!s->system_journal)
2078 log_info("Flushing to /var...");
2080 r = sd_id128_get_machine(&machine);
2082 log_error("Failed to get machine id: %s", strerror(-r));
2086 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
2088 log_error("Failed to read runtime journal: %s", strerror(-r));
2092 SD_JOURNAL_FOREACH(j) {
2095 f = j->current_file;
2096 assert(f && f->current_offset > 0);
2098 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
2100 log_error("Can't read entry: %s", strerror(-r));
2104 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
2106 log_info("Allocation limit reached.");
2108 journal_file_post_change(s->system_journal);
2112 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
2116 log_error("Can't write entry: %s", strerror(-r));
2122 journal_file_post_change(s->system_journal);
2124 journal_file_close(s->runtime_journal);
2125 s->runtime_journal = NULL;
2128 rm_rf("/run/log/journal", false, true, false);
2133 static int server_read_dev_kmsg(Server *s) {
2134 char buffer[8192+1]; /* the kernel-side limit per record is 8K currently */
2138 assert(s->dev_kmsg_fd >= 0);
2140 l = read(s->dev_kmsg_fd, buffer, sizeof(buffer) - 1);
2144 /* Old kernels who don't allow reading from /dev/kmsg
2145 * return EINVAL when we try. So handle this cleanly,
2146 * but don' try to ever read from it again. */
2147 if (errno == EINVAL) {
2148 epoll_ctl(s->epoll_fd, EPOLL_CTL_DEL, s->dev_kmsg_fd, NULL);
2152 if (errno == EAGAIN || errno == EINTR)
2155 log_error("Failed to read from kernel: %m");
2159 dev_kmsg_record(s, buffer, l);
2163 static int server_flush_dev_kmsg(Server *s) {
2168 if (s->dev_kmsg_fd < 0)
2171 if (!s->dev_kmsg_readable)
2174 log_info("Flushing /dev/kmsg...");
2177 r = server_read_dev_kmsg(s);
2188 static int process_event(Server *s, struct epoll_event *ev) {
2192 if (ev->data.fd == s->signal_fd) {
2193 struct signalfd_siginfo sfsi;
2196 if (ev->events != EPOLLIN) {
2197 log_info("Got invalid event from epoll.");
2201 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
2202 if (n != sizeof(sfsi)) {
2207 if (errno == EINTR || errno == EAGAIN)
2213 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
2215 if (sfsi.ssi_signo == SIGUSR1) {
2216 touch("/run/systemd/journal/flushed");
2217 server_flush_to_var(s);
2221 if (sfsi.ssi_signo == SIGUSR2) {
2229 } else if (ev->data.fd == s->dev_kmsg_fd) {
2232 if (ev->events != EPOLLIN) {
2233 log_info("Got invalid event from epoll.");
2237 r = server_read_dev_kmsg(s);
2243 } else if (ev->data.fd == s->native_fd ||
2244 ev->data.fd == s->syslog_fd) {
2246 if (ev->events != EPOLLIN) {
2247 log_info("Got invalid event from epoll.");
2252 struct msghdr msghdr;
2254 struct ucred *ucred = NULL;
2255 struct timeval *tv = NULL;
2256 struct cmsghdr *cmsg;
2258 size_t label_len = 0;
2260 struct cmsghdr cmsghdr;
2262 /* We use NAME_MAX space for the
2263 * SELinux label here. The kernel
2264 * currently enforces no limit, but
2265 * according to suggestions from the
2266 * SELinux people this will change and
2267 * it will probably be identical to
2268 * NAME_MAX. For now we use that, but
2269 * this should be updated one day when
2270 * the final limit is known.*/
2271 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
2272 CMSG_SPACE(sizeof(struct timeval)) +
2273 CMSG_SPACE(sizeof(int)) + /* fd */
2274 CMSG_SPACE(NAME_MAX)]; /* selinux label */
2281 if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
2282 log_error("SIOCINQ failed: %m");
2286 if (s->buffer_size < (size_t) v) {
2290 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
2291 b = realloc(s->buffer, l+1);
2294 log_error("Couldn't increase buffer.");
2303 iovec.iov_base = s->buffer;
2304 iovec.iov_len = s->buffer_size;
2308 msghdr.msg_iov = &iovec;
2309 msghdr.msg_iovlen = 1;
2310 msghdr.msg_control = &control;
2311 msghdr.msg_controllen = sizeof(control);
2313 n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
2316 if (errno == EINTR || errno == EAGAIN)
2319 log_error("recvmsg() failed: %m");
2323 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
2325 if (cmsg->cmsg_level == SOL_SOCKET &&
2326 cmsg->cmsg_type == SCM_CREDENTIALS &&
2327 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
2328 ucred = (struct ucred*) CMSG_DATA(cmsg);
2329 else if (cmsg->cmsg_level == SOL_SOCKET &&
2330 cmsg->cmsg_type == SCM_SECURITY) {
2331 label = (char*) CMSG_DATA(cmsg);
2332 label_len = cmsg->cmsg_len - CMSG_LEN(0);
2333 } else if (cmsg->cmsg_level == SOL_SOCKET &&
2334 cmsg->cmsg_type == SO_TIMESTAMP &&
2335 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
2336 tv = (struct timeval*) CMSG_DATA(cmsg);
2337 else if (cmsg->cmsg_level == SOL_SOCKET &&
2338 cmsg->cmsg_type == SCM_RIGHTS) {
2339 fds = (int*) CMSG_DATA(cmsg);
2340 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
2344 if (ev->data.fd == s->syslog_fd) {
2347 if (n > 0 && n_fds == 0) {
2348 e = memchr(s->buffer, '\n', n);
2354 process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
2355 } else if (n_fds > 0)
2356 log_warning("Got file descriptors via syslog socket. Ignoring.");
2359 if (n > 0 && n_fds == 0)
2360 process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
2361 else if (n == 0 && n_fds == 1)
2362 process_native_file(s, fds[0], ucred, tv, label, label_len);
2364 log_warning("Got too many file descriptors via native socket. Ignoring.");
2367 close_many(fds, n_fds);
2372 } else if (ev->data.fd == s->stdout_fd) {
2374 if (ev->events != EPOLLIN) {
2375 log_info("Got invalid event from epoll.");
2379 stdout_stream_new(s);
2383 StdoutStream *stream;
2385 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
2386 log_info("Got invalid event from epoll.");
2390 /* If it is none of the well-known fds, it must be an
2391 * stdout stream fd. Note that this is a bit ugly here
2392 * (since we rely that none of the well-known fds
2393 * could be interpreted as pointer), but nonetheless
2394 * safe, since the well-known fds would never get an
2395 * fd > 4096, i.e. beyond the first memory page */
2397 stream = ev->data.ptr;
2399 if (stdout_stream_process(stream) <= 0)
2400 stdout_stream_free(stream);
2405 log_error("Unknown event.");
2409 static int open_syslog_socket(Server *s) {
2410 union sockaddr_union sa;
2412 struct epoll_event ev;
2416 if (s->syslog_fd < 0) {
2418 s->syslog_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2419 if (s->syslog_fd < 0) {
2420 log_error("socket() failed: %m");
2425 sa.un.sun_family = AF_UNIX;
2426 strncpy(sa.un.sun_path, "/dev/log", sizeof(sa.un.sun_path));
2428 unlink(sa.un.sun_path);
2430 r = bind(s->syslog_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2432 log_error("bind() failed: %m");
2436 chmod(sa.un.sun_path, 0666);
2438 fd_nonblock(s->syslog_fd, 1);
2441 r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
2443 log_error("SO_PASSCRED failed: %m");
2449 r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
2451 log_warning("SO_PASSSEC failed: %m");
2455 r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
2457 log_error("SO_TIMESTAMP failed: %m");
2462 ev.events = EPOLLIN;
2463 ev.data.fd = s->syslog_fd;
2464 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->syslog_fd, &ev) < 0) {
2465 log_error("Failed to add syslog server fd to epoll object: %m");
2472 static int open_native_socket(Server*s) {
2473 union sockaddr_union sa;
2475 struct epoll_event ev;
2479 if (s->native_fd < 0) {
2481 s->native_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2482 if (s->native_fd < 0) {
2483 log_error("socket() failed: %m");
2488 sa.un.sun_family = AF_UNIX;
2489 strncpy(sa.un.sun_path, "/run/systemd/journal/socket", sizeof(sa.un.sun_path));
2491 unlink(sa.un.sun_path);
2493 r = bind(s->native_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2495 log_error("bind() failed: %m");
2499 chmod(sa.un.sun_path, 0666);
2501 fd_nonblock(s->native_fd, 1);
2504 r = setsockopt(s->native_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
2506 log_error("SO_PASSCRED failed: %m");
2512 r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
2514 log_warning("SO_PASSSEC failed: %m");
2518 r = setsockopt(s->native_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
2520 log_error("SO_TIMESTAMP failed: %m");
2525 ev.events = EPOLLIN;
2526 ev.data.fd = s->native_fd;
2527 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->native_fd, &ev) < 0) {
2528 log_error("Failed to add native server fd to epoll object: %m");
2535 static int open_stdout_socket(Server *s) {
2536 union sockaddr_union sa;
2538 struct epoll_event ev;
2542 if (s->stdout_fd < 0) {
2544 s->stdout_fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2545 if (s->stdout_fd < 0) {
2546 log_error("socket() failed: %m");
2551 sa.un.sun_family = AF_UNIX;
2552 strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
2554 unlink(sa.un.sun_path);
2556 r = bind(s->stdout_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2558 log_error("bind() failed: %m");
2562 chmod(sa.un.sun_path, 0666);
2564 if (listen(s->stdout_fd, SOMAXCONN) < 0) {
2565 log_error("liste() failed: %m");
2569 fd_nonblock(s->stdout_fd, 1);
2572 ev.events = EPOLLIN;
2573 ev.data.fd = s->stdout_fd;
2574 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->stdout_fd, &ev) < 0) {
2575 log_error("Failed to add stdout server fd to epoll object: %m");
2582 static int open_dev_kmsg(Server *s) {
2583 struct epoll_event ev;
2587 s->dev_kmsg_fd = open("/dev/kmsg", O_RDWR|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
2588 if (s->dev_kmsg_fd < 0) {
2589 log_warning("Failed to open /dev/kmsg, ignoring: %m");
2594 ev.events = EPOLLIN;
2595 ev.data.fd = s->dev_kmsg_fd;
2596 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->dev_kmsg_fd, &ev) < 0) {
2598 /* This will fail with EPERM on older kernels where
2599 * /dev/kmsg is not readable. */
2603 log_error("Failed to add /dev/kmsg fd to epoll object: %m");
2607 s->dev_kmsg_readable = true;
2612 static int open_signalfd(Server *s) {
2614 struct epoll_event ev;
2618 assert_se(sigemptyset(&mask) == 0);
2619 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
2620 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
2622 s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
2623 if (s->signal_fd < 0) {
2624 log_error("signalfd(): %m");
2629 ev.events = EPOLLIN;
2630 ev.data.fd = s->signal_fd;
2632 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
2633 log_error("epoll_ctl(): %m");
2640 static int server_parse_proc_cmdline(Server *s) {
2641 char *line, *w, *state;
2645 if (detect_container(NULL) > 0)
2648 r = read_one_line_file("/proc/cmdline", &line);
2650 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
2654 FOREACH_WORD_QUOTED(w, l, line, state) {
2657 word = strndup(w, l);
2663 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
2664 r = parse_boolean(word + 35);
2666 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
2668 s->forward_to_syslog = r;
2669 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
2670 r = parse_boolean(word + 33);
2672 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
2674 s->forward_to_kmsg = r;
2675 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
2676 r = parse_boolean(word + 36);
2678 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
2680 s->forward_to_console = r;
2681 } else if (startswith(word, "systemd.journald"))
2682 log_warning("Invalid systemd.journald parameter. Ignoring.");
2694 static int server_parse_config_file(Server *s) {
2701 fn = "/etc/systemd/journald.conf";
2702 f = fopen(fn, "re");
2704 if (errno == ENOENT)
2707 log_warning("Failed to open configuration file %s: %m", fn);
2711 r = config_parse(fn, f, "Journal\0", config_item_perf_lookup, (void*) journald_gperf_lookup, false, s);
2713 log_warning("Failed to parse configuration file: %s", strerror(-r));
2720 static int server_init(Server *s) {
2726 s->syslog_fd = s->native_fd = s->stdout_fd = s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
2729 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
2730 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
2732 s->forward_to_syslog = true;
2734 s->max_level_store = LOG_DEBUG;
2735 s->max_level_syslog = LOG_DEBUG;
2736 s->max_level_kmsg = LOG_NOTICE;
2737 s->max_level_console = LOG_INFO;
2739 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
2740 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
2742 server_parse_config_file(s);
2743 server_parse_proc_cmdline(s);
2745 s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
2746 if (!s->user_journals)
2749 s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
2750 if (s->epoll_fd < 0) {
2751 log_error("Failed to create epoll object: %m");
2755 n = sd_listen_fds(true);
2757 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
2761 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
2763 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
2765 if (s->native_fd >= 0) {
2766 log_error("Too many native sockets passed.");
2772 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
2774 if (s->stdout_fd >= 0) {
2775 log_error("Too many stdout sockets passed.");
2781 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
2783 if (s->syslog_fd >= 0) {
2784 log_error("Too many /dev/log sockets passed.");
2791 log_error("Unknown socket passed.");
2796 r = open_syslog_socket(s);
2800 r = open_native_socket(s);
2804 r = open_stdout_socket(s);
2808 r = open_dev_kmsg(s);
2812 r = open_signalfd(s);
2816 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
2820 r = system_journal_open(s);
2827 static void server_done(Server *s) {
2831 while (s->stdout_streams)
2832 stdout_stream_free(s->stdout_streams);
2834 if (s->system_journal)
2835 journal_file_close(s->system_journal);
2837 if (s->runtime_journal)
2838 journal_file_close(s->runtime_journal);
2840 while ((f = hashmap_steal_first(s->user_journals)))
2841 journal_file_close(f);
2843 hashmap_free(s->user_journals);
2845 if (s->epoll_fd >= 0)
2846 close_nointr_nofail(s->epoll_fd);
2848 if (s->signal_fd >= 0)
2849 close_nointr_nofail(s->signal_fd);
2851 if (s->syslog_fd >= 0)
2852 close_nointr_nofail(s->syslog_fd);
2854 if (s->native_fd >= 0)
2855 close_nointr_nofail(s->native_fd);
2857 if (s->stdout_fd >= 0)
2858 close_nointr_nofail(s->stdout_fd);
2860 if (s->dev_kmsg_fd >= 0)
2861 close_nointr_nofail(s->dev_kmsg_fd);
2864 journal_rate_limit_free(s->rate_limit);
2870 int main(int argc, char *argv[]) {
2874 /* if (getppid() != 1) { */
2875 /* log_error("This program should be invoked by init only."); */
2876 /* return EXIT_FAILURE; */
2880 log_error("This program does not take arguments.");
2881 return EXIT_FAILURE;
2884 log_set_target(LOG_TARGET_SAFE);
2885 log_set_facility(LOG_SYSLOG);
2886 log_parse_environment();
2891 r = server_init(&server);
2895 server_vacuum(&server);
2896 server_flush_to_var(&server);
2897 server_flush_dev_kmsg(&server);
2899 log_debug("systemd-journald running as pid %lu", (unsigned long) getpid());
2900 driver_message(&server, SD_MESSAGE_JOURNAL_START, "Journal started");
2904 "STATUS=Processing requests...");
2907 struct epoll_event event;
2909 r = epoll_wait(server.epoll_fd, &event, 1, -1);
2915 log_error("epoll_wait() failed: %m");
2921 r = process_event(&server, &event);
2928 log_debug("systemd-journald stopped as pid %lu", (unsigned long) getpid());
2929 driver_message(&server, SD_MESSAGE_JOURNAL_STOP, "Journal stopped");
2933 "STATUS=Shutting down...");
2935 server_done(&server);
2937 return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;