1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/epoll.h>
23 #include <sys/socket.h>
25 #include <sys/signalfd.h>
29 #include <sys/ioctl.h>
30 #include <linux/sockios.h>
31 #include <sys/statvfs.h>
35 #include <systemd/sd-journal.h>
36 #include <systemd/sd-messages.h>
37 #include <systemd/sd-daemon.h>
40 #include <systemd/sd-login.h>
45 #include "journal-file.h"
46 #include "socket-util.h"
47 #include "cgroup-util.h"
51 #include "conf-parser.h"
52 #include "journal-rate-limit.h"
53 #include "journal-internal.h"
54 #include "journal-vacuum.h"
55 #include "journal-authenticate.h"
57 #include "journald-kmsg.h"
58 #include "journald-syslog.h"
59 #include "journald-stream.h"
63 #include <acl/libacl.h>
68 #include <selinux/selinux.h>
71 #define USER_JOURNALS_MAX 1024
73 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
74 #define DEFAULT_RATE_LIMIT_BURST 200
76 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
78 #define ENTRY_SIZE_MAX (1024*1024*32)
80 static const char* const storage_table[] = {
81 [STORAGE_AUTO] = "auto",
82 [STORAGE_VOLATILE] = "volatile",
83 [STORAGE_PERSISTENT] = "persistent",
84 [STORAGE_NONE] = "none"
87 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
88 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
90 static uint64_t available_space(Server *s) {
95 uint64_t sum = 0, avail = 0, ss_avail = 0;
101 ts = now(CLOCK_MONOTONIC);
103 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
104 return s->cached_available_space;
106 r = sd_id128_get_machine(&machine);
110 if (s->system_journal) {
111 f = "/var/log/journal/";
112 m = &s->system_metrics;
114 f = "/run/log/journal/";
115 m = &s->runtime_metrics;
120 p = strappend(f, sd_id128_to_string(machine, ids));
130 if (fstatvfs(dirfd(d), &ss) < 0)
135 struct dirent buf, *de;
137 r = readdir_r(d, &buf, &de);
144 if (!endswith(de->d_name, ".journal") &&
145 !endswith(de->d_name, ".journal~"))
148 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
151 if (!S_ISREG(st.st_mode))
154 sum += (uint64_t) st.st_blocks * 512UL;
157 avail = sum >= m->max_use ? 0 : m->max_use - sum;
159 ss_avail = ss.f_bsize * ss.f_bavail;
161 ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
163 if (ss_avail < avail)
166 s->cached_available_space = avail;
167 s->cached_available_space_timestamp = ts;
175 static void server_read_file_gid(Server *s) {
176 const char *adm = "adm";
181 if (s->file_gid_valid)
184 r = get_group_creds(&adm, &s->file_gid);
186 log_warning("Failed to resolve 'adm' group: %s", strerror(-r));
188 /* if we couldn't read the gid, then it will be 0, but that's
189 * fine and we shouldn't try to resolve the group again, so
190 * let's just pretend it worked right-away. */
191 s->file_gid_valid = true;
194 static void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
199 acl_permset_t permset;
204 server_read_file_gid(s);
206 r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
208 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
214 acl = acl_get_fd(f->fd);
216 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
220 r = acl_find_uid(acl, uid, &entry);
223 if (acl_create_entry(&acl, &entry) < 0 ||
224 acl_set_tag_type(entry, ACL_USER) < 0 ||
225 acl_set_qualifier(entry, &uid) < 0) {
226 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
231 if (acl_get_permset(entry, &permset) < 0 ||
232 acl_add_perm(permset, ACL_READ) < 0 ||
233 acl_calc_mask(&acl) < 0) {
234 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
238 if (acl_set_fd(f->fd, acl) < 0)
239 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
246 static JournalFile* find_journal(Server *s, uid_t uid) {
254 /* We split up user logs only on /var, not on /run. If the
255 * runtime file is open, we write to it exclusively, in order
256 * to guarantee proper order as soon as we flush /run to
257 * /var and close the runtime file. */
259 if (s->runtime_journal)
260 return s->runtime_journal;
263 return s->system_journal;
265 r = sd_id128_get_machine(&machine);
267 return s->system_journal;
269 f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
273 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
274 SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
275 return s->system_journal;
277 while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
278 /* Too many open? Then let's close one */
279 f = hashmap_steal_first(s->user_journals);
281 journal_file_close(f);
284 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, s->system_journal, &f);
288 return s->system_journal;
290 server_fix_perms(s, f, uid);
292 r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
294 journal_file_close(f);
295 return s->system_journal;
301 static void server_rotate(Server *s) {
307 log_info("Rotating...");
309 if (s->runtime_journal) {
310 r = journal_file_rotate(&s->runtime_journal, s->compress, false);
312 if (s->runtime_journal)
313 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
315 log_error("Failed to create new runtime journal: %s", strerror(-r));
317 server_fix_perms(s, s->runtime_journal, 0);
320 if (s->system_journal) {
321 r = journal_file_rotate(&s->system_journal, s->compress, s->seal);
323 if (s->system_journal)
324 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
326 log_error("Failed to create new system journal: %s", strerror(-r));
329 server_fix_perms(s, s->system_journal, 0);
332 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
333 r = journal_file_rotate(&f, s->compress, s->seal);
336 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
338 log_error("Failed to create user journal: %s", strerror(-r));
340 hashmap_replace(s->user_journals, k, f);
341 server_fix_perms(s, s->system_journal, PTR_TO_UINT32(k));
346 static void server_vacuum(Server *s) {
352 log_info("Vacuuming...");
354 r = sd_id128_get_machine(&machine);
356 log_error("Failed to get machine ID: %s", strerror(-r));
360 sd_id128_to_string(machine, ids);
362 if (s->system_journal) {
363 if (asprintf(&p, "/var/log/journal/%s", ids) < 0) {
368 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free);
369 if (r < 0 && r != -ENOENT)
370 log_error("Failed to vacuum %s: %s", p, strerror(-r));
374 if (s->runtime_journal) {
375 if (asprintf(&p, "/run/log/journal/%s", ids) < 0) {
380 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free);
381 if (r < 0 && r != -ENOENT)
382 log_error("Failed to vacuum %s: %s", p, strerror(-r));
386 s->cached_available_space_timestamp = 0;
389 static char *shortened_cgroup_path(pid_t pid) {
391 char *process_path, *init_path, *path;
395 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, pid, &process_path);
399 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &init_path);
405 if (endswith(init_path, "/system"))
406 init_path[strlen(init_path) - 7] = 0;
407 else if (streq(init_path, "/"))
410 if (startswith(process_path, init_path)) {
413 p = strdup(process_path + strlen(init_path));
431 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n) {
433 bool vacuumed = false;
440 f = find_journal(s, uid);
444 if (journal_file_rotate_suggested(f)) {
445 log_info("Journal header limits reached or header out-of-date, rotating.");
450 f = find_journal(s, uid);
456 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
461 (r != -E2BIG && /* hit limit */
462 r != -EFBIG && /* hit fs limit */
463 r != -EDQUOT && /* quota hit */
464 r != -ENOSPC && /* disk full */
465 r != -EBADMSG && /* corrupted */
466 r != -ENODATA && /* truncated */
467 r != -EHOSTDOWN && /* other machine */
468 r != -EPROTONOSUPPORT && /* unsupported feature */
469 r != -EBUSY && /* unclean shutdown */
470 r != -ESHUTDOWN /* already archived */)) {
471 log_error("Failed to write entry, ignoring: %s", strerror(-r));
475 if (r == -E2BIG || r == -EFBIG || r == EDQUOT || r == ENOSPC)
476 log_info("Allocation limit reached, rotating.");
477 else if (r == -EHOSTDOWN)
478 log_info("Journal file from other machine, rotating.");
479 else if (r == -EBUSY)
480 log_info("Unlcean shutdown, rotating.");
482 log_warning("Journal file corrupted, rotating.");
488 f = find_journal(s, uid);
492 log_info("Retrying write.");
496 static void dispatch_message_real(
498 struct iovec *iovec, unsigned n, unsigned m,
501 const char *label, size_t label_len,
502 const char *unit_id) {
504 char *pid = NULL, *uid = NULL, *gid = NULL,
505 *source_time = NULL, *boot_id = NULL, *machine_id = NULL,
506 *comm = NULL, *cmdline = NULL, *hostname = NULL,
507 *audit_session = NULL, *audit_loginuid = NULL,
508 *exe = NULL, *cgroup = NULL, *session = NULL,
509 *owner_uid = NULL, *unit = NULL, *selinux_context = NULL;
515 uid_t loginuid = 0, realuid = 0;
520 assert(n + N_IOVEC_META_FIELDS <= m);
528 realuid = ucred->uid;
530 if (asprintf(&pid, "_PID=%lu", (unsigned long) ucred->pid) >= 0)
531 IOVEC_SET_STRING(iovec[n++], pid);
533 if (asprintf(&uid, "_UID=%lu", (unsigned long) ucred->uid) >= 0)
534 IOVEC_SET_STRING(iovec[n++], uid);
536 if (asprintf(&gid, "_GID=%lu", (unsigned long) ucred->gid) >= 0)
537 IOVEC_SET_STRING(iovec[n++], gid);
539 r = get_process_comm(ucred->pid, &t);
541 comm = strappend("_COMM=", t);
545 IOVEC_SET_STRING(iovec[n++], comm);
548 r = get_process_exe(ucred->pid, &t);
550 exe = strappend("_EXE=", t);
554 IOVEC_SET_STRING(iovec[n++], exe);
557 r = get_process_cmdline(ucred->pid, LINE_MAX, false, &t);
559 cmdline = strappend("_CMDLINE=", t);
563 IOVEC_SET_STRING(iovec[n++], cmdline);
566 r = audit_session_from_pid(ucred->pid, &audit);
568 if (asprintf(&audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit) >= 0)
569 IOVEC_SET_STRING(iovec[n++], audit_session);
571 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
573 if (asprintf(&audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0)
574 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
576 t = shortened_cgroup_path(ucred->pid);
578 cgroup = strappend("_SYSTEMD_CGROUP=", t);
582 IOVEC_SET_STRING(iovec[n++], cgroup);
586 if (sd_pid_get_session(ucred->pid, &t) >= 0) {
587 session = strappend("_SYSTEMD_SESSION=", t);
591 IOVEC_SET_STRING(iovec[n++], session);
594 if (sd_pid_get_owner_uid(ucred->uid, &owner) >= 0)
595 if (asprintf(&owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner) >= 0)
596 IOVEC_SET_STRING(iovec[n++], owner_uid);
599 if (cg_pid_get_unit(ucred->pid, &t) >= 0) {
600 unit = strappend("_SYSTEMD_UNIT=", t);
603 unit = strappend("_SYSTEMD_UNIT=", unit_id);
606 IOVEC_SET_STRING(iovec[n++], unit);
610 selinux_context = malloc(sizeof("_SELINUX_CONTEXT=") + label_len);
611 if (selinux_context) {
612 memcpy(selinux_context, "_SELINUX_CONTEXT=", sizeof("_SELINUX_CONTEXT=")-1);
613 memcpy(selinux_context+sizeof("_SELINUX_CONTEXT=")-1, label, label_len);
614 selinux_context[sizeof("_SELINUX_CONTEXT=")-1+label_len] = 0;
615 IOVEC_SET_STRING(iovec[n++], selinux_context);
618 security_context_t con;
620 if (getpidcon(ucred->pid, &con) >= 0) {
621 selinux_context = strappend("_SELINUX_CONTEXT=", con);
623 IOVEC_SET_STRING(iovec[n++], selinux_context);
632 if (asprintf(&source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu",
633 (unsigned long long) timeval_load(tv)) >= 0)
634 IOVEC_SET_STRING(iovec[n++], source_time);
637 /* Note that strictly speaking storing the boot id here is
638 * redundant since the entry includes this in-line
639 * anyway. However, we need this indexed, too. */
640 r = sd_id128_get_boot(&id);
642 if (asprintf(&boot_id, "_BOOT_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
643 IOVEC_SET_STRING(iovec[n++], boot_id);
645 r = sd_id128_get_machine(&id);
647 if (asprintf(&machine_id, "_MACHINE_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
648 IOVEC_SET_STRING(iovec[n++], machine_id);
650 t = gethostname_malloc();
652 hostname = strappend("_HOSTNAME=", t);
655 IOVEC_SET_STRING(iovec[n++], hostname);
660 write_to_journal(s, realuid == 0 ? 0 : loginuid, iovec, n);
673 free(audit_loginuid);
678 free(selinux_context);
681 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
682 char mid[11 + 32 + 1];
683 char buffer[16 + LINE_MAX + 1];
684 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
692 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
693 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
695 memcpy(buffer, "MESSAGE=", 8);
696 va_start(ap, format);
697 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
699 char_array_0(buffer);
700 IOVEC_SET_STRING(iovec[n++], buffer);
702 snprintf(mid, sizeof(mid), "MESSAGE_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(message_id));
704 IOVEC_SET_STRING(iovec[n++], mid);
707 ucred.pid = getpid();
708 ucred.uid = getuid();
709 ucred.gid = getgid();
711 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL);
714 void server_dispatch_message(
716 struct iovec *iovec, unsigned n, unsigned m,
719 const char *label, size_t label_len,
724 char *path = NULL, *c;
727 assert(iovec || n == 0);
732 if (LOG_PRI(priority) > s->max_level_store)
738 path = shortened_cgroup_path(ucred->pid);
742 /* example: /user/lennart/3/foobar
743 * /system/dbus.service/foobar
745 * So let's cut of everything past the third /, since that is
746 * wher user directories start */
748 c = strchr(path, '/');
750 c = strchr(c+1, '/');
752 c = strchr(c+1, '/');
758 rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available_space(s));
765 /* Write a suppression message if we suppressed something */
767 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED, "Suppressed %u messages from %s", rl - 1, path);
772 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id);
775 void server_forward_console(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred) {
776 struct iovec iovec[4];
779 char *ident_buf = NULL;
785 if (LOG_PRI(priority) > s->max_level_console)
788 /* First: identifier and PID */
791 get_process_comm(ucred->pid, &ident_buf);
792 identifier = ident_buf;
795 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
796 char_array_0(header_pid);
799 IOVEC_SET_STRING(iovec[n++], identifier);
801 IOVEC_SET_STRING(iovec[n++], header_pid);
802 } else if (identifier) {
803 IOVEC_SET_STRING(iovec[n++], identifier);
804 IOVEC_SET_STRING(iovec[n++], ": ");
808 IOVEC_SET_STRING(iovec[n++], message);
809 IOVEC_SET_STRING(iovec[n++], "\n");
811 tty = s->tty_path ? s->tty_path : "/dev/console";
813 fd = open_terminal(tty, O_WRONLY|O_NOCTTY|O_CLOEXEC);
815 log_debug("Failed to open %s for logging: %s", tty, strerror(errno));
819 if (writev(fd, iovec, n) < 0)
820 log_debug("Failed to write to %s for logging: %s", tty, strerror(errno));
822 close_nointr_nofail(fd);
830 static bool valid_user_field(const char *p, size_t l) {
833 /* We kinda enforce POSIX syntax recommendations for
834 environment variables here, but make a couple of additional
837 http://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html */
839 /* No empty field names */
843 /* Don't allow names longer than 64 chars */
847 /* Variables starting with an underscore are protected */
851 /* Don't allow digits as first character */
852 if (p[0] >= '0' && p[0] <= '9')
855 /* Only allow A-Z0-9 and '_' */
856 for (a = p; a < p + l; a++)
857 if (!((*a >= 'A' && *a <= 'Z') ||
858 (*a >= '0' && *a <= '9') ||
865 static void process_native_message(
867 const void *buffer, size_t buffer_size,
870 const char *label, size_t label_len) {
872 struct iovec *iovec = NULL;
873 unsigned n = 0, m = 0, j, tn = (unsigned) -1;
876 int priority = LOG_INFO;
877 char *identifier = NULL, *message = NULL;
880 assert(buffer || buffer_size == 0);
883 remaining = buffer_size;
885 while (remaining > 0) {
888 e = memchr(p, '\n', remaining);
891 /* Trailing noise, let's ignore it, and flush what we collected */
892 log_debug("Received message with trailing noise, ignoring.");
897 /* Entry separator */
898 server_dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, NULL, priority);
907 if (*p == '.' || *p == '#') {
908 /* Ignore control commands for now, and
910 remaining -= (e - p) + 1;
915 /* A property follows */
917 if (n+N_IOVEC_META_FIELDS >= m) {
921 u = MAX((n+N_IOVEC_META_FIELDS+1) * 2U, 4U);
922 c = realloc(iovec, u * sizeof(struct iovec));
932 q = memchr(p, '=', e - p);
934 if (valid_user_field(p, q - p)) {
939 /* If the field name starts with an
940 * underscore, skip the variable,
941 * since that indidates a trusted
943 iovec[n].iov_base = (char*) p;
944 iovec[n].iov_len = l;
947 /* We need to determine the priority
948 * of this entry for the rate limiting
951 memcmp(p, "PRIORITY=", 9) == 0 &&
952 p[9] >= '0' && p[9] <= '9')
953 priority = (priority & LOG_FACMASK) | (p[9] - '0');
956 memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
957 p[16] >= '0' && p[16] <= '9')
958 priority = (priority & LOG_PRIMASK) | ((p[16] - '0') << 3);
961 memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
962 p[16] >= '0' && p[16] <= '9' &&
963 p[17] >= '0' && p[17] <= '9')
964 priority = (priority & LOG_PRIMASK) | (((p[16] - '0')*10 + (p[17] - '0')) << 3);
967 memcmp(p, "SYSLOG_IDENTIFIER=", 18) == 0) {
970 t = strndup(p + 18, l - 18);
976 memcmp(p, "MESSAGE=", 8) == 0) {
979 t = strndup(p + 8, l - 8);
987 remaining -= (e - p) + 1;
995 if (remaining < e - p + 1 + sizeof(uint64_t) + 1) {
996 log_debug("Failed to parse message, ignoring.");
1000 memcpy(&l_le, e + 1, sizeof(uint64_t));
1003 if (remaining < e - p + 1 + sizeof(uint64_t) + l + 1 ||
1004 e[1+sizeof(uint64_t)+l] != '\n') {
1005 log_debug("Failed to parse message, ignoring.");
1009 k = malloc((e - p) + 1 + l);
1015 memcpy(k, p, e - p);
1017 memcpy(k + (e - p) + 1, e + 1 + sizeof(uint64_t), l);
1019 if (valid_user_field(p, e - p)) {
1020 iovec[n].iov_base = k;
1021 iovec[n].iov_len = (e - p) + 1 + l;
1026 remaining -= (e - p) + 1 + sizeof(uint64_t) + l + 1;
1027 p = e + 1 + sizeof(uint64_t) + l + 1;
1035 IOVEC_SET_STRING(iovec[tn], "_TRANSPORT=journal");
1038 if (s->forward_to_syslog)
1039 server_forward_syslog(s, priority, identifier, message, ucred, tv);
1041 if (s->forward_to_kmsg)
1042 server_forward_kmsg(s, priority, identifier, message, ucred);
1044 if (s->forward_to_console)
1045 server_forward_console(s, priority, identifier, message, ucred);
1048 server_dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, NULL, priority);
1051 for (j = 0; j < n; j++) {
1055 if (iovec[j].iov_base < buffer ||
1056 (const uint8_t*) iovec[j].iov_base >= (const uint8_t*) buffer + buffer_size)
1057 free(iovec[j].iov_base);
1065 static void process_native_file(
1068 struct ucred *ucred,
1070 const char *label, size_t label_len) {
1079 /* Data is in the passed file, since it didn't fit in a
1080 * datagram. We can't map the file here, since clients might
1081 * then truncate it and trigger a SIGBUS for us. So let's
1082 * stupidly read it */
1084 if (fstat(fd, &st) < 0) {
1085 log_error("Failed to stat passed file, ignoring: %m");
1089 if (!S_ISREG(st.st_mode)) {
1090 log_error("File passed is not regular. Ignoring.");
1094 if (st.st_size <= 0)
1097 if (st.st_size > ENTRY_SIZE_MAX) {
1098 log_error("File passed too large. Ignoring.");
1102 p = malloc(st.st_size);
1108 n = pread(fd, p, st.st_size, 0);
1110 log_error("Failed to read file, ignoring: %s", strerror(-n));
1112 process_native_message(s, p, n, ucred, tv, label, label_len);
1117 static int system_journal_open(Server *s) {
1123 r = sd_id128_get_machine(&machine);
1127 sd_id128_to_string(machine, ids);
1129 if (!s->system_journal &&
1130 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
1131 access("/run/systemd/journal/flushed", F_OK) >= 0) {
1133 /* If in auto mode: first try to create the machine
1134 * path, but not the prefix.
1136 * If in persistent mode: create /var/log/journal and
1137 * the machine path */
1139 if (s->storage == STORAGE_PERSISTENT)
1140 (void) mkdir("/var/log/journal/", 0755);
1142 fn = strappend("/var/log/journal/", ids);
1146 (void) mkdir(fn, 0755);
1149 fn = strjoin("/var/log/journal/", ids, "/system.journal", NULL);
1153 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
1157 server_fix_perms(s, s->system_journal, 0);
1160 if (r != -ENOENT && r != -EROFS)
1161 log_warning("Failed to open system journal: %s", strerror(-r));
1167 if (!s->runtime_journal &&
1168 (s->storage != STORAGE_NONE)) {
1170 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
1174 if (s->system_journal) {
1176 /* Try to open the runtime journal, but only
1177 * if it already exists, so that we can flush
1178 * it into the system journal */
1180 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
1185 log_warning("Failed to open runtime journal: %s", strerror(-r));
1192 /* OK, we really need the runtime journal, so create
1193 * it if necessary. */
1195 (void) mkdir_parents(fn, 0755);
1196 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
1200 log_error("Failed to open runtime journal: %s", strerror(-r));
1205 if (s->runtime_journal)
1206 server_fix_perms(s, s->runtime_journal, 0);
1212 static int server_flush_to_var(Server *s) {
1220 if (s->storage != STORAGE_AUTO &&
1221 s->storage != STORAGE_PERSISTENT)
1224 if (!s->runtime_journal)
1227 system_journal_open(s);
1229 if (!s->system_journal)
1232 log_info("Flushing to /var...");
1234 r = sd_id128_get_machine(&machine);
1236 log_error("Failed to get machine id: %s", strerror(-r));
1240 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1242 log_error("Failed to read runtime journal: %s", strerror(-r));
1246 SD_JOURNAL_FOREACH(j) {
1249 f = j->current_file;
1250 assert(f && f->current_offset > 0);
1252 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1254 log_error("Can't read entry: %s", strerror(-r));
1258 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1260 log_info("Allocation limit reached.");
1262 journal_file_post_change(s->system_journal);
1266 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1270 log_error("Can't write entry: %s", strerror(-r));
1276 journal_file_post_change(s->system_journal);
1278 journal_file_close(s->runtime_journal);
1279 s->runtime_journal = NULL;
1282 rm_rf("/run/log/journal", false, true, false);
1287 static int process_event(Server *s, struct epoll_event *ev) {
1291 if (ev->data.fd == s->signal_fd) {
1292 struct signalfd_siginfo sfsi;
1295 if (ev->events != EPOLLIN) {
1296 log_info("Got invalid event from epoll.");
1300 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
1301 if (n != sizeof(sfsi)) {
1306 if (errno == EINTR || errno == EAGAIN)
1312 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
1314 if (sfsi.ssi_signo == SIGUSR1) {
1315 touch("/run/systemd/journal/flushed");
1316 server_flush_to_var(s);
1320 if (sfsi.ssi_signo == SIGUSR2) {
1328 } else if (ev->data.fd == s->dev_kmsg_fd) {
1331 if (ev->events != EPOLLIN) {
1332 log_info("Got invalid event from epoll.");
1336 r = server_read_dev_kmsg(s);
1342 } else if (ev->data.fd == s->native_fd ||
1343 ev->data.fd == s->syslog_fd) {
1345 if (ev->events != EPOLLIN) {
1346 log_info("Got invalid event from epoll.");
1351 struct msghdr msghdr;
1353 struct ucred *ucred = NULL;
1354 struct timeval *tv = NULL;
1355 struct cmsghdr *cmsg;
1357 size_t label_len = 0;
1359 struct cmsghdr cmsghdr;
1361 /* We use NAME_MAX space for the
1362 * SELinux label here. The kernel
1363 * currently enforces no limit, but
1364 * according to suggestions from the
1365 * SELinux people this will change and
1366 * it will probably be identical to
1367 * NAME_MAX. For now we use that, but
1368 * this should be updated one day when
1369 * the final limit is known.*/
1370 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1371 CMSG_SPACE(sizeof(struct timeval)) +
1372 CMSG_SPACE(sizeof(int)) + /* fd */
1373 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1380 if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
1381 log_error("SIOCINQ failed: %m");
1385 if (s->buffer_size < (size_t) v) {
1389 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
1390 b = realloc(s->buffer, l+1);
1393 log_error("Couldn't increase buffer.");
1402 iovec.iov_base = s->buffer;
1403 iovec.iov_len = s->buffer_size;
1407 msghdr.msg_iov = &iovec;
1408 msghdr.msg_iovlen = 1;
1409 msghdr.msg_control = &control;
1410 msghdr.msg_controllen = sizeof(control);
1412 n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1415 if (errno == EINTR || errno == EAGAIN)
1418 log_error("recvmsg() failed: %m");
1422 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1424 if (cmsg->cmsg_level == SOL_SOCKET &&
1425 cmsg->cmsg_type == SCM_CREDENTIALS &&
1426 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1427 ucred = (struct ucred*) CMSG_DATA(cmsg);
1428 else if (cmsg->cmsg_level == SOL_SOCKET &&
1429 cmsg->cmsg_type == SCM_SECURITY) {
1430 label = (char*) CMSG_DATA(cmsg);
1431 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1432 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1433 cmsg->cmsg_type == SO_TIMESTAMP &&
1434 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1435 tv = (struct timeval*) CMSG_DATA(cmsg);
1436 else if (cmsg->cmsg_level == SOL_SOCKET &&
1437 cmsg->cmsg_type == SCM_RIGHTS) {
1438 fds = (int*) CMSG_DATA(cmsg);
1439 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1443 if (ev->data.fd == s->syslog_fd) {
1446 if (n > 0 && n_fds == 0) {
1447 e = memchr(s->buffer, '\n', n);
1453 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1454 } else if (n_fds > 0)
1455 log_warning("Got file descriptors via syslog socket. Ignoring.");
1458 if (n > 0 && n_fds == 0)
1459 process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1460 else if (n == 0 && n_fds == 1)
1461 process_native_file(s, fds[0], ucred, tv, label, label_len);
1463 log_warning("Got too many file descriptors via native socket. Ignoring.");
1466 close_many(fds, n_fds);
1471 } else if (ev->data.fd == s->stdout_fd) {
1473 if (ev->events != EPOLLIN) {
1474 log_info("Got invalid event from epoll.");
1478 stdout_stream_new(s);
1482 StdoutStream *stream;
1484 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
1485 log_info("Got invalid event from epoll.");
1489 /* If it is none of the well-known fds, it must be an
1490 * stdout stream fd. Note that this is a bit ugly here
1491 * (since we rely that none of the well-known fds
1492 * could be interpreted as pointer), but nonetheless
1493 * safe, since the well-known fds would never get an
1494 * fd > 4096, i.e. beyond the first memory page */
1496 stream = ev->data.ptr;
1498 if (stdout_stream_process(stream) <= 0)
1499 stdout_stream_free(stream);
1504 log_error("Unknown event.");
1509 static int open_native_socket(Server*s) {
1510 union sockaddr_union sa;
1512 struct epoll_event ev;
1516 if (s->native_fd < 0) {
1518 s->native_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
1519 if (s->native_fd < 0) {
1520 log_error("socket() failed: %m");
1525 sa.un.sun_family = AF_UNIX;
1526 strncpy(sa.un.sun_path, "/run/systemd/journal/socket", sizeof(sa.un.sun_path));
1528 unlink(sa.un.sun_path);
1530 r = bind(s->native_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
1532 log_error("bind() failed: %m");
1536 chmod(sa.un.sun_path, 0666);
1538 fd_nonblock(s->native_fd, 1);
1541 r = setsockopt(s->native_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
1543 log_error("SO_PASSCRED failed: %m");
1549 r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
1551 log_warning("SO_PASSSEC failed: %m");
1555 r = setsockopt(s->native_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
1557 log_error("SO_TIMESTAMP failed: %m");
1562 ev.events = EPOLLIN;
1563 ev.data.fd = s->native_fd;
1564 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->native_fd, &ev) < 0) {
1565 log_error("Failed to add native server fd to epoll object: %m");
1573 static int open_signalfd(Server *s) {
1575 struct epoll_event ev;
1579 assert_se(sigemptyset(&mask) == 0);
1580 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1581 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1583 s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
1584 if (s->signal_fd < 0) {
1585 log_error("signalfd(): %m");
1590 ev.events = EPOLLIN;
1591 ev.data.fd = s->signal_fd;
1593 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
1594 log_error("epoll_ctl(): %m");
1601 static int server_parse_proc_cmdline(Server *s) {
1602 char *line, *w, *state;
1606 if (detect_container(NULL) > 0)
1609 r = read_one_line_file("/proc/cmdline", &line);
1611 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1615 FOREACH_WORD_QUOTED(w, l, line, state) {
1618 word = strndup(w, l);
1624 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1625 r = parse_boolean(word + 35);
1627 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1629 s->forward_to_syslog = r;
1630 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1631 r = parse_boolean(word + 33);
1633 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1635 s->forward_to_kmsg = r;
1636 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1637 r = parse_boolean(word + 36);
1639 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1641 s->forward_to_console = r;
1642 } else if (startswith(word, "systemd.journald"))
1643 log_warning("Invalid systemd.journald parameter. Ignoring.");
1655 static int server_parse_config_file(Server *s) {
1662 fn = "/etc/systemd/journald.conf";
1663 f = fopen(fn, "re");
1665 if (errno == ENOENT)
1668 log_warning("Failed to open configuration file %s: %m", fn);
1672 r = config_parse(fn, f, "Journal\0", config_item_perf_lookup, (void*) journald_gperf_lookup, false, s);
1674 log_warning("Failed to parse configuration file: %s", strerror(-r));
1681 static int server_init(Server *s) {
1687 s->syslog_fd = s->native_fd = s->stdout_fd = s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
1691 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1692 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1694 s->forward_to_syslog = true;
1696 s->max_level_store = LOG_DEBUG;
1697 s->max_level_syslog = LOG_DEBUG;
1698 s->max_level_kmsg = LOG_NOTICE;
1699 s->max_level_console = LOG_INFO;
1701 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1702 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1704 server_parse_config_file(s);
1705 server_parse_proc_cmdline(s);
1707 mkdir_p("/run/systemd/journal", 0755);
1709 s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
1710 if (!s->user_journals)
1713 s->mmap = mmap_cache_new();
1717 s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
1718 if (s->epoll_fd < 0) {
1719 log_error("Failed to create epoll object: %m");
1723 n = sd_listen_fds(true);
1725 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1729 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1731 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1733 if (s->native_fd >= 0) {
1734 log_error("Too many native sockets passed.");
1740 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1742 if (s->stdout_fd >= 0) {
1743 log_error("Too many stdout sockets passed.");
1749 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
1751 if (s->syslog_fd >= 0) {
1752 log_error("Too many /dev/log sockets passed.");
1759 log_error("Unknown socket passed.");
1764 r = server_open_syslog_socket(s);
1768 r = open_native_socket(s);
1772 r = server_open_stdout_socket(s);
1776 r = server_open_dev_kmsg(s);
1780 r = server_open_kernel_seqnum(s);
1784 r = open_signalfd(s);
1788 s->udev = udev_new();
1792 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
1796 r = system_journal_open(s);
1803 static void maybe_append_tags(Server *s) {
1809 n = now(CLOCK_REALTIME);
1811 if (s->system_journal)
1812 journal_file_maybe_append_tag(s->system_journal, n);
1814 HASHMAP_FOREACH(f, s->user_journals, i)
1815 journal_file_maybe_append_tag(f, n);
1819 static void server_done(Server *s) {
1823 while (s->stdout_streams)
1824 stdout_stream_free(s->stdout_streams);
1826 if (s->system_journal)
1827 journal_file_close(s->system_journal);
1829 if (s->runtime_journal)
1830 journal_file_close(s->runtime_journal);
1832 while ((f = hashmap_steal_first(s->user_journals)))
1833 journal_file_close(f);
1835 hashmap_free(s->user_journals);
1837 if (s->epoll_fd >= 0)
1838 close_nointr_nofail(s->epoll_fd);
1840 if (s->signal_fd >= 0)
1841 close_nointr_nofail(s->signal_fd);
1843 if (s->syslog_fd >= 0)
1844 close_nointr_nofail(s->syslog_fd);
1846 if (s->native_fd >= 0)
1847 close_nointr_nofail(s->native_fd);
1849 if (s->stdout_fd >= 0)
1850 close_nointr_nofail(s->stdout_fd);
1852 if (s->dev_kmsg_fd >= 0)
1853 close_nointr_nofail(s->dev_kmsg_fd);
1856 journal_rate_limit_free(s->rate_limit);
1858 if (s->kernel_seqnum)
1859 munmap(s->kernel_seqnum, sizeof(uint64_t));
1865 mmap_cache_unref(s->mmap);
1868 udev_unref(s->udev);
1871 int main(int argc, char *argv[]) {
1875 /* if (getppid() != 1) { */
1876 /* log_error("This program should be invoked by init only."); */
1877 /* return EXIT_FAILURE; */
1881 log_error("This program does not take arguments.");
1882 return EXIT_FAILURE;
1885 log_set_target(LOG_TARGET_SAFE);
1886 log_set_facility(LOG_SYSLOG);
1887 log_set_max_level(LOG_DEBUG);
1888 log_parse_environment();
1893 r = server_init(&server);
1897 server_vacuum(&server);
1898 server_flush_to_var(&server);
1899 server_flush_dev_kmsg(&server);
1901 log_debug("systemd-journald running as pid %lu", (unsigned long) getpid());
1902 server_driver_message(&server, SD_MESSAGE_JOURNAL_START, "Journal started");
1906 "STATUS=Processing requests...");
1909 struct epoll_event event;
1915 if (server.system_journal &&
1916 journal_file_next_evolve_usec(server.system_journal, &u)) {
1919 n = now(CLOCK_REALTIME);
1924 t = (int) ((u - n + USEC_PER_MSEC - 1) / USEC_PER_MSEC);
1929 r = epoll_wait(server.epoll_fd, &event, 1, t);
1935 log_error("epoll_wait() failed: %m");
1941 r = process_event(&server, &event);
1948 maybe_append_tags(&server);
1951 log_debug("systemd-journald stopped as pid %lu", (unsigned long) getpid());
1952 server_driver_message(&server, SD_MESSAGE_JOURNAL_STOP, "Journal stopped");
1956 "STATUS=Shutting down...");
1958 server_done(&server);
1960 return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;