1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/epoll.h>
23 #include <sys/socket.h>
25 #include <sys/signalfd.h>
29 #include <sys/ioctl.h>
30 #include <linux/sockios.h>
31 #include <sys/statvfs.h>
35 #include <systemd/sd-journal.h>
36 #include <systemd/sd-messages.h>
37 #include <systemd/sd-daemon.h>
40 #include <systemd/sd-login.h>
45 #include "journal-file.h"
46 #include "socket-util.h"
47 #include "cgroup-util.h"
51 #include "conf-parser.h"
52 #include "journal-rate-limit.h"
53 #include "journal-internal.h"
54 #include "journal-vacuum.h"
55 #include "journal-authenticate.h"
57 #include "journald-kmsg.h"
58 #include "journald-syslog.h"
59 #include "journald-stream.h"
60 #include "journald-console.h"
64 #include <acl/libacl.h>
69 #include <selinux/selinux.h>
72 #define USER_JOURNALS_MAX 1024
74 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
75 #define DEFAULT_RATE_LIMIT_BURST 200
77 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
79 #define ENTRY_SIZE_MAX (1024*1024*32)
81 static const char* const storage_table[] = {
82 [STORAGE_AUTO] = "auto",
83 [STORAGE_VOLATILE] = "volatile",
84 [STORAGE_PERSISTENT] = "persistent",
85 [STORAGE_NONE] = "none"
88 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
89 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
91 static uint64_t available_space(Server *s) {
96 uint64_t sum = 0, avail = 0, ss_avail = 0;
102 ts = now(CLOCK_MONOTONIC);
104 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
105 return s->cached_available_space;
107 r = sd_id128_get_machine(&machine);
111 if (s->system_journal) {
112 f = "/var/log/journal/";
113 m = &s->system_metrics;
115 f = "/run/log/journal/";
116 m = &s->runtime_metrics;
121 p = strappend(f, sd_id128_to_string(machine, ids));
131 if (fstatvfs(dirfd(d), &ss) < 0)
136 struct dirent buf, *de;
138 r = readdir_r(d, &buf, &de);
145 if (!endswith(de->d_name, ".journal") &&
146 !endswith(de->d_name, ".journal~"))
149 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
152 if (!S_ISREG(st.st_mode))
155 sum += (uint64_t) st.st_blocks * 512UL;
158 avail = sum >= m->max_use ? 0 : m->max_use - sum;
160 ss_avail = ss.f_bsize * ss.f_bavail;
162 ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
164 if (ss_avail < avail)
167 s->cached_available_space = avail;
168 s->cached_available_space_timestamp = ts;
176 static void server_read_file_gid(Server *s) {
177 const char *adm = "adm";
182 if (s->file_gid_valid)
185 r = get_group_creds(&adm, &s->file_gid);
187 log_warning("Failed to resolve 'adm' group: %s", strerror(-r));
189 /* if we couldn't read the gid, then it will be 0, but that's
190 * fine and we shouldn't try to resolve the group again, so
191 * let's just pretend it worked right-away. */
192 s->file_gid_valid = true;
195 static void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
200 acl_permset_t permset;
205 server_read_file_gid(s);
207 r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
209 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
215 acl = acl_get_fd(f->fd);
217 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
221 r = acl_find_uid(acl, uid, &entry);
224 if (acl_create_entry(&acl, &entry) < 0 ||
225 acl_set_tag_type(entry, ACL_USER) < 0 ||
226 acl_set_qualifier(entry, &uid) < 0) {
227 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
232 if (acl_get_permset(entry, &permset) < 0 ||
233 acl_add_perm(permset, ACL_READ) < 0 ||
234 acl_calc_mask(&acl) < 0) {
235 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
239 if (acl_set_fd(f->fd, acl) < 0)
240 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
247 static JournalFile* find_journal(Server *s, uid_t uid) {
255 /* We split up user logs only on /var, not on /run. If the
256 * runtime file is open, we write to it exclusively, in order
257 * to guarantee proper order as soon as we flush /run to
258 * /var and close the runtime file. */
260 if (s->runtime_journal)
261 return s->runtime_journal;
264 return s->system_journal;
266 r = sd_id128_get_machine(&machine);
268 return s->system_journal;
270 f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
274 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
275 SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
276 return s->system_journal;
278 while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
279 /* Too many open? Then let's close one */
280 f = hashmap_steal_first(s->user_journals);
282 journal_file_close(f);
285 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, s->system_journal, &f);
289 return s->system_journal;
291 server_fix_perms(s, f, uid);
293 r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
295 journal_file_close(f);
296 return s->system_journal;
302 static void server_rotate(Server *s) {
308 log_info("Rotating...");
310 if (s->runtime_journal) {
311 r = journal_file_rotate(&s->runtime_journal, s->compress, false);
313 if (s->runtime_journal)
314 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
316 log_error("Failed to create new runtime journal: %s", strerror(-r));
318 server_fix_perms(s, s->runtime_journal, 0);
321 if (s->system_journal) {
322 r = journal_file_rotate(&s->system_journal, s->compress, s->seal);
324 if (s->system_journal)
325 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
327 log_error("Failed to create new system journal: %s", strerror(-r));
330 server_fix_perms(s, s->system_journal, 0);
333 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
334 r = journal_file_rotate(&f, s->compress, s->seal);
337 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
339 log_error("Failed to create user journal: %s", strerror(-r));
341 hashmap_replace(s->user_journals, k, f);
342 server_fix_perms(s, s->system_journal, PTR_TO_UINT32(k));
347 static void server_vacuum(Server *s) {
353 log_info("Vacuuming...");
355 r = sd_id128_get_machine(&machine);
357 log_error("Failed to get machine ID: %s", strerror(-r));
361 sd_id128_to_string(machine, ids);
363 if (s->system_journal) {
364 if (asprintf(&p, "/var/log/journal/%s", ids) < 0) {
369 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free);
370 if (r < 0 && r != -ENOENT)
371 log_error("Failed to vacuum %s: %s", p, strerror(-r));
375 if (s->runtime_journal) {
376 if (asprintf(&p, "/run/log/journal/%s", ids) < 0) {
381 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free);
382 if (r < 0 && r != -ENOENT)
383 log_error("Failed to vacuum %s: %s", p, strerror(-r));
387 s->cached_available_space_timestamp = 0;
390 static char *shortened_cgroup_path(pid_t pid) {
392 char *process_path, *init_path, *path;
396 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, pid, &process_path);
400 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &init_path);
406 if (endswith(init_path, "/system"))
407 init_path[strlen(init_path) - 7] = 0;
408 else if (streq(init_path, "/"))
411 if (startswith(process_path, init_path)) {
414 p = strdup(process_path + strlen(init_path));
432 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n) {
434 bool vacuumed = false;
441 f = find_journal(s, uid);
445 if (journal_file_rotate_suggested(f)) {
446 log_info("Journal header limits reached or header out-of-date, rotating.");
451 f = find_journal(s, uid);
457 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
462 (r != -E2BIG && /* hit limit */
463 r != -EFBIG && /* hit fs limit */
464 r != -EDQUOT && /* quota hit */
465 r != -ENOSPC && /* disk full */
466 r != -EBADMSG && /* corrupted */
467 r != -ENODATA && /* truncated */
468 r != -EHOSTDOWN && /* other machine */
469 r != -EPROTONOSUPPORT && /* unsupported feature */
470 r != -EBUSY && /* unclean shutdown */
471 r != -ESHUTDOWN /* already archived */)) {
472 log_error("Failed to write entry, ignoring: %s", strerror(-r));
476 if (r == -E2BIG || r == -EFBIG || r == EDQUOT || r == ENOSPC)
477 log_info("Allocation limit reached, rotating.");
478 else if (r == -EHOSTDOWN)
479 log_info("Journal file from other machine, rotating.");
480 else if (r == -EBUSY)
481 log_info("Unlcean shutdown, rotating.");
483 log_warning("Journal file corrupted, rotating.");
489 f = find_journal(s, uid);
493 log_info("Retrying write.");
497 static void dispatch_message_real(
499 struct iovec *iovec, unsigned n, unsigned m,
502 const char *label, size_t label_len,
503 const char *unit_id) {
505 char *pid = NULL, *uid = NULL, *gid = NULL,
506 *source_time = NULL, *boot_id = NULL, *machine_id = NULL,
507 *comm = NULL, *cmdline = NULL, *hostname = NULL,
508 *audit_session = NULL, *audit_loginuid = NULL,
509 *exe = NULL, *cgroup = NULL, *session = NULL,
510 *owner_uid = NULL, *unit = NULL, *selinux_context = NULL;
516 uid_t loginuid = 0, realuid = 0;
521 assert(n + N_IOVEC_META_FIELDS <= m);
529 realuid = ucred->uid;
531 if (asprintf(&pid, "_PID=%lu", (unsigned long) ucred->pid) >= 0)
532 IOVEC_SET_STRING(iovec[n++], pid);
534 if (asprintf(&uid, "_UID=%lu", (unsigned long) ucred->uid) >= 0)
535 IOVEC_SET_STRING(iovec[n++], uid);
537 if (asprintf(&gid, "_GID=%lu", (unsigned long) ucred->gid) >= 0)
538 IOVEC_SET_STRING(iovec[n++], gid);
540 r = get_process_comm(ucred->pid, &t);
542 comm = strappend("_COMM=", t);
546 IOVEC_SET_STRING(iovec[n++], comm);
549 r = get_process_exe(ucred->pid, &t);
551 exe = strappend("_EXE=", t);
555 IOVEC_SET_STRING(iovec[n++], exe);
558 r = get_process_cmdline(ucred->pid, LINE_MAX, false, &t);
560 cmdline = strappend("_CMDLINE=", t);
564 IOVEC_SET_STRING(iovec[n++], cmdline);
567 r = audit_session_from_pid(ucred->pid, &audit);
569 if (asprintf(&audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit) >= 0)
570 IOVEC_SET_STRING(iovec[n++], audit_session);
572 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
574 if (asprintf(&audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0)
575 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
577 t = shortened_cgroup_path(ucred->pid);
579 cgroup = strappend("_SYSTEMD_CGROUP=", t);
583 IOVEC_SET_STRING(iovec[n++], cgroup);
587 if (sd_pid_get_session(ucred->pid, &t) >= 0) {
588 session = strappend("_SYSTEMD_SESSION=", t);
592 IOVEC_SET_STRING(iovec[n++], session);
595 if (sd_pid_get_owner_uid(ucred->uid, &owner) >= 0)
596 if (asprintf(&owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner) >= 0)
597 IOVEC_SET_STRING(iovec[n++], owner_uid);
600 if (cg_pid_get_unit(ucred->pid, &t) >= 0) {
601 unit = strappend("_SYSTEMD_UNIT=", t);
604 unit = strappend("_SYSTEMD_UNIT=", unit_id);
607 IOVEC_SET_STRING(iovec[n++], unit);
611 selinux_context = malloc(sizeof("_SELINUX_CONTEXT=") + label_len);
612 if (selinux_context) {
613 memcpy(selinux_context, "_SELINUX_CONTEXT=", sizeof("_SELINUX_CONTEXT=")-1);
614 memcpy(selinux_context+sizeof("_SELINUX_CONTEXT=")-1, label, label_len);
615 selinux_context[sizeof("_SELINUX_CONTEXT=")-1+label_len] = 0;
616 IOVEC_SET_STRING(iovec[n++], selinux_context);
619 security_context_t con;
621 if (getpidcon(ucred->pid, &con) >= 0) {
622 selinux_context = strappend("_SELINUX_CONTEXT=", con);
624 IOVEC_SET_STRING(iovec[n++], selinux_context);
633 if (asprintf(&source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu",
634 (unsigned long long) timeval_load(tv)) >= 0)
635 IOVEC_SET_STRING(iovec[n++], source_time);
638 /* Note that strictly speaking storing the boot id here is
639 * redundant since the entry includes this in-line
640 * anyway. However, we need this indexed, too. */
641 r = sd_id128_get_boot(&id);
643 if (asprintf(&boot_id, "_BOOT_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
644 IOVEC_SET_STRING(iovec[n++], boot_id);
646 r = sd_id128_get_machine(&id);
648 if (asprintf(&machine_id, "_MACHINE_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
649 IOVEC_SET_STRING(iovec[n++], machine_id);
651 t = gethostname_malloc();
653 hostname = strappend("_HOSTNAME=", t);
656 IOVEC_SET_STRING(iovec[n++], hostname);
661 write_to_journal(s, realuid == 0 ? 0 : loginuid, iovec, n);
674 free(audit_loginuid);
679 free(selinux_context);
682 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
683 char mid[11 + 32 + 1];
684 char buffer[16 + LINE_MAX + 1];
685 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
693 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
694 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
696 memcpy(buffer, "MESSAGE=", 8);
697 va_start(ap, format);
698 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
700 char_array_0(buffer);
701 IOVEC_SET_STRING(iovec[n++], buffer);
703 snprintf(mid, sizeof(mid), "MESSAGE_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(message_id));
705 IOVEC_SET_STRING(iovec[n++], mid);
708 ucred.pid = getpid();
709 ucred.uid = getuid();
710 ucred.gid = getgid();
712 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL);
715 void server_dispatch_message(
717 struct iovec *iovec, unsigned n, unsigned m,
720 const char *label, size_t label_len,
725 char *path = NULL, *c;
728 assert(iovec || n == 0);
733 if (LOG_PRI(priority) > s->max_level_store)
739 path = shortened_cgroup_path(ucred->pid);
743 /* example: /user/lennart/3/foobar
744 * /system/dbus.service/foobar
746 * So let's cut of everything past the third /, since that is
747 * wher user directories start */
749 c = strchr(path, '/');
751 c = strchr(c+1, '/');
753 c = strchr(c+1, '/');
759 rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available_space(s));
766 /* Write a suppression message if we suppressed something */
768 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED, "Suppressed %u messages from %s", rl - 1, path);
773 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id);
776 static bool valid_user_field(const char *p, size_t l) {
779 /* We kinda enforce POSIX syntax recommendations for
780 environment variables here, but make a couple of additional
783 http://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html */
785 /* No empty field names */
789 /* Don't allow names longer than 64 chars */
793 /* Variables starting with an underscore are protected */
797 /* Don't allow digits as first character */
798 if (p[0] >= '0' && p[0] <= '9')
801 /* Only allow A-Z0-9 and '_' */
802 for (a = p; a < p + l; a++)
803 if (!((*a >= 'A' && *a <= 'Z') ||
804 (*a >= '0' && *a <= '9') ||
811 static void process_native_message(
813 const void *buffer, size_t buffer_size,
816 const char *label, size_t label_len) {
818 struct iovec *iovec = NULL;
819 unsigned n = 0, m = 0, j, tn = (unsigned) -1;
822 int priority = LOG_INFO;
823 char *identifier = NULL, *message = NULL;
826 assert(buffer || buffer_size == 0);
829 remaining = buffer_size;
831 while (remaining > 0) {
834 e = memchr(p, '\n', remaining);
837 /* Trailing noise, let's ignore it, and flush what we collected */
838 log_debug("Received message with trailing noise, ignoring.");
843 /* Entry separator */
844 server_dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, NULL, priority);
853 if (*p == '.' || *p == '#') {
854 /* Ignore control commands for now, and
856 remaining -= (e - p) + 1;
861 /* A property follows */
863 if (n+N_IOVEC_META_FIELDS >= m) {
867 u = MAX((n+N_IOVEC_META_FIELDS+1) * 2U, 4U);
868 c = realloc(iovec, u * sizeof(struct iovec));
878 q = memchr(p, '=', e - p);
880 if (valid_user_field(p, q - p)) {
885 /* If the field name starts with an
886 * underscore, skip the variable,
887 * since that indidates a trusted
889 iovec[n].iov_base = (char*) p;
890 iovec[n].iov_len = l;
893 /* We need to determine the priority
894 * of this entry for the rate limiting
897 memcmp(p, "PRIORITY=", 9) == 0 &&
898 p[9] >= '0' && p[9] <= '9')
899 priority = (priority & LOG_FACMASK) | (p[9] - '0');
902 memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
903 p[16] >= '0' && p[16] <= '9')
904 priority = (priority & LOG_PRIMASK) | ((p[16] - '0') << 3);
907 memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
908 p[16] >= '0' && p[16] <= '9' &&
909 p[17] >= '0' && p[17] <= '9')
910 priority = (priority & LOG_PRIMASK) | (((p[16] - '0')*10 + (p[17] - '0')) << 3);
913 memcmp(p, "SYSLOG_IDENTIFIER=", 18) == 0) {
916 t = strndup(p + 18, l - 18);
922 memcmp(p, "MESSAGE=", 8) == 0) {
925 t = strndup(p + 8, l - 8);
933 remaining -= (e - p) + 1;
941 if (remaining < e - p + 1 + sizeof(uint64_t) + 1) {
942 log_debug("Failed to parse message, ignoring.");
946 memcpy(&l_le, e + 1, sizeof(uint64_t));
949 if (remaining < e - p + 1 + sizeof(uint64_t) + l + 1 ||
950 e[1+sizeof(uint64_t)+l] != '\n') {
951 log_debug("Failed to parse message, ignoring.");
955 k = malloc((e - p) + 1 + l);
963 memcpy(k + (e - p) + 1, e + 1 + sizeof(uint64_t), l);
965 if (valid_user_field(p, e - p)) {
966 iovec[n].iov_base = k;
967 iovec[n].iov_len = (e - p) + 1 + l;
972 remaining -= (e - p) + 1 + sizeof(uint64_t) + l + 1;
973 p = e + 1 + sizeof(uint64_t) + l + 1;
981 IOVEC_SET_STRING(iovec[tn], "_TRANSPORT=journal");
984 if (s->forward_to_syslog)
985 server_forward_syslog(s, priority, identifier, message, ucred, tv);
987 if (s->forward_to_kmsg)
988 server_forward_kmsg(s, priority, identifier, message, ucred);
990 if (s->forward_to_console)
991 server_forward_console(s, priority, identifier, message, ucred);
994 server_dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, NULL, priority);
997 for (j = 0; j < n; j++) {
1001 if (iovec[j].iov_base < buffer ||
1002 (const uint8_t*) iovec[j].iov_base >= (const uint8_t*) buffer + buffer_size)
1003 free(iovec[j].iov_base);
1011 static void process_native_file(
1014 struct ucred *ucred,
1016 const char *label, size_t label_len) {
1025 /* Data is in the passed file, since it didn't fit in a
1026 * datagram. We can't map the file here, since clients might
1027 * then truncate it and trigger a SIGBUS for us. So let's
1028 * stupidly read it */
1030 if (fstat(fd, &st) < 0) {
1031 log_error("Failed to stat passed file, ignoring: %m");
1035 if (!S_ISREG(st.st_mode)) {
1036 log_error("File passed is not regular. Ignoring.");
1040 if (st.st_size <= 0)
1043 if (st.st_size > ENTRY_SIZE_MAX) {
1044 log_error("File passed too large. Ignoring.");
1048 p = malloc(st.st_size);
1054 n = pread(fd, p, st.st_size, 0);
1056 log_error("Failed to read file, ignoring: %s", strerror(-n));
1058 process_native_message(s, p, n, ucred, tv, label, label_len);
1063 static int system_journal_open(Server *s) {
1069 r = sd_id128_get_machine(&machine);
1073 sd_id128_to_string(machine, ids);
1075 if (!s->system_journal &&
1076 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
1077 access("/run/systemd/journal/flushed", F_OK) >= 0) {
1079 /* If in auto mode: first try to create the machine
1080 * path, but not the prefix.
1082 * If in persistent mode: create /var/log/journal and
1083 * the machine path */
1085 if (s->storage == STORAGE_PERSISTENT)
1086 (void) mkdir("/var/log/journal/", 0755);
1088 fn = strappend("/var/log/journal/", ids);
1092 (void) mkdir(fn, 0755);
1095 fn = strjoin("/var/log/journal/", ids, "/system.journal", NULL);
1099 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
1103 server_fix_perms(s, s->system_journal, 0);
1106 if (r != -ENOENT && r != -EROFS)
1107 log_warning("Failed to open system journal: %s", strerror(-r));
1113 if (!s->runtime_journal &&
1114 (s->storage != STORAGE_NONE)) {
1116 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
1120 if (s->system_journal) {
1122 /* Try to open the runtime journal, but only
1123 * if it already exists, so that we can flush
1124 * it into the system journal */
1126 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
1131 log_warning("Failed to open runtime journal: %s", strerror(-r));
1138 /* OK, we really need the runtime journal, so create
1139 * it if necessary. */
1141 (void) mkdir_parents(fn, 0755);
1142 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
1146 log_error("Failed to open runtime journal: %s", strerror(-r));
1151 if (s->runtime_journal)
1152 server_fix_perms(s, s->runtime_journal, 0);
1158 static int server_flush_to_var(Server *s) {
1166 if (s->storage != STORAGE_AUTO &&
1167 s->storage != STORAGE_PERSISTENT)
1170 if (!s->runtime_journal)
1173 system_journal_open(s);
1175 if (!s->system_journal)
1178 log_info("Flushing to /var...");
1180 r = sd_id128_get_machine(&machine);
1182 log_error("Failed to get machine id: %s", strerror(-r));
1186 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1188 log_error("Failed to read runtime journal: %s", strerror(-r));
1192 SD_JOURNAL_FOREACH(j) {
1195 f = j->current_file;
1196 assert(f && f->current_offset > 0);
1198 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1200 log_error("Can't read entry: %s", strerror(-r));
1204 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1206 log_info("Allocation limit reached.");
1208 journal_file_post_change(s->system_journal);
1212 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1216 log_error("Can't write entry: %s", strerror(-r));
1222 journal_file_post_change(s->system_journal);
1224 journal_file_close(s->runtime_journal);
1225 s->runtime_journal = NULL;
1228 rm_rf("/run/log/journal", false, true, false);
1233 static int process_event(Server *s, struct epoll_event *ev) {
1237 if (ev->data.fd == s->signal_fd) {
1238 struct signalfd_siginfo sfsi;
1241 if (ev->events != EPOLLIN) {
1242 log_info("Got invalid event from epoll.");
1246 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
1247 if (n != sizeof(sfsi)) {
1252 if (errno == EINTR || errno == EAGAIN)
1258 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
1260 if (sfsi.ssi_signo == SIGUSR1) {
1261 touch("/run/systemd/journal/flushed");
1262 server_flush_to_var(s);
1266 if (sfsi.ssi_signo == SIGUSR2) {
1274 } else if (ev->data.fd == s->dev_kmsg_fd) {
1277 if (ev->events != EPOLLIN) {
1278 log_info("Got invalid event from epoll.");
1282 r = server_read_dev_kmsg(s);
1288 } else if (ev->data.fd == s->native_fd ||
1289 ev->data.fd == s->syslog_fd) {
1291 if (ev->events != EPOLLIN) {
1292 log_info("Got invalid event from epoll.");
1297 struct msghdr msghdr;
1299 struct ucred *ucred = NULL;
1300 struct timeval *tv = NULL;
1301 struct cmsghdr *cmsg;
1303 size_t label_len = 0;
1305 struct cmsghdr cmsghdr;
1307 /* We use NAME_MAX space for the
1308 * SELinux label here. The kernel
1309 * currently enforces no limit, but
1310 * according to suggestions from the
1311 * SELinux people this will change and
1312 * it will probably be identical to
1313 * NAME_MAX. For now we use that, but
1314 * this should be updated one day when
1315 * the final limit is known.*/
1316 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1317 CMSG_SPACE(sizeof(struct timeval)) +
1318 CMSG_SPACE(sizeof(int)) + /* fd */
1319 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1326 if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
1327 log_error("SIOCINQ failed: %m");
1331 if (s->buffer_size < (size_t) v) {
1335 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
1336 b = realloc(s->buffer, l+1);
1339 log_error("Couldn't increase buffer.");
1348 iovec.iov_base = s->buffer;
1349 iovec.iov_len = s->buffer_size;
1353 msghdr.msg_iov = &iovec;
1354 msghdr.msg_iovlen = 1;
1355 msghdr.msg_control = &control;
1356 msghdr.msg_controllen = sizeof(control);
1358 n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1361 if (errno == EINTR || errno == EAGAIN)
1364 log_error("recvmsg() failed: %m");
1368 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1370 if (cmsg->cmsg_level == SOL_SOCKET &&
1371 cmsg->cmsg_type == SCM_CREDENTIALS &&
1372 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1373 ucred = (struct ucred*) CMSG_DATA(cmsg);
1374 else if (cmsg->cmsg_level == SOL_SOCKET &&
1375 cmsg->cmsg_type == SCM_SECURITY) {
1376 label = (char*) CMSG_DATA(cmsg);
1377 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1378 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1379 cmsg->cmsg_type == SO_TIMESTAMP &&
1380 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1381 tv = (struct timeval*) CMSG_DATA(cmsg);
1382 else if (cmsg->cmsg_level == SOL_SOCKET &&
1383 cmsg->cmsg_type == SCM_RIGHTS) {
1384 fds = (int*) CMSG_DATA(cmsg);
1385 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1389 if (ev->data.fd == s->syslog_fd) {
1392 if (n > 0 && n_fds == 0) {
1393 e = memchr(s->buffer, '\n', n);
1399 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1400 } else if (n_fds > 0)
1401 log_warning("Got file descriptors via syslog socket. Ignoring.");
1404 if (n > 0 && n_fds == 0)
1405 process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1406 else if (n == 0 && n_fds == 1)
1407 process_native_file(s, fds[0], ucred, tv, label, label_len);
1409 log_warning("Got too many file descriptors via native socket. Ignoring.");
1412 close_many(fds, n_fds);
1417 } else if (ev->data.fd == s->stdout_fd) {
1419 if (ev->events != EPOLLIN) {
1420 log_info("Got invalid event from epoll.");
1424 stdout_stream_new(s);
1428 StdoutStream *stream;
1430 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
1431 log_info("Got invalid event from epoll.");
1435 /* If it is none of the well-known fds, it must be an
1436 * stdout stream fd. Note that this is a bit ugly here
1437 * (since we rely that none of the well-known fds
1438 * could be interpreted as pointer), but nonetheless
1439 * safe, since the well-known fds would never get an
1440 * fd > 4096, i.e. beyond the first memory page */
1442 stream = ev->data.ptr;
1444 if (stdout_stream_process(stream) <= 0)
1445 stdout_stream_free(stream);
1450 log_error("Unknown event.");
1455 static int open_native_socket(Server*s) {
1456 union sockaddr_union sa;
1458 struct epoll_event ev;
1462 if (s->native_fd < 0) {
1464 s->native_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
1465 if (s->native_fd < 0) {
1466 log_error("socket() failed: %m");
1471 sa.un.sun_family = AF_UNIX;
1472 strncpy(sa.un.sun_path, "/run/systemd/journal/socket", sizeof(sa.un.sun_path));
1474 unlink(sa.un.sun_path);
1476 r = bind(s->native_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
1478 log_error("bind() failed: %m");
1482 chmod(sa.un.sun_path, 0666);
1484 fd_nonblock(s->native_fd, 1);
1487 r = setsockopt(s->native_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
1489 log_error("SO_PASSCRED failed: %m");
1495 r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
1497 log_warning("SO_PASSSEC failed: %m");
1501 r = setsockopt(s->native_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
1503 log_error("SO_TIMESTAMP failed: %m");
1508 ev.events = EPOLLIN;
1509 ev.data.fd = s->native_fd;
1510 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->native_fd, &ev) < 0) {
1511 log_error("Failed to add native server fd to epoll object: %m");
1519 static int open_signalfd(Server *s) {
1521 struct epoll_event ev;
1525 assert_se(sigemptyset(&mask) == 0);
1526 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1527 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1529 s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
1530 if (s->signal_fd < 0) {
1531 log_error("signalfd(): %m");
1536 ev.events = EPOLLIN;
1537 ev.data.fd = s->signal_fd;
1539 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
1540 log_error("epoll_ctl(): %m");
1547 static int server_parse_proc_cmdline(Server *s) {
1548 char *line, *w, *state;
1552 if (detect_container(NULL) > 0)
1555 r = read_one_line_file("/proc/cmdline", &line);
1557 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1561 FOREACH_WORD_QUOTED(w, l, line, state) {
1564 word = strndup(w, l);
1570 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1571 r = parse_boolean(word + 35);
1573 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1575 s->forward_to_syslog = r;
1576 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1577 r = parse_boolean(word + 33);
1579 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1581 s->forward_to_kmsg = r;
1582 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1583 r = parse_boolean(word + 36);
1585 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1587 s->forward_to_console = r;
1588 } else if (startswith(word, "systemd.journald"))
1589 log_warning("Invalid systemd.journald parameter. Ignoring.");
1601 static int server_parse_config_file(Server *s) {
1608 fn = "/etc/systemd/journald.conf";
1609 f = fopen(fn, "re");
1611 if (errno == ENOENT)
1614 log_warning("Failed to open configuration file %s: %m", fn);
1618 r = config_parse(fn, f, "Journal\0", config_item_perf_lookup, (void*) journald_gperf_lookup, false, s);
1620 log_warning("Failed to parse configuration file: %s", strerror(-r));
1627 static int server_init(Server *s) {
1633 s->syslog_fd = s->native_fd = s->stdout_fd = s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
1637 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1638 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1640 s->forward_to_syslog = true;
1642 s->max_level_store = LOG_DEBUG;
1643 s->max_level_syslog = LOG_DEBUG;
1644 s->max_level_kmsg = LOG_NOTICE;
1645 s->max_level_console = LOG_INFO;
1647 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1648 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1650 server_parse_config_file(s);
1651 server_parse_proc_cmdline(s);
1653 mkdir_p("/run/systemd/journal", 0755);
1655 s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
1656 if (!s->user_journals)
1659 s->mmap = mmap_cache_new();
1663 s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
1664 if (s->epoll_fd < 0) {
1665 log_error("Failed to create epoll object: %m");
1669 n = sd_listen_fds(true);
1671 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1675 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1677 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1679 if (s->native_fd >= 0) {
1680 log_error("Too many native sockets passed.");
1686 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1688 if (s->stdout_fd >= 0) {
1689 log_error("Too many stdout sockets passed.");
1695 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
1697 if (s->syslog_fd >= 0) {
1698 log_error("Too many /dev/log sockets passed.");
1705 log_error("Unknown socket passed.");
1710 r = server_open_syslog_socket(s);
1714 r = open_native_socket(s);
1718 r = server_open_stdout_socket(s);
1722 r = server_open_dev_kmsg(s);
1726 r = server_open_kernel_seqnum(s);
1730 r = open_signalfd(s);
1734 s->udev = udev_new();
1738 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
1742 r = system_journal_open(s);
1749 static void maybe_append_tags(Server *s) {
1755 n = now(CLOCK_REALTIME);
1757 if (s->system_journal)
1758 journal_file_maybe_append_tag(s->system_journal, n);
1760 HASHMAP_FOREACH(f, s->user_journals, i)
1761 journal_file_maybe_append_tag(f, n);
1765 static void server_done(Server *s) {
1769 while (s->stdout_streams)
1770 stdout_stream_free(s->stdout_streams);
1772 if (s->system_journal)
1773 journal_file_close(s->system_journal);
1775 if (s->runtime_journal)
1776 journal_file_close(s->runtime_journal);
1778 while ((f = hashmap_steal_first(s->user_journals)))
1779 journal_file_close(f);
1781 hashmap_free(s->user_journals);
1783 if (s->epoll_fd >= 0)
1784 close_nointr_nofail(s->epoll_fd);
1786 if (s->signal_fd >= 0)
1787 close_nointr_nofail(s->signal_fd);
1789 if (s->syslog_fd >= 0)
1790 close_nointr_nofail(s->syslog_fd);
1792 if (s->native_fd >= 0)
1793 close_nointr_nofail(s->native_fd);
1795 if (s->stdout_fd >= 0)
1796 close_nointr_nofail(s->stdout_fd);
1798 if (s->dev_kmsg_fd >= 0)
1799 close_nointr_nofail(s->dev_kmsg_fd);
1802 journal_rate_limit_free(s->rate_limit);
1804 if (s->kernel_seqnum)
1805 munmap(s->kernel_seqnum, sizeof(uint64_t));
1811 mmap_cache_unref(s->mmap);
1814 udev_unref(s->udev);
1817 int main(int argc, char *argv[]) {
1821 /* if (getppid() != 1) { */
1822 /* log_error("This program should be invoked by init only."); */
1823 /* return EXIT_FAILURE; */
1827 log_error("This program does not take arguments.");
1828 return EXIT_FAILURE;
1831 log_set_target(LOG_TARGET_SAFE);
1832 log_set_facility(LOG_SYSLOG);
1833 log_set_max_level(LOG_DEBUG);
1834 log_parse_environment();
1839 r = server_init(&server);
1843 server_vacuum(&server);
1844 server_flush_to_var(&server);
1845 server_flush_dev_kmsg(&server);
1847 log_debug("systemd-journald running as pid %lu", (unsigned long) getpid());
1848 server_driver_message(&server, SD_MESSAGE_JOURNAL_START, "Journal started");
1852 "STATUS=Processing requests...");
1855 struct epoll_event event;
1861 if (server.system_journal &&
1862 journal_file_next_evolve_usec(server.system_journal, &u)) {
1865 n = now(CLOCK_REALTIME);
1870 t = (int) ((u - n + USEC_PER_MSEC - 1) / USEC_PER_MSEC);
1875 r = epoll_wait(server.epoll_fd, &event, 1, t);
1881 log_error("epoll_wait() failed: %m");
1887 r = process_event(&server, &event);
1894 maybe_append_tags(&server);
1897 log_debug("systemd-journald stopped as pid %lu", (unsigned long) getpid());
1898 server_driver_message(&server, SD_MESSAGE_JOURNAL_STOP, "Journal stopped");
1902 "STATUS=Shutting down...");
1904 server_done(&server);
1906 return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;