chiark / gitweb /
journald: bump the journal per-unit ratelimit defaults
[elogind.git] / src / journal / journald-server.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
26 #include <sys/mman.h>
27 #include <sys/timerfd.h>
28
29 #include <libudev.h>
30 #include <systemd/sd-journal.h>
31 #include <systemd/sd-messages.h>
32 #include <systemd/sd-daemon.h>
33
34 #include "fileio.h"
35 #include "mkdir.h"
36 #include "hashmap.h"
37 #include "journal-file.h"
38 #include "socket-util.h"
39 #include "cgroup-util.h"
40 #include "list.h"
41 #include "virt.h"
42 #include "missing.h"
43 #include "conf-parser.h"
44 #include "journal-internal.h"
45 #include "journal-vacuum.h"
46 #include "journal-authenticate.h"
47 #include "journald-server.h"
48 #include "journald-rate-limit.h"
49 #include "journald-kmsg.h"
50 #include "journald-syslog.h"
51 #include "journald-stream.h"
52 #include "journald-console.h"
53 #include "journald-native.h"
54
55 #ifdef HAVE_ACL
56 #include <sys/acl.h>
57 #include <acl/libacl.h>
58 #include "acl-util.h"
59 #endif
60
61 #ifdef HAVE_SELINUX
62 #include <selinux/selinux.h>
63 #endif
64
65 #define USER_JOURNALS_MAX 1024
66
67 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
68 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
69 #define DEFAULT_RATE_LIMIT_BURST 1000
70
71 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
72
73 static const char* const storage_table[] = {
74         [STORAGE_AUTO] = "auto",
75         [STORAGE_VOLATILE] = "volatile",
76         [STORAGE_PERSISTENT] = "persistent",
77         [STORAGE_NONE] = "none"
78 };
79
80 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
81 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
82
83 static const char* const split_mode_table[] = {
84         [SPLIT_NONE] = "none",
85         [SPLIT_UID] = "uid",
86         [SPLIT_LOGIN] = "login"
87 };
88
89 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
90 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
91
92 static uint64_t available_space(Server *s) {
93         char ids[33];
94         _cleanup_free_ char *p = NULL;
95         const char *f;
96         sd_id128_t machine;
97         struct statvfs ss;
98         uint64_t sum = 0, avail = 0, ss_avail = 0;
99         int r;
100         _cleanup_closedir_ DIR *d = NULL;
101         usec_t ts;
102         JournalMetrics *m;
103
104         ts = now(CLOCK_MONOTONIC);
105
106         if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
107                 return s->cached_available_space;
108
109         r = sd_id128_get_machine(&machine);
110         if (r < 0)
111                 return 0;
112
113         if (s->system_journal) {
114                 f = "/var/log/journal/";
115                 m = &s->system_metrics;
116         } else {
117                 f = "/run/log/journal/";
118                 m = &s->runtime_metrics;
119         }
120
121         assert(m);
122
123         p = strappend(f, sd_id128_to_string(machine, ids));
124         if (!p)
125                 return 0;
126
127         d = opendir(p);
128         if (!d)
129                 return 0;
130
131         if (fstatvfs(dirfd(d), &ss) < 0)
132                 return 0;
133
134         for (;;) {
135                 struct stat st;
136                 struct dirent *de;
137                 union dirent_storage buf;
138
139                 r = readdir_r(d, &buf.de, &de);
140                 if (r != 0)
141                         break;
142
143                 if (!de)
144                         break;
145
146                 if (!endswith(de->d_name, ".journal") &&
147                     !endswith(de->d_name, ".journal~"))
148                         continue;
149
150                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
151                         continue;
152
153                 if (!S_ISREG(st.st_mode))
154                         continue;
155
156                 sum += (uint64_t) st.st_blocks * 512UL;
157         }
158
159         avail = sum >= m->max_use ? 0 : m->max_use - sum;
160
161         ss_avail = ss.f_bsize * ss.f_bavail;
162
163         ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
164
165         if (ss_avail < avail)
166                 avail = ss_avail;
167
168         s->cached_available_space = avail;
169         s->cached_available_space_timestamp = ts;
170
171         return avail;
172 }
173
174 static void server_read_file_gid(Server *s) {
175         const char *g = "systemd-journal";
176         int r;
177
178         assert(s);
179
180         if (s->file_gid_valid)
181                 return;
182
183         r = get_group_creds(&g, &s->file_gid);
184         if (r < 0)
185                 log_warning("Failed to resolve '%s' group: %s", g, strerror(-r));
186
187         /* if we couldn't read the gid, then it will be 0, but that's
188          * fine and we shouldn't try to resolve the group again, so
189          * let's just pretend it worked right-away. */
190         s->file_gid_valid = true;
191 }
192
193 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
194         int r;
195 #ifdef HAVE_ACL
196         acl_t acl;
197         acl_entry_t entry;
198         acl_permset_t permset;
199 #endif
200
201         assert(f);
202
203         server_read_file_gid(s);
204
205         r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
206         if (r < 0)
207                 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
208
209 #ifdef HAVE_ACL
210         if (uid <= 0)
211                 return;
212
213         acl = acl_get_fd(f->fd);
214         if (!acl) {
215                 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
216                 return;
217         }
218
219         r = acl_find_uid(acl, uid, &entry);
220         if (r <= 0) {
221
222                 if (acl_create_entry(&acl, &entry) < 0 ||
223                     acl_set_tag_type(entry, ACL_USER) < 0 ||
224                     acl_set_qualifier(entry, &uid) < 0) {
225                         log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
226                         goto finish;
227                 }
228         }
229
230         /* We do not recalculate the mask unconditionally here,
231          * so that the fchmod() mask above stays intact. */
232         if (acl_get_permset(entry, &permset) < 0 ||
233             acl_add_perm(permset, ACL_READ) < 0 ||
234             calc_acl_mask_if_needed(&acl) < 0) {
235                 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
236                 goto finish;
237         }
238
239         if (acl_set_fd(f->fd, acl) < 0)
240                 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
241
242 finish:
243         acl_free(acl);
244 #endif
245 }
246
247 static JournalFile* find_journal(Server *s, uid_t uid) {
248         _cleanup_free_ char *p = NULL;
249         int r;
250         JournalFile *f;
251         sd_id128_t machine;
252
253         assert(s);
254
255         /* We split up user logs only on /var, not on /run. If the
256          * runtime file is open, we write to it exclusively, in order
257          * to guarantee proper order as soon as we flush /run to
258          * /var and close the runtime file. */
259
260         if (s->runtime_journal)
261                 return s->runtime_journal;
262
263         if (uid <= 0)
264                 return s->system_journal;
265
266         r = sd_id128_get_machine(&machine);
267         if (r < 0)
268                 return s->system_journal;
269
270         f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
271         if (f)
272                 return f;
273
274         if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
275                      SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
276                 return s->system_journal;
277
278         while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
279                 /* Too many open? Then let's close one */
280                 f = hashmap_steal_first(s->user_journals);
281                 assert(f);
282                 journal_file_close(f);
283         }
284
285         r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
286         if (r < 0)
287                 return s->system_journal;
288
289         server_fix_perms(s, f, uid);
290
291         r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
292         if (r < 0) {
293                 journal_file_close(f);
294                 return s->system_journal;
295         }
296
297         return f;
298 }
299
300 void server_rotate(Server *s) {
301         JournalFile *f;
302         void *k;
303         Iterator i;
304         int r;
305
306         log_debug("Rotating...");
307
308         if (s->runtime_journal) {
309                 r = journal_file_rotate(&s->runtime_journal, s->compress, false);
310                 if (r < 0)
311                         if (s->runtime_journal)
312                                 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
313                         else
314                                 log_error("Failed to create new runtime journal: %s", strerror(-r));
315                 else
316                         server_fix_perms(s, s->runtime_journal, 0);
317         }
318
319         if (s->system_journal) {
320                 r = journal_file_rotate(&s->system_journal, s->compress, s->seal);
321                 if (r < 0)
322                         if (s->system_journal)
323                                 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
324                         else
325                                 log_error("Failed to create new system journal: %s", strerror(-r));
326
327                 else
328                         server_fix_perms(s, s->system_journal, 0);
329         }
330
331         HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
332                 r = journal_file_rotate(&f, s->compress, s->seal);
333                 if (r < 0)
334                         if (f)
335                                 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
336                         else
337                                 log_error("Failed to create user journal: %s", strerror(-r));
338                 else {
339                         hashmap_replace(s->user_journals, k, f);
340                         server_fix_perms(s, f, PTR_TO_UINT32(k));
341                 }
342         }
343 }
344
345 void server_sync(Server *s) {
346         JournalFile *f;
347         void *k;
348         Iterator i;
349         int r;
350
351         static const struct itimerspec sync_timer_disable = {};
352
353         if (s->system_journal) {
354                 r = journal_file_set_offline(s->system_journal);
355                 if (r < 0)
356                         log_error("Failed to sync system journal: %s", strerror(-r));
357         }
358
359         HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
360                 r = journal_file_set_offline(f);
361                 if (r < 0)
362                         log_error("Failed to sync user journal: %s", strerror(-r));
363         }
364
365         r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_disable, NULL);
366         if (r < 0)
367                 log_error("Failed to disable max timer: %m");
368
369         s->sync_scheduled = false;
370 }
371
372 void server_vacuum(Server *s) {
373         char ids[33];
374         sd_id128_t machine;
375         int r;
376
377         log_debug("Vacuuming...");
378
379         s->oldest_file_usec = 0;
380
381         r = sd_id128_get_machine(&machine);
382         if (r < 0) {
383                 log_error("Failed to get machine ID: %s", strerror(-r));
384                 return;
385         }
386
387         sd_id128_to_string(machine, ids);
388
389         if (s->system_journal) {
390                 char *p = strappenda("/var/log/journal/", ids);
391
392                 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
393                 if (r < 0 && r != -ENOENT)
394                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
395         }
396
397         if (s->runtime_journal) {
398                 char *p = strappenda("/run/log/journal/", ids);
399
400                 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
401                 if (r < 0 && r != -ENOENT)
402                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
403         }
404
405         s->cached_available_space_timestamp = 0;
406 }
407
408 bool shall_try_append_again(JournalFile *f, int r) {
409
410         /* -E2BIG            Hit configured limit
411            -EFBIG            Hit fs limit
412            -EDQUOT           Quota limit hit
413            -ENOSPC           Disk full
414            -EHOSTDOWN        Other machine
415            -EBUSY            Unclean shutdown
416            -EPROTONOSUPPORT  Unsupported feature
417            -EBADMSG          Corrupted
418            -ENODATA          Truncated
419            -ESHUTDOWN        Already archived */
420
421         if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
422                 log_debug("%s: Allocation limit reached, rotating.", f->path);
423         else if (r == -EHOSTDOWN)
424                 log_info("%s: Journal file from other machine, rotating.", f->path);
425         else if (r == -EBUSY)
426                 log_info("%s: Unclean shutdown, rotating.", f->path);
427         else if (r == -EPROTONOSUPPORT)
428                 log_info("%s: Unsupported feature, rotating.", f->path);
429         else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
430                 log_warning("%s: Journal file corrupted, rotating.", f->path);
431         else
432                 return false;
433
434         return true;
435 }
436
437 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n) {
438         JournalFile *f;
439         bool vacuumed = false;
440         int r;
441
442         assert(s);
443         assert(iovec);
444         assert(n > 0);
445
446         f = find_journal(s, uid);
447         if (!f)
448                 return;
449
450         if (journal_file_rotate_suggested(f, s->max_file_usec)) {
451                 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
452                 server_rotate(s);
453                 server_vacuum(s);
454                 vacuumed = true;
455
456                 f = find_journal(s, uid);
457                 if (!f)
458                         return;
459         }
460
461         r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
462         if (r >= 0) {
463                 server_schedule_sync(s);
464                 return;
465         }
466
467         if (vacuumed || !shall_try_append_again(f, r)) {
468                 size_t size = 0;
469                 unsigned i;
470                 for (i = 0; i < n; i++)
471                         size += iovec[i].iov_len;
472
473                 log_error("Failed to write entry (%d items, %zu bytes), ignoring: %s", n, size, strerror(-r));
474                 return;
475         }
476
477         server_rotate(s);
478         server_vacuum(s);
479
480         f = find_journal(s, uid);
481         if (!f)
482                 return;
483
484         log_debug("Retrying write.");
485         r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
486         if (r < 0) {
487                 size_t size = 0;
488                 unsigned i;
489                 for (i = 0; i < n; i++)
490                         size += iovec[i].iov_len;
491
492                 log_error("Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %s", n, size, strerror(-r));
493         }
494 }
495
496 static void dispatch_message_real(
497                 Server *s,
498                 struct iovec *iovec, unsigned n, unsigned m,
499                 struct ucred *ucred,
500                 struct timeval *tv,
501                 const char *label, size_t label_len,
502                 const char *unit_id,
503                 pid_t object_pid) {
504
505         char    pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
506                 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
507                 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
508                 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
509                 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
510                 boot_id[sizeof("_BOOT_ID=") + 32] = "_BOOT_ID=",
511                 machine_id[sizeof("_MACHINE_ID=") + 32] = "_MACHINE_ID=",
512                 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
513                 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
514                 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
515         uid_t object_uid;
516         gid_t object_gid;
517
518         char *x;
519         sd_id128_t id;
520         int r;
521         char *t, *c;
522         uid_t realuid = 0, owner = 0, journal_uid;
523         bool owner_valid = false;
524 #ifdef HAVE_AUDIT
525         char    audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
526                 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
527                 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
528                 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
529
530         uint32_t audit;
531         uid_t loginuid;
532 #endif
533
534         assert(s);
535         assert(iovec);
536         assert(n > 0);
537         assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
538
539         if (ucred) {
540                 realuid = ucred->uid;
541
542                 sprintf(pid, "_PID=%lu", (unsigned long) ucred->pid);
543                 IOVEC_SET_STRING(iovec[n++], pid);
544
545                 sprintf(uid, "_UID=%lu", (unsigned long) ucred->uid);
546                 IOVEC_SET_STRING(iovec[n++], uid);
547
548                 sprintf(gid, "_GID=%lu", (unsigned long) ucred->gid);
549                 IOVEC_SET_STRING(iovec[n++], gid);
550
551                 r = get_process_comm(ucred->pid, &t);
552                 if (r >= 0) {
553                         x = strappenda("_COMM=", t);
554                         free(t);
555                         IOVEC_SET_STRING(iovec[n++], x);
556                 }
557
558                 r = get_process_exe(ucred->pid, &t);
559                 if (r >= 0) {
560                         x = strappenda("_EXE=", t);
561                         free(t);
562                         IOVEC_SET_STRING(iovec[n++], x);
563                 }
564
565                 r = get_process_cmdline(ucred->pid, 0, false, &t);
566                 if (r >= 0) {
567                         x = strappenda("_CMDLINE=", t);
568                         free(t);
569                         IOVEC_SET_STRING(iovec[n++], x);
570                 }
571
572 #ifdef HAVE_AUDIT
573                 r = audit_session_from_pid(ucred->pid, &audit);
574                 if (r >= 0) {
575                         sprintf(audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit);
576                         IOVEC_SET_STRING(iovec[n++], audit_session);
577                 }
578
579                 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
580                 if (r >= 0) {
581                         sprintf(audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid);
582                         IOVEC_SET_STRING(iovec[n++], audit_loginuid);
583                 }
584 #endif
585
586                 r = cg_pid_get_path_shifted(ucred->pid, NULL, &c);
587                 if (r >= 0) {
588                         char *session = NULL;
589
590                         x = strappenda("_SYSTEMD_CGROUP=", c);
591                         IOVEC_SET_STRING(iovec[n++], x);
592
593                         r = cg_path_get_session(c, &t);
594                         if (r >= 0) {
595                                 session = strappenda("_SYSTEMD_SESSION=", t);
596                                 free(t);
597                                 IOVEC_SET_STRING(iovec[n++], session);
598                         }
599
600                         if (cg_path_get_owner_uid(c, &owner) >= 0) {
601                                 owner_valid = true;
602
603                                 sprintf(owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner);
604                                 IOVEC_SET_STRING(iovec[n++], owner_uid);
605                         }
606
607                         if (cg_path_get_unit(c, &t) >= 0) {
608                                 x = strappenda("_SYSTEMD_UNIT=", t);
609                                 free(t);
610                         } else if (cg_path_get_user_unit(c, &t) >= 0) {
611                                 x = strappenda("_SYSTEMD_USER_UNIT=", t);
612                                 free(t);
613                         } else if (unit_id) {
614                                 if (session)
615                                         x = strappenda("_SYSTEMD_USER_UNIT=", unit_id);
616                                 else
617                                         x = strappenda("_SYSTEMD_UNIT=", unit_id);
618                         } else
619                                 x = NULL;
620
621                         if (x)
622                                 IOVEC_SET_STRING(iovec[n++], x);
623
624                         free(c);
625                 }
626
627 #ifdef HAVE_SELINUX
628                 if (label) {
629                         x = alloca(sizeof("_SELINUX_CONTEXT=") + label_len);
630
631                         *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
632                         IOVEC_SET_STRING(iovec[n++], x);
633                 } else {
634                         security_context_t con;
635
636                         if (getpidcon(ucred->pid, &con) >= 0) {
637                                 x = strappenda("_SELINUX_CONTEXT=", con);
638
639                                 freecon(con);
640                                 IOVEC_SET_STRING(iovec[n++], x);
641                         }
642                 }
643 #endif
644         }
645         assert(n <= m);
646
647         if (object_pid) {
648                 r = get_process_uid(object_pid, &object_uid);
649                 if (r >= 0) {
650                         sprintf(o_uid, "OBJECT_UID=%lu", (unsigned long) object_uid);
651                         IOVEC_SET_STRING(iovec[n++], o_uid);
652                 }
653
654                 r = get_process_gid(object_pid, &object_gid);
655                 if (r >= 0) {
656                         sprintf(o_gid, "OBJECT_GID=%lu", (unsigned long) object_gid);
657                         IOVEC_SET_STRING(iovec[n++], o_gid);
658                 }
659
660                 r = get_process_comm(object_pid, &t);
661                 if (r >= 0) {
662                         x = strappenda("OBJECT_COMM=", t);
663                         free(t);
664                         IOVEC_SET_STRING(iovec[n++], x);
665                 }
666
667                 r = get_process_exe(object_pid, &t);
668                 if (r >= 0) {
669                         x = strappenda("OBJECT_EXE=", t);
670                         free(t);
671                         IOVEC_SET_STRING(iovec[n++], x);
672                 }
673
674                 r = get_process_cmdline(object_pid, 0, false, &t);
675                 if (r >= 0) {
676                         x = strappenda("OBJECT_CMDLINE=", t);
677                         free(t);
678                         IOVEC_SET_STRING(iovec[n++], x);
679                 }
680
681 #ifdef HAVE_AUDIT
682                 r = audit_session_from_pid(object_pid, &audit);
683                 if (r >= 0) {
684                         sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%lu", (unsigned long) audit);
685                         IOVEC_SET_STRING(iovec[n++], o_audit_session);
686                 }
687
688                 r = audit_loginuid_from_pid(object_pid, &loginuid);
689                 if (r >= 0) {
690                         sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID=%lu", (unsigned long) loginuid);
691                         IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
692                 }
693 #endif
694
695                 r = cg_pid_get_path_shifted(object_pid, NULL, &c);
696                 if (r >= 0) {
697                         x = strappenda("OBJECT_SYSTEMD_CGROUP=", c);
698                         IOVEC_SET_STRING(iovec[n++], x);
699
700                         r = cg_path_get_session(c, &t);
701                         if (r >= 0) {
702                                 x = strappenda("OBJECT_SYSTEMD_SESSION=", t);
703                                 free(t);
704                                 IOVEC_SET_STRING(iovec[n++], x);
705                         }
706
707                         if (cg_path_get_owner_uid(c, &owner) >= 0) {
708                                 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner);
709                                 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
710                         }
711
712                         if (cg_path_get_unit(c, &t) >= 0) {
713                                 x = strappenda("OBJECT_SYSTEMD_UNIT=", t);
714                                 free(t);
715                         } else if (cg_path_get_user_unit(c, &t) >= 0) {
716                                 x = strappenda("OBJECT_SYSTEMD_USER_UNIT=", t);
717                                 free(t);
718                         } else
719                                 x = NULL;
720
721                         if (x)
722                                 IOVEC_SET_STRING(iovec[n++], x);
723
724                         free(c);
725                 }
726         }
727         assert(n <= m);
728
729         if (tv) {
730                 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
731                 IOVEC_SET_STRING(iovec[n++], source_time);
732         }
733
734         /* Note that strictly speaking storing the boot id here is
735          * redundant since the entry includes this in-line
736          * anyway. However, we need this indexed, too. */
737         r = sd_id128_get_boot(&id);
738         if (r >= 0) {
739                 sd_id128_to_string(id, boot_id + strlen("_BOOT_ID="));
740                 IOVEC_SET_STRING(iovec[n++], boot_id);
741         }
742
743         r = sd_id128_get_machine(&id);
744         if (r >= 0) {
745                 sd_id128_to_string(id, machine_id + strlen("_MACHINE_ID="));
746                 IOVEC_SET_STRING(iovec[n++], machine_id);
747         }
748
749         t = gethostname_malloc();
750         if (t) {
751                 x = strappenda("_HOSTNAME=", t);
752                 free(t);
753                 IOVEC_SET_STRING(iovec[n++], x);
754         }
755
756         assert(n <= m);
757
758         if (s->split_mode == SPLIT_UID && realuid > 0)
759                 /* Split up strictly by any UID */
760                 journal_uid = realuid;
761         else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
762                 /* Split up by login UIDs, this avoids creation of
763                  * individual journals for system UIDs.  We do this
764                  * only if the realuid is not root, in order not to
765                  * accidentally leak privileged information to the
766                  * user that is logged by a privileged process that is
767                  * part of an unprivileged session.*/
768                 journal_uid = owner;
769         else
770                 journal_uid = 0;
771
772         write_to_journal(s, journal_uid, iovec, n);
773 }
774
775 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
776         char mid[11 + 32 + 1];
777         char buffer[16 + LINE_MAX + 1];
778         struct iovec iovec[N_IOVEC_META_FIELDS + 4];
779         int n = 0;
780         va_list ap;
781         struct ucred ucred = {};
782
783         assert(s);
784         assert(format);
785
786         IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
787         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
788
789         memcpy(buffer, "MESSAGE=", 8);
790         va_start(ap, format);
791         vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
792         va_end(ap);
793         char_array_0(buffer);
794         IOVEC_SET_STRING(iovec[n++], buffer);
795
796         if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
797                 snprintf(mid, sizeof(mid), MESSAGE_ID(message_id));
798                 char_array_0(mid);
799                 IOVEC_SET_STRING(iovec[n++], mid);
800         }
801
802         ucred.pid = getpid();
803         ucred.uid = getuid();
804         ucred.gid = getgid();
805
806         dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, 0);
807 }
808
809 void server_dispatch_message(
810                 Server *s,
811                 struct iovec *iovec, unsigned n, unsigned m,
812                 struct ucred *ucred,
813                 struct timeval *tv,
814                 const char *label, size_t label_len,
815                 const char *unit_id,
816                 int priority,
817                 pid_t object_pid) {
818
819         int rl, r;
820         _cleanup_free_ char *path = NULL;
821         char *c;
822
823         assert(s);
824         assert(iovec || n == 0);
825
826         if (n == 0)
827                 return;
828
829         if (LOG_PRI(priority) > s->max_level_store)
830                 return;
831
832         if (!ucred)
833                 goto finish;
834
835         r = cg_pid_get_path_shifted(ucred->pid, NULL, &path);
836         if (r < 0)
837                 goto finish;
838
839         /* example: /user/lennart/3/foobar
840          *          /system/dbus.service/foobar
841          *
842          * So let's cut of everything past the third /, since that is
843          * where user directories start */
844
845         c = strchr(path, '/');
846         if (c) {
847                 c = strchr(c+1, '/');
848                 if (c) {
849                         c = strchr(c+1, '/');
850                         if (c)
851                                 *c = 0;
852                 }
853         }
854
855         rl = journal_rate_limit_test(s->rate_limit, path,
856                                      priority & LOG_PRIMASK, available_space(s));
857
858         if (rl == 0)
859                 return;
860
861         /* Write a suppression message if we suppressed something */
862         if (rl > 1)
863                 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
864                                       "Suppressed %u messages from %s", rl - 1, path);
865
866 finish:
867         dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, object_pid);
868 }
869
870
871 static int system_journal_open(Server *s) {
872         int r;
873         char *fn;
874         sd_id128_t machine;
875         char ids[33];
876
877         r = sd_id128_get_machine(&machine);
878         if (r < 0)
879                 return r;
880
881         sd_id128_to_string(machine, ids);
882
883         if (!s->system_journal &&
884             (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
885             access("/run/systemd/journal/flushed", F_OK) >= 0) {
886
887                 /* If in auto mode: first try to create the machine
888                  * path, but not the prefix.
889                  *
890                  * If in persistent mode: create /var/log/journal and
891                  * the machine path */
892
893                 if (s->storage == STORAGE_PERSISTENT)
894                         (void) mkdir("/var/log/journal/", 0755);
895
896                 fn = strappenda("/var/log/journal/", ids);
897                 (void) mkdir(fn, 0755);
898
899                 fn = strappenda(fn, "/system.journal");
900                 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
901
902                 if (r >= 0) {
903                         char fb[FORMAT_BYTES_MAX];
904                         uint64_t avail;
905
906                         server_fix_perms(s, s->system_journal, 0);
907
908                         server_driver_message(s, SD_ID128_NULL, "Allowing system journal files to grow to %s.",
909                                               format_bytes(fb, sizeof(fb), s->system_metrics.max_use));
910
911                         avail = available_space(s);
912
913                         if (s->system_metrics.max_use > avail)
914                                server_driver_message(s, SD_ID128_NULL, "Journal size currently limited to %s due to SystemKeepFree.",
915                                                      format_bytes(fb, sizeof(fb), avail));
916
917                 } else if (r < 0) {
918
919                         if (r != -ENOENT && r != -EROFS)
920                                 log_warning("Failed to open system journal: %s", strerror(-r));
921
922                         r = 0;
923                 }
924         }
925
926         if (!s->runtime_journal &&
927             (s->storage != STORAGE_NONE)) {
928
929                 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
930                 if (!fn)
931                         return -ENOMEM;
932
933                 if (s->system_journal) {
934
935                         /* Try to open the runtime journal, but only
936                          * if it already exists, so that we can flush
937                          * it into the system journal */
938
939                         r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
940                         free(fn);
941
942                         if (r < 0) {
943                                 if (r != -ENOENT)
944                                         log_warning("Failed to open runtime journal: %s", strerror(-r));
945
946                                 r = 0;
947                         }
948
949                 } else {
950
951                         /* OK, we really need the runtime journal, so create
952                          * it if necessary. */
953
954                         (void) mkdir_parents(fn, 0755);
955                         r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
956                         free(fn);
957
958                         if (r < 0) {
959                                 log_error("Failed to open runtime journal: %s", strerror(-r));
960                                 return r;
961                         }
962                 }
963
964                 if (s->runtime_journal) {
965                         char fb[FORMAT_BYTES_MAX];
966                         uint64_t avail;
967
968                         server_fix_perms(s, s->runtime_journal, 0);
969                         server_driver_message(s, SD_ID128_NULL, "Allowing runtime journal files to grow to %s.",
970                                               format_bytes(fb, sizeof(fb), s->runtime_metrics.max_use));
971
972                         avail = available_space(s);
973
974                         if (s->system_metrics.max_use > avail)
975                                server_driver_message(s, SD_ID128_NULL, "Journal size currently limited to %s due to RuntimeKeepFree.",
976                                                      format_bytes(fb, sizeof(fb), avail));
977                 }
978         }
979
980         return r;
981 }
982
983 int server_flush_to_var(Server *s) {
984         int r;
985         sd_id128_t machine;
986         sd_journal *j = NULL;
987
988         assert(s);
989
990         if (s->storage != STORAGE_AUTO &&
991             s->storage != STORAGE_PERSISTENT)
992                 return 0;
993
994         if (!s->runtime_journal)
995                 return 0;
996
997         system_journal_open(s);
998
999         if (!s->system_journal)
1000                 return 0;
1001
1002         log_debug("Flushing to /var...");
1003
1004         r = sd_id128_get_machine(&machine);
1005         if (r < 0) {
1006                 log_error("Failed to get machine id: %s", strerror(-r));
1007                 return r;
1008         }
1009
1010         r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1011         if (r < 0) {
1012                 log_error("Failed to read runtime journal: %s", strerror(-r));
1013                 return r;
1014         }
1015
1016         sd_journal_set_data_threshold(j, 0);
1017
1018         SD_JOURNAL_FOREACH(j) {
1019                 Object *o = NULL;
1020                 JournalFile *f;
1021
1022                 f = j->current_file;
1023                 assert(f && f->current_offset > 0);
1024
1025                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1026                 if (r < 0) {
1027                         log_error("Can't read entry: %s", strerror(-r));
1028                         goto finish;
1029                 }
1030
1031                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1032                 if (r >= 0)
1033                         continue;
1034
1035                 if (!shall_try_append_again(s->system_journal, r)) {
1036                         log_error("Can't write entry: %s", strerror(-r));
1037                         goto finish;
1038                 }
1039
1040                 server_rotate(s);
1041                 server_vacuum(s);
1042
1043                 if (!s->system_journal) {
1044                         log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1045                         r = -EIO;
1046                         goto finish;
1047                 }
1048
1049                 log_debug("Retrying write.");
1050                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1051                 if (r < 0) {
1052                         log_error("Can't write entry: %s", strerror(-r));
1053                         goto finish;
1054                 }
1055         }
1056
1057 finish:
1058         journal_file_post_change(s->system_journal);
1059
1060         journal_file_close(s->runtime_journal);
1061         s->runtime_journal = NULL;
1062
1063         if (r >= 0)
1064                 rm_rf("/run/log/journal", false, true, false);
1065
1066         sd_journal_close(j);
1067
1068         return r;
1069 }
1070
1071 int process_event(Server *s, struct epoll_event *ev) {
1072         assert(s);
1073         assert(ev);
1074
1075         if (ev->data.fd == s->signal_fd) {
1076                 struct signalfd_siginfo sfsi;
1077                 ssize_t n;
1078
1079                 if (ev->events != EPOLLIN) {
1080                         log_error("Got invalid event from epoll.");
1081                         return -EIO;
1082                 }
1083
1084                 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
1085                 if (n != sizeof(sfsi)) {
1086
1087                         if (n >= 0)
1088                                 return -EIO;
1089
1090                         if (errno == EINTR || errno == EAGAIN)
1091                                 return 1;
1092
1093                         return -errno;
1094                 }
1095
1096                 if (sfsi.ssi_signo == SIGUSR1) {
1097                         touch("/run/systemd/journal/flushed");
1098                         server_flush_to_var(s);
1099                         server_sync(s);
1100                         return 1;
1101                 }
1102
1103                 if (sfsi.ssi_signo == SIGUSR2) {
1104                         server_rotate(s);
1105                         server_vacuum(s);
1106                         return 1;
1107                 }
1108
1109                 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
1110
1111                 return 0;
1112
1113         } else if (ev->data.fd == s->sync_timer_fd) {
1114                 int r;
1115                 uint64_t t;
1116
1117                 log_debug("Got sync request from epoll.");
1118
1119                 r = read(ev->data.fd, (void *)&t, sizeof(t));
1120                 if (r < 0)
1121                         return 0;
1122
1123                 server_sync(s);
1124                 return 1;
1125
1126         } else if (ev->data.fd == s->dev_kmsg_fd) {
1127                 int r;
1128
1129                 if (ev->events != EPOLLIN) {
1130                         log_error("Got invalid event from epoll.");
1131                         return -EIO;
1132                 }
1133
1134                 r = server_read_dev_kmsg(s);
1135                 if (r < 0)
1136                         return r;
1137
1138                 return 1;
1139
1140         } else if (ev->data.fd == s->native_fd ||
1141                    ev->data.fd == s->syslog_fd) {
1142
1143                 if (ev->events != EPOLLIN) {
1144                         log_error("Got invalid event from epoll.");
1145                         return -EIO;
1146                 }
1147
1148                 for (;;) {
1149                         struct msghdr msghdr;
1150                         struct iovec iovec;
1151                         struct ucred *ucred = NULL;
1152                         struct timeval *tv = NULL;
1153                         struct cmsghdr *cmsg;
1154                         char *label = NULL;
1155                         size_t label_len = 0;
1156                         union {
1157                                 struct cmsghdr cmsghdr;
1158
1159                                 /* We use NAME_MAX space for the
1160                                  * SELinux label here. The kernel
1161                                  * currently enforces no limit, but
1162                                  * according to suggestions from the
1163                                  * SELinux people this will change and
1164                                  * it will probably be identical to
1165                                  * NAME_MAX. For now we use that, but
1166                                  * this should be updated one day when
1167                                  * the final limit is known.*/
1168                                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1169                                             CMSG_SPACE(sizeof(struct timeval)) +
1170                                             CMSG_SPACE(sizeof(int)) + /* fd */
1171                                             CMSG_SPACE(NAME_MAX)]; /* selinux label */
1172                         } control;
1173                         ssize_t n;
1174                         int v;
1175                         int *fds = NULL;
1176                         unsigned n_fds = 0;
1177
1178                         if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
1179                                 log_error("SIOCINQ failed: %m");
1180                                 return -errno;
1181                         }
1182
1183                         if (s->buffer_size < (size_t) v) {
1184                                 void *b;
1185                                 size_t l;
1186
1187                                 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
1188                                 b = realloc(s->buffer, l+1);
1189
1190                                 if (!b) {
1191                                         log_error("Couldn't increase buffer.");
1192                                         return -ENOMEM;
1193                                 }
1194
1195                                 s->buffer_size = l;
1196                                 s->buffer = b;
1197                         }
1198
1199                         zero(iovec);
1200                         iovec.iov_base = s->buffer;
1201                         iovec.iov_len = s->buffer_size;
1202
1203                         zero(control);
1204                         zero(msghdr);
1205                         msghdr.msg_iov = &iovec;
1206                         msghdr.msg_iovlen = 1;
1207                         msghdr.msg_control = &control;
1208                         msghdr.msg_controllen = sizeof(control);
1209
1210                         n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1211                         if (n < 0) {
1212
1213                                 if (errno == EINTR || errno == EAGAIN)
1214                                         return 1;
1215
1216                                 log_error("recvmsg() failed: %m");
1217                                 return -errno;
1218                         }
1219
1220                         for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1221
1222                                 if (cmsg->cmsg_level == SOL_SOCKET &&
1223                                     cmsg->cmsg_type == SCM_CREDENTIALS &&
1224                                     cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1225                                         ucred = (struct ucred*) CMSG_DATA(cmsg);
1226                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
1227                                          cmsg->cmsg_type == SCM_SECURITY) {
1228                                         label = (char*) CMSG_DATA(cmsg);
1229                                         label_len = cmsg->cmsg_len - CMSG_LEN(0);
1230                                 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1231                                          cmsg->cmsg_type == SO_TIMESTAMP &&
1232                                          cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1233                                         tv = (struct timeval*) CMSG_DATA(cmsg);
1234                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
1235                                          cmsg->cmsg_type == SCM_RIGHTS) {
1236                                         fds = (int*) CMSG_DATA(cmsg);
1237                                         n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1238                                 }
1239                         }
1240
1241                         if (ev->data.fd == s->syslog_fd) {
1242                                 char *e;
1243
1244                                 if (n > 0 && n_fds == 0) {
1245                                         e = memchr(s->buffer, '\n', n);
1246                                         if (e)
1247                                                 *e = 0;
1248                                         else
1249                                                 s->buffer[n] = 0;
1250
1251                                         server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1252                                 } else if (n_fds > 0)
1253                                         log_warning("Got file descriptors via syslog socket. Ignoring.");
1254
1255                         } else {
1256                                 if (n > 0 && n_fds == 0)
1257                                         server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1258                                 else if (n == 0 && n_fds == 1)
1259                                         server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1260                                 else if (n_fds > 0)
1261                                         log_warning("Got too many file descriptors via native socket. Ignoring.");
1262                         }
1263
1264                         close_many(fds, n_fds);
1265                 }
1266
1267                 return 1;
1268
1269         } else if (ev->data.fd == s->stdout_fd) {
1270
1271                 if (ev->events != EPOLLIN) {
1272                         log_error("Got invalid event from epoll.");
1273                         return -EIO;
1274                 }
1275
1276                 stdout_stream_new(s);
1277                 return 1;
1278
1279         } else {
1280                 StdoutStream *stream;
1281
1282                 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
1283                         log_error("Got invalid event from epoll.");
1284                         return -EIO;
1285                 }
1286
1287                 /* If it is none of the well-known fds, it must be an
1288                  * stdout stream fd. Note that this is a bit ugly here
1289                  * (since we rely that none of the well-known fds
1290                  * could be interpreted as pointer), but nonetheless
1291                  * safe, since the well-known fds would never get an
1292                  * fd > 4096, i.e. beyond the first memory page */
1293
1294                 stream = ev->data.ptr;
1295
1296                 if (stdout_stream_process(stream) <= 0)
1297                         stdout_stream_free(stream);
1298
1299                 return 1;
1300         }
1301
1302         log_error("Unknown event.");
1303         return 0;
1304 }
1305
1306 static int open_signalfd(Server *s) {
1307         sigset_t mask;
1308         struct epoll_event ev;
1309
1310         assert(s);
1311
1312         assert_se(sigemptyset(&mask) == 0);
1313         sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1314         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1315
1316         s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
1317         if (s->signal_fd < 0) {
1318                 log_error("signalfd(): %m");
1319                 return -errno;
1320         }
1321
1322         zero(ev);
1323         ev.events = EPOLLIN;
1324         ev.data.fd = s->signal_fd;
1325
1326         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
1327                 log_error("epoll_ctl(): %m");
1328                 return -errno;
1329         }
1330
1331         return 0;
1332 }
1333
1334 static int server_parse_proc_cmdline(Server *s) {
1335         _cleanup_free_ char *line = NULL;
1336         char *w, *state;
1337         int r;
1338         size_t l;
1339
1340         if (detect_container(NULL) > 0)
1341                 return 0;
1342
1343         r = read_one_line_file("/proc/cmdline", &line);
1344         if (r < 0) {
1345                 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1346                 return 0;
1347         }
1348
1349         FOREACH_WORD_QUOTED(w, l, line, state) {
1350                 _cleanup_free_ char *word;
1351
1352                 word = strndup(w, l);
1353                 if (!word)
1354                         return -ENOMEM;
1355
1356                 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1357                         r = parse_boolean(word + 35);
1358                         if (r < 0)
1359                                 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1360                         else
1361                                 s->forward_to_syslog = r;
1362                 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1363                         r = parse_boolean(word + 33);
1364                         if (r < 0)
1365                                 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1366                         else
1367                                 s->forward_to_kmsg = r;
1368                 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1369                         r = parse_boolean(word + 36);
1370                         if (r < 0)
1371                                 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1372                         else
1373                                 s->forward_to_console = r;
1374                 } else if (startswith(word, "systemd.journald"))
1375                         log_warning("Invalid systemd.journald parameter. Ignoring.");
1376         }
1377
1378         return 0;
1379 }
1380
1381 static int server_parse_config_file(Server *s) {
1382         static const char fn[] = "/etc/systemd/journald.conf";
1383         _cleanup_fclose_ FILE *f = NULL;
1384         int r;
1385
1386         assert(s);
1387
1388         f = fopen(fn, "re");
1389         if (!f) {
1390                 if (errno == ENOENT)
1391                         return 0;
1392
1393                 log_warning("Failed to open configuration file %s: %m", fn);
1394                 return -errno;
1395         }
1396
1397         r = config_parse(NULL, fn, f, "Journal\0", config_item_perf_lookup,
1398                          (void*) journald_gperf_lookup, false, false, s);
1399         if (r < 0)
1400                 log_warning("Failed to parse configuration file: %s", strerror(-r));
1401
1402         return r;
1403 }
1404
1405 static int server_open_sync_timer(Server *s) {
1406         int r;
1407         struct epoll_event ev;
1408
1409         assert(s);
1410
1411         s->sync_timer_fd = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC);
1412         if (s->sync_timer_fd < 0)
1413                 return -errno;
1414
1415         zero(ev);
1416         ev.events = EPOLLIN;
1417         ev.data.fd = s->sync_timer_fd;
1418
1419         r = epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->sync_timer_fd, &ev);
1420         if (r < 0) {
1421                 log_error("Failed to add idle timer fd to epoll object: %m");
1422                 return -errno;
1423         }
1424
1425         return 0;
1426 }
1427
1428 int server_schedule_sync(Server *s) {
1429         int r;
1430
1431         assert(s);
1432
1433         if (s->sync_scheduled)
1434                 return 0;
1435
1436         if (s->sync_interval_usec) {
1437                 struct itimerspec sync_timer_enable = {};
1438
1439                 timespec_store(&sync_timer_enable.it_value, s->sync_interval_usec);
1440
1441                 r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_enable, NULL);
1442                 if (r < 0)
1443                         return -errno;
1444         }
1445
1446         s->sync_scheduled = true;
1447
1448         return 0;
1449 }
1450
1451 int server_init(Server *s) {
1452         int n, r, fd;
1453
1454         assert(s);
1455
1456         zero(*s);
1457         s->sync_timer_fd = s->syslog_fd = s->native_fd = s->stdout_fd =
1458             s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
1459         s->compress = true;
1460         s->seal = true;
1461
1462         s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1463         s->sync_scheduled = false;
1464
1465         s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1466         s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1467
1468         s->forward_to_syslog = true;
1469
1470         s->max_level_store = LOG_DEBUG;
1471         s->max_level_syslog = LOG_DEBUG;
1472         s->max_level_kmsg = LOG_NOTICE;
1473         s->max_level_console = LOG_INFO;
1474
1475         memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1476         memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1477
1478         server_parse_config_file(s);
1479         server_parse_proc_cmdline(s);
1480         if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1481                 log_debug("Setting both rate limit interval and burst from %llu,%u to 0,0",
1482                           (long long unsigned) s->rate_limit_interval,
1483                           s->rate_limit_burst);
1484                 s->rate_limit_interval = s->rate_limit_burst = 0;
1485         }
1486
1487         mkdir_p("/run/systemd/journal", 0755);
1488
1489         s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
1490         if (!s->user_journals)
1491                 return log_oom();
1492
1493         s->mmap = mmap_cache_new();
1494         if (!s->mmap)
1495                 return log_oom();
1496
1497         s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
1498         if (s->epoll_fd < 0) {
1499                 log_error("Failed to create epoll object: %m");
1500                 return -errno;
1501         }
1502
1503         n = sd_listen_fds(true);
1504         if (n < 0) {
1505                 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1506                 return n;
1507         }
1508
1509         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1510
1511                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1512
1513                         if (s->native_fd >= 0) {
1514                                 log_error("Too many native sockets passed.");
1515                                 return -EINVAL;
1516                         }
1517
1518                         s->native_fd = fd;
1519
1520                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1521
1522                         if (s->stdout_fd >= 0) {
1523                                 log_error("Too many stdout sockets passed.");
1524                                 return -EINVAL;
1525                         }
1526
1527                         s->stdout_fd = fd;
1528
1529                 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
1530
1531                         if (s->syslog_fd >= 0) {
1532                                 log_error("Too many /dev/log sockets passed.");
1533                                 return -EINVAL;
1534                         }
1535
1536                         s->syslog_fd = fd;
1537
1538                 } else {
1539                         log_error("Unknown socket passed.");
1540                         return -EINVAL;
1541                 }
1542         }
1543
1544         r = server_open_syslog_socket(s);
1545         if (r < 0)
1546                 return r;
1547
1548         r = server_open_native_socket(s);
1549         if (r < 0)
1550                 return r;
1551
1552         r = server_open_stdout_socket(s);
1553         if (r < 0)
1554                 return r;
1555
1556         r = server_open_dev_kmsg(s);
1557         if (r < 0)
1558                 return r;
1559
1560         r = server_open_kernel_seqnum(s);
1561         if (r < 0)
1562                 return r;
1563
1564         r = server_open_sync_timer(s);
1565         if (r < 0)
1566                 return r;
1567
1568         r = open_signalfd(s);
1569         if (r < 0)
1570                 return r;
1571
1572         s->udev = udev_new();
1573         if (!s->udev)
1574                 return -ENOMEM;
1575
1576         s->rate_limit = journal_rate_limit_new(s->rate_limit_interval,
1577                                                s->rate_limit_burst);
1578         if (!s->rate_limit)
1579                 return -ENOMEM;
1580
1581         r = system_journal_open(s);
1582         if (r < 0)
1583                 return r;
1584
1585         return 0;
1586 }
1587
1588 void server_maybe_append_tags(Server *s) {
1589 #ifdef HAVE_GCRYPT
1590         JournalFile *f;
1591         Iterator i;
1592         usec_t n;
1593
1594         n = now(CLOCK_REALTIME);
1595
1596         if (s->system_journal)
1597                 journal_file_maybe_append_tag(s->system_journal, n);
1598
1599         HASHMAP_FOREACH(f, s->user_journals, i)
1600                 journal_file_maybe_append_tag(f, n);
1601 #endif
1602 }
1603
1604 void server_done(Server *s) {
1605         JournalFile *f;
1606         assert(s);
1607
1608         while (s->stdout_streams)
1609                 stdout_stream_free(s->stdout_streams);
1610
1611         if (s->system_journal)
1612                 journal_file_close(s->system_journal);
1613
1614         if (s->runtime_journal)
1615                 journal_file_close(s->runtime_journal);
1616
1617         while ((f = hashmap_steal_first(s->user_journals)))
1618                 journal_file_close(f);
1619
1620         hashmap_free(s->user_journals);
1621
1622         if (s->epoll_fd >= 0)
1623                 close_nointr_nofail(s->epoll_fd);
1624
1625         if (s->signal_fd >= 0)
1626                 close_nointr_nofail(s->signal_fd);
1627
1628         if (s->syslog_fd >= 0)
1629                 close_nointr_nofail(s->syslog_fd);
1630
1631         if (s->native_fd >= 0)
1632                 close_nointr_nofail(s->native_fd);
1633
1634         if (s->stdout_fd >= 0)
1635                 close_nointr_nofail(s->stdout_fd);
1636
1637         if (s->dev_kmsg_fd >= 0)
1638                 close_nointr_nofail(s->dev_kmsg_fd);
1639
1640         if (s->sync_timer_fd >= 0)
1641                 close_nointr_nofail(s->sync_timer_fd);
1642
1643         if (s->rate_limit)
1644                 journal_rate_limit_free(s->rate_limit);
1645
1646         if (s->kernel_seqnum)
1647                 munmap(s->kernel_seqnum, sizeof(uint64_t));
1648
1649         free(s->buffer);
1650         free(s->tty_path);
1651
1652         if (s->mmap)
1653                 mmap_cache_unref(s->mmap);
1654
1655         if (s->udev)
1656                 udev_unref(s->udev);
1657 }