chiark / gitweb /
journald: avoid NSS in journald
[elogind.git] / src / journal / journald-server.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
26 #include <sys/mman.h>
27 #include <sys/timerfd.h>
28
29 #include <libudev.h>
30 #include <systemd/sd-journal.h>
31 #include <systemd/sd-messages.h>
32 #include <systemd/sd-daemon.h>
33
34 #include "fileio.h"
35 #include "mkdir.h"
36 #include "hashmap.h"
37 #include "journal-file.h"
38 #include "socket-util.h"
39 #include "cgroup-util.h"
40 #include "list.h"
41 #include "virt.h"
42 #include "missing.h"
43 #include "conf-parser.h"
44 #include "journal-internal.h"
45 #include "journal-vacuum.h"
46 #include "journal-authenticate.h"
47 #include "journald-server.h"
48 #include "journald-rate-limit.h"
49 #include "journald-kmsg.h"
50 #include "journald-syslog.h"
51 #include "journald-stream.h"
52 #include "journald-console.h"
53 #include "journald-native.h"
54
55 #ifdef HAVE_ACL
56 #include <sys/acl.h>
57 #include <acl/libacl.h>
58 #include "acl-util.h"
59 #endif
60
61 #ifdef HAVE_SELINUX
62 #include <selinux/selinux.h>
63 #endif
64
65 #define USER_JOURNALS_MAX 1024
66
67 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
68 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
69 #define DEFAULT_RATE_LIMIT_BURST 1000
70
71 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
72
73 static const char* const storage_table[] = {
74         [STORAGE_AUTO] = "auto",
75         [STORAGE_VOLATILE] = "volatile",
76         [STORAGE_PERSISTENT] = "persistent",
77         [STORAGE_NONE] = "none"
78 };
79
80 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
81 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
82
83 static const char* const split_mode_table[] = {
84         [SPLIT_NONE] = "none",
85         [SPLIT_UID] = "uid",
86         [SPLIT_LOGIN] = "login"
87 };
88
89 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
90 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
91
92 static uint64_t available_space(Server *s, bool verbose) {
93         char ids[33];
94         _cleanup_free_ char *p = NULL;
95         sd_id128_t machine;
96         struct statvfs ss;
97         uint64_t sum = 0, ss_avail = 0, avail = 0;
98         int r;
99         _cleanup_closedir_ DIR *d = NULL;
100         usec_t ts;
101         const char *f;
102         JournalMetrics *m;
103
104         ts = now(CLOCK_MONOTONIC);
105
106         if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts
107             && !verbose)
108                 return s->cached_available_space;
109
110         r = sd_id128_get_machine(&machine);
111         if (r < 0)
112                 return 0;
113
114         if (s->system_journal) {
115                 f = "/var/log/journal/";
116                 m = &s->system_metrics;
117         } else {
118                 f = "/run/log/journal/";
119                 m = &s->runtime_metrics;
120         }
121
122         assert(m);
123
124         p = strappend(f, sd_id128_to_string(machine, ids));
125         if (!p)
126                 return 0;
127
128         d = opendir(p);
129         if (!d)
130                 return 0;
131
132         if (fstatvfs(dirfd(d), &ss) < 0)
133                 return 0;
134
135         for (;;) {
136                 struct stat st;
137                 struct dirent *de;
138                 union dirent_storage buf;
139
140                 r = readdir_r(d, &buf.de, &de);
141                 if (r != 0)
142                         break;
143
144                 if (!de)
145                         break;
146
147                 if (!endswith(de->d_name, ".journal") &&
148                     !endswith(de->d_name, ".journal~"))
149                         continue;
150
151                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
152                         continue;
153
154                 if (!S_ISREG(st.st_mode))
155                         continue;
156
157                 sum += (uint64_t) st.st_blocks * 512UL;
158         }
159
160         ss_avail = ss.f_bsize * ss.f_bavail;
161         avail = ss_avail > m->keep_free ? ss_avail - m->keep_free : 0;
162
163         s->cached_available_space = MIN(m->max_use, avail) > sum ? MIN(m->max_use, avail) - sum : 0;
164         s->cached_available_space_timestamp = ts;
165
166         if (verbose) {
167                 char    fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
168                         fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX];
169
170                 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
171                                       "%s journal is using %s (max %s, leaving %s of free %s, current limit %s).",
172                                       s->system_journal ? "Permanent" : "Runtime",
173                                       format_bytes(fb1, sizeof(fb1), sum),
174                                       format_bytes(fb2, sizeof(fb2), m->max_use),
175                                       format_bytes(fb3, sizeof(fb3), m->keep_free),
176                                       format_bytes(fb4, sizeof(fb4), ss_avail),
177                                       format_bytes(fb5, sizeof(fb5), MIN(m->max_use, avail)));
178         }
179
180         return s->cached_available_space;
181 }
182
183 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
184         int r;
185 #ifdef HAVE_ACL
186         acl_t acl;
187         acl_entry_t entry;
188         acl_permset_t permset;
189 #endif
190
191         assert(f);
192
193         r = fchmod(f->fd, 0640);
194         if (r < 0)
195                 log_warning("Failed to fix access mode on %s, ignoring: %s", f->path, strerror(-r));
196
197 #ifdef HAVE_ACL
198         if (uid <= 0)
199                 return;
200
201         acl = acl_get_fd(f->fd);
202         if (!acl) {
203                 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
204                 return;
205         }
206
207         r = acl_find_uid(acl, uid, &entry);
208         if (r <= 0) {
209
210                 if (acl_create_entry(&acl, &entry) < 0 ||
211                     acl_set_tag_type(entry, ACL_USER) < 0 ||
212                     acl_set_qualifier(entry, &uid) < 0) {
213                         log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
214                         goto finish;
215                 }
216         }
217
218         /* We do not recalculate the mask unconditionally here,
219          * so that the fchmod() mask above stays intact. */
220         if (acl_get_permset(entry, &permset) < 0 ||
221             acl_add_perm(permset, ACL_READ) < 0 ||
222             calc_acl_mask_if_needed(&acl) < 0) {
223                 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
224                 goto finish;
225         }
226
227         if (acl_set_fd(f->fd, acl) < 0)
228                 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
229
230 finish:
231         acl_free(acl);
232 #endif
233 }
234
235 static JournalFile* find_journal(Server *s, uid_t uid) {
236         _cleanup_free_ char *p = NULL;
237         int r;
238         JournalFile *f;
239         sd_id128_t machine;
240
241         assert(s);
242
243         /* We split up user logs only on /var, not on /run. If the
244          * runtime file is open, we write to it exclusively, in order
245          * to guarantee proper order as soon as we flush /run to
246          * /var and close the runtime file. */
247
248         if (s->runtime_journal)
249                 return s->runtime_journal;
250
251         if (uid <= 0)
252                 return s->system_journal;
253
254         r = sd_id128_get_machine(&machine);
255         if (r < 0)
256                 return s->system_journal;
257
258         f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
259         if (f)
260                 return f;
261
262         if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
263                      SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
264                 return s->system_journal;
265
266         while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
267                 /* Too many open? Then let's close one */
268                 f = hashmap_steal_first(s->user_journals);
269                 assert(f);
270                 journal_file_close(f);
271         }
272
273         r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
274         if (r < 0)
275                 return s->system_journal;
276
277         server_fix_perms(s, f, uid);
278
279         r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
280         if (r < 0) {
281                 journal_file_close(f);
282                 return s->system_journal;
283         }
284
285         return f;
286 }
287
288 void server_rotate(Server *s) {
289         JournalFile *f;
290         void *k;
291         Iterator i;
292         int r;
293
294         log_debug("Rotating...");
295
296         if (s->runtime_journal) {
297                 r = journal_file_rotate(&s->runtime_journal, s->compress, false);
298                 if (r < 0)
299                         if (s->runtime_journal)
300                                 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
301                         else
302                                 log_error("Failed to create new runtime journal: %s", strerror(-r));
303                 else
304                         server_fix_perms(s, s->runtime_journal, 0);
305         }
306
307         if (s->system_journal) {
308                 r = journal_file_rotate(&s->system_journal, s->compress, s->seal);
309                 if (r < 0)
310                         if (s->system_journal)
311                                 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
312                         else
313                                 log_error("Failed to create new system journal: %s", strerror(-r));
314
315                 else
316                         server_fix_perms(s, s->system_journal, 0);
317         }
318
319         HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
320                 r = journal_file_rotate(&f, s->compress, s->seal);
321                 if (r < 0)
322                         if (f)
323                                 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
324                         else
325                                 log_error("Failed to create user journal: %s", strerror(-r));
326                 else {
327                         hashmap_replace(s->user_journals, k, f);
328                         server_fix_perms(s, f, PTR_TO_UINT32(k));
329                 }
330         }
331 }
332
333 void server_sync(Server *s) {
334         static const struct itimerspec sync_timer_disable = {};
335         JournalFile *f;
336         void *k;
337         Iterator i;
338         int r;
339
340         if (s->system_journal) {
341                 r = journal_file_set_offline(s->system_journal);
342                 if (r < 0)
343                         log_error("Failed to sync system journal: %s", strerror(-r));
344         }
345
346         HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
347                 r = journal_file_set_offline(f);
348                 if (r < 0)
349                         log_error("Failed to sync user journal: %s", strerror(-r));
350         }
351
352         r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_disable, NULL);
353         if (r < 0)
354                 log_error("Failed to disable max timer: %m");
355
356         s->sync_scheduled = false;
357 }
358
359 void server_vacuum(Server *s) {
360         char ids[33];
361         sd_id128_t machine;
362         int r;
363
364         log_debug("Vacuuming...");
365
366         s->oldest_file_usec = 0;
367
368         r = sd_id128_get_machine(&machine);
369         if (r < 0) {
370                 log_error("Failed to get machine ID: %s", strerror(-r));
371                 return;
372         }
373
374         sd_id128_to_string(machine, ids);
375
376         if (s->system_journal) {
377                 char *p = strappenda("/var/log/journal/", ids);
378
379                 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
380                 if (r < 0 && r != -ENOENT)
381                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
382         }
383
384         if (s->runtime_journal) {
385                 char *p = strappenda("/run/log/journal/", ids);
386
387                 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
388                 if (r < 0 && r != -ENOENT)
389                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
390         }
391
392         s->cached_available_space_timestamp = 0;
393 }
394
395 bool shall_try_append_again(JournalFile *f, int r) {
396
397         /* -E2BIG            Hit configured limit
398            -EFBIG            Hit fs limit
399            -EDQUOT           Quota limit hit
400            -ENOSPC           Disk full
401            -EHOSTDOWN        Other machine
402            -EBUSY            Unclean shutdown
403            -EPROTONOSUPPORT  Unsupported feature
404            -EBADMSG          Corrupted
405            -ENODATA          Truncated
406            -ESHUTDOWN        Already archived */
407
408         if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
409                 log_debug("%s: Allocation limit reached, rotating.", f->path);
410         else if (r == -EHOSTDOWN)
411                 log_info("%s: Journal file from other machine, rotating.", f->path);
412         else if (r == -EBUSY)
413                 log_info("%s: Unclean shutdown, rotating.", f->path);
414         else if (r == -EPROTONOSUPPORT)
415                 log_info("%s: Unsupported feature, rotating.", f->path);
416         else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
417                 log_warning("%s: Journal file corrupted, rotating.", f->path);
418         else
419                 return false;
420
421         return true;
422 }
423
424 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
425         JournalFile *f;
426         bool vacuumed = false;
427         int r;
428
429         assert(s);
430         assert(iovec);
431         assert(n > 0);
432
433         f = find_journal(s, uid);
434         if (!f)
435                 return;
436
437         if (journal_file_rotate_suggested(f, s->max_file_usec)) {
438                 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
439                 server_rotate(s);
440                 server_vacuum(s);
441                 vacuumed = true;
442
443                 f = find_journal(s, uid);
444                 if (!f)
445                         return;
446         }
447
448         r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
449         if (r >= 0) {
450                 server_schedule_sync(s, priority);
451                 return;
452         }
453
454         if (vacuumed || !shall_try_append_again(f, r)) {
455                 size_t size = 0;
456                 unsigned i;
457                 for (i = 0; i < n; i++)
458                         size += iovec[i].iov_len;
459
460                 log_error("Failed to write entry (%d items, %zu bytes), ignoring: %s", n, size, strerror(-r));
461                 return;
462         }
463
464         server_rotate(s);
465         server_vacuum(s);
466
467         f = find_journal(s, uid);
468         if (!f)
469                 return;
470
471         log_debug("Retrying write.");
472         r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
473         if (r < 0) {
474                 size_t size = 0;
475                 unsigned i;
476                 for (i = 0; i < n; i++)
477                         size += iovec[i].iov_len;
478
479                 log_error("Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %s", n, size, strerror(-r));
480         } else
481                 server_schedule_sync(s, priority);
482 }
483
484 static void dispatch_message_real(
485                 Server *s,
486                 struct iovec *iovec, unsigned n, unsigned m,
487                 struct ucred *ucred,
488                 struct timeval *tv,
489                 const char *label, size_t label_len,
490                 const char *unit_id,
491                 int priority,
492                 pid_t object_pid) {
493
494         char    pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
495                 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
496                 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
497                 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
498                 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
499                 boot_id[sizeof("_BOOT_ID=") + 32] = "_BOOT_ID=",
500                 machine_id[sizeof("_MACHINE_ID=") + 32] = "_MACHINE_ID=",
501                 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
502                 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
503                 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
504         uid_t object_uid;
505         gid_t object_gid;
506         char *x;
507         sd_id128_t id;
508         int r;
509         char *t, *c;
510         uid_t realuid = 0, owner = 0, journal_uid;
511         bool owner_valid = false;
512 #ifdef HAVE_AUDIT
513         char    audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
514                 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
515                 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
516                 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
517
518         uint32_t audit;
519         uid_t loginuid;
520 #endif
521
522         assert(s);
523         assert(iovec);
524         assert(n > 0);
525         assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
526
527         if (ucred) {
528                 realuid = ucred->uid;
529
530                 sprintf(pid, "_PID=%lu", (unsigned long) ucred->pid);
531                 IOVEC_SET_STRING(iovec[n++], pid);
532
533                 sprintf(uid, "_UID=%lu", (unsigned long) ucred->uid);
534                 IOVEC_SET_STRING(iovec[n++], uid);
535
536                 sprintf(gid, "_GID=%lu", (unsigned long) ucred->gid);
537                 IOVEC_SET_STRING(iovec[n++], gid);
538
539                 r = get_process_comm(ucred->pid, &t);
540                 if (r >= 0) {
541                         x = strappenda("_COMM=", t);
542                         free(t);
543                         IOVEC_SET_STRING(iovec[n++], x);
544                 }
545
546                 r = get_process_exe(ucred->pid, &t);
547                 if (r >= 0) {
548                         x = strappenda("_EXE=", t);
549                         free(t);
550                         IOVEC_SET_STRING(iovec[n++], x);
551                 }
552
553                 r = get_process_cmdline(ucred->pid, 0, false, &t);
554                 if (r >= 0) {
555                         x = strappenda("_CMDLINE=", t);
556                         free(t);
557                         IOVEC_SET_STRING(iovec[n++], x);
558                 }
559
560                 r = get_process_capeff(ucred->pid, &t);
561                 if (r >= 0) {
562                         x = strappenda("_CAP_EFFECTIVE=", t);
563                         free(t);
564                         IOVEC_SET_STRING(iovec[n++], x);
565                 }
566
567 #ifdef HAVE_AUDIT
568                 r = audit_session_from_pid(ucred->pid, &audit);
569                 if (r >= 0) {
570                         sprintf(audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit);
571                         IOVEC_SET_STRING(iovec[n++], audit_session);
572                 }
573
574                 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
575                 if (r >= 0) {
576                         sprintf(audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid);
577                         IOVEC_SET_STRING(iovec[n++], audit_loginuid);
578                 }
579 #endif
580
581                 r = cg_pid_get_path_shifted(ucred->pid, NULL, &c);
582                 if (r >= 0) {
583                         char *session = NULL;
584
585                         x = strappenda("_SYSTEMD_CGROUP=", c);
586                         IOVEC_SET_STRING(iovec[n++], x);
587
588                         r = cg_path_get_session(c, &t);
589                         if (r >= 0) {
590                                 session = strappenda("_SYSTEMD_SESSION=", t);
591                                 free(t);
592                                 IOVEC_SET_STRING(iovec[n++], session);
593                         }
594
595                         if (cg_path_get_owner_uid(c, &owner) >= 0) {
596                                 owner_valid = true;
597
598                                 sprintf(owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner);
599                                 IOVEC_SET_STRING(iovec[n++], owner_uid);
600                         }
601
602                         if (cg_path_get_unit(c, &t) >= 0) {
603                                 x = strappenda("_SYSTEMD_UNIT=", t);
604                                 free(t);
605                                 IOVEC_SET_STRING(iovec[n++], x);
606                         } else if (unit_id && !session) {
607                                 x = strappenda("_SYSTEMD_UNIT=", unit_id);
608                                 IOVEC_SET_STRING(iovec[n++], x);
609                         }
610
611                         if (cg_path_get_user_unit(c, &t) >= 0) {
612                                 x = strappenda("_SYSTEMD_USER_UNIT=", t);
613                                 free(t);
614                                 IOVEC_SET_STRING(iovec[n++], x);
615                         } else if (unit_id && session) {
616                                 x = strappenda("_SYSTEMD_USER_UNIT=", unit_id);
617                                 IOVEC_SET_STRING(iovec[n++], x);
618                         }
619
620                         if (cg_path_get_slice(c, &t) >= 0) {
621                                 x = strappenda("_SYSTEMD_SLICE=", t);
622                                 free(t);
623                                 IOVEC_SET_STRING(iovec[n++], x);
624                         }
625
626                         free(c);
627                 }
628
629 #ifdef HAVE_SELINUX
630                 if (label) {
631                         x = alloca(sizeof("_SELINUX_CONTEXT=") + label_len);
632
633                         *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
634                         IOVEC_SET_STRING(iovec[n++], x);
635                 } else {
636                         security_context_t con;
637
638                         if (getpidcon(ucred->pid, &con) >= 0) {
639                                 x = strappenda("_SELINUX_CONTEXT=", con);
640
641                                 freecon(con);
642                                 IOVEC_SET_STRING(iovec[n++], x);
643                         }
644                 }
645 #endif
646         }
647         assert(n <= m);
648
649         if (object_pid) {
650                 r = get_process_uid(object_pid, &object_uid);
651                 if (r >= 0) {
652                         sprintf(o_uid, "OBJECT_UID=%lu", (unsigned long) object_uid);
653                         IOVEC_SET_STRING(iovec[n++], o_uid);
654                 }
655
656                 r = get_process_gid(object_pid, &object_gid);
657                 if (r >= 0) {
658                         sprintf(o_gid, "OBJECT_GID=%lu", (unsigned long) object_gid);
659                         IOVEC_SET_STRING(iovec[n++], o_gid);
660                 }
661
662                 r = get_process_comm(object_pid, &t);
663                 if (r >= 0) {
664                         x = strappenda("OBJECT_COMM=", t);
665                         free(t);
666                         IOVEC_SET_STRING(iovec[n++], x);
667                 }
668
669                 r = get_process_exe(object_pid, &t);
670                 if (r >= 0) {
671                         x = strappenda("OBJECT_EXE=", t);
672                         free(t);
673                         IOVEC_SET_STRING(iovec[n++], x);
674                 }
675
676                 r = get_process_cmdline(object_pid, 0, false, &t);
677                 if (r >= 0) {
678                         x = strappenda("OBJECT_CMDLINE=", t);
679                         free(t);
680                         IOVEC_SET_STRING(iovec[n++], x);
681                 }
682
683 #ifdef HAVE_AUDIT
684                 r = audit_session_from_pid(object_pid, &audit);
685                 if (r >= 0) {
686                         sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%lu", (unsigned long) audit);
687                         IOVEC_SET_STRING(iovec[n++], o_audit_session);
688                 }
689
690                 r = audit_loginuid_from_pid(object_pid, &loginuid);
691                 if (r >= 0) {
692                         sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID=%lu", (unsigned long) loginuid);
693                         IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
694                 }
695 #endif
696
697                 r = cg_pid_get_path_shifted(object_pid, NULL, &c);
698                 if (r >= 0) {
699                         x = strappenda("OBJECT_SYSTEMD_CGROUP=", c);
700                         IOVEC_SET_STRING(iovec[n++], x);
701
702                         r = cg_path_get_session(c, &t);
703                         if (r >= 0) {
704                                 x = strappenda("OBJECT_SYSTEMD_SESSION=", t);
705                                 free(t);
706                                 IOVEC_SET_STRING(iovec[n++], x);
707                         }
708
709                         if (cg_path_get_owner_uid(c, &owner) >= 0) {
710                                 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner);
711                                 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
712                         }
713
714                         if (cg_path_get_unit(c, &t) >= 0) {
715                                 x = strappenda("OBJECT_SYSTEMD_UNIT=", t);
716                                 free(t);
717                                 IOVEC_SET_STRING(iovec[n++], x);
718                         }
719
720                         if (cg_path_get_user_unit(c, &t) >= 0) {
721                                 x = strappenda("OBJECT_SYSTEMD_USER_UNIT=", t);
722                                 free(t);
723                                 IOVEC_SET_STRING(iovec[n++], x);
724                         }
725
726                         free(c);
727                 }
728         }
729         assert(n <= m);
730
731         if (tv) {
732                 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
733                 IOVEC_SET_STRING(iovec[n++], source_time);
734         }
735
736         /* Note that strictly speaking storing the boot id here is
737          * redundant since the entry includes this in-line
738          * anyway. However, we need this indexed, too. */
739         r = sd_id128_get_boot(&id);
740         if (r >= 0) {
741                 sd_id128_to_string(id, boot_id + strlen("_BOOT_ID="));
742                 IOVEC_SET_STRING(iovec[n++], boot_id);
743         }
744
745         r = sd_id128_get_machine(&id);
746         if (r >= 0) {
747                 sd_id128_to_string(id, machine_id + strlen("_MACHINE_ID="));
748                 IOVEC_SET_STRING(iovec[n++], machine_id);
749         }
750
751         t = gethostname_malloc();
752         if (t) {
753                 x = strappenda("_HOSTNAME=", t);
754                 free(t);
755                 IOVEC_SET_STRING(iovec[n++], x);
756         }
757
758         assert(n <= m);
759
760         if (s->split_mode == SPLIT_UID && realuid > 0)
761                 /* Split up strictly by any UID */
762                 journal_uid = realuid;
763         else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
764                 /* Split up by login UIDs, this avoids creation of
765                  * individual journals for system UIDs.  We do this
766                  * only if the realuid is not root, in order not to
767                  * accidentally leak privileged information to the
768                  * user that is logged by a privileged process that is
769                  * part of an unprivileged session.*/
770                 journal_uid = owner;
771         else
772                 journal_uid = 0;
773
774         write_to_journal(s, journal_uid, iovec, n, priority);
775 }
776
777 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
778         char mid[11 + 32 + 1];
779         char buffer[16 + LINE_MAX + 1];
780         struct iovec iovec[N_IOVEC_META_FIELDS + 4];
781         int n = 0;
782         va_list ap;
783         struct ucred ucred = {};
784
785         assert(s);
786         assert(format);
787
788         IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
789         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
790
791         memcpy(buffer, "MESSAGE=", 8);
792         va_start(ap, format);
793         vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
794         va_end(ap);
795         char_array_0(buffer);
796         IOVEC_SET_STRING(iovec[n++], buffer);
797
798         if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
799                 snprintf(mid, sizeof(mid), MESSAGE_ID(message_id));
800                 char_array_0(mid);
801                 IOVEC_SET_STRING(iovec[n++], mid);
802         }
803
804         ucred.pid = getpid();
805         ucred.uid = getuid();
806         ucred.gid = getgid();
807
808         dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
809 }
810
811 void server_dispatch_message(
812                 Server *s,
813                 struct iovec *iovec, unsigned n, unsigned m,
814                 struct ucred *ucred,
815                 struct timeval *tv,
816                 const char *label, size_t label_len,
817                 const char *unit_id,
818                 int priority,
819                 pid_t object_pid) {
820
821         int rl, r;
822         _cleanup_free_ char *path = NULL;
823         char *c;
824
825         assert(s);
826         assert(iovec || n == 0);
827
828         if (n == 0)
829                 return;
830
831         if (LOG_PRI(priority) > s->max_level_store)
832                 return;
833
834         /* Stop early in case the information will not be stored
835          * in a journal. */
836         if (s->storage == STORAGE_NONE)
837                 return;
838
839         if (!ucred)
840                 goto finish;
841
842         r = cg_pid_get_path_shifted(ucred->pid, NULL, &path);
843         if (r < 0)
844                 goto finish;
845
846         /* example: /user/lennart/3/foobar
847          *          /system/dbus.service/foobar
848          *
849          * So let's cut of everything past the third /, since that is
850          * where user directories start */
851
852         c = strchr(path, '/');
853         if (c) {
854                 c = strchr(c+1, '/');
855                 if (c) {
856                         c = strchr(c+1, '/');
857                         if (c)
858                                 *c = 0;
859                 }
860         }
861
862         rl = journal_rate_limit_test(s->rate_limit, path,
863                                      priority & LOG_PRIMASK, available_space(s, false));
864
865         if (rl == 0)
866                 return;
867
868         /* Write a suppression message if we suppressed something */
869         if (rl > 1)
870                 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
871                                       "Suppressed %u messages from %s", rl - 1, path);
872
873 finish:
874         dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
875 }
876
877
878 static int system_journal_open(Server *s) {
879         int r;
880         char *fn;
881         sd_id128_t machine;
882         char ids[33];
883
884         r = sd_id128_get_machine(&machine);
885         if (r < 0) {
886                 log_error("Failed to get machine id: %s", strerror(-r));
887                 return r;
888         }
889
890         sd_id128_to_string(machine, ids);
891
892         if (!s->system_journal &&
893             (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
894             access("/run/systemd/journal/flushed", F_OK) >= 0) {
895
896                 /* If in auto mode: first try to create the machine
897                  * path, but not the prefix.
898                  *
899                  * If in persistent mode: create /var/log/journal and
900                  * the machine path */
901
902                 if (s->storage == STORAGE_PERSISTENT)
903                         (void) mkdir("/var/log/journal/", 0755);
904
905                 fn = strappenda("/var/log/journal/", ids);
906                 (void) mkdir(fn, 0755);
907
908                 fn = strappenda(fn, "/system.journal");
909                 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
910
911                 if (r >= 0)
912                         server_fix_perms(s, s->system_journal, 0);
913                 else if (r < 0) {
914                         if (r != -ENOENT && r != -EROFS)
915                                 log_warning("Failed to open system journal: %s", strerror(-r));
916
917                         r = 0;
918                 }
919         }
920
921         if (!s->runtime_journal &&
922             (s->storage != STORAGE_NONE)) {
923
924                 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
925                 if (!fn)
926                         return -ENOMEM;
927
928                 if (s->system_journal) {
929
930                         /* Try to open the runtime journal, but only
931                          * if it already exists, so that we can flush
932                          * it into the system journal */
933
934                         r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
935                         free(fn);
936
937                         if (r < 0) {
938                                 if (r != -ENOENT)
939                                         log_warning("Failed to open runtime journal: %s", strerror(-r));
940
941                                 r = 0;
942                         }
943
944                 } else {
945
946                         /* OK, we really need the runtime journal, so create
947                          * it if necessary. */
948
949                         (void) mkdir_parents(fn, 0755);
950                         r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
951                         free(fn);
952
953                         if (r < 0) {
954                                 log_error("Failed to open runtime journal: %s", strerror(-r));
955                                 return r;
956                         }
957                 }
958
959                 if (s->runtime_journal)
960                         server_fix_perms(s, s->runtime_journal, 0);
961         }
962
963         available_space(s, true);
964
965         return r;
966 }
967
968 int server_flush_to_var(Server *s) {
969         int r;
970         sd_id128_t machine;
971         sd_journal *j = NULL;
972
973         assert(s);
974
975         if (s->storage != STORAGE_AUTO &&
976             s->storage != STORAGE_PERSISTENT)
977                 return 0;
978
979         if (!s->runtime_journal)
980                 return 0;
981
982         system_journal_open(s);
983
984         if (!s->system_journal)
985                 return 0;
986
987         log_debug("Flushing to /var...");
988
989         r = sd_id128_get_machine(&machine);
990         if (r < 0)
991                 return r;
992
993         r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
994         if (r < 0) {
995                 log_error("Failed to read runtime journal: %s", strerror(-r));
996                 return r;
997         }
998
999         sd_journal_set_data_threshold(j, 0);
1000
1001         SD_JOURNAL_FOREACH(j) {
1002                 Object *o = NULL;
1003                 JournalFile *f;
1004
1005                 f = j->current_file;
1006                 assert(f && f->current_offset > 0);
1007
1008                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1009                 if (r < 0) {
1010                         log_error("Can't read entry: %s", strerror(-r));
1011                         goto finish;
1012                 }
1013
1014                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1015                 if (r >= 0)
1016                         continue;
1017
1018                 if (!shall_try_append_again(s->system_journal, r)) {
1019                         log_error("Can't write entry: %s", strerror(-r));
1020                         goto finish;
1021                 }
1022
1023                 server_rotate(s);
1024                 server_vacuum(s);
1025
1026                 if (!s->system_journal) {
1027                         log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1028                         r = -EIO;
1029                         goto finish;
1030                 }
1031
1032                 log_debug("Retrying write.");
1033                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1034                 if (r < 0) {
1035                         log_error("Can't write entry: %s", strerror(-r));
1036                         goto finish;
1037                 }
1038         }
1039
1040 finish:
1041         journal_file_post_change(s->system_journal);
1042
1043         journal_file_close(s->runtime_journal);
1044         s->runtime_journal = NULL;
1045
1046         if (r >= 0)
1047                 rm_rf("/run/log/journal", false, true, false);
1048
1049         sd_journal_close(j);
1050
1051         return r;
1052 }
1053
1054 int process_event(Server *s, struct epoll_event *ev) {
1055         assert(s);
1056         assert(ev);
1057
1058         if (ev->data.fd == s->signal_fd) {
1059                 struct signalfd_siginfo sfsi;
1060                 ssize_t n;
1061
1062                 if (ev->events != EPOLLIN) {
1063                         log_error("Got invalid event from epoll.");
1064                         return -EIO;
1065                 }
1066
1067                 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
1068                 if (n != sizeof(sfsi)) {
1069
1070                         if (n >= 0)
1071                                 return -EIO;
1072
1073                         if (errno == EINTR || errno == EAGAIN)
1074                                 return 1;
1075
1076                         return -errno;
1077                 }
1078
1079                 if (sfsi.ssi_signo == SIGUSR1) {
1080                         log_info("Received request to flush runtime journal from PID %"PRIu32,
1081                                  sfsi.ssi_pid);
1082                         touch("/run/systemd/journal/flushed");
1083                         server_flush_to_var(s);
1084                         server_sync(s);
1085                         return 1;
1086                 }
1087
1088                 if (sfsi.ssi_signo == SIGUSR2) {
1089                         log_info("Received request to rotate journal from PID %"PRIu32,
1090                                  sfsi.ssi_pid);
1091                         server_rotate(s);
1092                         server_vacuum(s);
1093                         return 1;
1094                 }
1095
1096                 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
1097
1098                 return 0;
1099
1100         } else if (ev->data.fd == s->sync_timer_fd) {
1101                 int r;
1102                 uint64_t t;
1103
1104                 log_debug("Got sync request from epoll.");
1105
1106                 r = read(ev->data.fd, (void *)&t, sizeof(t));
1107                 if (r < 0)
1108                         return 0;
1109
1110                 server_sync(s);
1111                 return 1;
1112
1113         } else if (ev->data.fd == s->dev_kmsg_fd) {
1114                 int r;
1115
1116                 if (ev->events != EPOLLIN) {
1117                         log_error("Got invalid event from epoll.");
1118                         return -EIO;
1119                 }
1120
1121                 r = server_read_dev_kmsg(s);
1122                 if (r < 0)
1123                         return r;
1124
1125                 return 1;
1126
1127         } else if (ev->data.fd == s->native_fd ||
1128                    ev->data.fd == s->syslog_fd) {
1129
1130                 if (ev->events != EPOLLIN) {
1131                         log_error("Got invalid event from epoll.");
1132                         return -EIO;
1133                 }
1134
1135                 for (;;) {
1136                         struct msghdr msghdr;
1137                         struct iovec iovec;
1138                         struct ucred *ucred = NULL;
1139                         struct timeval *tv = NULL;
1140                         struct cmsghdr *cmsg;
1141                         char *label = NULL;
1142                         size_t label_len = 0;
1143                         union {
1144                                 struct cmsghdr cmsghdr;
1145
1146                                 /* We use NAME_MAX space for the
1147                                  * SELinux label here. The kernel
1148                                  * currently enforces no limit, but
1149                                  * according to suggestions from the
1150                                  * SELinux people this will change and
1151                                  * it will probably be identical to
1152                                  * NAME_MAX. For now we use that, but
1153                                  * this should be updated one day when
1154                                  * the final limit is known.*/
1155                                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1156                                             CMSG_SPACE(sizeof(struct timeval)) +
1157                                             CMSG_SPACE(sizeof(int)) + /* fd */
1158                                             CMSG_SPACE(NAME_MAX)]; /* selinux label */
1159                         } control;
1160                         ssize_t n;
1161                         int v;
1162                         int *fds = NULL;
1163                         unsigned n_fds = 0;
1164
1165                         if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
1166                                 log_error("SIOCINQ failed: %m");
1167                                 return -errno;
1168                         }
1169
1170                         if (s->buffer_size < (size_t) v) {
1171                                 void *b;
1172                                 size_t l;
1173
1174                                 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
1175                                 b = realloc(s->buffer, l+1);
1176
1177                                 if (!b) {
1178                                         log_error("Couldn't increase buffer.");
1179                                         return -ENOMEM;
1180                                 }
1181
1182                                 s->buffer_size = l;
1183                                 s->buffer = b;
1184                         }
1185
1186                         zero(iovec);
1187                         iovec.iov_base = s->buffer;
1188                         iovec.iov_len = s->buffer_size;
1189
1190                         zero(control);
1191                         zero(msghdr);
1192                         msghdr.msg_iov = &iovec;
1193                         msghdr.msg_iovlen = 1;
1194                         msghdr.msg_control = &control;
1195                         msghdr.msg_controllen = sizeof(control);
1196
1197                         n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1198                         if (n < 0) {
1199
1200                                 if (errno == EINTR || errno == EAGAIN)
1201                                         return 1;
1202
1203                                 log_error("recvmsg() failed: %m");
1204                                 return -errno;
1205                         }
1206
1207                         for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1208
1209                                 if (cmsg->cmsg_level == SOL_SOCKET &&
1210                                     cmsg->cmsg_type == SCM_CREDENTIALS &&
1211                                     cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1212                                         ucred = (struct ucred*) CMSG_DATA(cmsg);
1213                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
1214                                          cmsg->cmsg_type == SCM_SECURITY) {
1215                                         label = (char*) CMSG_DATA(cmsg);
1216                                         label_len = cmsg->cmsg_len - CMSG_LEN(0);
1217                                 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1218                                            cmsg->cmsg_type == SO_TIMESTAMP &&
1219                                            cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1220                                         tv = (struct timeval*) CMSG_DATA(cmsg);
1221                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
1222                                          cmsg->cmsg_type == SCM_RIGHTS) {
1223                                         fds = (int*) CMSG_DATA(cmsg);
1224                                         n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1225                                 }
1226                         }
1227
1228                         if (ev->data.fd == s->syslog_fd) {
1229                                 if (n > 0 && n_fds == 0) {
1230                                         s->buffer[n] = 0;
1231                                         server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1232                                 } else if (n_fds > 0)
1233                                         log_warning("Got file descriptors via syslog socket. Ignoring.");
1234
1235                         } else {
1236                                 if (n > 0 && n_fds == 0)
1237                                         server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1238                                 else if (n == 0 && n_fds == 1)
1239                                         server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1240                                 else if (n_fds > 0)
1241                                         log_warning("Got too many file descriptors via native socket. Ignoring.");
1242                         }
1243
1244                         close_many(fds, n_fds);
1245                 }
1246
1247                 return 1;
1248
1249         } else if (ev->data.fd == s->stdout_fd) {
1250
1251                 if (ev->events != EPOLLIN) {
1252                         log_error("Got invalid event from epoll.");
1253                         return -EIO;
1254                 }
1255
1256                 stdout_stream_new(s);
1257                 return 1;
1258
1259         } else {
1260                 StdoutStream *stream;
1261
1262                 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
1263                         log_error("Got invalid event from epoll.");
1264                         return -EIO;
1265                 }
1266
1267                 /* If it is none of the well-known fds, it must be an
1268                  * stdout stream fd. Note that this is a bit ugly here
1269                  * (since we rely that none of the well-known fds
1270                  * could be interpreted as pointer), but nonetheless
1271                  * safe, since the well-known fds would never get an
1272                  * fd > 4096, i.e. beyond the first memory page */
1273
1274                 stream = ev->data.ptr;
1275
1276                 if (stdout_stream_process(stream) <= 0)
1277                         stdout_stream_free(stream);
1278
1279                 return 1;
1280         }
1281
1282         log_error("Unknown event.");
1283         return 0;
1284 }
1285
1286 static int open_signalfd(Server *s) {
1287         sigset_t mask;
1288         struct epoll_event ev;
1289
1290         assert(s);
1291
1292         assert_se(sigemptyset(&mask) == 0);
1293         sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1294         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1295
1296         s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
1297         if (s->signal_fd < 0) {
1298                 log_error("signalfd(): %m");
1299                 return -errno;
1300         }
1301
1302         zero(ev);
1303         ev.events = EPOLLIN;
1304         ev.data.fd = s->signal_fd;
1305
1306         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
1307                 log_error("epoll_ctl(): %m");
1308                 return -errno;
1309         }
1310
1311         return 0;
1312 }
1313
1314 static int server_parse_proc_cmdline(Server *s) {
1315         _cleanup_free_ char *line = NULL;
1316         char *w, *state;
1317         int r;
1318         size_t l;
1319
1320         if (detect_container(NULL) > 0)
1321                 return 0;
1322
1323         r = read_one_line_file("/proc/cmdline", &line);
1324         if (r < 0) {
1325                 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1326                 return 0;
1327         }
1328
1329         FOREACH_WORD_QUOTED(w, l, line, state) {
1330                 _cleanup_free_ char *word;
1331
1332                 word = strndup(w, l);
1333                 if (!word)
1334                         return -ENOMEM;
1335
1336                 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1337                         r = parse_boolean(word + 35);
1338                         if (r < 0)
1339                                 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1340                         else
1341                                 s->forward_to_syslog = r;
1342                 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1343                         r = parse_boolean(word + 33);
1344                         if (r < 0)
1345                                 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1346                         else
1347                                 s->forward_to_kmsg = r;
1348                 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1349                         r = parse_boolean(word + 36);
1350                         if (r < 0)
1351                                 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1352                         else
1353                                 s->forward_to_console = r;
1354                 } else if (startswith(word, "systemd.journald"))
1355                         log_warning("Invalid systemd.journald parameter. Ignoring.");
1356         }
1357
1358         return 0;
1359 }
1360
1361 static int server_parse_config_file(Server *s) {
1362         static const char fn[] = "/etc/systemd/journald.conf";
1363         _cleanup_fclose_ FILE *f = NULL;
1364         int r;
1365
1366         assert(s);
1367
1368         f = fopen(fn, "re");
1369         if (!f) {
1370                 if (errno == ENOENT)
1371                         return 0;
1372
1373                 log_warning("Failed to open configuration file %s: %m", fn);
1374                 return -errno;
1375         }
1376
1377         r = config_parse(NULL, fn, f, "Journal\0", config_item_perf_lookup,
1378                          (void*) journald_gperf_lookup, false, false, s);
1379         if (r < 0)
1380                 log_warning("Failed to parse configuration file: %s", strerror(-r));
1381
1382         return r;
1383 }
1384
1385 static int server_open_sync_timer(Server *s) {
1386         int r;
1387         struct epoll_event ev;
1388
1389         assert(s);
1390
1391         s->sync_timer_fd = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC);
1392         if (s->sync_timer_fd < 0)
1393                 return -errno;
1394
1395         zero(ev);
1396         ev.events = EPOLLIN;
1397         ev.data.fd = s->sync_timer_fd;
1398
1399         r = epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->sync_timer_fd, &ev);
1400         if (r < 0) {
1401                 log_error("Failed to add idle timer fd to epoll object: %m");
1402                 return -errno;
1403         }
1404
1405         return 0;
1406 }
1407
1408 int server_schedule_sync(Server *s, int priority) {
1409         int r;
1410
1411         assert(s);
1412
1413         if (priority <= LOG_CRIT) {
1414                 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1415                 server_sync(s);
1416                 return 0;
1417         }
1418
1419         if (s->sync_scheduled)
1420                 return 0;
1421
1422         if (s->sync_interval_usec) {
1423                 struct itimerspec sync_timer_enable = {};
1424
1425                 timespec_store(&sync_timer_enable.it_value, s->sync_interval_usec);
1426
1427                 r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_enable, NULL);
1428                 if (r < 0)
1429                         return -errno;
1430         }
1431
1432         s->sync_scheduled = true;
1433
1434         return 0;
1435 }
1436
1437 int server_init(Server *s) {
1438         int n, r, fd;
1439
1440         assert(s);
1441
1442         zero(*s);
1443         s->sync_timer_fd = s->syslog_fd = s->native_fd = s->stdout_fd =
1444                 s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
1445         s->compress = true;
1446         s->seal = true;
1447
1448         s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1449         s->sync_scheduled = false;
1450
1451         s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1452         s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1453
1454         s->forward_to_syslog = true;
1455
1456         s->max_level_store = LOG_DEBUG;
1457         s->max_level_syslog = LOG_DEBUG;
1458         s->max_level_kmsg = LOG_NOTICE;
1459         s->max_level_console = LOG_INFO;
1460
1461         memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1462         memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1463
1464         server_parse_config_file(s);
1465         server_parse_proc_cmdline(s);
1466         if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1467                 log_debug("Setting both rate limit interval and burst from %llu,%u to 0,0",
1468                           (long long unsigned) s->rate_limit_interval,
1469                           s->rate_limit_burst);
1470                 s->rate_limit_interval = s->rate_limit_burst = 0;
1471         }
1472
1473         mkdir_p("/run/systemd/journal", 0755);
1474
1475         s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
1476         if (!s->user_journals)
1477                 return log_oom();
1478
1479         s->mmap = mmap_cache_new();
1480         if (!s->mmap)
1481                 return log_oom();
1482
1483         s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
1484         if (s->epoll_fd < 0) {
1485                 log_error("Failed to create epoll object: %m");
1486                 return -errno;
1487         }
1488
1489         n = sd_listen_fds(true);
1490         if (n < 0) {
1491                 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1492                 return n;
1493         }
1494
1495         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1496
1497                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1498
1499                         if (s->native_fd >= 0) {
1500                                 log_error("Too many native sockets passed.");
1501                                 return -EINVAL;
1502                         }
1503
1504                         s->native_fd = fd;
1505
1506                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1507
1508                         if (s->stdout_fd >= 0) {
1509                                 log_error("Too many stdout sockets passed.");
1510                                 return -EINVAL;
1511                         }
1512
1513                         s->stdout_fd = fd;
1514
1515                 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
1516
1517                         if (s->syslog_fd >= 0) {
1518                                 log_error("Too many /dev/log sockets passed.");
1519                                 return -EINVAL;
1520                         }
1521
1522                         s->syslog_fd = fd;
1523
1524                 } else {
1525                         log_error("Unknown socket passed.");
1526                         return -EINVAL;
1527                 }
1528         }
1529
1530         r = server_open_syslog_socket(s);
1531         if (r < 0)
1532                 return r;
1533
1534         r = server_open_native_socket(s);
1535         if (r < 0)
1536                 return r;
1537
1538         r = server_open_stdout_socket(s);
1539         if (r < 0)
1540                 return r;
1541
1542         r = server_open_dev_kmsg(s);
1543         if (r < 0)
1544                 return r;
1545
1546         r = server_open_kernel_seqnum(s);
1547         if (r < 0)
1548                 return r;
1549
1550         r = server_open_sync_timer(s);
1551         if (r < 0)
1552                 return r;
1553
1554         r = open_signalfd(s);
1555         if (r < 0)
1556                 return r;
1557
1558         s->udev = udev_new();
1559         if (!s->udev)
1560                 return -ENOMEM;
1561
1562         s->rate_limit = journal_rate_limit_new(s->rate_limit_interval,
1563                                                s->rate_limit_burst);
1564         if (!s->rate_limit)
1565                 return -ENOMEM;
1566
1567         r = system_journal_open(s);
1568         if (r < 0)
1569                 return r;
1570
1571         return 0;
1572 }
1573
1574 void server_maybe_append_tags(Server *s) {
1575 #ifdef HAVE_GCRYPT
1576         JournalFile *f;
1577         Iterator i;
1578         usec_t n;
1579
1580         n = now(CLOCK_REALTIME);
1581
1582         if (s->system_journal)
1583                 journal_file_maybe_append_tag(s->system_journal, n);
1584
1585         HASHMAP_FOREACH(f, s->user_journals, i)
1586                 journal_file_maybe_append_tag(f, n);
1587 #endif
1588 }
1589
1590 void server_done(Server *s) {
1591         JournalFile *f;
1592         assert(s);
1593
1594         while (s->stdout_streams)
1595                 stdout_stream_free(s->stdout_streams);
1596
1597         if (s->system_journal)
1598                 journal_file_close(s->system_journal);
1599
1600         if (s->runtime_journal)
1601                 journal_file_close(s->runtime_journal);
1602
1603         while ((f = hashmap_steal_first(s->user_journals)))
1604                 journal_file_close(f);
1605
1606         hashmap_free(s->user_journals);
1607
1608         if (s->epoll_fd >= 0)
1609                 close_nointr_nofail(s->epoll_fd);
1610
1611         if (s->signal_fd >= 0)
1612                 close_nointr_nofail(s->signal_fd);
1613
1614         if (s->syslog_fd >= 0)
1615                 close_nointr_nofail(s->syslog_fd);
1616
1617         if (s->native_fd >= 0)
1618                 close_nointr_nofail(s->native_fd);
1619
1620         if (s->stdout_fd >= 0)
1621                 close_nointr_nofail(s->stdout_fd);
1622
1623         if (s->dev_kmsg_fd >= 0)
1624                 close_nointr_nofail(s->dev_kmsg_fd);
1625
1626         if (s->sync_timer_fd >= 0)
1627                 close_nointr_nofail(s->sync_timer_fd);
1628
1629         if (s->rate_limit)
1630                 journal_rate_limit_free(s->rate_limit);
1631
1632         if (s->kernel_seqnum)
1633                 munmap(s->kernel_seqnum, sizeof(uint64_t));
1634
1635         free(s->buffer);
1636         free(s->tty_path);
1637
1638         if (s->mmap)
1639                 mmap_cache_unref(s->mmap);
1640
1641         if (s->udev)
1642                 udev_unref(s->udev);
1643 }