chiark / gitweb /
journald: remove rotated file from hashmap when rotation fails
[elogind.git] / src / journal / journald-server.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
26 #include <sys/mman.h>
27 #include <sys/timerfd.h>
28
29 #include <libudev.h>
30 #include <systemd/sd-journal.h>
31 #include <systemd/sd-messages.h>
32 #include <systemd/sd-daemon.h>
33
34 #include "fileio.h"
35 #include "mkdir.h"
36 #include "hashmap.h"
37 #include "journal-file.h"
38 #include "socket-util.h"
39 #include "cgroup-util.h"
40 #include "list.h"
41 #include "virt.h"
42 #include "missing.h"
43 #include "conf-parser.h"
44 #include "journal-internal.h"
45 #include "journal-vacuum.h"
46 #include "journal-authenticate.h"
47 #include "journald-server.h"
48 #include "journald-rate-limit.h"
49 #include "journald-kmsg.h"
50 #include "journald-syslog.h"
51 #include "journald-stream.h"
52 #include "journald-console.h"
53 #include "journald-native.h"
54
55 #ifdef HAVE_ACL
56 #include <sys/acl.h>
57 #include <acl/libacl.h>
58 #include "acl-util.h"
59 #endif
60
61 #ifdef HAVE_SELINUX
62 #include <selinux/selinux.h>
63 #endif
64
65 #define USER_JOURNALS_MAX 1024
66
67 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
68 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
69 #define DEFAULT_RATE_LIMIT_BURST 1000
70
71 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
72
73 static const char* const storage_table[] = {
74         [STORAGE_AUTO] = "auto",
75         [STORAGE_VOLATILE] = "volatile",
76         [STORAGE_PERSISTENT] = "persistent",
77         [STORAGE_NONE] = "none"
78 };
79
80 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
81 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
82
83 static const char* const split_mode_table[] = {
84         [SPLIT_NONE] = "none",
85         [SPLIT_UID] = "uid",
86         [SPLIT_LOGIN] = "login"
87 };
88
89 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
90 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
91
92 static uint64_t available_space(Server *s, bool verbose) {
93         char ids[33];
94         _cleanup_free_ char *p = NULL;
95         sd_id128_t machine;
96         struct statvfs ss;
97         uint64_t sum = 0, ss_avail = 0, avail = 0;
98         int r;
99         _cleanup_closedir_ DIR *d = NULL;
100         usec_t ts;
101         const char *f;
102         JournalMetrics *m;
103
104         ts = now(CLOCK_MONOTONIC);
105
106         if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts
107             && !verbose)
108                 return s->cached_available_space;
109
110         r = sd_id128_get_machine(&machine);
111         if (r < 0)
112                 return 0;
113
114         if (s->system_journal) {
115                 f = "/var/log/journal/";
116                 m = &s->system_metrics;
117         } else {
118                 f = "/run/log/journal/";
119                 m = &s->runtime_metrics;
120         }
121
122         assert(m);
123
124         p = strappend(f, sd_id128_to_string(machine, ids));
125         if (!p)
126                 return 0;
127
128         d = opendir(p);
129         if (!d)
130                 return 0;
131
132         if (fstatvfs(dirfd(d), &ss) < 0)
133                 return 0;
134
135         for (;;) {
136                 struct stat st;
137                 struct dirent *de;
138                 union dirent_storage buf;
139
140                 r = readdir_r(d, &buf.de, &de);
141                 if (r != 0)
142                         break;
143
144                 if (!de)
145                         break;
146
147                 if (!endswith(de->d_name, ".journal") &&
148                     !endswith(de->d_name, ".journal~"))
149                         continue;
150
151                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
152                         continue;
153
154                 if (!S_ISREG(st.st_mode))
155                         continue;
156
157                 sum += (uint64_t) st.st_blocks * 512UL;
158         }
159
160         ss_avail = ss.f_bsize * ss.f_bavail;
161         avail = ss_avail > m->keep_free ? ss_avail - m->keep_free : 0;
162
163         s->cached_available_space = MIN(m->max_use, avail) > sum ? MIN(m->max_use, avail) - sum : 0;
164         s->cached_available_space_timestamp = ts;
165
166         if (verbose) {
167                 char    fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
168                         fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX];
169
170                 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
171                                       "%s journal is using %s (max %s, leaving %s of free %s, current limit %s).",
172                                       s->system_journal ? "Permanent" : "Runtime",
173                                       format_bytes(fb1, sizeof(fb1), sum),
174                                       format_bytes(fb2, sizeof(fb2), m->max_use),
175                                       format_bytes(fb3, sizeof(fb3), m->keep_free),
176                                       format_bytes(fb4, sizeof(fb4), ss_avail),
177                                       format_bytes(fb5, sizeof(fb5), MIN(m->max_use, avail)));
178         }
179
180         return s->cached_available_space;
181 }
182
183 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
184         int r;
185 #ifdef HAVE_ACL
186         acl_t acl;
187         acl_entry_t entry;
188         acl_permset_t permset;
189 #endif
190
191         assert(f);
192
193         r = fchmod(f->fd, 0640);
194         if (r < 0)
195                 log_warning("Failed to fix access mode on %s, ignoring: %s", f->path, strerror(-r));
196
197 #ifdef HAVE_ACL
198         if (uid <= 0)
199                 return;
200
201         acl = acl_get_fd(f->fd);
202         if (!acl) {
203                 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
204                 return;
205         }
206
207         r = acl_find_uid(acl, uid, &entry);
208         if (r <= 0) {
209
210                 if (acl_create_entry(&acl, &entry) < 0 ||
211                     acl_set_tag_type(entry, ACL_USER) < 0 ||
212                     acl_set_qualifier(entry, &uid) < 0) {
213                         log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
214                         goto finish;
215                 }
216         }
217
218         /* We do not recalculate the mask unconditionally here,
219          * so that the fchmod() mask above stays intact. */
220         if (acl_get_permset(entry, &permset) < 0 ||
221             acl_add_perm(permset, ACL_READ) < 0 ||
222             calc_acl_mask_if_needed(&acl) < 0) {
223                 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
224                 goto finish;
225         }
226
227         if (acl_set_fd(f->fd, acl) < 0)
228                 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
229
230 finish:
231         acl_free(acl);
232 #endif
233 }
234
235 static JournalFile* find_journal(Server *s, uid_t uid) {
236         _cleanup_free_ char *p = NULL;
237         int r;
238         JournalFile *f;
239         sd_id128_t machine;
240
241         assert(s);
242
243         /* We split up user logs only on /var, not on /run. If the
244          * runtime file is open, we write to it exclusively, in order
245          * to guarantee proper order as soon as we flush /run to
246          * /var and close the runtime file. */
247
248         if (s->runtime_journal)
249                 return s->runtime_journal;
250
251         if (uid <= 0)
252                 return s->system_journal;
253
254         r = sd_id128_get_machine(&machine);
255         if (r < 0)
256                 return s->system_journal;
257
258         f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
259         if (f)
260                 return f;
261
262         if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
263                      SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
264                 return s->system_journal;
265
266         while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
267                 /* Too many open? Then let's close one */
268                 f = hashmap_steal_first(s->user_journals);
269                 assert(f);
270                 journal_file_close(f);
271         }
272
273         r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
274         if (r < 0)
275                 return s->system_journal;
276
277         server_fix_perms(s, f, uid);
278
279         r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
280         if (r < 0) {
281                 journal_file_close(f);
282                 return s->system_journal;
283         }
284
285         return f;
286 }
287
288 void server_rotate(Server *s) {
289         JournalFile *f;
290         void *k;
291         Iterator i;
292         int r;
293
294         log_debug("Rotating...");
295
296         if (s->runtime_journal) {
297                 r = journal_file_rotate(&s->runtime_journal, s->compress, false);
298                 if (r < 0)
299                         if (s->runtime_journal)
300                                 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
301                         else
302                                 log_error("Failed to create new runtime journal: %s", strerror(-r));
303                 else
304                         server_fix_perms(s, s->runtime_journal, 0);
305         }
306
307         if (s->system_journal) {
308                 r = journal_file_rotate(&s->system_journal, s->compress, s->seal);
309                 if (r < 0)
310                         if (s->system_journal)
311                                 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
312                         else
313                                 log_error("Failed to create new system journal: %s", strerror(-r));
314
315                 else
316                         server_fix_perms(s, s->system_journal, 0);
317         }
318
319         HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
320                 r = journal_file_rotate(&f, s->compress, s->seal);
321                 if (r < 0)
322                         if (f)
323                                 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
324                         else {
325                                 log_error("Failed to create user journal: %s", strerror(-r));
326                                 hashmap_remove(s->user_journals, k);
327                         }
328                 else {
329                         hashmap_replace(s->user_journals, k, f);
330                         server_fix_perms(s, f, PTR_TO_UINT32(k));
331                 }
332         }
333 }
334
335 void server_sync(Server *s) {
336         static const struct itimerspec sync_timer_disable = {};
337         JournalFile *f;
338         void *k;
339         Iterator i;
340         int r;
341
342         if (s->system_journal) {
343                 r = journal_file_set_offline(s->system_journal);
344                 if (r < 0)
345                         log_error("Failed to sync system journal: %s", strerror(-r));
346         }
347
348         HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
349                 r = journal_file_set_offline(f);
350                 if (r < 0)
351                         log_error("Failed to sync user journal: %s", strerror(-r));
352         }
353
354         r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_disable, NULL);
355         if (r < 0)
356                 log_error("Failed to disable max timer: %m");
357
358         s->sync_scheduled = false;
359 }
360
361 void server_vacuum(Server *s) {
362         char ids[33];
363         sd_id128_t machine;
364         int r;
365
366         log_debug("Vacuuming...");
367
368         s->oldest_file_usec = 0;
369
370         r = sd_id128_get_machine(&machine);
371         if (r < 0) {
372                 log_error("Failed to get machine ID: %s", strerror(-r));
373                 return;
374         }
375
376         sd_id128_to_string(machine, ids);
377
378         if (s->system_journal) {
379                 char *p = strappenda("/var/log/journal/", ids);
380
381                 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
382                 if (r < 0 && r != -ENOENT)
383                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
384         }
385
386         if (s->runtime_journal) {
387                 char *p = strappenda("/run/log/journal/", ids);
388
389                 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
390                 if (r < 0 && r != -ENOENT)
391                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
392         }
393
394         s->cached_available_space_timestamp = 0;
395 }
396
397 bool shall_try_append_again(JournalFile *f, int r) {
398
399         /* -E2BIG            Hit configured limit
400            -EFBIG            Hit fs limit
401            -EDQUOT           Quota limit hit
402            -ENOSPC           Disk full
403            -EHOSTDOWN        Other machine
404            -EBUSY            Unclean shutdown
405            -EPROTONOSUPPORT  Unsupported feature
406            -EBADMSG          Corrupted
407            -ENODATA          Truncated
408            -ESHUTDOWN        Already archived */
409
410         if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
411                 log_debug("%s: Allocation limit reached, rotating.", f->path);
412         else if (r == -EHOSTDOWN)
413                 log_info("%s: Journal file from other machine, rotating.", f->path);
414         else if (r == -EBUSY)
415                 log_info("%s: Unclean shutdown, rotating.", f->path);
416         else if (r == -EPROTONOSUPPORT)
417                 log_info("%s: Unsupported feature, rotating.", f->path);
418         else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
419                 log_warning("%s: Journal file corrupted, rotating.", f->path);
420         else
421                 return false;
422
423         return true;
424 }
425
426 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
427         JournalFile *f;
428         bool vacuumed = false;
429         int r;
430
431         assert(s);
432         assert(iovec);
433         assert(n > 0);
434
435         f = find_journal(s, uid);
436         if (!f)
437                 return;
438
439         if (journal_file_rotate_suggested(f, s->max_file_usec)) {
440                 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
441                 server_rotate(s);
442                 server_vacuum(s);
443                 vacuumed = true;
444
445                 f = find_journal(s, uid);
446                 if (!f)
447                         return;
448         }
449
450         r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
451         if (r >= 0) {
452                 server_schedule_sync(s, priority);
453                 return;
454         }
455
456         if (vacuumed || !shall_try_append_again(f, r)) {
457                 size_t size = 0;
458                 unsigned i;
459                 for (i = 0; i < n; i++)
460                         size += iovec[i].iov_len;
461
462                 log_error("Failed to write entry (%d items, %zu bytes), ignoring: %s", n, size, strerror(-r));
463                 return;
464         }
465
466         server_rotate(s);
467         server_vacuum(s);
468
469         f = find_journal(s, uid);
470         if (!f)
471                 return;
472
473         log_debug("Retrying write.");
474         r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
475         if (r < 0) {
476                 size_t size = 0;
477                 unsigned i;
478                 for (i = 0; i < n; i++)
479                         size += iovec[i].iov_len;
480
481                 log_error("Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %s", n, size, strerror(-r));
482         } else
483                 server_schedule_sync(s, priority);
484 }
485
486 static void dispatch_message_real(
487                 Server *s,
488                 struct iovec *iovec, unsigned n, unsigned m,
489                 struct ucred *ucred,
490                 struct timeval *tv,
491                 const char *label, size_t label_len,
492                 const char *unit_id,
493                 int priority,
494                 pid_t object_pid) {
495
496         char    pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
497                 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
498                 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
499                 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
500                 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
501                 boot_id[sizeof("_BOOT_ID=") + 32] = "_BOOT_ID=",
502                 machine_id[sizeof("_MACHINE_ID=") + 32] = "_MACHINE_ID=",
503                 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
504                 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
505                 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
506         uid_t object_uid;
507         gid_t object_gid;
508         char *x;
509         sd_id128_t id;
510         int r;
511         char *t, *c;
512         uid_t realuid = 0, owner = 0, journal_uid;
513         bool owner_valid = false;
514 #ifdef HAVE_AUDIT
515         char    audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
516                 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
517                 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
518                 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
519
520         uint32_t audit;
521         uid_t loginuid;
522 #endif
523
524         assert(s);
525         assert(iovec);
526         assert(n > 0);
527         assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
528
529         if (ucred) {
530                 realuid = ucred->uid;
531
532                 sprintf(pid, "_PID=%lu", (unsigned long) ucred->pid);
533                 IOVEC_SET_STRING(iovec[n++], pid);
534
535                 sprintf(uid, "_UID=%lu", (unsigned long) ucred->uid);
536                 IOVEC_SET_STRING(iovec[n++], uid);
537
538                 sprintf(gid, "_GID=%lu", (unsigned long) ucred->gid);
539                 IOVEC_SET_STRING(iovec[n++], gid);
540
541                 r = get_process_comm(ucred->pid, &t);
542                 if (r >= 0) {
543                         x = strappenda("_COMM=", t);
544                         free(t);
545                         IOVEC_SET_STRING(iovec[n++], x);
546                 }
547
548                 r = get_process_exe(ucred->pid, &t);
549                 if (r >= 0) {
550                         x = strappenda("_EXE=", t);
551                         free(t);
552                         IOVEC_SET_STRING(iovec[n++], x);
553                 }
554
555                 r = get_process_cmdline(ucred->pid, 0, false, &t);
556                 if (r >= 0) {
557                         x = strappenda("_CMDLINE=", t);
558                         free(t);
559                         IOVEC_SET_STRING(iovec[n++], x);
560                 }
561
562                 r = get_process_capeff(ucred->pid, &t);
563                 if (r >= 0) {
564                         x = strappenda("_CAP_EFFECTIVE=", t);
565                         free(t);
566                         IOVEC_SET_STRING(iovec[n++], x);
567                 }
568
569 #ifdef HAVE_AUDIT
570                 r = audit_session_from_pid(ucred->pid, &audit);
571                 if (r >= 0) {
572                         sprintf(audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit);
573                         IOVEC_SET_STRING(iovec[n++], audit_session);
574                 }
575
576                 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
577                 if (r >= 0) {
578                         sprintf(audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid);
579                         IOVEC_SET_STRING(iovec[n++], audit_loginuid);
580                 }
581 #endif
582
583                 r = cg_pid_get_path_shifted(ucred->pid, NULL, &c);
584                 if (r >= 0) {
585                         char *session = NULL;
586
587                         x = strappenda("_SYSTEMD_CGROUP=", c);
588                         IOVEC_SET_STRING(iovec[n++], x);
589
590                         r = cg_path_get_session(c, &t);
591                         if (r >= 0) {
592                                 session = strappenda("_SYSTEMD_SESSION=", t);
593                                 free(t);
594                                 IOVEC_SET_STRING(iovec[n++], session);
595                         }
596
597                         if (cg_path_get_owner_uid(c, &owner) >= 0) {
598                                 owner_valid = true;
599
600                                 sprintf(owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner);
601                                 IOVEC_SET_STRING(iovec[n++], owner_uid);
602                         }
603
604                         if (cg_path_get_unit(c, &t) >= 0) {
605                                 x = strappenda("_SYSTEMD_UNIT=", t);
606                                 free(t);
607                                 IOVEC_SET_STRING(iovec[n++], x);
608                         } else if (unit_id && !session) {
609                                 x = strappenda("_SYSTEMD_UNIT=", unit_id);
610                                 IOVEC_SET_STRING(iovec[n++], x);
611                         }
612
613                         if (cg_path_get_user_unit(c, &t) >= 0) {
614                                 x = strappenda("_SYSTEMD_USER_UNIT=", t);
615                                 free(t);
616                                 IOVEC_SET_STRING(iovec[n++], x);
617                         } else if (unit_id && session) {
618                                 x = strappenda("_SYSTEMD_USER_UNIT=", unit_id);
619                                 IOVEC_SET_STRING(iovec[n++], x);
620                         }
621
622                         if (cg_path_get_slice(c, &t) >= 0) {
623                                 x = strappenda("_SYSTEMD_SLICE=", t);
624                                 free(t);
625                                 IOVEC_SET_STRING(iovec[n++], x);
626                         }
627
628                         free(c);
629                 }
630
631 #ifdef HAVE_SELINUX
632                 if (label) {
633                         x = alloca(sizeof("_SELINUX_CONTEXT=") + label_len);
634
635                         *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
636                         IOVEC_SET_STRING(iovec[n++], x);
637                 } else {
638                         security_context_t con;
639
640                         if (getpidcon(ucred->pid, &con) >= 0) {
641                                 x = strappenda("_SELINUX_CONTEXT=", con);
642
643                                 freecon(con);
644                                 IOVEC_SET_STRING(iovec[n++], x);
645                         }
646                 }
647 #endif
648         }
649         assert(n <= m);
650
651         if (object_pid) {
652                 r = get_process_uid(object_pid, &object_uid);
653                 if (r >= 0) {
654                         sprintf(o_uid, "OBJECT_UID=%lu", (unsigned long) object_uid);
655                         IOVEC_SET_STRING(iovec[n++], o_uid);
656                 }
657
658                 r = get_process_gid(object_pid, &object_gid);
659                 if (r >= 0) {
660                         sprintf(o_gid, "OBJECT_GID=%lu", (unsigned long) object_gid);
661                         IOVEC_SET_STRING(iovec[n++], o_gid);
662                 }
663
664                 r = get_process_comm(object_pid, &t);
665                 if (r >= 0) {
666                         x = strappenda("OBJECT_COMM=", t);
667                         free(t);
668                         IOVEC_SET_STRING(iovec[n++], x);
669                 }
670
671                 r = get_process_exe(object_pid, &t);
672                 if (r >= 0) {
673                         x = strappenda("OBJECT_EXE=", t);
674                         free(t);
675                         IOVEC_SET_STRING(iovec[n++], x);
676                 }
677
678                 r = get_process_cmdline(object_pid, 0, false, &t);
679                 if (r >= 0) {
680                         x = strappenda("OBJECT_CMDLINE=", t);
681                         free(t);
682                         IOVEC_SET_STRING(iovec[n++], x);
683                 }
684
685 #ifdef HAVE_AUDIT
686                 r = audit_session_from_pid(object_pid, &audit);
687                 if (r >= 0) {
688                         sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%lu", (unsigned long) audit);
689                         IOVEC_SET_STRING(iovec[n++], o_audit_session);
690                 }
691
692                 r = audit_loginuid_from_pid(object_pid, &loginuid);
693                 if (r >= 0) {
694                         sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID=%lu", (unsigned long) loginuid);
695                         IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
696                 }
697 #endif
698
699                 r = cg_pid_get_path_shifted(object_pid, NULL, &c);
700                 if (r >= 0) {
701                         x = strappenda("OBJECT_SYSTEMD_CGROUP=", c);
702                         IOVEC_SET_STRING(iovec[n++], x);
703
704                         r = cg_path_get_session(c, &t);
705                         if (r >= 0) {
706                                 x = strappenda("OBJECT_SYSTEMD_SESSION=", t);
707                                 free(t);
708                                 IOVEC_SET_STRING(iovec[n++], x);
709                         }
710
711                         if (cg_path_get_owner_uid(c, &owner) >= 0) {
712                                 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner);
713                                 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
714                         }
715
716                         if (cg_path_get_unit(c, &t) >= 0) {
717                                 x = strappenda("OBJECT_SYSTEMD_UNIT=", t);
718                                 free(t);
719                                 IOVEC_SET_STRING(iovec[n++], x);
720                         }
721
722                         if (cg_path_get_user_unit(c, &t) >= 0) {
723                                 x = strappenda("OBJECT_SYSTEMD_USER_UNIT=", t);
724                                 free(t);
725                                 IOVEC_SET_STRING(iovec[n++], x);
726                         }
727
728                         free(c);
729                 }
730         }
731         assert(n <= m);
732
733         if (tv) {
734                 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
735                 IOVEC_SET_STRING(iovec[n++], source_time);
736         }
737
738         /* Note that strictly speaking storing the boot id here is
739          * redundant since the entry includes this in-line
740          * anyway. However, we need this indexed, too. */
741         r = sd_id128_get_boot(&id);
742         if (r >= 0) {
743                 sd_id128_to_string(id, boot_id + strlen("_BOOT_ID="));
744                 IOVEC_SET_STRING(iovec[n++], boot_id);
745         }
746
747         r = sd_id128_get_machine(&id);
748         if (r >= 0) {
749                 sd_id128_to_string(id, machine_id + strlen("_MACHINE_ID="));
750                 IOVEC_SET_STRING(iovec[n++], machine_id);
751         }
752
753         t = gethostname_malloc();
754         if (t) {
755                 x = strappenda("_HOSTNAME=", t);
756                 free(t);
757                 IOVEC_SET_STRING(iovec[n++], x);
758         }
759
760         assert(n <= m);
761
762         if (s->split_mode == SPLIT_UID && realuid > 0)
763                 /* Split up strictly by any UID */
764                 journal_uid = realuid;
765         else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
766                 /* Split up by login UIDs, this avoids creation of
767                  * individual journals for system UIDs.  We do this
768                  * only if the realuid is not root, in order not to
769                  * accidentally leak privileged information to the
770                  * user that is logged by a privileged process that is
771                  * part of an unprivileged session.*/
772                 journal_uid = owner;
773         else
774                 journal_uid = 0;
775
776         write_to_journal(s, journal_uid, iovec, n, priority);
777 }
778
779 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
780         char mid[11 + 32 + 1];
781         char buffer[16 + LINE_MAX + 1];
782         struct iovec iovec[N_IOVEC_META_FIELDS + 4];
783         int n = 0;
784         va_list ap;
785         struct ucred ucred = {};
786
787         assert(s);
788         assert(format);
789
790         IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
791         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
792
793         memcpy(buffer, "MESSAGE=", 8);
794         va_start(ap, format);
795         vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
796         va_end(ap);
797         char_array_0(buffer);
798         IOVEC_SET_STRING(iovec[n++], buffer);
799
800         if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
801                 snprintf(mid, sizeof(mid), MESSAGE_ID(message_id));
802                 char_array_0(mid);
803                 IOVEC_SET_STRING(iovec[n++], mid);
804         }
805
806         ucred.pid = getpid();
807         ucred.uid = getuid();
808         ucred.gid = getgid();
809
810         dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
811 }
812
813 void server_dispatch_message(
814                 Server *s,
815                 struct iovec *iovec, unsigned n, unsigned m,
816                 struct ucred *ucred,
817                 struct timeval *tv,
818                 const char *label, size_t label_len,
819                 const char *unit_id,
820                 int priority,
821                 pid_t object_pid) {
822
823         int rl, r;
824         _cleanup_free_ char *path = NULL;
825         char *c;
826
827         assert(s);
828         assert(iovec || n == 0);
829
830         if (n == 0)
831                 return;
832
833         if (LOG_PRI(priority) > s->max_level_store)
834                 return;
835
836         /* Stop early in case the information will not be stored
837          * in a journal. */
838         if (s->storage == STORAGE_NONE)
839                 return;
840
841         if (!ucred)
842                 goto finish;
843
844         r = cg_pid_get_path_shifted(ucred->pid, NULL, &path);
845         if (r < 0)
846                 goto finish;
847
848         /* example: /user/lennart/3/foobar
849          *          /system/dbus.service/foobar
850          *
851          * So let's cut of everything past the third /, since that is
852          * where user directories start */
853
854         c = strchr(path, '/');
855         if (c) {
856                 c = strchr(c+1, '/');
857                 if (c) {
858                         c = strchr(c+1, '/');
859                         if (c)
860                                 *c = 0;
861                 }
862         }
863
864         rl = journal_rate_limit_test(s->rate_limit, path,
865                                      priority & LOG_PRIMASK, available_space(s, false));
866
867         if (rl == 0)
868                 return;
869
870         /* Write a suppression message if we suppressed something */
871         if (rl > 1)
872                 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
873                                       "Suppressed %u messages from %s", rl - 1, path);
874
875 finish:
876         dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
877 }
878
879
880 static int system_journal_open(Server *s) {
881         int r;
882         char *fn;
883         sd_id128_t machine;
884         char ids[33];
885
886         r = sd_id128_get_machine(&machine);
887         if (r < 0) {
888                 log_error("Failed to get machine id: %s", strerror(-r));
889                 return r;
890         }
891
892         sd_id128_to_string(machine, ids);
893
894         if (!s->system_journal &&
895             (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
896             access("/run/systemd/journal/flushed", F_OK) >= 0) {
897
898                 /* If in auto mode: first try to create the machine
899                  * path, but not the prefix.
900                  *
901                  * If in persistent mode: create /var/log/journal and
902                  * the machine path */
903
904                 if (s->storage == STORAGE_PERSISTENT)
905                         (void) mkdir("/var/log/journal/", 0755);
906
907                 fn = strappenda("/var/log/journal/", ids);
908                 (void) mkdir(fn, 0755);
909
910                 fn = strappenda(fn, "/system.journal");
911                 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
912
913                 if (r >= 0)
914                         server_fix_perms(s, s->system_journal, 0);
915                 else if (r < 0) {
916                         if (r != -ENOENT && r != -EROFS)
917                                 log_warning("Failed to open system journal: %s", strerror(-r));
918
919                         r = 0;
920                 }
921         }
922
923         if (!s->runtime_journal &&
924             (s->storage != STORAGE_NONE)) {
925
926                 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
927                 if (!fn)
928                         return -ENOMEM;
929
930                 if (s->system_journal) {
931
932                         /* Try to open the runtime journal, but only
933                          * if it already exists, so that we can flush
934                          * it into the system journal */
935
936                         r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
937                         free(fn);
938
939                         if (r < 0) {
940                                 if (r != -ENOENT)
941                                         log_warning("Failed to open runtime journal: %s", strerror(-r));
942
943                                 r = 0;
944                         }
945
946                 } else {
947
948                         /* OK, we really need the runtime journal, so create
949                          * it if necessary. */
950
951                         (void) mkdir_parents(fn, 0755);
952                         r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
953                         free(fn);
954
955                         if (r < 0) {
956                                 log_error("Failed to open runtime journal: %s", strerror(-r));
957                                 return r;
958                         }
959                 }
960
961                 if (s->runtime_journal)
962                         server_fix_perms(s, s->runtime_journal, 0);
963         }
964
965         available_space(s, true);
966
967         return r;
968 }
969
970 int server_flush_to_var(Server *s) {
971         int r;
972         sd_id128_t machine;
973         sd_journal *j = NULL;
974
975         assert(s);
976
977         if (s->storage != STORAGE_AUTO &&
978             s->storage != STORAGE_PERSISTENT)
979                 return 0;
980
981         if (!s->runtime_journal)
982                 return 0;
983
984         system_journal_open(s);
985
986         if (!s->system_journal)
987                 return 0;
988
989         log_debug("Flushing to /var...");
990
991         r = sd_id128_get_machine(&machine);
992         if (r < 0)
993                 return r;
994
995         r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
996         if (r < 0) {
997                 log_error("Failed to read runtime journal: %s", strerror(-r));
998                 return r;
999         }
1000
1001         sd_journal_set_data_threshold(j, 0);
1002
1003         SD_JOURNAL_FOREACH(j) {
1004                 Object *o = NULL;
1005                 JournalFile *f;
1006
1007                 f = j->current_file;
1008                 assert(f && f->current_offset > 0);
1009
1010                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1011                 if (r < 0) {
1012                         log_error("Can't read entry: %s", strerror(-r));
1013                         goto finish;
1014                 }
1015
1016                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1017                 if (r >= 0)
1018                         continue;
1019
1020                 if (!shall_try_append_again(s->system_journal, r)) {
1021                         log_error("Can't write entry: %s", strerror(-r));
1022                         goto finish;
1023                 }
1024
1025                 server_rotate(s);
1026                 server_vacuum(s);
1027
1028                 if (!s->system_journal) {
1029                         log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1030                         r = -EIO;
1031                         goto finish;
1032                 }
1033
1034                 log_debug("Retrying write.");
1035                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1036                 if (r < 0) {
1037                         log_error("Can't write entry: %s", strerror(-r));
1038                         goto finish;
1039                 }
1040         }
1041
1042 finish:
1043         journal_file_post_change(s->system_journal);
1044
1045         journal_file_close(s->runtime_journal);
1046         s->runtime_journal = NULL;
1047
1048         if (r >= 0)
1049                 rm_rf("/run/log/journal", false, true, false);
1050
1051         sd_journal_close(j);
1052
1053         return r;
1054 }
1055
1056 int process_event(Server *s, struct epoll_event *ev) {
1057         assert(s);
1058         assert(ev);
1059
1060         if (ev->data.fd == s->signal_fd) {
1061                 struct signalfd_siginfo sfsi;
1062                 ssize_t n;
1063
1064                 if (ev->events != EPOLLIN) {
1065                         log_error("Got invalid event from epoll for %s: %"PRIx32,
1066                                   "signal fd", ev->events);
1067                         return -EIO;
1068                 }
1069
1070                 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
1071                 if (n != sizeof(sfsi)) {
1072
1073                         if (n >= 0)
1074                                 return -EIO;
1075
1076                         if (errno == EINTR || errno == EAGAIN)
1077                                 return 1;
1078
1079                         return -errno;
1080                 }
1081
1082                 if (sfsi.ssi_signo == SIGUSR1) {
1083                         log_info("Received request to flush runtime journal from PID %"PRIu32,
1084                                  sfsi.ssi_pid);
1085                         touch("/run/systemd/journal/flushed");
1086                         server_flush_to_var(s);
1087                         server_sync(s);
1088                         return 1;
1089                 }
1090
1091                 if (sfsi.ssi_signo == SIGUSR2) {
1092                         log_info("Received request to rotate journal from PID %"PRIu32,
1093                                  sfsi.ssi_pid);
1094                         server_rotate(s);
1095                         server_vacuum(s);
1096                         return 1;
1097                 }
1098
1099                 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
1100
1101                 return 0;
1102
1103         } else if (ev->data.fd == s->sync_timer_fd) {
1104                 int r;
1105                 uint64_t t;
1106
1107                 log_debug("Got sync request from epoll.");
1108
1109                 r = read(ev->data.fd, (void *)&t, sizeof(t));
1110                 if (r < 0)
1111                         return 0;
1112
1113                 server_sync(s);
1114                 return 1;
1115
1116         } else if (ev->data.fd == s->dev_kmsg_fd) {
1117                 int r;
1118
1119                 if (ev->events & EPOLLERR)
1120                         log_warning("/dev/kmsg buffer overrun, some messages lost.");
1121
1122                 if (!(ev->events & EPOLLIN)) {
1123                         log_error("Got invalid event from epoll for %s: %"PRIx32,
1124                                   "/dev/kmsg", ev->events);
1125                         return -EIO;
1126                 }
1127
1128                 r = server_read_dev_kmsg(s);
1129                 if (r < 0)
1130                         return r;
1131
1132                 return 1;
1133
1134         } else if (ev->data.fd == s->native_fd ||
1135                    ev->data.fd == s->syslog_fd) {
1136
1137                 if (ev->events != EPOLLIN) {
1138                         log_error("Got invalid event from epoll for %s: %"PRIx32,
1139                                   ev->data.fd == s->native_fd ? "native fd" : "syslog fd",
1140                                   ev->events);
1141                         return -EIO;
1142                 }
1143
1144                 for (;;) {
1145                         struct msghdr msghdr;
1146                         struct iovec iovec;
1147                         struct ucred *ucred = NULL;
1148                         struct timeval *tv = NULL;
1149                         struct cmsghdr *cmsg;
1150                         char *label = NULL;
1151                         size_t label_len = 0;
1152                         union {
1153                                 struct cmsghdr cmsghdr;
1154
1155                                 /* We use NAME_MAX space for the
1156                                  * SELinux label here. The kernel
1157                                  * currently enforces no limit, but
1158                                  * according to suggestions from the
1159                                  * SELinux people this will change and
1160                                  * it will probably be identical to
1161                                  * NAME_MAX. For now we use that, but
1162                                  * this should be updated one day when
1163                                  * the final limit is known.*/
1164                                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1165                                             CMSG_SPACE(sizeof(struct timeval)) +
1166                                             CMSG_SPACE(sizeof(int)) + /* fd */
1167                                             CMSG_SPACE(NAME_MAX)]; /* selinux label */
1168                         } control;
1169                         ssize_t n;
1170                         int v;
1171                         int *fds = NULL;
1172                         unsigned n_fds = 0;
1173
1174                         if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
1175                                 log_error("SIOCINQ failed: %m");
1176                                 return -errno;
1177                         }
1178
1179                         if (s->buffer_size < (size_t) v) {
1180                                 void *b;
1181                                 size_t l;
1182
1183                                 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
1184                                 b = realloc(s->buffer, l+1);
1185
1186                                 if (!b) {
1187                                         log_error("Couldn't increase buffer.");
1188                                         return -ENOMEM;
1189                                 }
1190
1191                                 s->buffer_size = l;
1192                                 s->buffer = b;
1193                         }
1194
1195                         zero(iovec);
1196                         iovec.iov_base = s->buffer;
1197                         iovec.iov_len = s->buffer_size;
1198
1199                         zero(control);
1200                         zero(msghdr);
1201                         msghdr.msg_iov = &iovec;
1202                         msghdr.msg_iovlen = 1;
1203                         msghdr.msg_control = &control;
1204                         msghdr.msg_controllen = sizeof(control);
1205
1206                         n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1207                         if (n < 0) {
1208
1209                                 if (errno == EINTR || errno == EAGAIN)
1210                                         return 1;
1211
1212                                 log_error("recvmsg() failed: %m");
1213                                 return -errno;
1214                         }
1215
1216                         for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1217
1218                                 if (cmsg->cmsg_level == SOL_SOCKET &&
1219                                     cmsg->cmsg_type == SCM_CREDENTIALS &&
1220                                     cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1221                                         ucred = (struct ucred*) CMSG_DATA(cmsg);
1222                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
1223                                          cmsg->cmsg_type == SCM_SECURITY) {
1224                                         label = (char*) CMSG_DATA(cmsg);
1225                                         label_len = cmsg->cmsg_len - CMSG_LEN(0);
1226                                 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1227                                            cmsg->cmsg_type == SO_TIMESTAMP &&
1228                                            cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1229                                         tv = (struct timeval*) CMSG_DATA(cmsg);
1230                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
1231                                          cmsg->cmsg_type == SCM_RIGHTS) {
1232                                         fds = (int*) CMSG_DATA(cmsg);
1233                                         n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1234                                 }
1235                         }
1236
1237                         if (ev->data.fd == s->syslog_fd) {
1238                                 if (n > 0 && n_fds == 0) {
1239                                         s->buffer[n] = 0;
1240                                         server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1241                                 } else if (n_fds > 0)
1242                                         log_warning("Got file descriptors via syslog socket. Ignoring.");
1243
1244                         } else {
1245                                 if (n > 0 && n_fds == 0)
1246                                         server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1247                                 else if (n == 0 && n_fds == 1)
1248                                         server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1249                                 else if (n_fds > 0)
1250                                         log_warning("Got too many file descriptors via native socket. Ignoring.");
1251                         }
1252
1253                         close_many(fds, n_fds);
1254                 }
1255
1256                 return 1;
1257
1258         } else if (ev->data.fd == s->stdout_fd) {
1259
1260                 if (ev->events != EPOLLIN) {
1261                         log_error("Got invalid event from epoll for %s: %"PRIx32,
1262                                   "stdout fd", ev->events);
1263                         return -EIO;
1264                 }
1265
1266                 stdout_stream_new(s);
1267                 return 1;
1268
1269         } else {
1270                 StdoutStream *stream;
1271
1272                 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
1273                         log_error("Got invalid event from epoll for %s: %"PRIx32,
1274                                   "stdout stream", ev->events);
1275                         return -EIO;
1276                 }
1277
1278                 /* If it is none of the well-known fds, it must be an
1279                  * stdout stream fd. Note that this is a bit ugly here
1280                  * (since we rely that none of the well-known fds
1281                  * could be interpreted as pointer), but nonetheless
1282                  * safe, since the well-known fds would never get an
1283                  * fd > 4096, i.e. beyond the first memory page */
1284
1285                 stream = ev->data.ptr;
1286
1287                 if (stdout_stream_process(stream) <= 0)
1288                         stdout_stream_free(stream);
1289
1290                 return 1;
1291         }
1292
1293         log_error("Unknown event.");
1294         return 0;
1295 }
1296
1297 static int open_signalfd(Server *s) {
1298         sigset_t mask;
1299         struct epoll_event ev;
1300
1301         assert(s);
1302
1303         assert_se(sigemptyset(&mask) == 0);
1304         sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1305         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1306
1307         s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
1308         if (s->signal_fd < 0) {
1309                 log_error("signalfd(): %m");
1310                 return -errno;
1311         }
1312
1313         zero(ev);
1314         ev.events = EPOLLIN;
1315         ev.data.fd = s->signal_fd;
1316
1317         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
1318                 log_error("epoll_ctl(): %m");
1319                 return -errno;
1320         }
1321
1322         return 0;
1323 }
1324
1325 static int server_parse_proc_cmdline(Server *s) {
1326         _cleanup_free_ char *line = NULL;
1327         char *w, *state;
1328         int r;
1329         size_t l;
1330
1331         if (detect_container(NULL) > 0)
1332                 return 0;
1333
1334         r = read_one_line_file("/proc/cmdline", &line);
1335         if (r < 0) {
1336                 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1337                 return 0;
1338         }
1339
1340         FOREACH_WORD_QUOTED(w, l, line, state) {
1341                 _cleanup_free_ char *word;
1342
1343                 word = strndup(w, l);
1344                 if (!word)
1345                         return -ENOMEM;
1346
1347                 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1348                         r = parse_boolean(word + 35);
1349                         if (r < 0)
1350                                 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1351                         else
1352                                 s->forward_to_syslog = r;
1353                 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1354                         r = parse_boolean(word + 33);
1355                         if (r < 0)
1356                                 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1357                         else
1358                                 s->forward_to_kmsg = r;
1359                 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1360                         r = parse_boolean(word + 36);
1361                         if (r < 0)
1362                                 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1363                         else
1364                                 s->forward_to_console = r;
1365                 } else if (startswith(word, "systemd.journald"))
1366                         log_warning("Invalid systemd.journald parameter. Ignoring.");
1367         }
1368
1369         return 0;
1370 }
1371
1372 static int server_parse_config_file(Server *s) {
1373         static const char fn[] = "/etc/systemd/journald.conf";
1374         _cleanup_fclose_ FILE *f = NULL;
1375         int r;
1376
1377         assert(s);
1378
1379         f = fopen(fn, "re");
1380         if (!f) {
1381                 if (errno == ENOENT)
1382                         return 0;
1383
1384                 log_warning("Failed to open configuration file %s: %m", fn);
1385                 return -errno;
1386         }
1387
1388         r = config_parse(NULL, fn, f, "Journal\0", config_item_perf_lookup,
1389                          (void*) journald_gperf_lookup, false, false, s);
1390         if (r < 0)
1391                 log_warning("Failed to parse configuration file: %s", strerror(-r));
1392
1393         return r;
1394 }
1395
1396 static int server_open_sync_timer(Server *s) {
1397         int r;
1398         struct epoll_event ev;
1399
1400         assert(s);
1401
1402         s->sync_timer_fd = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC);
1403         if (s->sync_timer_fd < 0)
1404                 return -errno;
1405
1406         zero(ev);
1407         ev.events = EPOLLIN;
1408         ev.data.fd = s->sync_timer_fd;
1409
1410         r = epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->sync_timer_fd, &ev);
1411         if (r < 0) {
1412                 log_error("Failed to add idle timer fd to epoll object: %m");
1413                 return -errno;
1414         }
1415
1416         return 0;
1417 }
1418
1419 int server_schedule_sync(Server *s, int priority) {
1420         int r;
1421
1422         assert(s);
1423
1424         if (priority <= LOG_CRIT) {
1425                 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1426                 server_sync(s);
1427                 return 0;
1428         }
1429
1430         if (s->sync_scheduled)
1431                 return 0;
1432
1433         if (s->sync_interval_usec) {
1434                 struct itimerspec sync_timer_enable = {};
1435
1436                 timespec_store(&sync_timer_enable.it_value, s->sync_interval_usec);
1437
1438                 r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_enable, NULL);
1439                 if (r < 0)
1440                         return -errno;
1441         }
1442
1443         s->sync_scheduled = true;
1444
1445         return 0;
1446 }
1447
1448 int server_init(Server *s) {
1449         int n, r, fd;
1450
1451         assert(s);
1452
1453         zero(*s);
1454         s->sync_timer_fd = s->syslog_fd = s->native_fd = s->stdout_fd =
1455                 s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
1456         s->compress = true;
1457         s->seal = true;
1458
1459         s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1460         s->sync_scheduled = false;
1461
1462         s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1463         s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1464
1465         s->forward_to_syslog = true;
1466
1467         s->max_level_store = LOG_DEBUG;
1468         s->max_level_syslog = LOG_DEBUG;
1469         s->max_level_kmsg = LOG_NOTICE;
1470         s->max_level_console = LOG_INFO;
1471
1472         memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1473         memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1474
1475         server_parse_config_file(s);
1476         server_parse_proc_cmdline(s);
1477         if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1478                 log_debug("Setting both rate limit interval and burst from %llu,%u to 0,0",
1479                           (long long unsigned) s->rate_limit_interval,
1480                           s->rate_limit_burst);
1481                 s->rate_limit_interval = s->rate_limit_burst = 0;
1482         }
1483
1484         mkdir_p("/run/systemd/journal", 0755);
1485
1486         s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
1487         if (!s->user_journals)
1488                 return log_oom();
1489
1490         s->mmap = mmap_cache_new();
1491         if (!s->mmap)
1492                 return log_oom();
1493
1494         s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
1495         if (s->epoll_fd < 0) {
1496                 log_error("Failed to create epoll object: %m");
1497                 return -errno;
1498         }
1499
1500         n = sd_listen_fds(true);
1501         if (n < 0) {
1502                 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1503                 return n;
1504         }
1505
1506         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1507
1508                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1509
1510                         if (s->native_fd >= 0) {
1511                                 log_error("Too many native sockets passed.");
1512                                 return -EINVAL;
1513                         }
1514
1515                         s->native_fd = fd;
1516
1517                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1518
1519                         if (s->stdout_fd >= 0) {
1520                                 log_error("Too many stdout sockets passed.");
1521                                 return -EINVAL;
1522                         }
1523
1524                         s->stdout_fd = fd;
1525
1526                 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
1527
1528                         if (s->syslog_fd >= 0) {
1529                                 log_error("Too many /dev/log sockets passed.");
1530                                 return -EINVAL;
1531                         }
1532
1533                         s->syslog_fd = fd;
1534
1535                 } else {
1536                         log_error("Unknown socket passed.");
1537                         return -EINVAL;
1538                 }
1539         }
1540
1541         r = server_open_syslog_socket(s);
1542         if (r < 0)
1543                 return r;
1544
1545         r = server_open_native_socket(s);
1546         if (r < 0)
1547                 return r;
1548
1549         r = server_open_stdout_socket(s);
1550         if (r < 0)
1551                 return r;
1552
1553         r = server_open_dev_kmsg(s);
1554         if (r < 0)
1555                 return r;
1556
1557         r = server_open_kernel_seqnum(s);
1558         if (r < 0)
1559                 return r;
1560
1561         r = server_open_sync_timer(s);
1562         if (r < 0)
1563                 return r;
1564
1565         r = open_signalfd(s);
1566         if (r < 0)
1567                 return r;
1568
1569         s->udev = udev_new();
1570         if (!s->udev)
1571                 return -ENOMEM;
1572
1573         s->rate_limit = journal_rate_limit_new(s->rate_limit_interval,
1574                                                s->rate_limit_burst);
1575         if (!s->rate_limit)
1576                 return -ENOMEM;
1577
1578         r = system_journal_open(s);
1579         if (r < 0)
1580                 return r;
1581
1582         return 0;
1583 }
1584
1585 void server_maybe_append_tags(Server *s) {
1586 #ifdef HAVE_GCRYPT
1587         JournalFile *f;
1588         Iterator i;
1589         usec_t n;
1590
1591         n = now(CLOCK_REALTIME);
1592
1593         if (s->system_journal)
1594                 journal_file_maybe_append_tag(s->system_journal, n);
1595
1596         HASHMAP_FOREACH(f, s->user_journals, i)
1597                 journal_file_maybe_append_tag(f, n);
1598 #endif
1599 }
1600
1601 void server_done(Server *s) {
1602         JournalFile *f;
1603         assert(s);
1604
1605         while (s->stdout_streams)
1606                 stdout_stream_free(s->stdout_streams);
1607
1608         if (s->system_journal)
1609                 journal_file_close(s->system_journal);
1610
1611         if (s->runtime_journal)
1612                 journal_file_close(s->runtime_journal);
1613
1614         while ((f = hashmap_steal_first(s->user_journals)))
1615                 journal_file_close(f);
1616
1617         hashmap_free(s->user_journals);
1618
1619         if (s->epoll_fd >= 0)
1620                 close_nointr_nofail(s->epoll_fd);
1621
1622         if (s->signal_fd >= 0)
1623                 close_nointr_nofail(s->signal_fd);
1624
1625         if (s->syslog_fd >= 0)
1626                 close_nointr_nofail(s->syslog_fd);
1627
1628         if (s->native_fd >= 0)
1629                 close_nointr_nofail(s->native_fd);
1630
1631         if (s->stdout_fd >= 0)
1632                 close_nointr_nofail(s->stdout_fd);
1633
1634         if (s->dev_kmsg_fd >= 0)
1635                 close_nointr_nofail(s->dev_kmsg_fd);
1636
1637         if (s->sync_timer_fd >= 0)
1638                 close_nointr_nofail(s->sync_timer_fd);
1639
1640         if (s->rate_limit)
1641                 journal_rate_limit_free(s->rate_limit);
1642
1643         if (s->kernel_seqnum)
1644                 munmap(s->kernel_seqnum, sizeof(uint64_t));
1645
1646         free(s->buffer);
1647         free(s->tty_path);
1648
1649         if (s->mmap)
1650                 mmap_cache_unref(s->mmap);
1651
1652         if (s->udev)
1653                 udev_unref(s->udev);
1654 }