chiark / gitweb /
Add a bit more explicit message, to help confused users
[elogind.git] / src / journal / journald-server.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
26 #include <sys/mman.h>
27 #include <sys/timerfd.h>
28
29 #include <libudev.h>
30 #include <systemd/sd-journal.h>
31 #include <systemd/sd-messages.h>
32 #include <systemd/sd-daemon.h>
33
34 #include "fileio.h"
35 #include "mkdir.h"
36 #include "hashmap.h"
37 #include "journal-file.h"
38 #include "socket-util.h"
39 #include "cgroup-util.h"
40 #include "list.h"
41 #include "virt.h"
42 #include "missing.h"
43 #include "conf-parser.h"
44 #include "journal-internal.h"
45 #include "journal-vacuum.h"
46 #include "journal-authenticate.h"
47 #include "journald-server.h"
48 #include "journald-rate-limit.h"
49 #include "journald-kmsg.h"
50 #include "journald-syslog.h"
51 #include "journald-stream.h"
52 #include "journald-console.h"
53 #include "journald-native.h"
54
55 #ifdef HAVE_ACL
56 #include <sys/acl.h>
57 #include <acl/libacl.h>
58 #include "acl-util.h"
59 #endif
60
61 #ifdef HAVE_SELINUX
62 #include <selinux/selinux.h>
63 #endif
64
65 #define USER_JOURNALS_MAX 1024
66
67 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
68 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
69 #define DEFAULT_RATE_LIMIT_BURST 1000
70
71 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
72
73 static const char* const storage_table[] = {
74         [STORAGE_AUTO] = "auto",
75         [STORAGE_VOLATILE] = "volatile",
76         [STORAGE_PERSISTENT] = "persistent",
77         [STORAGE_NONE] = "none"
78 };
79
80 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
81 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
82
83 static const char* const split_mode_table[] = {
84         [SPLIT_NONE] = "none",
85         [SPLIT_UID] = "uid",
86         [SPLIT_LOGIN] = "login"
87 };
88
89 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
90 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
91
92 static uint64_t available_space(Server *s, bool verbose) {
93         char ids[33];
94         _cleanup_free_ char *p = NULL;
95         sd_id128_t machine;
96         struct statvfs ss;
97         uint64_t sum = 0, ss_avail = 0, avail = 0;
98         int r;
99         _cleanup_closedir_ DIR *d = NULL;
100         usec_t ts;
101         const char *f;
102         JournalMetrics *m;
103
104         ts = now(CLOCK_MONOTONIC);
105
106         if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts
107             && !verbose)
108                 return s->cached_available_space;
109
110         r = sd_id128_get_machine(&machine);
111         if (r < 0)
112                 return 0;
113
114         if (s->system_journal) {
115                 f = "/var/log/journal/";
116                 m = &s->system_metrics;
117         } else {
118                 f = "/run/log/journal/";
119                 m = &s->runtime_metrics;
120         }
121
122         assert(m);
123
124         p = strappend(f, sd_id128_to_string(machine, ids));
125         if (!p)
126                 return 0;
127
128         d = opendir(p);
129         if (!d)
130                 return 0;
131
132         if (fstatvfs(dirfd(d), &ss) < 0)
133                 return 0;
134
135         for (;;) {
136                 struct stat st;
137                 struct dirent *de;
138                 union dirent_storage buf;
139
140                 r = readdir_r(d, &buf.de, &de);
141                 if (r != 0)
142                         break;
143
144                 if (!de)
145                         break;
146
147                 if (!endswith(de->d_name, ".journal") &&
148                     !endswith(de->d_name, ".journal~"))
149                         continue;
150
151                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
152                         continue;
153
154                 if (!S_ISREG(st.st_mode))
155                         continue;
156
157                 sum += (uint64_t) st.st_blocks * 512UL;
158         }
159
160         ss_avail = ss.f_bsize * ss.f_bavail;
161         avail = ss_avail > m->keep_free ? ss_avail - m->keep_free : 0;
162
163         s->cached_available_space = MIN(m->max_use, avail) > sum ? MIN(m->max_use, avail) - sum : 0;
164         s->cached_available_space_timestamp = ts;
165
166         if (verbose) {
167                 char    fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
168                         fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX];
169
170                 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
171                                       "%s journal is using %s (max %s, leaving %s of free %s, current limit %s).",
172                                       s->system_journal ? "Permanent" : "Runtime",
173                                       format_bytes(fb1, sizeof(fb1), sum),
174                                       format_bytes(fb2, sizeof(fb2), m->max_use),
175                                       format_bytes(fb3, sizeof(fb3), m->keep_free),
176                                       format_bytes(fb4, sizeof(fb4), ss_avail),
177                                       format_bytes(fb5, sizeof(fb5), MIN(m->max_use, avail)));
178         }
179
180         return s->cached_available_space;
181 }
182
183 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
184         int r;
185 #ifdef HAVE_ACL
186         acl_t acl;
187         acl_entry_t entry;
188         acl_permset_t permset;
189 #endif
190
191         assert(f);
192
193         r = fchmod(f->fd, 0640);
194         if (r < 0)
195                 log_warning("Failed to fix access mode on %s, ignoring: %s", f->path, strerror(-r));
196
197 #ifdef HAVE_ACL
198         if (uid <= 0)
199                 return;
200
201         acl = acl_get_fd(f->fd);
202         if (!acl) {
203                 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
204                 return;
205         }
206
207         r = acl_find_uid(acl, uid, &entry);
208         if (r <= 0) {
209
210                 if (acl_create_entry(&acl, &entry) < 0 ||
211                     acl_set_tag_type(entry, ACL_USER) < 0 ||
212                     acl_set_qualifier(entry, &uid) < 0) {
213                         log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
214                         goto finish;
215                 }
216         }
217
218         /* We do not recalculate the mask unconditionally here,
219          * so that the fchmod() mask above stays intact. */
220         if (acl_get_permset(entry, &permset) < 0 ||
221             acl_add_perm(permset, ACL_READ) < 0 ||
222             calc_acl_mask_if_needed(&acl) < 0) {
223                 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
224                 goto finish;
225         }
226
227         if (acl_set_fd(f->fd, acl) < 0)
228                 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
229
230 finish:
231         acl_free(acl);
232 #endif
233 }
234
235 static JournalFile* find_journal(Server *s, uid_t uid) {
236         _cleanup_free_ char *p = NULL;
237         int r;
238         JournalFile *f;
239         sd_id128_t machine;
240
241         assert(s);
242
243         /* We split up user logs only on /var, not on /run. If the
244          * runtime file is open, we write to it exclusively, in order
245          * to guarantee proper order as soon as we flush /run to
246          * /var and close the runtime file. */
247
248         if (s->runtime_journal)
249                 return s->runtime_journal;
250
251         if (uid <= 0)
252                 return s->system_journal;
253
254         r = sd_id128_get_machine(&machine);
255         if (r < 0)
256                 return s->system_journal;
257
258         f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
259         if (f)
260                 return f;
261
262         if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
263                      SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
264                 return s->system_journal;
265
266         while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
267                 /* Too many open? Then let's close one */
268                 f = hashmap_steal_first(s->user_journals);
269                 assert(f);
270                 journal_file_close(f);
271         }
272
273         r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
274         if (r < 0)
275                 return s->system_journal;
276
277         server_fix_perms(s, f, uid);
278
279         r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
280         if (r < 0) {
281                 journal_file_close(f);
282                 return s->system_journal;
283         }
284
285         return f;
286 }
287
288 void server_rotate(Server *s) {
289         JournalFile *f;
290         void *k;
291         Iterator i;
292         int r;
293
294         log_debug("Rotating...");
295
296         if (s->runtime_journal) {
297                 r = journal_file_rotate(&s->runtime_journal, s->compress, false);
298                 if (r < 0)
299                         if (s->runtime_journal)
300                                 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
301                         else
302                                 log_error("Failed to create new runtime journal: %s", strerror(-r));
303                 else
304                         server_fix_perms(s, s->runtime_journal, 0);
305         }
306
307         if (s->system_journal) {
308                 r = journal_file_rotate(&s->system_journal, s->compress, s->seal);
309                 if (r < 0)
310                         if (s->system_journal)
311                                 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
312                         else
313                                 log_error("Failed to create new system journal: %s", strerror(-r));
314
315                 else
316                         server_fix_perms(s, s->system_journal, 0);
317         }
318
319         HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
320                 r = journal_file_rotate(&f, s->compress, s->seal);
321                 if (r < 0)
322                         if (f)
323                                 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
324                         else
325                                 log_error("Failed to create user journal: %s", strerror(-r));
326                 else {
327                         hashmap_replace(s->user_journals, k, f);
328                         server_fix_perms(s, f, PTR_TO_UINT32(k));
329                 }
330         }
331 }
332
333 void server_sync(Server *s) {
334         static const struct itimerspec sync_timer_disable = {};
335         JournalFile *f;
336         void *k;
337         Iterator i;
338         int r;
339
340         if (s->system_journal) {
341                 r = journal_file_set_offline(s->system_journal);
342                 if (r < 0)
343                         log_error("Failed to sync system journal: %s", strerror(-r));
344         }
345
346         HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
347                 r = journal_file_set_offline(f);
348                 if (r < 0)
349                         log_error("Failed to sync user journal: %s", strerror(-r));
350         }
351
352         r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_disable, NULL);
353         if (r < 0)
354                 log_error("Failed to disable max timer: %m");
355
356         s->sync_scheduled = false;
357 }
358
359 void server_vacuum(Server *s) {
360         char ids[33];
361         sd_id128_t machine;
362         int r;
363
364         log_debug("Vacuuming...");
365
366         s->oldest_file_usec = 0;
367
368         r = sd_id128_get_machine(&machine);
369         if (r < 0) {
370                 log_error("Failed to get machine ID: %s", strerror(-r));
371                 return;
372         }
373
374         sd_id128_to_string(machine, ids);
375
376         if (s->system_journal) {
377                 char *p = strappenda("/var/log/journal/", ids);
378
379                 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
380                 if (r < 0 && r != -ENOENT)
381                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
382         }
383
384         if (s->runtime_journal) {
385                 char *p = strappenda("/run/log/journal/", ids);
386
387                 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
388                 if (r < 0 && r != -ENOENT)
389                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
390         }
391
392         s->cached_available_space_timestamp = 0;
393 }
394
395 bool shall_try_append_again(JournalFile *f, int r) {
396
397         /* -E2BIG            Hit configured limit
398            -EFBIG            Hit fs limit
399            -EDQUOT           Quota limit hit
400            -ENOSPC           Disk full
401            -EHOSTDOWN        Other machine
402            -EBUSY            Unclean shutdown
403            -EPROTONOSUPPORT  Unsupported feature
404            -EBADMSG          Corrupted
405            -ENODATA          Truncated
406            -ESHUTDOWN        Already archived */
407
408         if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
409                 log_debug("%s: Allocation limit reached, rotating.", f->path);
410         else if (r == -EHOSTDOWN)
411                 log_info("%s: Journal file from other machine, rotating.", f->path);
412         else if (r == -EBUSY)
413                 log_info("%s: Unclean shutdown, rotating.", f->path);
414         else if (r == -EPROTONOSUPPORT)
415                 log_info("%s: Unsupported feature, rotating.", f->path);
416         else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
417                 log_warning("%s: Journal file corrupted, rotating.", f->path);
418         else
419                 return false;
420
421         return true;
422 }
423
424 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
425         JournalFile *f;
426         bool vacuumed = false;
427         int r;
428
429         assert(s);
430         assert(iovec);
431         assert(n > 0);
432
433         f = find_journal(s, uid);
434         if (!f)
435                 return;
436
437         if (journal_file_rotate_suggested(f, s->max_file_usec)) {
438                 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
439                 server_rotate(s);
440                 server_vacuum(s);
441                 vacuumed = true;
442
443                 f = find_journal(s, uid);
444                 if (!f)
445                         return;
446         }
447
448         r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
449         if (r >= 0) {
450                 server_schedule_sync(s, priority);
451                 return;
452         }
453
454         if (vacuumed || !shall_try_append_again(f, r)) {
455                 size_t size = 0;
456                 unsigned i;
457                 for (i = 0; i < n; i++)
458                         size += iovec[i].iov_len;
459
460                 log_error("Failed to write entry (%d items, %zu bytes), ignoring: %s", n, size, strerror(-r));
461                 return;
462         }
463
464         server_rotate(s);
465         server_vacuum(s);
466
467         f = find_journal(s, uid);
468         if (!f)
469                 return;
470
471         log_debug("Retrying write.");
472         r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
473         if (r < 0) {
474                 size_t size = 0;
475                 unsigned i;
476                 for (i = 0; i < n; i++)
477                         size += iovec[i].iov_len;
478
479                 log_error("Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %s", n, size, strerror(-r));
480         } else
481                 server_schedule_sync(s, priority);
482 }
483
484 static void dispatch_message_real(
485                 Server *s,
486                 struct iovec *iovec, unsigned n, unsigned m,
487                 struct ucred *ucred,
488                 struct timeval *tv,
489                 const char *label, size_t label_len,
490                 const char *unit_id,
491                 int priority,
492                 pid_t object_pid) {
493
494         char    pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
495                 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
496                 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
497                 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
498                 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
499                 boot_id[sizeof("_BOOT_ID=") + 32] = "_BOOT_ID=",
500                 machine_id[sizeof("_MACHINE_ID=") + 32] = "_MACHINE_ID=",
501                 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
502                 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
503                 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
504         uid_t object_uid;
505         gid_t object_gid;
506         char *x;
507         sd_id128_t id;
508         int r;
509         char *t, *c;
510         uid_t realuid = 0, owner = 0, journal_uid;
511         bool owner_valid = false;
512 #ifdef HAVE_AUDIT
513         char    audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
514                 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
515                 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
516                 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
517
518         uint32_t audit;
519         uid_t loginuid;
520 #endif
521
522         assert(s);
523         assert(iovec);
524         assert(n > 0);
525         assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
526
527         if (ucred) {
528                 realuid = ucred->uid;
529
530                 sprintf(pid, "_PID=%lu", (unsigned long) ucred->pid);
531                 IOVEC_SET_STRING(iovec[n++], pid);
532
533                 sprintf(uid, "_UID=%lu", (unsigned long) ucred->uid);
534                 IOVEC_SET_STRING(iovec[n++], uid);
535
536                 sprintf(gid, "_GID=%lu", (unsigned long) ucred->gid);
537                 IOVEC_SET_STRING(iovec[n++], gid);
538
539                 r = get_process_comm(ucred->pid, &t);
540                 if (r >= 0) {
541                         x = strappenda("_COMM=", t);
542                         free(t);
543                         IOVEC_SET_STRING(iovec[n++], x);
544                 }
545
546                 r = get_process_exe(ucred->pid, &t);
547                 if (r >= 0) {
548                         x = strappenda("_EXE=", t);
549                         free(t);
550                         IOVEC_SET_STRING(iovec[n++], x);
551                 }
552
553                 r = get_process_cmdline(ucred->pid, 0, false, &t);
554                 if (r >= 0) {
555                         x = strappenda("_CMDLINE=", t);
556                         free(t);
557                         IOVEC_SET_STRING(iovec[n++], x);
558                 }
559
560                 r = get_process_capeff(ucred->pid, &t);
561                 if (r >= 0) {
562                         x = strappenda("_CAP_EFFECTIVE=", t);
563                         free(t);
564                         IOVEC_SET_STRING(iovec[n++], x);
565                 }
566
567 #ifdef HAVE_AUDIT
568                 r = audit_session_from_pid(ucred->pid, &audit);
569                 if (r >= 0) {
570                         sprintf(audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit);
571                         IOVEC_SET_STRING(iovec[n++], audit_session);
572                 }
573
574                 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
575                 if (r >= 0) {
576                         sprintf(audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid);
577                         IOVEC_SET_STRING(iovec[n++], audit_loginuid);
578                 }
579 #endif
580
581                 r = cg_pid_get_path_shifted(ucred->pid, NULL, &c);
582                 if (r >= 0) {
583                         char *session = NULL;
584
585                         x = strappenda("_SYSTEMD_CGROUP=", c);
586                         IOVEC_SET_STRING(iovec[n++], x);
587
588                         r = cg_path_get_session(c, &t);
589                         if (r >= 0) {
590                                 session = strappenda("_SYSTEMD_SESSION=", t);
591                                 free(t);
592                                 IOVEC_SET_STRING(iovec[n++], session);
593                         }
594
595                         if (cg_path_get_owner_uid(c, &owner) >= 0) {
596                                 owner_valid = true;
597
598                                 sprintf(owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner);
599                                 IOVEC_SET_STRING(iovec[n++], owner_uid);
600                         }
601
602                         if (cg_path_get_unit(c, &t) >= 0) {
603                                 x = strappenda("_SYSTEMD_UNIT=", t);
604                                 free(t);
605                                 IOVEC_SET_STRING(iovec[n++], x);
606                         } else if (unit_id && !session) {
607                                 x = strappenda("_SYSTEMD_UNIT=", unit_id);
608                                 IOVEC_SET_STRING(iovec[n++], x);
609                         }
610
611                         if (cg_path_get_user_unit(c, &t) >= 0) {
612                                 x = strappenda("_SYSTEMD_USER_UNIT=", t);
613                                 free(t);
614                                 IOVEC_SET_STRING(iovec[n++], x);
615                         } else if (unit_id && session) {
616                                 x = strappenda("_SYSTEMD_USER_UNIT=", unit_id);
617                                 IOVEC_SET_STRING(iovec[n++], x);
618                         }
619
620                         if (cg_path_get_slice(c, &t) >= 0) {
621                                 x = strappenda("_SYSTEMD_SLICE=", t);
622                                 free(t);
623                                 IOVEC_SET_STRING(iovec[n++], x);
624                         }
625
626                         free(c);
627                 }
628
629 #ifdef HAVE_SELINUX
630                 if (label) {
631                         x = alloca(sizeof("_SELINUX_CONTEXT=") + label_len);
632
633                         *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
634                         IOVEC_SET_STRING(iovec[n++], x);
635                 } else {
636                         security_context_t con;
637
638                         if (getpidcon(ucred->pid, &con) >= 0) {
639                                 x = strappenda("_SELINUX_CONTEXT=", con);
640
641                                 freecon(con);
642                                 IOVEC_SET_STRING(iovec[n++], x);
643                         }
644                 }
645 #endif
646         }
647         assert(n <= m);
648
649         if (object_pid) {
650                 r = get_process_uid(object_pid, &object_uid);
651                 if (r >= 0) {
652                         sprintf(o_uid, "OBJECT_UID=%lu", (unsigned long) object_uid);
653                         IOVEC_SET_STRING(iovec[n++], o_uid);
654                 }
655
656                 r = get_process_gid(object_pid, &object_gid);
657                 if (r >= 0) {
658                         sprintf(o_gid, "OBJECT_GID=%lu", (unsigned long) object_gid);
659                         IOVEC_SET_STRING(iovec[n++], o_gid);
660                 }
661
662                 r = get_process_comm(object_pid, &t);
663                 if (r >= 0) {
664                         x = strappenda("OBJECT_COMM=", t);
665                         free(t);
666                         IOVEC_SET_STRING(iovec[n++], x);
667                 }
668
669                 r = get_process_exe(object_pid, &t);
670                 if (r >= 0) {
671                         x = strappenda("OBJECT_EXE=", t);
672                         free(t);
673                         IOVEC_SET_STRING(iovec[n++], x);
674                 }
675
676                 r = get_process_cmdline(object_pid, 0, false, &t);
677                 if (r >= 0) {
678                         x = strappenda("OBJECT_CMDLINE=", t);
679                         free(t);
680                         IOVEC_SET_STRING(iovec[n++], x);
681                 }
682
683 #ifdef HAVE_AUDIT
684                 r = audit_session_from_pid(object_pid, &audit);
685                 if (r >= 0) {
686                         sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%lu", (unsigned long) audit);
687                         IOVEC_SET_STRING(iovec[n++], o_audit_session);
688                 }
689
690                 r = audit_loginuid_from_pid(object_pid, &loginuid);
691                 if (r >= 0) {
692                         sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID=%lu", (unsigned long) loginuid);
693                         IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
694                 }
695 #endif
696
697                 r = cg_pid_get_path_shifted(object_pid, NULL, &c);
698                 if (r >= 0) {
699                         x = strappenda("OBJECT_SYSTEMD_CGROUP=", c);
700                         IOVEC_SET_STRING(iovec[n++], x);
701
702                         r = cg_path_get_session(c, &t);
703                         if (r >= 0) {
704                                 x = strappenda("OBJECT_SYSTEMD_SESSION=", t);
705                                 free(t);
706                                 IOVEC_SET_STRING(iovec[n++], x);
707                         }
708
709                         if (cg_path_get_owner_uid(c, &owner) >= 0) {
710                                 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner);
711                                 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
712                         }
713
714                         if (cg_path_get_unit(c, &t) >= 0) {
715                                 x = strappenda("OBJECT_SYSTEMD_UNIT=", t);
716                                 free(t);
717                                 IOVEC_SET_STRING(iovec[n++], x);
718                         }
719
720                         if (cg_path_get_user_unit(c, &t) >= 0) {
721                                 x = strappenda("OBJECT_SYSTEMD_USER_UNIT=", t);
722                                 free(t);
723                                 IOVEC_SET_STRING(iovec[n++], x);
724                         }
725
726                         free(c);
727                 }
728         }
729         assert(n <= m);
730
731         if (tv) {
732                 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
733                 IOVEC_SET_STRING(iovec[n++], source_time);
734         }
735
736         /* Note that strictly speaking storing the boot id here is
737          * redundant since the entry includes this in-line
738          * anyway. However, we need this indexed, too. */
739         r = sd_id128_get_boot(&id);
740         if (r >= 0) {
741                 sd_id128_to_string(id, boot_id + strlen("_BOOT_ID="));
742                 IOVEC_SET_STRING(iovec[n++], boot_id);
743         }
744
745         r = sd_id128_get_machine(&id);
746         if (r >= 0) {
747                 sd_id128_to_string(id, machine_id + strlen("_MACHINE_ID="));
748                 IOVEC_SET_STRING(iovec[n++], machine_id);
749         }
750
751         t = gethostname_malloc();
752         if (t) {
753                 x = strappenda("_HOSTNAME=", t);
754                 free(t);
755                 IOVEC_SET_STRING(iovec[n++], x);
756         }
757
758         assert(n <= m);
759
760         if (s->split_mode == SPLIT_UID && realuid > 0)
761                 /* Split up strictly by any UID */
762                 journal_uid = realuid;
763         else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
764                 /* Split up by login UIDs, this avoids creation of
765                  * individual journals for system UIDs.  We do this
766                  * only if the realuid is not root, in order not to
767                  * accidentally leak privileged information to the
768                  * user that is logged by a privileged process that is
769                  * part of an unprivileged session.*/
770                 journal_uid = owner;
771         else
772                 journal_uid = 0;
773
774         write_to_journal(s, journal_uid, iovec, n, priority);
775 }
776
777 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
778         char mid[11 + 32 + 1];
779         char buffer[16 + LINE_MAX + 1];
780         struct iovec iovec[N_IOVEC_META_FIELDS + 4];
781         int n = 0;
782         va_list ap;
783         struct ucred ucred = {};
784
785         assert(s);
786         assert(format);
787
788         IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
789         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
790
791         memcpy(buffer, "MESSAGE=", 8);
792         va_start(ap, format);
793         vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
794         va_end(ap);
795         char_array_0(buffer);
796         IOVEC_SET_STRING(iovec[n++], buffer);
797
798         if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
799                 snprintf(mid, sizeof(mid), MESSAGE_ID(message_id));
800                 char_array_0(mid);
801                 IOVEC_SET_STRING(iovec[n++], mid);
802         }
803
804         ucred.pid = getpid();
805         ucred.uid = getuid();
806         ucred.gid = getgid();
807
808         dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
809 }
810
811 void server_dispatch_message(
812                 Server *s,
813                 struct iovec *iovec, unsigned n, unsigned m,
814                 struct ucred *ucred,
815                 struct timeval *tv,
816                 const char *label, size_t label_len,
817                 const char *unit_id,
818                 int priority,
819                 pid_t object_pid) {
820
821         int rl, r;
822         _cleanup_free_ char *path = NULL;
823         char *c;
824
825         assert(s);
826         assert(iovec || n == 0);
827
828         if (n == 0)
829                 return;
830
831         if (LOG_PRI(priority) > s->max_level_store)
832                 return;
833
834         /* Stop early in case the information will not be stored
835          * in a journal. */
836         if (s->storage == STORAGE_NONE)
837                 return;
838
839         if (!ucred)
840                 goto finish;
841
842         r = cg_pid_get_path_shifted(ucred->pid, NULL, &path);
843         if (r < 0)
844                 goto finish;
845
846         /* example: /user/lennart/3/foobar
847          *          /system/dbus.service/foobar
848          *
849          * So let's cut of everything past the third /, since that is
850          * where user directories start */
851
852         c = strchr(path, '/');
853         if (c) {
854                 c = strchr(c+1, '/');
855                 if (c) {
856                         c = strchr(c+1, '/');
857                         if (c)
858                                 *c = 0;
859                 }
860         }
861
862         rl = journal_rate_limit_test(s->rate_limit, path,
863                                      priority & LOG_PRIMASK, available_space(s, false));
864
865         if (rl == 0)
866                 return;
867
868         /* Write a suppression message if we suppressed something */
869         if (rl > 1)
870                 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
871                                       "Suppressed %u messages from %s", rl - 1, path);
872
873 finish:
874         dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
875 }
876
877
878 static int system_journal_open(Server *s) {
879         int r;
880         char *fn;
881         sd_id128_t machine;
882         char ids[33];
883
884         r = sd_id128_get_machine(&machine);
885         if (r < 0) {
886                 log_error("Failed to get machine id: %s", strerror(-r));
887                 return r;
888         }
889
890         sd_id128_to_string(machine, ids);
891
892         if (!s->system_journal &&
893             (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
894             access("/run/systemd/journal/flushed", F_OK) >= 0) {
895
896                 /* If in auto mode: first try to create the machine
897                  * path, but not the prefix.
898                  *
899                  * If in persistent mode: create /var/log/journal and
900                  * the machine path */
901
902                 if (s->storage == STORAGE_PERSISTENT)
903                         (void) mkdir("/var/log/journal/", 0755);
904
905                 fn = strappenda("/var/log/journal/", ids);
906                 (void) mkdir(fn, 0755);
907
908                 fn = strappenda(fn, "/system.journal");
909                 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
910
911                 if (r >= 0)
912                         server_fix_perms(s, s->system_journal, 0);
913                 else if (r < 0) {
914                         if (r != -ENOENT && r != -EROFS)
915                                 log_warning("Failed to open system journal: %s", strerror(-r));
916
917                         r = 0;
918                 }
919         }
920
921         if (!s->runtime_journal &&
922             (s->storage != STORAGE_NONE)) {
923
924                 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
925                 if (!fn)
926                         return -ENOMEM;
927
928                 if (s->system_journal) {
929
930                         /* Try to open the runtime journal, but only
931                          * if it already exists, so that we can flush
932                          * it into the system journal */
933
934                         r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
935                         free(fn);
936
937                         if (r < 0) {
938                                 if (r != -ENOENT)
939                                         log_warning("Failed to open runtime journal: %s", strerror(-r));
940
941                                 r = 0;
942                         }
943
944                 } else {
945
946                         /* OK, we really need the runtime journal, so create
947                          * it if necessary. */
948
949                         (void) mkdir_parents(fn, 0755);
950                         r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
951                         free(fn);
952
953                         if (r < 0) {
954                                 log_error("Failed to open runtime journal: %s", strerror(-r));
955                                 return r;
956                         }
957                 }
958
959                 if (s->runtime_journal)
960                         server_fix_perms(s, s->runtime_journal, 0);
961         }
962
963         available_space(s, true);
964
965         return r;
966 }
967
968 int server_flush_to_var(Server *s) {
969         int r;
970         sd_id128_t machine;
971         sd_journal *j = NULL;
972
973         assert(s);
974
975         if (s->storage != STORAGE_AUTO &&
976             s->storage != STORAGE_PERSISTENT)
977                 return 0;
978
979         if (!s->runtime_journal)
980                 return 0;
981
982         system_journal_open(s);
983
984         if (!s->system_journal)
985                 return 0;
986
987         log_debug("Flushing to /var...");
988
989         r = sd_id128_get_machine(&machine);
990         if (r < 0)
991                 return r;
992
993         r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
994         if (r < 0) {
995                 log_error("Failed to read runtime journal: %s", strerror(-r));
996                 return r;
997         }
998
999         sd_journal_set_data_threshold(j, 0);
1000
1001         SD_JOURNAL_FOREACH(j) {
1002                 Object *o = NULL;
1003                 JournalFile *f;
1004
1005                 f = j->current_file;
1006                 assert(f && f->current_offset > 0);
1007
1008                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1009                 if (r < 0) {
1010                         log_error("Can't read entry: %s", strerror(-r));
1011                         goto finish;
1012                 }
1013
1014                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1015                 if (r >= 0)
1016                         continue;
1017
1018                 if (!shall_try_append_again(s->system_journal, r)) {
1019                         log_error("Can't write entry: %s", strerror(-r));
1020                         goto finish;
1021                 }
1022
1023                 server_rotate(s);
1024                 server_vacuum(s);
1025
1026                 if (!s->system_journal) {
1027                         log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1028                         r = -EIO;
1029                         goto finish;
1030                 }
1031
1032                 log_debug("Retrying write.");
1033                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1034                 if (r < 0) {
1035                         log_error("Can't write entry: %s", strerror(-r));
1036                         goto finish;
1037                 }
1038         }
1039
1040 finish:
1041         journal_file_post_change(s->system_journal);
1042
1043         journal_file_close(s->runtime_journal);
1044         s->runtime_journal = NULL;
1045
1046         if (r >= 0)
1047                 rm_rf("/run/log/journal", false, true, false);
1048
1049         sd_journal_close(j);
1050
1051         return r;
1052 }
1053
1054 int process_event(Server *s, struct epoll_event *ev) {
1055         assert(s);
1056         assert(ev);
1057
1058         if (ev->data.fd == s->signal_fd) {
1059                 struct signalfd_siginfo sfsi;
1060                 ssize_t n;
1061
1062                 if (ev->events != EPOLLIN) {
1063                         log_error("Got invalid event from epoll for %s: %"PRIx32,
1064                                   "signal fd", ev->events);
1065                         return -EIO;
1066                 }
1067
1068                 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
1069                 if (n != sizeof(sfsi)) {
1070
1071                         if (n >= 0)
1072                                 return -EIO;
1073
1074                         if (errno == EINTR || errno == EAGAIN)
1075                                 return 1;
1076
1077                         return -errno;
1078                 }
1079
1080                 if (sfsi.ssi_signo == SIGUSR1) {
1081                         log_info("Received request to flush runtime journal from PID %"PRIu32,
1082                                  sfsi.ssi_pid);
1083                         touch("/run/systemd/journal/flushed");
1084                         server_flush_to_var(s);
1085                         server_sync(s);
1086                         return 1;
1087                 }
1088
1089                 if (sfsi.ssi_signo == SIGUSR2) {
1090                         log_info("Received request to rotate journal from PID %"PRIu32,
1091                                  sfsi.ssi_pid);
1092                         server_rotate(s);
1093                         server_vacuum(s);
1094                         return 1;
1095                 }
1096
1097                 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
1098
1099                 return 0;
1100
1101         } else if (ev->data.fd == s->sync_timer_fd) {
1102                 int r;
1103                 uint64_t t;
1104
1105                 log_debug("Got sync request from epoll.");
1106
1107                 r = read(ev->data.fd, (void *)&t, sizeof(t));
1108                 if (r < 0)
1109                         return 0;
1110
1111                 server_sync(s);
1112                 return 1;
1113
1114         } else if (ev->data.fd == s->dev_kmsg_fd) {
1115                 int r;
1116
1117                 if (ev->events & EPOLLERR)
1118                         log_warning("/dev/kmsg buffer overrun, some messages lost.");
1119
1120                 if (!(ev->events & EPOLLIN)) {
1121                         log_error("Got invalid event from epoll for %s: %"PRIx32,
1122                                   "/dev/kmsg", ev->events);
1123                         return -EIO;
1124                 }
1125
1126                 r = server_read_dev_kmsg(s);
1127                 if (r < 0)
1128                         return r;
1129
1130                 return 1;
1131
1132         } else if (ev->data.fd == s->native_fd ||
1133                    ev->data.fd == s->syslog_fd) {
1134
1135                 if (ev->events != EPOLLIN) {
1136                         log_error("Got invalid event from epoll for %s: %"PRIx32,
1137                                   ev->data.fd == s->native_fd ? "native fd" : "syslog fd",
1138                                   ev->events);
1139                         return -EIO;
1140                 }
1141
1142                 for (;;) {
1143                         struct msghdr msghdr;
1144                         struct iovec iovec;
1145                         struct ucred *ucred = NULL;
1146                         struct timeval *tv = NULL;
1147                         struct cmsghdr *cmsg;
1148                         char *label = NULL;
1149                         size_t label_len = 0;
1150                         union {
1151                                 struct cmsghdr cmsghdr;
1152
1153                                 /* We use NAME_MAX space for the
1154                                  * SELinux label here. The kernel
1155                                  * currently enforces no limit, but
1156                                  * according to suggestions from the
1157                                  * SELinux people this will change and
1158                                  * it will probably be identical to
1159                                  * NAME_MAX. For now we use that, but
1160                                  * this should be updated one day when
1161                                  * the final limit is known.*/
1162                                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1163                                             CMSG_SPACE(sizeof(struct timeval)) +
1164                                             CMSG_SPACE(sizeof(int)) + /* fd */
1165                                             CMSG_SPACE(NAME_MAX)]; /* selinux label */
1166                         } control;
1167                         ssize_t n;
1168                         int v;
1169                         int *fds = NULL;
1170                         unsigned n_fds = 0;
1171
1172                         if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
1173                                 log_error("SIOCINQ failed: %m");
1174                                 return -errno;
1175                         }
1176
1177                         if (s->buffer_size < (size_t) v) {
1178                                 void *b;
1179                                 size_t l;
1180
1181                                 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
1182                                 b = realloc(s->buffer, l+1);
1183
1184                                 if (!b) {
1185                                         log_error("Couldn't increase buffer.");
1186                                         return -ENOMEM;
1187                                 }
1188
1189                                 s->buffer_size = l;
1190                                 s->buffer = b;
1191                         }
1192
1193                         zero(iovec);
1194                         iovec.iov_base = s->buffer;
1195                         iovec.iov_len = s->buffer_size;
1196
1197                         zero(control);
1198                         zero(msghdr);
1199                         msghdr.msg_iov = &iovec;
1200                         msghdr.msg_iovlen = 1;
1201                         msghdr.msg_control = &control;
1202                         msghdr.msg_controllen = sizeof(control);
1203
1204                         n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1205                         if (n < 0) {
1206
1207                                 if (errno == EINTR || errno == EAGAIN)
1208                                         return 1;
1209
1210                                 log_error("recvmsg() failed: %m");
1211                                 return -errno;
1212                         }
1213
1214                         for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1215
1216                                 if (cmsg->cmsg_level == SOL_SOCKET &&
1217                                     cmsg->cmsg_type == SCM_CREDENTIALS &&
1218                                     cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1219                                         ucred = (struct ucred*) CMSG_DATA(cmsg);
1220                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
1221                                          cmsg->cmsg_type == SCM_SECURITY) {
1222                                         label = (char*) CMSG_DATA(cmsg);
1223                                         label_len = cmsg->cmsg_len - CMSG_LEN(0);
1224                                 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1225                                            cmsg->cmsg_type == SO_TIMESTAMP &&
1226                                            cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1227                                         tv = (struct timeval*) CMSG_DATA(cmsg);
1228                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
1229                                          cmsg->cmsg_type == SCM_RIGHTS) {
1230                                         fds = (int*) CMSG_DATA(cmsg);
1231                                         n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1232                                 }
1233                         }
1234
1235                         if (ev->data.fd == s->syslog_fd) {
1236                                 if (n > 0 && n_fds == 0) {
1237                                         s->buffer[n] = 0;
1238                                         server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1239                                 } else if (n_fds > 0)
1240                                         log_warning("Got file descriptors via syslog socket. Ignoring.");
1241
1242                         } else {
1243                                 if (n > 0 && n_fds == 0)
1244                                         server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1245                                 else if (n == 0 && n_fds == 1)
1246                                         server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1247                                 else if (n_fds > 0)
1248                                         log_warning("Got too many file descriptors via native socket. Ignoring.");
1249                         }
1250
1251                         close_many(fds, n_fds);
1252                 }
1253
1254                 return 1;
1255
1256         } else if (ev->data.fd == s->stdout_fd) {
1257
1258                 if (ev->events != EPOLLIN) {
1259                         log_error("Got invalid event from epoll for %s: %"PRIx32,
1260                                   "stdout fd", ev->events);
1261                         return -EIO;
1262                 }
1263
1264                 stdout_stream_new(s);
1265                 return 1;
1266
1267         } else {
1268                 StdoutStream *stream;
1269
1270                 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
1271                         log_error("Got invalid event from epoll for %s: %"PRIx32,
1272                                   "stdout stream", ev->events);
1273                         return -EIO;
1274                 }
1275
1276                 /* If it is none of the well-known fds, it must be an
1277                  * stdout stream fd. Note that this is a bit ugly here
1278                  * (since we rely that none of the well-known fds
1279                  * could be interpreted as pointer), but nonetheless
1280                  * safe, since the well-known fds would never get an
1281                  * fd > 4096, i.e. beyond the first memory page */
1282
1283                 stream = ev->data.ptr;
1284
1285                 if (stdout_stream_process(stream) <= 0)
1286                         stdout_stream_free(stream);
1287
1288                 return 1;
1289         }
1290
1291         log_error("Unknown event.");
1292         return 0;
1293 }
1294
1295 static int open_signalfd(Server *s) {
1296         sigset_t mask;
1297         struct epoll_event ev;
1298
1299         assert(s);
1300
1301         assert_se(sigemptyset(&mask) == 0);
1302         sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1303         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1304
1305         s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
1306         if (s->signal_fd < 0) {
1307                 log_error("signalfd(): %m");
1308                 return -errno;
1309         }
1310
1311         zero(ev);
1312         ev.events = EPOLLIN;
1313         ev.data.fd = s->signal_fd;
1314
1315         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
1316                 log_error("epoll_ctl(): %m");
1317                 return -errno;
1318         }
1319
1320         return 0;
1321 }
1322
1323 static int server_parse_proc_cmdline(Server *s) {
1324         _cleanup_free_ char *line = NULL;
1325         char *w, *state;
1326         int r;
1327         size_t l;
1328
1329         if (detect_container(NULL) > 0)
1330                 return 0;
1331
1332         r = read_one_line_file("/proc/cmdline", &line);
1333         if (r < 0) {
1334                 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1335                 return 0;
1336         }
1337
1338         FOREACH_WORD_QUOTED(w, l, line, state) {
1339                 _cleanup_free_ char *word;
1340
1341                 word = strndup(w, l);
1342                 if (!word)
1343                         return -ENOMEM;
1344
1345                 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1346                         r = parse_boolean(word + 35);
1347                         if (r < 0)
1348                                 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1349                         else
1350                                 s->forward_to_syslog = r;
1351                 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1352                         r = parse_boolean(word + 33);
1353                         if (r < 0)
1354                                 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1355                         else
1356                                 s->forward_to_kmsg = r;
1357                 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1358                         r = parse_boolean(word + 36);
1359                         if (r < 0)
1360                                 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1361                         else
1362                                 s->forward_to_console = r;
1363                 } else if (startswith(word, "systemd.journald"))
1364                         log_warning("Invalid systemd.journald parameter. Ignoring.");
1365         }
1366
1367         return 0;
1368 }
1369
1370 static int server_parse_config_file(Server *s) {
1371         static const char fn[] = "/etc/systemd/journald.conf";
1372         _cleanup_fclose_ FILE *f = NULL;
1373         int r;
1374
1375         assert(s);
1376
1377         f = fopen(fn, "re");
1378         if (!f) {
1379                 if (errno == ENOENT)
1380                         return 0;
1381
1382                 log_warning("Failed to open configuration file %s: %m", fn);
1383                 return -errno;
1384         }
1385
1386         r = config_parse(NULL, fn, f, "Journal\0", config_item_perf_lookup,
1387                          (void*) journald_gperf_lookup, false, false, s);
1388         if (r < 0)
1389                 log_warning("Failed to parse configuration file: %s", strerror(-r));
1390
1391         return r;
1392 }
1393
1394 static int server_open_sync_timer(Server *s) {
1395         int r;
1396         struct epoll_event ev;
1397
1398         assert(s);
1399
1400         s->sync_timer_fd = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC);
1401         if (s->sync_timer_fd < 0)
1402                 return -errno;
1403
1404         zero(ev);
1405         ev.events = EPOLLIN;
1406         ev.data.fd = s->sync_timer_fd;
1407
1408         r = epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->sync_timer_fd, &ev);
1409         if (r < 0) {
1410                 log_error("Failed to add idle timer fd to epoll object: %m");
1411                 return -errno;
1412         }
1413
1414         return 0;
1415 }
1416
1417 int server_schedule_sync(Server *s, int priority) {
1418         int r;
1419
1420         assert(s);
1421
1422         if (priority <= LOG_CRIT) {
1423                 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1424                 server_sync(s);
1425                 return 0;
1426         }
1427
1428         if (s->sync_scheduled)
1429                 return 0;
1430
1431         if (s->sync_interval_usec) {
1432                 struct itimerspec sync_timer_enable = {};
1433
1434                 timespec_store(&sync_timer_enable.it_value, s->sync_interval_usec);
1435
1436                 r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_enable, NULL);
1437                 if (r < 0)
1438                         return -errno;
1439         }
1440
1441         s->sync_scheduled = true;
1442
1443         return 0;
1444 }
1445
1446 int server_init(Server *s) {
1447         int n, r, fd;
1448
1449         assert(s);
1450
1451         zero(*s);
1452         s->sync_timer_fd = s->syslog_fd = s->native_fd = s->stdout_fd =
1453                 s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
1454         s->compress = true;
1455         s->seal = true;
1456
1457         s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1458         s->sync_scheduled = false;
1459
1460         s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1461         s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1462
1463         s->forward_to_syslog = true;
1464
1465         s->max_level_store = LOG_DEBUG;
1466         s->max_level_syslog = LOG_DEBUG;
1467         s->max_level_kmsg = LOG_NOTICE;
1468         s->max_level_console = LOG_INFO;
1469
1470         memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1471         memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1472
1473         server_parse_config_file(s);
1474         server_parse_proc_cmdline(s);
1475         if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1476                 log_debug("Setting both rate limit interval and burst from %llu,%u to 0,0",
1477                           (long long unsigned) s->rate_limit_interval,
1478                           s->rate_limit_burst);
1479                 s->rate_limit_interval = s->rate_limit_burst = 0;
1480         }
1481
1482         mkdir_p("/run/systemd/journal", 0755);
1483
1484         s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
1485         if (!s->user_journals)
1486                 return log_oom();
1487
1488         s->mmap = mmap_cache_new();
1489         if (!s->mmap)
1490                 return log_oom();
1491
1492         s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
1493         if (s->epoll_fd < 0) {
1494                 log_error("Failed to create epoll object: %m");
1495                 return -errno;
1496         }
1497
1498         n = sd_listen_fds(true);
1499         if (n < 0) {
1500                 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1501                 return n;
1502         }
1503
1504         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1505
1506                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1507
1508                         if (s->native_fd >= 0) {
1509                                 log_error("Too many native sockets passed.");
1510                                 return -EINVAL;
1511                         }
1512
1513                         s->native_fd = fd;
1514
1515                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1516
1517                         if (s->stdout_fd >= 0) {
1518                                 log_error("Too many stdout sockets passed.");
1519                                 return -EINVAL;
1520                         }
1521
1522                         s->stdout_fd = fd;
1523
1524                 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
1525
1526                         if (s->syslog_fd >= 0) {
1527                                 log_error("Too many /dev/log sockets passed.");
1528                                 return -EINVAL;
1529                         }
1530
1531                         s->syslog_fd = fd;
1532
1533                 } else {
1534                         log_error("Unknown socket passed.");
1535                         return -EINVAL;
1536                 }
1537         }
1538
1539         r = server_open_syslog_socket(s);
1540         if (r < 0)
1541                 return r;
1542
1543         r = server_open_native_socket(s);
1544         if (r < 0)
1545                 return r;
1546
1547         r = server_open_stdout_socket(s);
1548         if (r < 0)
1549                 return r;
1550
1551         r = server_open_dev_kmsg(s);
1552         if (r < 0)
1553                 return r;
1554
1555         r = server_open_kernel_seqnum(s);
1556         if (r < 0)
1557                 return r;
1558
1559         r = server_open_sync_timer(s);
1560         if (r < 0)
1561                 return r;
1562
1563         r = open_signalfd(s);
1564         if (r < 0)
1565                 return r;
1566
1567         s->udev = udev_new();
1568         if (!s->udev)
1569                 return -ENOMEM;
1570
1571         s->rate_limit = journal_rate_limit_new(s->rate_limit_interval,
1572                                                s->rate_limit_burst);
1573         if (!s->rate_limit)
1574                 return -ENOMEM;
1575
1576         r = system_journal_open(s);
1577         if (r < 0)
1578                 return r;
1579
1580         return 0;
1581 }
1582
1583 void server_maybe_append_tags(Server *s) {
1584 #ifdef HAVE_GCRYPT
1585         JournalFile *f;
1586         Iterator i;
1587         usec_t n;
1588
1589         n = now(CLOCK_REALTIME);
1590
1591         if (s->system_journal)
1592                 journal_file_maybe_append_tag(s->system_journal, n);
1593
1594         HASHMAP_FOREACH(f, s->user_journals, i)
1595                 journal_file_maybe_append_tag(f, n);
1596 #endif
1597 }
1598
1599 void server_done(Server *s) {
1600         JournalFile *f;
1601         assert(s);
1602
1603         while (s->stdout_streams)
1604                 stdout_stream_free(s->stdout_streams);
1605
1606         if (s->system_journal)
1607                 journal_file_close(s->system_journal);
1608
1609         if (s->runtime_journal)
1610                 journal_file_close(s->runtime_journal);
1611
1612         while ((f = hashmap_steal_first(s->user_journals)))
1613                 journal_file_close(f);
1614
1615         hashmap_free(s->user_journals);
1616
1617         if (s->epoll_fd >= 0)
1618                 close_nointr_nofail(s->epoll_fd);
1619
1620         if (s->signal_fd >= 0)
1621                 close_nointr_nofail(s->signal_fd);
1622
1623         if (s->syslog_fd >= 0)
1624                 close_nointr_nofail(s->syslog_fd);
1625
1626         if (s->native_fd >= 0)
1627                 close_nointr_nofail(s->native_fd);
1628
1629         if (s->stdout_fd >= 0)
1630                 close_nointr_nofail(s->stdout_fd);
1631
1632         if (s->dev_kmsg_fd >= 0)
1633                 close_nointr_nofail(s->dev_kmsg_fd);
1634
1635         if (s->sync_timer_fd >= 0)
1636                 close_nointr_nofail(s->sync_timer_fd);
1637
1638         if (s->rate_limit)
1639                 journal_rate_limit_free(s->rate_limit);
1640
1641         if (s->kernel_seqnum)
1642                 munmap(s->kernel_seqnum, sizeof(uint64_t));
1643
1644         free(s->buffer);
1645         free(s->tty_path);
1646
1647         if (s->mmap)
1648                 mmap_cache_unref(s->mmap);
1649
1650         if (s->udev)
1651                 udev_unref(s->udev);
1652 }