chiark / gitweb /
019c3a649acae87f7dfd68292e479e3af701b4ab
[elogind.git] / src / journal / journald-server.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
26 #include <sys/mman.h>
27 #include <sys/timerfd.h>
28
29 #include <libudev.h>
30
31 #include "sd-journal.h"
32 #include "sd-messages.h"
33 #include "sd-daemon.h"
34 #include "fileio.h"
35 #include "mkdir.h"
36 #include "hashmap.h"
37 #include "journal-file.h"
38 #include "socket-util.h"
39 #include "cgroup-util.h"
40 #include "list.h"
41 #include "missing.h"
42 #include "conf-parser.h"
43 #include "selinux-util.h"
44 #include "journal-internal.h"
45 #include "journal-vacuum.h"
46 #include "journal-authenticate.h"
47 #include "journald-rate-limit.h"
48 #include "journald-kmsg.h"
49 #include "journald-syslog.h"
50 #include "journald-stream.h"
51 #include "journald-console.h"
52 #include "journald-native.h"
53 #include "journald-audit.h"
54 #include "journald-server.h"
55
56 #ifdef HAVE_ACL
57 #include <sys/acl.h>
58 #include <acl/libacl.h>
59 #include "acl-util.h"
60 #endif
61
62 #ifdef HAVE_SELINUX
63 #include <selinux/selinux.h>
64 #endif
65
66 #define USER_JOURNALS_MAX 1024
67
68 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
69 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
70 #define DEFAULT_RATE_LIMIT_BURST 1000
71 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
72
73 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
74
75 static const char* const storage_table[_STORAGE_MAX] = {
76         [STORAGE_AUTO] = "auto",
77         [STORAGE_VOLATILE] = "volatile",
78         [STORAGE_PERSISTENT] = "persistent",
79         [STORAGE_NONE] = "none"
80 };
81
82 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
83 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
84
85 static const char* const split_mode_table[_SPLIT_MAX] = {
86         [SPLIT_LOGIN] = "login",
87         [SPLIT_UID] = "uid",
88         [SPLIT_NONE] = "none",
89 };
90
91 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
92 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
93
94 static uint64_t available_space(Server *s, bool verbose) {
95         char ids[33];
96         _cleanup_free_ char *p = NULL;
97         sd_id128_t machine;
98         struct statvfs ss;
99         uint64_t sum = 0, ss_avail = 0, avail = 0;
100         int r;
101         _cleanup_closedir_ DIR *d = NULL;
102         usec_t ts;
103         const char *f;
104         JournalMetrics *m;
105
106         ts = now(CLOCK_MONOTONIC);
107
108         if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts
109             && !verbose)
110                 return s->cached_available_space;
111
112         r = sd_id128_get_machine(&machine);
113         if (r < 0)
114                 return 0;
115
116         if (s->system_journal) {
117                 f = "/var/log/journal/";
118                 m = &s->system_metrics;
119         } else {
120                 f = "/run/log/journal/";
121                 m = &s->runtime_metrics;
122         }
123
124         assert(m);
125
126         p = strappend(f, sd_id128_to_string(machine, ids));
127         if (!p)
128                 return 0;
129
130         d = opendir(p);
131         if (!d)
132                 return 0;
133
134         if (fstatvfs(dirfd(d), &ss) < 0)
135                 return 0;
136
137         for (;;) {
138                 struct stat st;
139                 struct dirent *de;
140
141                 errno = 0;
142                 de = readdir(d);
143                 if (!de && errno != 0)
144                         return 0;
145
146                 if (!de)
147                         break;
148
149                 if (!endswith(de->d_name, ".journal") &&
150                     !endswith(de->d_name, ".journal~"))
151                         continue;
152
153                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
154                         continue;
155
156                 if (!S_ISREG(st.st_mode))
157                         continue;
158
159                 sum += (uint64_t) st.st_blocks * 512UL;
160         }
161
162         ss_avail = ss.f_bsize * ss.f_bavail;
163
164         /* If we reached a high mark, we will always allow this much
165          * again, unless usage goes above max_use. This watermark
166          * value is cached so that we don't give up space on pressure,
167          * but hover below the maximum usage. */
168
169         if (m->use < sum)
170                 m->use = sum;
171
172         avail = LESS_BY(ss_avail, m->keep_free);
173
174         s->cached_available_space = LESS_BY(MIN(m->max_use, avail), sum);
175         s->cached_available_space_timestamp = ts;
176
177         if (verbose) {
178                 char    fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
179                         fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX];
180
181                 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
182                                       "%s journal is using %s (max allowed %s, "
183                                       "trying to leave %s free of %s available → current limit %s).",
184                                       s->system_journal ? "Permanent" : "Runtime",
185                                       format_bytes(fb1, sizeof(fb1), sum),
186                                       format_bytes(fb2, sizeof(fb2), m->max_use),
187                                       format_bytes(fb3, sizeof(fb3), m->keep_free),
188                                       format_bytes(fb4, sizeof(fb4), ss_avail),
189                                       format_bytes(fb5, sizeof(fb5), s->cached_available_space + sum));
190         }
191
192         return s->cached_available_space;
193 }
194
195 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
196         int r;
197 #ifdef HAVE_ACL
198         acl_t acl;
199         acl_entry_t entry;
200         acl_permset_t permset;
201 #endif
202
203         assert(f);
204
205         r = fchmod(f->fd, 0640);
206         if (r < 0)
207                 log_warning_errno(r, "Failed to fix access mode on %s, ignoring: %m", f->path);
208
209 #ifdef HAVE_ACL
210         if (uid <= SYSTEM_UID_MAX)
211                 return;
212
213         acl = acl_get_fd(f->fd);
214         if (!acl) {
215                 log_warning_errno(errno, "Failed to read ACL on %s, ignoring: %m", f->path);
216                 return;
217         }
218
219         r = acl_find_uid(acl, uid, &entry);
220         if (r <= 0) {
221
222                 if (acl_create_entry(&acl, &entry) < 0 ||
223                     acl_set_tag_type(entry, ACL_USER) < 0 ||
224                     acl_set_qualifier(entry, &uid) < 0) {
225                         log_warning_errno(errno, "Failed to patch ACL on %s, ignoring: %m", f->path);
226                         goto finish;
227                 }
228         }
229
230         /* We do not recalculate the mask unconditionally here,
231          * so that the fchmod() mask above stays intact. */
232         if (acl_get_permset(entry, &permset) < 0 ||
233             acl_add_perm(permset, ACL_READ) < 0 ||
234             calc_acl_mask_if_needed(&acl) < 0) {
235                 log_warning_errno(errno, "Failed to patch ACL on %s, ignoring: %m", f->path);
236                 goto finish;
237         }
238
239         if (acl_set_fd(f->fd, acl) < 0)
240                 log_warning_errno(errno, "Failed to set ACL on %s, ignoring: %m", f->path);
241
242 finish:
243         acl_free(acl);
244 #endif
245 }
246
247 static JournalFile* find_journal(Server *s, uid_t uid) {
248         _cleanup_free_ char *p = NULL;
249         int r;
250         JournalFile *f;
251         sd_id128_t machine;
252
253         assert(s);
254
255         /* We split up user logs only on /var, not on /run. If the
256          * runtime file is open, we write to it exclusively, in order
257          * to guarantee proper order as soon as we flush /run to
258          * /var and close the runtime file. */
259
260         if (s->runtime_journal)
261                 return s->runtime_journal;
262
263         if (uid <= SYSTEM_UID_MAX)
264                 return s->system_journal;
265
266         r = sd_id128_get_machine(&machine);
267         if (r < 0)
268                 return s->system_journal;
269
270         f = ordered_hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
271         if (f)
272                 return f;
273
274         if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
275                      SD_ID128_FORMAT_VAL(machine), uid) < 0)
276                 return s->system_journal;
277
278         while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
279                 /* Too many open? Then let's close one */
280                 f = ordered_hashmap_steal_first(s->user_journals);
281                 assert(f);
282                 journal_file_close(f);
283         }
284
285         r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
286         if (r < 0)
287                 return s->system_journal;
288
289         server_fix_perms(s, f, uid);
290
291         r = ordered_hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
292         if (r < 0) {
293                 journal_file_close(f);
294                 return s->system_journal;
295         }
296
297         return f;
298 }
299
300 static int do_rotate(
301                 Server *s,
302                 JournalFile **f,
303                 const char* name,
304                 bool seal,
305                 uint32_t uid) {
306
307         int r;
308         assert(s);
309
310         if (!*f)
311                 return -EINVAL;
312
313         r = journal_file_rotate(f, s->compress, seal);
314         if (r < 0)
315                 if (*f)
316                         log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
317                 else
318                         log_error_errno(r, "Failed to create new %s journal: %m", name);
319         else
320                 server_fix_perms(s, *f, uid);
321         return r;
322 }
323
324 void server_rotate(Server *s) {
325         JournalFile *f;
326         void *k;
327         Iterator i;
328         int r;
329
330         log_debug("Rotating...");
331
332         do_rotate(s, &s->runtime_journal, "runtime", false, 0);
333         do_rotate(s, &s->system_journal, "system", s->seal, 0);
334
335         ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
336                 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UINT32(k));
337                 if (r >= 0)
338                         ordered_hashmap_replace(s->user_journals, k, f);
339                 else if (!f)
340                         /* Old file has been closed and deallocated */
341                         ordered_hashmap_remove(s->user_journals, k);
342         }
343 }
344
345 void server_sync(Server *s) {
346         JournalFile *f;
347         void *k;
348         Iterator i;
349         int r;
350
351         if (s->system_journal) {
352                 r = journal_file_set_offline(s->system_journal);
353                 if (r < 0)
354                         log_error_errno(r, "Failed to sync system journal: %m");
355         }
356
357         ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
358                 r = journal_file_set_offline(f);
359                 if (r < 0)
360                         log_error_errno(r, "Failed to sync user journal: %m");
361         }
362
363         if (s->sync_event_source) {
364                 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
365                 if (r < 0)
366                         log_error_errno(r, "Failed to disable sync timer source: %m");
367         }
368
369         s->sync_scheduled = false;
370 }
371
372 static void do_vacuum(
373                 Server *s,
374                 const char *id,
375                 JournalFile *f,
376                 const char* path,
377                 JournalMetrics *metrics) {
378
379         const char *p;
380         int r;
381
382         if (!f)
383                 return;
384
385         p = strappenda(path, id);
386         r = journal_directory_vacuum(p, metrics->max_use, s->max_retention_usec, &s->oldest_file_usec, false);
387         if (r < 0 && r != -ENOENT)
388                 log_error_errno(r, "Failed to vacuum %s: %m", p);
389 }
390
391 void server_vacuum(Server *s) {
392         char ids[33];
393         sd_id128_t machine;
394         int r;
395
396         log_debug("Vacuuming...");
397
398         s->oldest_file_usec = 0;
399
400         r = sd_id128_get_machine(&machine);
401         if (r < 0) {
402                 log_error_errno(r, "Failed to get machine ID: %m");
403                 return;
404         }
405         sd_id128_to_string(machine, ids);
406
407         do_vacuum(s, ids, s->system_journal, "/var/log/journal/", &s->system_metrics);
408         do_vacuum(s, ids, s->runtime_journal, "/run/log/journal/", &s->runtime_metrics);
409
410         s->cached_available_space_timestamp = 0;
411 }
412
413 static void server_cache_machine_id(Server *s) {
414         sd_id128_t id;
415         int r;
416
417         assert(s);
418
419         r = sd_id128_get_machine(&id);
420         if (r < 0)
421                 return;
422
423         sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
424 }
425
426 static void server_cache_boot_id(Server *s) {
427         sd_id128_t id;
428         int r;
429
430         assert(s);
431
432         r = sd_id128_get_boot(&id);
433         if (r < 0)
434                 return;
435
436         sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
437 }
438
439 static void server_cache_hostname(Server *s) {
440         _cleanup_free_ char *t = NULL;
441         char *x;
442
443         assert(s);
444
445         t = gethostname_malloc();
446         if (!t)
447                 return;
448
449         x = strappend("_HOSTNAME=", t);
450         if (!x)
451                 return;
452
453         free(s->hostname_field);
454         s->hostname_field = x;
455 }
456
457 static bool shall_try_append_again(JournalFile *f, int r) {
458
459         /* -E2BIG            Hit configured limit
460            -EFBIG            Hit fs limit
461            -EDQUOT           Quota limit hit
462            -ENOSPC           Disk full
463            -EIO              I/O error of some kind (mmap)
464            -EHOSTDOWN        Other machine
465            -EBUSY            Unclean shutdown
466            -EPROTONOSUPPORT  Unsupported feature
467            -EBADMSG          Corrupted
468            -ENODATA          Truncated
469            -ESHUTDOWN        Already archived */
470
471         if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
472                 log_debug("%s: Allocation limit reached, rotating.", f->path);
473         else if (r == -EHOSTDOWN)
474                 log_info("%s: Journal file from other machine, rotating.", f->path);
475         else if (r == -EBUSY)
476                 log_info("%s: Unclean shutdown, rotating.", f->path);
477         else if (r == -EPROTONOSUPPORT)
478                 log_info("%s: Unsupported feature, rotating.", f->path);
479         else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
480                 log_warning("%s: Journal file corrupted, rotating.", f->path);
481         else if (r == -EIO)
482                 log_warning("%s: IO error, rotating.", f->path);
483         else
484                 return false;
485
486         return true;
487 }
488
489 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
490         JournalFile *f;
491         bool vacuumed = false;
492         int r;
493
494         assert(s);
495         assert(iovec);
496         assert(n > 0);
497
498         f = find_journal(s, uid);
499         if (!f)
500                 return;
501
502         if (journal_file_rotate_suggested(f, s->max_file_usec)) {
503                 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
504                 server_rotate(s);
505                 server_vacuum(s);
506                 vacuumed = true;
507
508                 f = find_journal(s, uid);
509                 if (!f)
510                         return;
511         }
512
513         r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
514         if (r >= 0) {
515                 server_schedule_sync(s, priority);
516                 return;
517         }
518
519         if (vacuumed || !shall_try_append_again(f, r)) {
520                 size_t size = 0;
521                 unsigned i;
522                 for (i = 0; i < n; i++)
523                         size += iovec[i].iov_len;
524
525                 log_error_errno(r, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n, size);
526                 return;
527         }
528
529         server_rotate(s);
530         server_vacuum(s);
531
532         f = find_journal(s, uid);
533         if (!f)
534                 return;
535
536         log_debug("Retrying write.");
537         r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
538         if (r < 0) {
539                 size_t size = 0;
540                 unsigned i;
541                 for (i = 0; i < n; i++)
542                         size += iovec[i].iov_len;
543
544                 log_error_errno(r, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n, size);
545         } else
546                 server_schedule_sync(s, priority);
547 }
548
549 static void dispatch_message_real(
550                 Server *s,
551                 struct iovec *iovec, unsigned n, unsigned m,
552                 const struct ucred *ucred,
553                 const struct timeval *tv,
554                 const char *label, size_t label_len,
555                 const char *unit_id,
556                 int priority,
557                 pid_t object_pid) {
558
559         char    pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
560                 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
561                 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
562                 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
563                 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
564                 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
565                 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
566                 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
567         uid_t object_uid;
568         gid_t object_gid;
569         char *x;
570         int r;
571         char *t, *c;
572         uid_t realuid = 0, owner = 0, journal_uid;
573         bool owner_valid = false;
574 #ifdef HAVE_AUDIT
575         char    audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
576                 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
577                 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
578                 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
579
580         uint32_t audit;
581         uid_t loginuid;
582 #endif
583
584         assert(s);
585         assert(iovec);
586         assert(n > 0);
587         assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
588
589         if (ucred) {
590                 realuid = ucred->uid;
591
592                 sprintf(pid, "_PID="PID_FMT, ucred->pid);
593                 IOVEC_SET_STRING(iovec[n++], pid);
594
595                 sprintf(uid, "_UID="UID_FMT, ucred->uid);
596                 IOVEC_SET_STRING(iovec[n++], uid);
597
598                 sprintf(gid, "_GID="GID_FMT, ucred->gid);
599                 IOVEC_SET_STRING(iovec[n++], gid);
600
601                 r = get_process_comm(ucred->pid, &t);
602                 if (r >= 0) {
603                         x = strappenda("_COMM=", t);
604                         free(t);
605                         IOVEC_SET_STRING(iovec[n++], x);
606                 }
607
608                 r = get_process_exe(ucred->pid, &t);
609                 if (r >= 0) {
610                         x = strappenda("_EXE=", t);
611                         free(t);
612                         IOVEC_SET_STRING(iovec[n++], x);
613                 }
614
615                 r = get_process_cmdline(ucred->pid, 0, false, &t);
616                 if (r >= 0) {
617                         x = strappenda("_CMDLINE=", t);
618                         free(t);
619                         IOVEC_SET_STRING(iovec[n++], x);
620                 }
621
622                 r = get_process_capeff(ucred->pid, &t);
623                 if (r >= 0) {
624                         x = strappenda("_CAP_EFFECTIVE=", t);
625                         free(t);
626                         IOVEC_SET_STRING(iovec[n++], x);
627                 }
628
629 #ifdef HAVE_AUDIT
630                 r = audit_session_from_pid(ucred->pid, &audit);
631                 if (r >= 0) {
632                         sprintf(audit_session, "_AUDIT_SESSION=%"PRIu32, audit);
633                         IOVEC_SET_STRING(iovec[n++], audit_session);
634                 }
635
636                 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
637                 if (r >= 0) {
638                         sprintf(audit_loginuid, "_AUDIT_LOGINUID="UID_FMT, loginuid);
639                         IOVEC_SET_STRING(iovec[n++], audit_loginuid);
640                 }
641 #endif
642
643                 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
644                 if (r >= 0) {
645                         char *session = NULL;
646
647                         x = strappenda("_SYSTEMD_CGROUP=", c);
648                         IOVEC_SET_STRING(iovec[n++], x);
649
650                         r = cg_path_get_session(c, &t);
651                         if (r >= 0) {
652                                 session = strappenda("_SYSTEMD_SESSION=", t);
653                                 free(t);
654                                 IOVEC_SET_STRING(iovec[n++], session);
655                         }
656
657                         if (cg_path_get_owner_uid(c, &owner) >= 0) {
658                                 owner_valid = true;
659
660                                 sprintf(owner_uid, "_SYSTEMD_OWNER_UID="UID_FMT, owner);
661                                 IOVEC_SET_STRING(iovec[n++], owner_uid);
662                         }
663
664                         if (cg_path_get_unit(c, &t) >= 0) {
665                                 x = strappenda("_SYSTEMD_UNIT=", t);
666                                 free(t);
667                                 IOVEC_SET_STRING(iovec[n++], x);
668                         } else if (unit_id && !session) {
669                                 x = strappenda("_SYSTEMD_UNIT=", unit_id);
670                                 IOVEC_SET_STRING(iovec[n++], x);
671                         }
672
673                         if (cg_path_get_user_unit(c, &t) >= 0) {
674                                 x = strappenda("_SYSTEMD_USER_UNIT=", t);
675                                 free(t);
676                                 IOVEC_SET_STRING(iovec[n++], x);
677                         } else if (unit_id && session) {
678                                 x = strappenda("_SYSTEMD_USER_UNIT=", unit_id);
679                                 IOVEC_SET_STRING(iovec[n++], x);
680                         }
681
682                         if (cg_path_get_slice(c, &t) >= 0) {
683                                 x = strappenda("_SYSTEMD_SLICE=", t);
684                                 free(t);
685                                 IOVEC_SET_STRING(iovec[n++], x);
686                         }
687
688                         free(c);
689                 } else if (unit_id) {
690                         x = strappenda("_SYSTEMD_UNIT=", unit_id);
691                         IOVEC_SET_STRING(iovec[n++], x);
692                 }
693
694 #ifdef HAVE_SELINUX
695                 if (mac_selinux_use()) {
696                         if (label) {
697                                 x = alloca(strlen("_SELINUX_CONTEXT=") + label_len + 1);
698
699                                 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
700                                 IOVEC_SET_STRING(iovec[n++], x);
701                         } else {
702                                 security_context_t con;
703
704                                 if (getpidcon(ucred->pid, &con) >= 0) {
705                                         x = strappenda("_SELINUX_CONTEXT=", con);
706
707                                         freecon(con);
708                                         IOVEC_SET_STRING(iovec[n++], x);
709                                 }
710                         }
711                 }
712 #endif
713         }
714         assert(n <= m);
715
716         if (object_pid) {
717                 r = get_process_uid(object_pid, &object_uid);
718                 if (r >= 0) {
719                         sprintf(o_uid, "OBJECT_UID="UID_FMT, object_uid);
720                         IOVEC_SET_STRING(iovec[n++], o_uid);
721                 }
722
723                 r = get_process_gid(object_pid, &object_gid);
724                 if (r >= 0) {
725                         sprintf(o_gid, "OBJECT_GID="GID_FMT, object_gid);
726                         IOVEC_SET_STRING(iovec[n++], o_gid);
727                 }
728
729                 r = get_process_comm(object_pid, &t);
730                 if (r >= 0) {
731                         x = strappenda("OBJECT_COMM=", t);
732                         free(t);
733                         IOVEC_SET_STRING(iovec[n++], x);
734                 }
735
736                 r = get_process_exe(object_pid, &t);
737                 if (r >= 0) {
738                         x = strappenda("OBJECT_EXE=", t);
739                         free(t);
740                         IOVEC_SET_STRING(iovec[n++], x);
741                 }
742
743                 r = get_process_cmdline(object_pid, 0, false, &t);
744                 if (r >= 0) {
745                         x = strappenda("OBJECT_CMDLINE=", t);
746                         free(t);
747                         IOVEC_SET_STRING(iovec[n++], x);
748                 }
749
750 #ifdef HAVE_AUDIT
751                 r = audit_session_from_pid(object_pid, &audit);
752                 if (r >= 0) {
753                         sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%"PRIu32, audit);
754                         IOVEC_SET_STRING(iovec[n++], o_audit_session);
755                 }
756
757                 r = audit_loginuid_from_pid(object_pid, &loginuid);
758                 if (r >= 0) {
759                         sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID="UID_FMT, loginuid);
760                         IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
761                 }
762 #endif
763
764                 r = cg_pid_get_path_shifted(object_pid, s->cgroup_root, &c);
765                 if (r >= 0) {
766                         x = strappenda("OBJECT_SYSTEMD_CGROUP=", c);
767                         IOVEC_SET_STRING(iovec[n++], x);
768
769                         r = cg_path_get_session(c, &t);
770                         if (r >= 0) {
771                                 x = strappenda("OBJECT_SYSTEMD_SESSION=", t);
772                                 free(t);
773                                 IOVEC_SET_STRING(iovec[n++], x);
774                         }
775
776                         if (cg_path_get_owner_uid(c, &owner) >= 0) {
777                                 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT, owner);
778                                 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
779                         }
780
781                         if (cg_path_get_unit(c, &t) >= 0) {
782                                 x = strappenda("OBJECT_SYSTEMD_UNIT=", t);
783                                 free(t);
784                                 IOVEC_SET_STRING(iovec[n++], x);
785                         }
786
787                         if (cg_path_get_user_unit(c, &t) >= 0) {
788                                 x = strappenda("OBJECT_SYSTEMD_USER_UNIT=", t);
789                                 free(t);
790                                 IOVEC_SET_STRING(iovec[n++], x);
791                         }
792
793                         free(c);
794                 }
795         }
796         assert(n <= m);
797
798         if (tv) {
799                 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
800                 IOVEC_SET_STRING(iovec[n++], source_time);
801         }
802
803         /* Note that strictly speaking storing the boot id here is
804          * redundant since the entry includes this in-line
805          * anyway. However, we need this indexed, too. */
806         if (!isempty(s->boot_id_field))
807                 IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
808
809         if (!isempty(s->machine_id_field))
810                 IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
811
812         if (!isempty(s->hostname_field))
813                 IOVEC_SET_STRING(iovec[n++], s->hostname_field);
814
815         assert(n <= m);
816
817         if (s->split_mode == SPLIT_UID && realuid > 0)
818                 /* Split up strictly by any UID */
819                 journal_uid = realuid;
820         else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
821                 /* Split up by login UIDs.  We do this only if the
822                  * realuid is not root, in order not to accidentally
823                  * leak privileged information to the user that is
824                  * logged by a privileged process that is part of an
825                  * unprivileged session. */
826                 journal_uid = owner;
827         else
828                 journal_uid = 0;
829
830         write_to_journal(s, journal_uid, iovec, n, priority);
831 }
832
833 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
834         char mid[11 + 32 + 1];
835         char buffer[16 + LINE_MAX + 1];
836         struct iovec iovec[N_IOVEC_META_FIELDS + 4];
837         int n = 0;
838         va_list ap;
839         struct ucred ucred = {};
840
841         assert(s);
842         assert(format);
843
844         IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
845         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
846
847         memcpy(buffer, "MESSAGE=", 8);
848         va_start(ap, format);
849         vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
850         va_end(ap);
851         char_array_0(buffer);
852         IOVEC_SET_STRING(iovec[n++], buffer);
853
854         if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
855                 snprintf(mid, sizeof(mid), LOG_MESSAGE_ID(message_id));
856                 char_array_0(mid);
857                 IOVEC_SET_STRING(iovec[n++], mid);
858         }
859
860         ucred.pid = getpid();
861         ucred.uid = getuid();
862         ucred.gid = getgid();
863
864         dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
865 }
866
867 void server_dispatch_message(
868                 Server *s,
869                 struct iovec *iovec, unsigned n, unsigned m,
870                 const struct ucred *ucred,
871                 const struct timeval *tv,
872                 const char *label, size_t label_len,
873                 const char *unit_id,
874                 int priority,
875                 pid_t object_pid) {
876
877         int rl, r;
878         _cleanup_free_ char *path = NULL;
879         char *c;
880
881         assert(s);
882         assert(iovec || n == 0);
883
884         if (n == 0)
885                 return;
886
887         if (LOG_PRI(priority) > s->max_level_store)
888                 return;
889
890         /* Stop early in case the information will not be stored
891          * in a journal. */
892         if (s->storage == STORAGE_NONE)
893                 return;
894
895         if (!ucred)
896                 goto finish;
897
898         r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &path);
899         if (r < 0)
900                 goto finish;
901
902         /* example: /user/lennart/3/foobar
903          *          /system/dbus.service/foobar
904          *
905          * So let's cut of everything past the third /, since that is
906          * where user directories start */
907
908         c = strchr(path, '/');
909         if (c) {
910                 c = strchr(c+1, '/');
911                 if (c) {
912                         c = strchr(c+1, '/');
913                         if (c)
914                                 *c = 0;
915                 }
916         }
917
918         rl = journal_rate_limit_test(s->rate_limit, path,
919                                      priority & LOG_PRIMASK, available_space(s, false));
920
921         if (rl == 0)
922                 return;
923
924         /* Write a suppression message if we suppressed something */
925         if (rl > 1)
926                 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
927                                       "Suppressed %u messages from %s", rl - 1, path);
928
929 finish:
930         dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
931 }
932
933
934 static int system_journal_open(Server *s, bool flush_requested) {
935         int r;
936         char *fn;
937         sd_id128_t machine;
938         char ids[33];
939
940         r = sd_id128_get_machine(&machine);
941         if (r < 0)
942                 return log_error_errno(r, "Failed to get machine id: %m");
943
944         sd_id128_to_string(machine, ids);
945
946         if (!s->system_journal &&
947             (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
948             (flush_requested
949              || access("/run/systemd/journal/flushed", F_OK) >= 0)) {
950
951                 /* If in auto mode: first try to create the machine
952                  * path, but not the prefix.
953                  *
954                  * If in persistent mode: create /var/log/journal and
955                  * the machine path */
956
957                 if (s->storage == STORAGE_PERSISTENT)
958                         (void) mkdir("/var/log/journal/", 0755);
959
960                 fn = strappenda("/var/log/journal/", ids);
961                 (void) mkdir(fn, 0755);
962
963                 fn = strappenda(fn, "/system.journal");
964                 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
965
966                 if (r >= 0)
967                         server_fix_perms(s, s->system_journal, 0);
968                 else if (r < 0) {
969                         if (r != -ENOENT && r != -EROFS)
970                                 log_warning_errno(r, "Failed to open system journal: %m");
971
972                         r = 0;
973                 }
974         }
975
976         if (!s->runtime_journal &&
977             (s->storage != STORAGE_NONE)) {
978
979                 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
980                 if (!fn)
981                         return -ENOMEM;
982
983                 if (s->system_journal) {
984
985                         /* Try to open the runtime journal, but only
986                          * if it already exists, so that we can flush
987                          * it into the system journal */
988
989                         r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
990                         free(fn);
991
992                         if (r < 0) {
993                                 if (r != -ENOENT)
994                                         log_warning_errno(r, "Failed to open runtime journal: %m");
995
996                                 r = 0;
997                         }
998
999                 } else {
1000
1001                         /* OK, we really need the runtime journal, so create
1002                          * it if necessary. */
1003
1004                         (void) mkdir("/run/log", 0755);
1005                         (void) mkdir("/run/log/journal", 0755);
1006                         (void) mkdir_parents(fn, 0750);
1007
1008                         r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
1009                         free(fn);
1010
1011                         if (r < 0)
1012                                 return log_error_errno(r, "Failed to open runtime journal: %m");
1013                 }
1014
1015                 if (s->runtime_journal)
1016                         server_fix_perms(s, s->runtime_journal, 0);
1017         }
1018
1019         available_space(s, true);
1020
1021         return r;
1022 }
1023
1024 int server_flush_to_var(Server *s) {
1025         sd_id128_t machine;
1026         sd_journal *j = NULL;
1027         char ts[FORMAT_TIMESPAN_MAX];
1028         usec_t start;
1029         unsigned n = 0;
1030         int r;
1031
1032         assert(s);
1033
1034         if (s->storage != STORAGE_AUTO &&
1035             s->storage != STORAGE_PERSISTENT)
1036                 return 0;
1037
1038         if (!s->runtime_journal)
1039                 return 0;
1040
1041         system_journal_open(s, true);
1042
1043         if (!s->system_journal)
1044                 return 0;
1045
1046         log_debug("Flushing to /var...");
1047
1048         start = now(CLOCK_MONOTONIC);
1049
1050         r = sd_id128_get_machine(&machine);
1051         if (r < 0)
1052                 return r;
1053
1054         r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1055         if (r < 0)
1056                 return log_error_errno(r, "Failed to read runtime journal: %m");
1057
1058         sd_journal_set_data_threshold(j, 0);
1059
1060         SD_JOURNAL_FOREACH(j) {
1061                 Object *o = NULL;
1062                 JournalFile *f;
1063
1064                 f = j->current_file;
1065                 assert(f && f->current_offset > 0);
1066
1067                 n++;
1068
1069                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1070                 if (r < 0) {
1071                         log_error_errno(r, "Can't read entry: %m");
1072                         goto finish;
1073                 }
1074
1075                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1076                 if (r >= 0)
1077                         continue;
1078
1079                 if (!shall_try_append_again(s->system_journal, r)) {
1080                         log_error_errno(r, "Can't write entry: %m");
1081                         goto finish;
1082                 }
1083
1084                 server_rotate(s);
1085                 server_vacuum(s);
1086
1087                 if (!s->system_journal) {
1088                         log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1089                         r = -EIO;
1090                         goto finish;
1091                 }
1092
1093                 log_debug("Retrying write.");
1094                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1095                 if (r < 0) {
1096                         log_error_errno(r, "Can't write entry: %m");
1097                         goto finish;
1098                 }
1099         }
1100
1101 finish:
1102         journal_file_post_change(s->system_journal);
1103
1104         journal_file_close(s->runtime_journal);
1105         s->runtime_journal = NULL;
1106
1107         if (r >= 0)
1108                 rm_rf("/run/log/journal", false, true, false);
1109
1110         sd_journal_close(j);
1111
1112         server_driver_message(s, SD_ID128_NULL, "Time spent on flushing to /var is %s for %u entries.", format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0), n);
1113
1114         return r;
1115 }
1116
1117 int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1118         Server *s = userdata;
1119
1120         assert(s);
1121         assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
1122
1123         if (revents != EPOLLIN) {
1124                 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1125                 return -EIO;
1126         }
1127
1128         for (;;) {
1129                 struct ucred *ucred = NULL;
1130                 struct timeval *tv = NULL;
1131                 struct cmsghdr *cmsg;
1132                 char *label = NULL;
1133                 size_t label_len = 0;
1134                 struct iovec iovec;
1135
1136                 union {
1137                         struct cmsghdr cmsghdr;
1138
1139                         /* We use NAME_MAX space for the SELinux label
1140                          * here. The kernel currently enforces no
1141                          * limit, but according to suggestions from
1142                          * the SELinux people this will change and it
1143                          * will probably be identical to NAME_MAX. For
1144                          * now we use that, but this should be updated
1145                          * one day when the final limit is known. */
1146                         uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1147                                     CMSG_SPACE(sizeof(struct timeval)) +
1148                                     CMSG_SPACE(sizeof(int)) + /* fd */
1149                                     CMSG_SPACE(NAME_MAX)]; /* selinux label */
1150                 } control = {};
1151                 union sockaddr_union sa = {};
1152                 struct msghdr msghdr = {
1153                         .msg_iov = &iovec,
1154                         .msg_iovlen = 1,
1155                         .msg_control = &control,
1156                         .msg_controllen = sizeof(control),
1157                         .msg_name = &sa,
1158                         .msg_namelen = sizeof(sa),
1159                 };
1160
1161                 ssize_t n;
1162                 int *fds = NULL;
1163                 unsigned n_fds = 0;
1164                 int v = 0;
1165                 size_t m;
1166
1167                 /* Try to get the right size, if we can. (Not all
1168                  * sockets support SIOCINQ, hence we just try, but
1169                  * don't rely on it. */
1170                 (void) ioctl(fd, SIOCINQ, &v);
1171
1172                 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1173                 m = PAGE_ALIGN(MAX3((size_t) v + 1,
1174                                     (size_t) LINE_MAX,
1175                                     ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
1176
1177                 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1178                         return log_oom();
1179
1180                 iovec.iov_base = s->buffer;
1181                 iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
1182
1183                 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1184                 if (n < 0) {
1185                         if (errno == EINTR || errno == EAGAIN)
1186                                 return 0;
1187
1188                         log_error_errno(errno, "recvmsg() failed: %m");
1189                         return -errno;
1190                 }
1191
1192                 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1193
1194                         if (cmsg->cmsg_level == SOL_SOCKET &&
1195                             cmsg->cmsg_type == SCM_CREDENTIALS &&
1196                             cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1197                                 ucred = (struct ucred*) CMSG_DATA(cmsg);
1198                         else if (cmsg->cmsg_level == SOL_SOCKET &&
1199                                  cmsg->cmsg_type == SCM_SECURITY) {
1200                                 label = (char*) CMSG_DATA(cmsg);
1201                                 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1202                         } else if (cmsg->cmsg_level == SOL_SOCKET &&
1203                                    cmsg->cmsg_type == SO_TIMESTAMP &&
1204                                    cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1205                                 tv = (struct timeval*) CMSG_DATA(cmsg);
1206                         else if (cmsg->cmsg_level == SOL_SOCKET &&
1207                                  cmsg->cmsg_type == SCM_RIGHTS) {
1208                                 fds = (int*) CMSG_DATA(cmsg);
1209                                 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1210                         }
1211                 }
1212
1213                 /* And a trailing NUL, just in case */
1214                 s->buffer[n] = 0;
1215
1216                 if (fd == s->syslog_fd) {
1217                         if (n > 0 && n_fds == 0)
1218                                 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1219                         else if (n_fds > 0)
1220                                 log_warning("Got file descriptors via syslog socket. Ignoring.");
1221
1222                 } else if (fd == s->native_fd) {
1223                         if (n > 0 && n_fds == 0)
1224                                 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1225                         else if (n == 0 && n_fds == 1)
1226                                 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1227                         else if (n_fds > 0)
1228                                 log_warning("Got too many file descriptors via native socket. Ignoring.");
1229
1230                 } else {
1231                         assert(fd == s->audit_fd);
1232
1233                         if (n > 0 && n_fds == 0)
1234                                 server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1235                         else if (n_fds > 0)
1236                                 log_warning("Got file descriptors via audit socket. Ignoring.");
1237                 }
1238
1239                 close_many(fds, n_fds);
1240         }
1241 }
1242
1243 static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1244         Server *s = userdata;
1245
1246         assert(s);
1247
1248         log_info("Received request to flush runtime journal from PID %"PRIu32, si->ssi_pid);
1249
1250         server_flush_to_var(s);
1251         server_sync(s);
1252         server_vacuum(s);
1253
1254         touch("/run/systemd/journal/flushed");
1255
1256         return 0;
1257 }
1258
1259 static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1260         Server *s = userdata;
1261
1262         assert(s);
1263
1264         log_info("Received request to rotate journal from PID %"PRIu32, si->ssi_pid);
1265         server_rotate(s);
1266         server_vacuum(s);
1267
1268         return 0;
1269 }
1270
1271 static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1272         Server *s = userdata;
1273
1274         assert(s);
1275
1276         log_received_signal(LOG_INFO, si);
1277
1278         sd_event_exit(s->event, 0);
1279         return 0;
1280 }
1281
1282 static int setup_signals(Server *s) {
1283         sigset_t mask;
1284         int r;
1285
1286         assert(s);
1287
1288         assert_se(sigemptyset(&mask) == 0);
1289         sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1290         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1291
1292         r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
1293         if (r < 0)
1294                 return r;
1295
1296         r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
1297         if (r < 0)
1298                 return r;
1299
1300         r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
1301         if (r < 0)
1302                 return r;
1303
1304         r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
1305         if (r < 0)
1306                 return r;
1307
1308         return 0;
1309 }
1310
1311 static int server_parse_proc_cmdline(Server *s) {
1312         _cleanup_free_ char *line = NULL;
1313         const char *w, *state;
1314         size_t l;
1315         int r;
1316
1317         r = proc_cmdline(&line);
1318         if (r < 0) {
1319                 log_warning_errno(r, "Failed to read /proc/cmdline, ignoring: %m");
1320                 return 0;
1321         }
1322
1323         FOREACH_WORD_QUOTED(w, l, line, state) {
1324                 _cleanup_free_ char *word;
1325
1326                 word = strndup(w, l);
1327                 if (!word)
1328                         return -ENOMEM;
1329
1330                 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1331                         r = parse_boolean(word + 35);
1332                         if (r < 0)
1333                                 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1334                         else
1335                                 s->forward_to_syslog = r;
1336                 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1337                         r = parse_boolean(word + 33);
1338                         if (r < 0)
1339                                 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1340                         else
1341                                 s->forward_to_kmsg = r;
1342                 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1343                         r = parse_boolean(word + 36);
1344                         if (r < 0)
1345                                 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1346                         else
1347                                 s->forward_to_console = r;
1348                 } else if (startswith(word, "systemd.journald.forward_to_wall=")) {
1349                         r = parse_boolean(word + 33);
1350                         if (r < 0)
1351                                 log_warning("Failed to parse forward to wall switch %s. Ignoring.", word + 33);
1352                         else
1353                                 s->forward_to_wall = r;
1354                 } else if (startswith(word, "systemd.journald"))
1355                         log_warning("Invalid systemd.journald parameter. Ignoring.");
1356         }
1357         /* do not warn about state here, since probably systemd already did */
1358
1359         return 0;
1360 }
1361
1362 static int server_parse_config_file(Server *s) {
1363         assert(s);
1364
1365         return config_parse_many("/etc/systemd/journald.conf",
1366                                  CONF_DIRS_NULSTR("systemd/journald.conf"),
1367                                  "Journal\0",
1368                                  config_item_perf_lookup, journald_gperf_lookup,
1369                                  false, s);
1370 }
1371
1372 static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1373         Server *s = userdata;
1374
1375         assert(s);
1376
1377         server_sync(s);
1378         return 0;
1379 }
1380
1381 int server_schedule_sync(Server *s, int priority) {
1382         int r;
1383
1384         assert(s);
1385
1386         if (priority <= LOG_CRIT) {
1387                 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1388                 server_sync(s);
1389                 return 0;
1390         }
1391
1392         if (s->sync_scheduled)
1393                 return 0;
1394
1395         if (s->sync_interval_usec > 0) {
1396                 usec_t when;
1397
1398                 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
1399                 if (r < 0)
1400                         return r;
1401
1402                 when += s->sync_interval_usec;
1403
1404                 if (!s->sync_event_source) {
1405                         r = sd_event_add_time(
1406                                         s->event,
1407                                         &s->sync_event_source,
1408                                         CLOCK_MONOTONIC,
1409                                         when, 0,
1410                                         server_dispatch_sync, s);
1411                         if (r < 0)
1412                                 return r;
1413
1414                         r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1415                 } else {
1416                         r = sd_event_source_set_time(s->sync_event_source, when);
1417                         if (r < 0)
1418                                 return r;
1419
1420                         r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1421                 }
1422                 if (r < 0)
1423                         return r;
1424
1425                 s->sync_scheduled = true;
1426         }
1427
1428         return 0;
1429 }
1430
1431 static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1432         Server *s = userdata;
1433
1434         assert(s);
1435
1436         server_cache_hostname(s);
1437         return 0;
1438 }
1439
1440 static int server_open_hostname(Server *s) {
1441         int r;
1442
1443         assert(s);
1444
1445         s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
1446         if (s->hostname_fd < 0)
1447                 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
1448
1449         r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
1450         if (r < 0) {
1451                 /* kernels prior to 3.2 don't support polling this file. Ignore
1452                  * the failure. */
1453                 if (r == -EPERM) {
1454                         log_warning("Failed to register hostname fd in event loop: %s. Ignoring.",
1455                                         strerror(-r));
1456                         s->hostname_fd = safe_close(s->hostname_fd);
1457                         return 0;
1458                 }
1459
1460                 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
1461         }
1462
1463         r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
1464         if (r < 0)
1465                 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
1466
1467         return 0;
1468 }
1469
1470 int server_init(Server *s) {
1471         int n, r, fd;
1472
1473         assert(s);
1474
1475         zero(*s);
1476         s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = -1;
1477         s->compress = true;
1478         s->seal = true;
1479
1480         s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1481         s->sync_scheduled = false;
1482
1483         s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1484         s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1485
1486         s->forward_to_wall = true;
1487
1488         s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1489
1490         s->max_level_store = LOG_DEBUG;
1491         s->max_level_syslog = LOG_DEBUG;
1492         s->max_level_kmsg = LOG_NOTICE;
1493         s->max_level_console = LOG_INFO;
1494         s->max_level_wall = LOG_EMERG;
1495
1496         memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1497         memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1498
1499         server_parse_config_file(s);
1500         server_parse_proc_cmdline(s);
1501         if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1502                 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1503                           s->rate_limit_interval, s->rate_limit_burst);
1504                 s->rate_limit_interval = s->rate_limit_burst = 0;
1505         }
1506
1507         mkdir_p("/run/systemd/journal", 0755);
1508
1509         s->user_journals = ordered_hashmap_new(NULL);
1510         if (!s->user_journals)
1511                 return log_oom();
1512
1513         s->mmap = mmap_cache_new();
1514         if (!s->mmap)
1515                 return log_oom();
1516
1517         r = sd_event_default(&s->event);
1518         if (r < 0)
1519                 return log_error_errno(r, "Failed to create event loop: %m");
1520
1521         sd_event_set_watchdog(s->event, true);
1522
1523         n = sd_listen_fds(true);
1524         if (n < 0)
1525                 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
1526
1527         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1528
1529                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1530
1531                         if (s->native_fd >= 0) {
1532                                 log_error("Too many native sockets passed.");
1533                                 return -EINVAL;
1534                         }
1535
1536                         s->native_fd = fd;
1537
1538                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1539
1540                         if (s->stdout_fd >= 0) {
1541                                 log_error("Too many stdout sockets passed.");
1542                                 return -EINVAL;
1543                         }
1544
1545                         s->stdout_fd = fd;
1546
1547                 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1548                            sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1549
1550                         if (s->syslog_fd >= 0) {
1551                                 log_error("Too many /dev/log sockets passed.");
1552                                 return -EINVAL;
1553                         }
1554
1555                         s->syslog_fd = fd;
1556
1557                 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
1558
1559                         if (s->audit_fd >= 0) {
1560                                 log_error("Too many audit sockets passed.");
1561                                 return -EINVAL;
1562                         }
1563
1564                         s->audit_fd = fd;
1565
1566                 } else {
1567                         log_warning("Unknown socket passed as file descriptor %d, ignoring.", fd);
1568
1569                         /* Let's close the fd, better be safe than
1570                            sorry. The fd might reference some resource
1571                            that we really want to release if we don't
1572                            make use of it. */
1573
1574                         safe_close(fd);
1575                 }
1576         }
1577
1578         r = server_open_syslog_socket(s);
1579         if (r < 0)
1580                 return r;
1581
1582         r = server_open_native_socket(s);
1583         if (r < 0)
1584                 return r;
1585
1586         r = server_open_stdout_socket(s);
1587         if (r < 0)
1588                 return r;
1589
1590         r = server_open_dev_kmsg(s);
1591         if (r < 0)
1592                 return r;
1593
1594         r = server_open_audit(s);
1595         if (r < 0)
1596                 return r;
1597
1598         r = server_open_kernel_seqnum(s);
1599         if (r < 0)
1600                 return r;
1601
1602         r = server_open_hostname(s);
1603         if (r < 0)
1604                 return r;
1605
1606         r = setup_signals(s);
1607         if (r < 0)
1608                 return r;
1609
1610         s->udev = udev_new();
1611         if (!s->udev)
1612                 return -ENOMEM;
1613
1614         s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
1615         if (!s->rate_limit)
1616                 return -ENOMEM;
1617
1618         r = cg_get_root_path(&s->cgroup_root);
1619         if (r < 0)
1620                 return r;
1621
1622         server_cache_hostname(s);
1623         server_cache_boot_id(s);
1624         server_cache_machine_id(s);
1625
1626         r = system_journal_open(s, false);
1627         if (r < 0)
1628                 return r;
1629
1630         return 0;
1631 }
1632
1633 void server_maybe_append_tags(Server *s) {
1634 #ifdef HAVE_GCRYPT
1635         JournalFile *f;
1636         Iterator i;
1637         usec_t n;
1638
1639         n = now(CLOCK_REALTIME);
1640
1641         if (s->system_journal)
1642                 journal_file_maybe_append_tag(s->system_journal, n);
1643
1644         ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
1645                 journal_file_maybe_append_tag(f, n);
1646 #endif
1647 }
1648
1649 void server_done(Server *s) {
1650         JournalFile *f;
1651         assert(s);
1652
1653         while (s->stdout_streams)
1654                 stdout_stream_free(s->stdout_streams);
1655
1656         if (s->system_journal)
1657                 journal_file_close(s->system_journal);
1658
1659         if (s->runtime_journal)
1660                 journal_file_close(s->runtime_journal);
1661
1662         while ((f = ordered_hashmap_steal_first(s->user_journals)))
1663                 journal_file_close(f);
1664
1665         ordered_hashmap_free(s->user_journals);
1666
1667         sd_event_source_unref(s->syslog_event_source);
1668         sd_event_source_unref(s->native_event_source);
1669         sd_event_source_unref(s->stdout_event_source);
1670         sd_event_source_unref(s->dev_kmsg_event_source);
1671         sd_event_source_unref(s->audit_event_source);
1672         sd_event_source_unref(s->sync_event_source);
1673         sd_event_source_unref(s->sigusr1_event_source);
1674         sd_event_source_unref(s->sigusr2_event_source);
1675         sd_event_source_unref(s->sigterm_event_source);
1676         sd_event_source_unref(s->sigint_event_source);
1677         sd_event_source_unref(s->hostname_event_source);
1678         sd_event_unref(s->event);
1679
1680         safe_close(s->syslog_fd);
1681         safe_close(s->native_fd);
1682         safe_close(s->stdout_fd);
1683         safe_close(s->dev_kmsg_fd);
1684         safe_close(s->audit_fd);
1685         safe_close(s->hostname_fd);
1686
1687         if (s->rate_limit)
1688                 journal_rate_limit_free(s->rate_limit);
1689
1690         if (s->kernel_seqnum)
1691                 munmap(s->kernel_seqnum, sizeof(uint64_t));
1692
1693         free(s->buffer);
1694         free(s->tty_path);
1695         free(s->cgroup_root);
1696         free(s->hostname_field);
1697
1698         if (s->mmap)
1699                 mmap_cache_unref(s->mmap);
1700
1701         if (s->udev)
1702                 udev_unref(s->udev);
1703 }