chiark / gitweb /
journald: reuse IOVEC_TOTAL_SIZE() macros where possible
[elogind.git] / src / journal / journald-server.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
26 #include <sys/mman.h>
27 #include <sys/timerfd.h>
28
29 #include <libudev.h>
30
31 #include "sd-journal.h"
32 #include "sd-messages.h"
33 #include "sd-daemon.h"
34 #include "fileio.h"
35 #include "mkdir.h"
36 #include "hashmap.h"
37 #include "journal-file.h"
38 #include "socket-util.h"
39 #include "cgroup-util.h"
40 #include "list.h"
41 #include "missing.h"
42 #include "conf-parser.h"
43 #include "selinux-util.h"
44 #include "journal-internal.h"
45 #include "journal-vacuum.h"
46 #include "journal-authenticate.h"
47 #include "journald-rate-limit.h"
48 #include "journald-kmsg.h"
49 #include "journald-syslog.h"
50 #include "journald-stream.h"
51 #include "journald-console.h"
52 #include "journald-native.h"
53 #include "journald-audit.h"
54 #include "journald-server.h"
55
56 #ifdef HAVE_ACL
57 #include <sys/acl.h>
58 #include <acl/libacl.h>
59 #include "acl-util.h"
60 #endif
61
62 #ifdef HAVE_SELINUX
63 #include <selinux/selinux.h>
64 #endif
65
66 #define USER_JOURNALS_MAX 1024
67
68 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
69 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
70 #define DEFAULT_RATE_LIMIT_BURST 1000
71 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
72
73 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
74
75 static const char* const storage_table[_STORAGE_MAX] = {
76         [STORAGE_AUTO] = "auto",
77         [STORAGE_VOLATILE] = "volatile",
78         [STORAGE_PERSISTENT] = "persistent",
79         [STORAGE_NONE] = "none"
80 };
81
82 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
83 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
84
85 static const char* const split_mode_table[_SPLIT_MAX] = {
86         [SPLIT_LOGIN] = "login",
87         [SPLIT_UID] = "uid",
88         [SPLIT_NONE] = "none",
89 };
90
91 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
92 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
93
94 static uint64_t available_space(Server *s, bool verbose) {
95         char ids[33];
96         _cleanup_free_ char *p = NULL;
97         sd_id128_t machine;
98         struct statvfs ss;
99         uint64_t sum = 0, ss_avail = 0, avail = 0;
100         int r;
101         _cleanup_closedir_ DIR *d = NULL;
102         usec_t ts;
103         const char *f;
104         JournalMetrics *m;
105
106         ts = now(CLOCK_MONOTONIC);
107
108         if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts
109             && !verbose)
110                 return s->cached_available_space;
111
112         r = sd_id128_get_machine(&machine);
113         if (r < 0)
114                 return 0;
115
116         if (s->system_journal) {
117                 f = "/var/log/journal/";
118                 m = &s->system_metrics;
119         } else {
120                 f = "/run/log/journal/";
121                 m = &s->runtime_metrics;
122         }
123
124         assert(m);
125
126         p = strappend(f, sd_id128_to_string(machine, ids));
127         if (!p)
128                 return 0;
129
130         d = opendir(p);
131         if (!d)
132                 return 0;
133
134         if (fstatvfs(dirfd(d), &ss) < 0)
135                 return 0;
136
137         for (;;) {
138                 struct stat st;
139                 struct dirent *de;
140
141                 errno = 0;
142                 de = readdir(d);
143                 if (!de && errno != 0)
144                         return 0;
145
146                 if (!de)
147                         break;
148
149                 if (!endswith(de->d_name, ".journal") &&
150                     !endswith(de->d_name, ".journal~"))
151                         continue;
152
153                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
154                         continue;
155
156                 if (!S_ISREG(st.st_mode))
157                         continue;
158
159                 sum += (uint64_t) st.st_blocks * 512UL;
160         }
161
162         ss_avail = ss.f_bsize * ss.f_bavail;
163
164         /* If we reached a high mark, we will always allow this much
165          * again, unless usage goes above max_use. This watermark
166          * value is cached so that we don't give up space on pressure,
167          * but hover below the maximum usage. */
168
169         if (m->use < sum)
170                 m->use = sum;
171
172         avail = LESS_BY(ss_avail, m->keep_free);
173
174         s->cached_available_space = LESS_BY(MIN(m->max_use, avail), sum);
175         s->cached_available_space_timestamp = ts;
176
177         if (verbose) {
178                 char    fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
179                         fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX];
180
181                 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
182                                       "%s journal is using %s (max allowed %s, "
183                                       "trying to leave %s free of %s available → current limit %s).",
184                                       s->system_journal ? "Permanent" : "Runtime",
185                                       format_bytes(fb1, sizeof(fb1), sum),
186                                       format_bytes(fb2, sizeof(fb2), m->max_use),
187                                       format_bytes(fb3, sizeof(fb3), m->keep_free),
188                                       format_bytes(fb4, sizeof(fb4), ss_avail),
189                                       format_bytes(fb5, sizeof(fb5), s->cached_available_space + sum));
190         }
191
192         return s->cached_available_space;
193 }
194
195 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
196         int r;
197 #ifdef HAVE_ACL
198         acl_t acl;
199         acl_entry_t entry;
200         acl_permset_t permset;
201 #endif
202
203         assert(f);
204
205         r = fchmod(f->fd, 0640);
206         if (r < 0)
207                 log_warning_errno(r, "Failed to fix access mode on %s, ignoring: %m", f->path);
208
209 #ifdef HAVE_ACL
210         if (uid <= SYSTEM_UID_MAX)
211                 return;
212
213         acl = acl_get_fd(f->fd);
214         if (!acl) {
215                 log_warning_errno(errno, "Failed to read ACL on %s, ignoring: %m", f->path);
216                 return;
217         }
218
219         r = acl_find_uid(acl, uid, &entry);
220         if (r <= 0) {
221
222                 if (acl_create_entry(&acl, &entry) < 0 ||
223                     acl_set_tag_type(entry, ACL_USER) < 0 ||
224                     acl_set_qualifier(entry, &uid) < 0) {
225                         log_warning_errno(errno, "Failed to patch ACL on %s, ignoring: %m", f->path);
226                         goto finish;
227                 }
228         }
229
230         /* We do not recalculate the mask unconditionally here,
231          * so that the fchmod() mask above stays intact. */
232         if (acl_get_permset(entry, &permset) < 0 ||
233             acl_add_perm(permset, ACL_READ) < 0 ||
234             calc_acl_mask_if_needed(&acl) < 0) {
235                 log_warning_errno(errno, "Failed to patch ACL on %s, ignoring: %m", f->path);
236                 goto finish;
237         }
238
239         if (acl_set_fd(f->fd, acl) < 0)
240                 log_warning_errno(errno, "Failed to set ACL on %s, ignoring: %m", f->path);
241
242 finish:
243         acl_free(acl);
244 #endif
245 }
246
247 static JournalFile* find_journal(Server *s, uid_t uid) {
248         _cleanup_free_ char *p = NULL;
249         int r;
250         JournalFile *f;
251         sd_id128_t machine;
252
253         assert(s);
254
255         /* We split up user logs only on /var, not on /run. If the
256          * runtime file is open, we write to it exclusively, in order
257          * to guarantee proper order as soon as we flush /run to
258          * /var and close the runtime file. */
259
260         if (s->runtime_journal)
261                 return s->runtime_journal;
262
263         if (uid <= SYSTEM_UID_MAX)
264                 return s->system_journal;
265
266         r = sd_id128_get_machine(&machine);
267         if (r < 0)
268                 return s->system_journal;
269
270         f = ordered_hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
271         if (f)
272                 return f;
273
274         if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
275                      SD_ID128_FORMAT_VAL(machine), uid) < 0)
276                 return s->system_journal;
277
278         while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
279                 /* Too many open? Then let's close one */
280                 f = ordered_hashmap_steal_first(s->user_journals);
281                 assert(f);
282                 journal_file_close(f);
283         }
284
285         r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
286         if (r < 0)
287                 return s->system_journal;
288
289         server_fix_perms(s, f, uid);
290
291         r = ordered_hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
292         if (r < 0) {
293                 journal_file_close(f);
294                 return s->system_journal;
295         }
296
297         return f;
298 }
299
300 static int do_rotate(
301                 Server *s,
302                 JournalFile **f,
303                 const char* name,
304                 bool seal,
305                 uint32_t uid) {
306
307         int r;
308         assert(s);
309
310         if (!*f)
311                 return -EINVAL;
312
313         r = journal_file_rotate(f, s->compress, seal);
314         if (r < 0)
315                 if (*f)
316                         log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
317                 else
318                         log_error_errno(r, "Failed to create new %s journal: %m", name);
319         else
320                 server_fix_perms(s, *f, uid);
321
322         return r;
323 }
324
325 void server_rotate(Server *s) {
326         JournalFile *f;
327         void *k;
328         Iterator i;
329         int r;
330
331         log_debug("Rotating...");
332
333         do_rotate(s, &s->runtime_journal, "runtime", false, 0);
334         do_rotate(s, &s->system_journal, "system", s->seal, 0);
335
336         ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
337                 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UINT32(k));
338                 if (r >= 0)
339                         ordered_hashmap_replace(s->user_journals, k, f);
340                 else if (!f)
341                         /* Old file has been closed and deallocated */
342                         ordered_hashmap_remove(s->user_journals, k);
343         }
344 }
345
346 void server_sync(Server *s) {
347         JournalFile *f;
348         void *k;
349         Iterator i;
350         int r;
351
352         if (s->system_journal) {
353                 r = journal_file_set_offline(s->system_journal);
354                 if (r < 0)
355                         log_error_errno(r, "Failed to sync system journal: %m");
356         }
357
358         ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
359                 r = journal_file_set_offline(f);
360                 if (r < 0)
361                         log_error_errno(r, "Failed to sync user journal: %m");
362         }
363
364         if (s->sync_event_source) {
365                 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
366                 if (r < 0)
367                         log_error_errno(r, "Failed to disable sync timer source: %m");
368         }
369
370         s->sync_scheduled = false;
371 }
372
373 static void do_vacuum(
374                 Server *s,
375                 const char *id,
376                 JournalFile *f,
377                 const char* path,
378                 JournalMetrics *metrics) {
379
380         const char *p;
381         int r;
382
383         if (!f)
384                 return;
385
386         p = strappenda(path, id);
387         r = journal_directory_vacuum(p, metrics->max_use, s->max_retention_usec, &s->oldest_file_usec, false);
388         if (r < 0 && r != -ENOENT)
389                 log_error_errno(r, "Failed to vacuum %s: %m", p);
390 }
391
392 void server_vacuum(Server *s) {
393         char ids[33];
394         sd_id128_t machine;
395         int r;
396
397         log_debug("Vacuuming...");
398
399         s->oldest_file_usec = 0;
400
401         r = sd_id128_get_machine(&machine);
402         if (r < 0) {
403                 log_error_errno(r, "Failed to get machine ID: %m");
404                 return;
405         }
406         sd_id128_to_string(machine, ids);
407
408         do_vacuum(s, ids, s->system_journal, "/var/log/journal/", &s->system_metrics);
409         do_vacuum(s, ids, s->runtime_journal, "/run/log/journal/", &s->runtime_metrics);
410
411         s->cached_available_space_timestamp = 0;
412 }
413
414 static void server_cache_machine_id(Server *s) {
415         sd_id128_t id;
416         int r;
417
418         assert(s);
419
420         r = sd_id128_get_machine(&id);
421         if (r < 0)
422                 return;
423
424         sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
425 }
426
427 static void server_cache_boot_id(Server *s) {
428         sd_id128_t id;
429         int r;
430
431         assert(s);
432
433         r = sd_id128_get_boot(&id);
434         if (r < 0)
435                 return;
436
437         sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
438 }
439
440 static void server_cache_hostname(Server *s) {
441         _cleanup_free_ char *t = NULL;
442         char *x;
443
444         assert(s);
445
446         t = gethostname_malloc();
447         if (!t)
448                 return;
449
450         x = strappend("_HOSTNAME=", t);
451         if (!x)
452                 return;
453
454         free(s->hostname_field);
455         s->hostname_field = x;
456 }
457
458 static bool shall_try_append_again(JournalFile *f, int r) {
459
460         /* -E2BIG            Hit configured limit
461            -EFBIG            Hit fs limit
462            -EDQUOT           Quota limit hit
463            -ENOSPC           Disk full
464            -EIO              I/O error of some kind (mmap)
465            -EHOSTDOWN        Other machine
466            -EBUSY            Unclean shutdown
467            -EPROTONOSUPPORT  Unsupported feature
468            -EBADMSG          Corrupted
469            -ENODATA          Truncated
470            -ESHUTDOWN        Already archived
471            -EIDRM            Journal file has been deleted */
472
473         if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
474                 log_debug("%s: Allocation limit reached, rotating.", f->path);
475         else if (r == -EHOSTDOWN)
476                 log_info("%s: Journal file from other machine, rotating.", f->path);
477         else if (r == -EBUSY)
478                 log_info("%s: Unclean shutdown, rotating.", f->path);
479         else if (r == -EPROTONOSUPPORT)
480                 log_info("%s: Unsupported feature, rotating.", f->path);
481         else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
482                 log_warning("%s: Journal file corrupted, rotating.", f->path);
483         else if (r == -EIO)
484                 log_warning("%s: IO error, rotating.", f->path);
485         else if (r == -EIDRM)
486                 log_warning("%s: Journal file has been deleted, rotating.", f->path);
487         else
488                 return false;
489
490         return true;
491 }
492
493 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
494         JournalFile *f;
495         bool vacuumed = false;
496         int r;
497
498         assert(s);
499         assert(iovec);
500         assert(n > 0);
501
502         f = find_journal(s, uid);
503         if (!f)
504                 return;
505
506         if (journal_file_rotate_suggested(f, s->max_file_usec)) {
507                 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
508                 server_rotate(s);
509                 server_vacuum(s);
510                 vacuumed = true;
511
512                 f = find_journal(s, uid);
513                 if (!f)
514                         return;
515         }
516
517         r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
518         if (r >= 0) {
519                 server_schedule_sync(s, priority);
520                 return;
521         }
522
523         if (vacuumed || !shall_try_append_again(f, r)) {
524                 log_error_errno(r, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
525                 return;
526         }
527
528         server_rotate(s);
529         server_vacuum(s);
530
531         f = find_journal(s, uid);
532         if (!f)
533                 return;
534
535         log_debug("Retrying write.");
536         r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
537         if (r < 0)
538                 log_error_errno(r, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
539         else
540                 server_schedule_sync(s, priority);
541 }
542
543 static void dispatch_message_real(
544                 Server *s,
545                 struct iovec *iovec, unsigned n, unsigned m,
546                 const struct ucred *ucred,
547                 const struct timeval *tv,
548                 const char *label, size_t label_len,
549                 const char *unit_id,
550                 int priority,
551                 pid_t object_pid) {
552
553         char    pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
554                 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
555                 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
556                 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
557                 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
558                 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
559                 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
560                 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
561         uid_t object_uid;
562         gid_t object_gid;
563         char *x;
564         int r;
565         char *t, *c;
566         uid_t realuid = 0, owner = 0, journal_uid;
567         bool owner_valid = false;
568 #ifdef HAVE_AUDIT
569         char    audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
570                 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
571                 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
572                 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
573
574         uint32_t audit;
575         uid_t loginuid;
576 #endif
577
578         assert(s);
579         assert(iovec);
580         assert(n > 0);
581         assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
582
583         if (ucred) {
584                 realuid = ucred->uid;
585
586                 sprintf(pid, "_PID="PID_FMT, ucred->pid);
587                 IOVEC_SET_STRING(iovec[n++], pid);
588
589                 sprintf(uid, "_UID="UID_FMT, ucred->uid);
590                 IOVEC_SET_STRING(iovec[n++], uid);
591
592                 sprintf(gid, "_GID="GID_FMT, ucred->gid);
593                 IOVEC_SET_STRING(iovec[n++], gid);
594
595                 r = get_process_comm(ucred->pid, &t);
596                 if (r >= 0) {
597                         x = strappenda("_COMM=", t);
598                         free(t);
599                         IOVEC_SET_STRING(iovec[n++], x);
600                 }
601
602                 r = get_process_exe(ucred->pid, &t);
603                 if (r >= 0) {
604                         x = strappenda("_EXE=", t);
605                         free(t);
606                         IOVEC_SET_STRING(iovec[n++], x);
607                 }
608
609                 r = get_process_cmdline(ucred->pid, 0, false, &t);
610                 if (r >= 0) {
611                         x = strappenda("_CMDLINE=", t);
612                         free(t);
613                         IOVEC_SET_STRING(iovec[n++], x);
614                 }
615
616                 r = get_process_capeff(ucred->pid, &t);
617                 if (r >= 0) {
618                         x = strappenda("_CAP_EFFECTIVE=", t);
619                         free(t);
620                         IOVEC_SET_STRING(iovec[n++], x);
621                 }
622
623 #ifdef HAVE_AUDIT
624                 r = audit_session_from_pid(ucred->pid, &audit);
625                 if (r >= 0) {
626                         sprintf(audit_session, "_AUDIT_SESSION=%"PRIu32, audit);
627                         IOVEC_SET_STRING(iovec[n++], audit_session);
628                 }
629
630                 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
631                 if (r >= 0) {
632                         sprintf(audit_loginuid, "_AUDIT_LOGINUID="UID_FMT, loginuid);
633                         IOVEC_SET_STRING(iovec[n++], audit_loginuid);
634                 }
635 #endif
636
637                 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
638                 if (r >= 0) {
639                         char *session = NULL;
640
641                         x = strappenda("_SYSTEMD_CGROUP=", c);
642                         IOVEC_SET_STRING(iovec[n++], x);
643
644                         r = cg_path_get_session(c, &t);
645                         if (r >= 0) {
646                                 session = strappenda("_SYSTEMD_SESSION=", t);
647                                 free(t);
648                                 IOVEC_SET_STRING(iovec[n++], session);
649                         }
650
651                         if (cg_path_get_owner_uid(c, &owner) >= 0) {
652                                 owner_valid = true;
653
654                                 sprintf(owner_uid, "_SYSTEMD_OWNER_UID="UID_FMT, owner);
655                                 IOVEC_SET_STRING(iovec[n++], owner_uid);
656                         }
657
658                         if (cg_path_get_unit(c, &t) >= 0) {
659                                 x = strappenda("_SYSTEMD_UNIT=", t);
660                                 free(t);
661                                 IOVEC_SET_STRING(iovec[n++], x);
662                         } else if (unit_id && !session) {
663                                 x = strappenda("_SYSTEMD_UNIT=", unit_id);
664                                 IOVEC_SET_STRING(iovec[n++], x);
665                         }
666
667                         if (cg_path_get_user_unit(c, &t) >= 0) {
668                                 x = strappenda("_SYSTEMD_USER_UNIT=", t);
669                                 free(t);
670                                 IOVEC_SET_STRING(iovec[n++], x);
671                         } else if (unit_id && session) {
672                                 x = strappenda("_SYSTEMD_USER_UNIT=", unit_id);
673                                 IOVEC_SET_STRING(iovec[n++], x);
674                         }
675
676                         if (cg_path_get_slice(c, &t) >= 0) {
677                                 x = strappenda("_SYSTEMD_SLICE=", t);
678                                 free(t);
679                                 IOVEC_SET_STRING(iovec[n++], x);
680                         }
681
682                         free(c);
683                 } else if (unit_id) {
684                         x = strappenda("_SYSTEMD_UNIT=", unit_id);
685                         IOVEC_SET_STRING(iovec[n++], x);
686                 }
687
688 #ifdef HAVE_SELINUX
689                 if (mac_selinux_use()) {
690                         if (label) {
691                                 x = alloca(strlen("_SELINUX_CONTEXT=") + label_len + 1);
692
693                                 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
694                                 IOVEC_SET_STRING(iovec[n++], x);
695                         } else {
696                                 security_context_t con;
697
698                                 if (getpidcon(ucred->pid, &con) >= 0) {
699                                         x = strappenda("_SELINUX_CONTEXT=", con);
700
701                                         freecon(con);
702                                         IOVEC_SET_STRING(iovec[n++], x);
703                                 }
704                         }
705                 }
706 #endif
707         }
708         assert(n <= m);
709
710         if (object_pid) {
711                 r = get_process_uid(object_pid, &object_uid);
712                 if (r >= 0) {
713                         sprintf(o_uid, "OBJECT_UID="UID_FMT, object_uid);
714                         IOVEC_SET_STRING(iovec[n++], o_uid);
715                 }
716
717                 r = get_process_gid(object_pid, &object_gid);
718                 if (r >= 0) {
719                         sprintf(o_gid, "OBJECT_GID="GID_FMT, object_gid);
720                         IOVEC_SET_STRING(iovec[n++], o_gid);
721                 }
722
723                 r = get_process_comm(object_pid, &t);
724                 if (r >= 0) {
725                         x = strappenda("OBJECT_COMM=", t);
726                         free(t);
727                         IOVEC_SET_STRING(iovec[n++], x);
728                 }
729
730                 r = get_process_exe(object_pid, &t);
731                 if (r >= 0) {
732                         x = strappenda("OBJECT_EXE=", t);
733                         free(t);
734                         IOVEC_SET_STRING(iovec[n++], x);
735                 }
736
737                 r = get_process_cmdline(object_pid, 0, false, &t);
738                 if (r >= 0) {
739                         x = strappenda("OBJECT_CMDLINE=", t);
740                         free(t);
741                         IOVEC_SET_STRING(iovec[n++], x);
742                 }
743
744 #ifdef HAVE_AUDIT
745                 r = audit_session_from_pid(object_pid, &audit);
746                 if (r >= 0) {
747                         sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%"PRIu32, audit);
748                         IOVEC_SET_STRING(iovec[n++], o_audit_session);
749                 }
750
751                 r = audit_loginuid_from_pid(object_pid, &loginuid);
752                 if (r >= 0) {
753                         sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID="UID_FMT, loginuid);
754                         IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
755                 }
756 #endif
757
758                 r = cg_pid_get_path_shifted(object_pid, s->cgroup_root, &c);
759                 if (r >= 0) {
760                         x = strappenda("OBJECT_SYSTEMD_CGROUP=", c);
761                         IOVEC_SET_STRING(iovec[n++], x);
762
763                         r = cg_path_get_session(c, &t);
764                         if (r >= 0) {
765                                 x = strappenda("OBJECT_SYSTEMD_SESSION=", t);
766                                 free(t);
767                                 IOVEC_SET_STRING(iovec[n++], x);
768                         }
769
770                         if (cg_path_get_owner_uid(c, &owner) >= 0) {
771                                 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT, owner);
772                                 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
773                         }
774
775                         if (cg_path_get_unit(c, &t) >= 0) {
776                                 x = strappenda("OBJECT_SYSTEMD_UNIT=", t);
777                                 free(t);
778                                 IOVEC_SET_STRING(iovec[n++], x);
779                         }
780
781                         if (cg_path_get_user_unit(c, &t) >= 0) {
782                                 x = strappenda("OBJECT_SYSTEMD_USER_UNIT=", t);
783                                 free(t);
784                                 IOVEC_SET_STRING(iovec[n++], x);
785                         }
786
787                         free(c);
788                 }
789         }
790         assert(n <= m);
791
792         if (tv) {
793                 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
794                 IOVEC_SET_STRING(iovec[n++], source_time);
795         }
796
797         /* Note that strictly speaking storing the boot id here is
798          * redundant since the entry includes this in-line
799          * anyway. However, we need this indexed, too. */
800         if (!isempty(s->boot_id_field))
801                 IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
802
803         if (!isempty(s->machine_id_field))
804                 IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
805
806         if (!isempty(s->hostname_field))
807                 IOVEC_SET_STRING(iovec[n++], s->hostname_field);
808
809         assert(n <= m);
810
811         if (s->split_mode == SPLIT_UID && realuid > 0)
812                 /* Split up strictly by any UID */
813                 journal_uid = realuid;
814         else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
815                 /* Split up by login UIDs.  We do this only if the
816                  * realuid is not root, in order not to accidentally
817                  * leak privileged information to the user that is
818                  * logged by a privileged process that is part of an
819                  * unprivileged session. */
820                 journal_uid = owner;
821         else
822                 journal_uid = 0;
823
824         write_to_journal(s, journal_uid, iovec, n, priority);
825 }
826
827 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
828         char mid[11 + 32 + 1];
829         char buffer[16 + LINE_MAX + 1];
830         struct iovec iovec[N_IOVEC_META_FIELDS + 4];
831         int n = 0;
832         va_list ap;
833         struct ucred ucred = {};
834
835         assert(s);
836         assert(format);
837
838         IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
839         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
840
841         memcpy(buffer, "MESSAGE=", 8);
842         va_start(ap, format);
843         vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
844         va_end(ap);
845         char_array_0(buffer);
846         IOVEC_SET_STRING(iovec[n++], buffer);
847
848         if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
849                 snprintf(mid, sizeof(mid), LOG_MESSAGE_ID(message_id));
850                 char_array_0(mid);
851                 IOVEC_SET_STRING(iovec[n++], mid);
852         }
853
854         ucred.pid = getpid();
855         ucred.uid = getuid();
856         ucred.gid = getgid();
857
858         dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
859 }
860
861 void server_dispatch_message(
862                 Server *s,
863                 struct iovec *iovec, unsigned n, unsigned m,
864                 const struct ucred *ucred,
865                 const struct timeval *tv,
866                 const char *label, size_t label_len,
867                 const char *unit_id,
868                 int priority,
869                 pid_t object_pid) {
870
871         int rl, r;
872         _cleanup_free_ char *path = NULL;
873         char *c;
874
875         assert(s);
876         assert(iovec || n == 0);
877
878         if (n == 0)
879                 return;
880
881         if (LOG_PRI(priority) > s->max_level_store)
882                 return;
883
884         /* Stop early in case the information will not be stored
885          * in a journal. */
886         if (s->storage == STORAGE_NONE)
887                 return;
888
889         if (!ucred)
890                 goto finish;
891
892         r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &path);
893         if (r < 0)
894                 goto finish;
895
896         /* example: /user/lennart/3/foobar
897          *          /system/dbus.service/foobar
898          *
899          * So let's cut of everything past the third /, since that is
900          * where user directories start */
901
902         c = strchr(path, '/');
903         if (c) {
904                 c = strchr(c+1, '/');
905                 if (c) {
906                         c = strchr(c+1, '/');
907                         if (c)
908                                 *c = 0;
909                 }
910         }
911
912         rl = journal_rate_limit_test(s->rate_limit, path,
913                                      priority & LOG_PRIMASK, available_space(s, false));
914
915         if (rl == 0)
916                 return;
917
918         /* Write a suppression message if we suppressed something */
919         if (rl > 1)
920                 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
921                                       "Suppressed %u messages from %s", rl - 1, path);
922
923 finish:
924         dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
925 }
926
927
928 static int system_journal_open(Server *s, bool flush_requested) {
929         int r;
930         char *fn;
931         sd_id128_t machine;
932         char ids[33];
933
934         r = sd_id128_get_machine(&machine);
935         if (r < 0)
936                 return log_error_errno(r, "Failed to get machine id: %m");
937
938         sd_id128_to_string(machine, ids);
939
940         if (!s->system_journal &&
941             (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
942             (flush_requested
943              || access("/run/systemd/journal/flushed", F_OK) >= 0)) {
944
945                 /* If in auto mode: first try to create the machine
946                  * path, but not the prefix.
947                  *
948                  * If in persistent mode: create /var/log/journal and
949                  * the machine path */
950
951                 if (s->storage == STORAGE_PERSISTENT)
952                         (void) mkdir("/var/log/journal/", 0755);
953
954                 fn = strappenda("/var/log/journal/", ids);
955                 (void) mkdir(fn, 0755);
956
957                 fn = strappenda(fn, "/system.journal");
958                 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
959
960                 if (r >= 0)
961                         server_fix_perms(s, s->system_journal, 0);
962                 else if (r < 0) {
963                         if (r != -ENOENT && r != -EROFS)
964                                 log_warning_errno(r, "Failed to open system journal: %m");
965
966                         r = 0;
967                 }
968         }
969
970         if (!s->runtime_journal &&
971             (s->storage != STORAGE_NONE)) {
972
973                 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
974                 if (!fn)
975                         return -ENOMEM;
976
977                 if (s->system_journal) {
978
979                         /* Try to open the runtime journal, but only
980                          * if it already exists, so that we can flush
981                          * it into the system journal */
982
983                         r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
984                         free(fn);
985
986                         if (r < 0) {
987                                 if (r != -ENOENT)
988                                         log_warning_errno(r, "Failed to open runtime journal: %m");
989
990                                 r = 0;
991                         }
992
993                 } else {
994
995                         /* OK, we really need the runtime journal, so create
996                          * it if necessary. */
997
998                         (void) mkdir("/run/log", 0755);
999                         (void) mkdir("/run/log/journal", 0755);
1000                         (void) mkdir_parents(fn, 0750);
1001
1002                         r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
1003                         free(fn);
1004
1005                         if (r < 0)
1006                                 return log_error_errno(r, "Failed to open runtime journal: %m");
1007                 }
1008
1009                 if (s->runtime_journal)
1010                         server_fix_perms(s, s->runtime_journal, 0);
1011         }
1012
1013         available_space(s, true);
1014
1015         return r;
1016 }
1017
1018 int server_flush_to_var(Server *s) {
1019         sd_id128_t machine;
1020         sd_journal *j = NULL;
1021         char ts[FORMAT_TIMESPAN_MAX];
1022         usec_t start;
1023         unsigned n = 0;
1024         int r;
1025
1026         assert(s);
1027
1028         if (s->storage != STORAGE_AUTO &&
1029             s->storage != STORAGE_PERSISTENT)
1030                 return 0;
1031
1032         if (!s->runtime_journal)
1033                 return 0;
1034
1035         system_journal_open(s, true);
1036
1037         if (!s->system_journal)
1038                 return 0;
1039
1040         log_debug("Flushing to /var...");
1041
1042         start = now(CLOCK_MONOTONIC);
1043
1044         r = sd_id128_get_machine(&machine);
1045         if (r < 0)
1046                 return r;
1047
1048         r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1049         if (r < 0)
1050                 return log_error_errno(r, "Failed to read runtime journal: %m");
1051
1052         sd_journal_set_data_threshold(j, 0);
1053
1054         SD_JOURNAL_FOREACH(j) {
1055                 Object *o = NULL;
1056                 JournalFile *f;
1057
1058                 f = j->current_file;
1059                 assert(f && f->current_offset > 0);
1060
1061                 n++;
1062
1063                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1064                 if (r < 0) {
1065                         log_error_errno(r, "Can't read entry: %m");
1066                         goto finish;
1067                 }
1068
1069                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1070                 if (r >= 0)
1071                         continue;
1072
1073                 if (!shall_try_append_again(s->system_journal, r)) {
1074                         log_error_errno(r, "Can't write entry: %m");
1075                         goto finish;
1076                 }
1077
1078                 server_rotate(s);
1079                 server_vacuum(s);
1080
1081                 if (!s->system_journal) {
1082                         log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1083                         r = -EIO;
1084                         goto finish;
1085                 }
1086
1087                 log_debug("Retrying write.");
1088                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1089                 if (r < 0) {
1090                         log_error_errno(r, "Can't write entry: %m");
1091                         goto finish;
1092                 }
1093         }
1094
1095 finish:
1096         journal_file_post_change(s->system_journal);
1097
1098         journal_file_close(s->runtime_journal);
1099         s->runtime_journal = NULL;
1100
1101         if (r >= 0)
1102                 rm_rf("/run/log/journal", false, true, false);
1103
1104         sd_journal_close(j);
1105
1106         server_driver_message(s, SD_ID128_NULL, "Time spent on flushing to /var is %s for %u entries.", format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0), n);
1107
1108         return r;
1109 }
1110
1111 int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1112         Server *s = userdata;
1113
1114         assert(s);
1115         assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
1116
1117         if (revents != EPOLLIN) {
1118                 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1119                 return -EIO;
1120         }
1121
1122         for (;;) {
1123                 struct ucred *ucred = NULL;
1124                 struct timeval *tv = NULL;
1125                 struct cmsghdr *cmsg;
1126                 char *label = NULL;
1127                 size_t label_len = 0;
1128                 struct iovec iovec;
1129
1130                 union {
1131                         struct cmsghdr cmsghdr;
1132
1133                         /* We use NAME_MAX space for the SELinux label
1134                          * here. The kernel currently enforces no
1135                          * limit, but according to suggestions from
1136                          * the SELinux people this will change and it
1137                          * will probably be identical to NAME_MAX. For
1138                          * now we use that, but this should be updated
1139                          * one day when the final limit is known. */
1140                         uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1141                                     CMSG_SPACE(sizeof(struct timeval)) +
1142                                     CMSG_SPACE(sizeof(int)) + /* fd */
1143                                     CMSG_SPACE(NAME_MAX)]; /* selinux label */
1144                 } control = {};
1145                 union sockaddr_union sa = {};
1146                 struct msghdr msghdr = {
1147                         .msg_iov = &iovec,
1148                         .msg_iovlen = 1,
1149                         .msg_control = &control,
1150                         .msg_controllen = sizeof(control),
1151                         .msg_name = &sa,
1152                         .msg_namelen = sizeof(sa),
1153                 };
1154
1155                 ssize_t n;
1156                 int *fds = NULL;
1157                 unsigned n_fds = 0;
1158                 int v = 0;
1159                 size_t m;
1160
1161                 /* Try to get the right size, if we can. (Not all
1162                  * sockets support SIOCINQ, hence we just try, but
1163                  * don't rely on it. */
1164                 (void) ioctl(fd, SIOCINQ, &v);
1165
1166                 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1167                 m = PAGE_ALIGN(MAX3((size_t) v + 1,
1168                                     (size_t) LINE_MAX,
1169                                     ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
1170
1171                 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1172                         return log_oom();
1173
1174                 iovec.iov_base = s->buffer;
1175                 iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
1176
1177                 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1178                 if (n < 0) {
1179                         if (errno == EINTR || errno == EAGAIN)
1180                                 return 0;
1181
1182                         log_error_errno(errno, "recvmsg() failed: %m");
1183                         return -errno;
1184                 }
1185
1186                 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1187
1188                         if (cmsg->cmsg_level == SOL_SOCKET &&
1189                             cmsg->cmsg_type == SCM_CREDENTIALS &&
1190                             cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1191                                 ucred = (struct ucred*) CMSG_DATA(cmsg);
1192                         else if (cmsg->cmsg_level == SOL_SOCKET &&
1193                                  cmsg->cmsg_type == SCM_SECURITY) {
1194                                 label = (char*) CMSG_DATA(cmsg);
1195                                 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1196                         } else if (cmsg->cmsg_level == SOL_SOCKET &&
1197                                    cmsg->cmsg_type == SO_TIMESTAMP &&
1198                                    cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1199                                 tv = (struct timeval*) CMSG_DATA(cmsg);
1200                         else if (cmsg->cmsg_level == SOL_SOCKET &&
1201                                  cmsg->cmsg_type == SCM_RIGHTS) {
1202                                 fds = (int*) CMSG_DATA(cmsg);
1203                                 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1204                         }
1205                 }
1206
1207                 /* And a trailing NUL, just in case */
1208                 s->buffer[n] = 0;
1209
1210                 if (fd == s->syslog_fd) {
1211                         if (n > 0 && n_fds == 0)
1212                                 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1213                         else if (n_fds > 0)
1214                                 log_warning("Got file descriptors via syslog socket. Ignoring.");
1215
1216                 } else if (fd == s->native_fd) {
1217                         if (n > 0 && n_fds == 0)
1218                                 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1219                         else if (n == 0 && n_fds == 1)
1220                                 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1221                         else if (n_fds > 0)
1222                                 log_warning("Got too many file descriptors via native socket. Ignoring.");
1223
1224                 } else {
1225                         assert(fd == s->audit_fd);
1226
1227                         if (n > 0 && n_fds == 0)
1228                                 server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1229                         else if (n_fds > 0)
1230                                 log_warning("Got file descriptors via audit socket. Ignoring.");
1231                 }
1232
1233                 close_many(fds, n_fds);
1234         }
1235 }
1236
1237 static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1238         Server *s = userdata;
1239
1240         assert(s);
1241
1242         log_info("Received request to flush runtime journal from PID %"PRIu32, si->ssi_pid);
1243
1244         server_flush_to_var(s);
1245         server_sync(s);
1246         server_vacuum(s);
1247
1248         touch("/run/systemd/journal/flushed");
1249
1250         return 0;
1251 }
1252
1253 static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1254         Server *s = userdata;
1255
1256         assert(s);
1257
1258         log_info("Received request to rotate journal from PID %"PRIu32, si->ssi_pid);
1259         server_rotate(s);
1260         server_vacuum(s);
1261
1262         return 0;
1263 }
1264
1265 static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1266         Server *s = userdata;
1267
1268         assert(s);
1269
1270         log_received_signal(LOG_INFO, si);
1271
1272         sd_event_exit(s->event, 0);
1273         return 0;
1274 }
1275
1276 static int setup_signals(Server *s) {
1277         sigset_t mask;
1278         int r;
1279
1280         assert(s);
1281
1282         assert_se(sigemptyset(&mask) == 0);
1283         sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1284         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1285
1286         r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
1287         if (r < 0)
1288                 return r;
1289
1290         r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
1291         if (r < 0)
1292                 return r;
1293
1294         r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
1295         if (r < 0)
1296                 return r;
1297
1298         r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
1299         if (r < 0)
1300                 return r;
1301
1302         return 0;
1303 }
1304
1305 static int server_parse_proc_cmdline(Server *s) {
1306         _cleanup_free_ char *line = NULL;
1307         const char *w, *state;
1308         size_t l;
1309         int r;
1310
1311         r = proc_cmdline(&line);
1312         if (r < 0) {
1313                 log_warning_errno(r, "Failed to read /proc/cmdline, ignoring: %m");
1314                 return 0;
1315         }
1316
1317         FOREACH_WORD_QUOTED(w, l, line, state) {
1318                 _cleanup_free_ char *word;
1319
1320                 word = strndup(w, l);
1321                 if (!word)
1322                         return -ENOMEM;
1323
1324                 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1325                         r = parse_boolean(word + 35);
1326                         if (r < 0)
1327                                 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1328                         else
1329                                 s->forward_to_syslog = r;
1330                 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1331                         r = parse_boolean(word + 33);
1332                         if (r < 0)
1333                                 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1334                         else
1335                                 s->forward_to_kmsg = r;
1336                 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1337                         r = parse_boolean(word + 36);
1338                         if (r < 0)
1339                                 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1340                         else
1341                                 s->forward_to_console = r;
1342                 } else if (startswith(word, "systemd.journald.forward_to_wall=")) {
1343                         r = parse_boolean(word + 33);
1344                         if (r < 0)
1345                                 log_warning("Failed to parse forward to wall switch %s. Ignoring.", word + 33);
1346                         else
1347                                 s->forward_to_wall = r;
1348                 } else if (startswith(word, "systemd.journald"))
1349                         log_warning("Invalid systemd.journald parameter. Ignoring.");
1350         }
1351         /* do not warn about state here, since probably systemd already did */
1352
1353         return 0;
1354 }
1355
1356 static int server_parse_config_file(Server *s) {
1357         assert(s);
1358
1359         return config_parse_many("/etc/systemd/journald.conf",
1360                                  CONF_DIRS_NULSTR("systemd/journald.conf"),
1361                                  "Journal\0",
1362                                  config_item_perf_lookup, journald_gperf_lookup,
1363                                  false, s);
1364 }
1365
1366 static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1367         Server *s = userdata;
1368
1369         assert(s);
1370
1371         server_sync(s);
1372         return 0;
1373 }
1374
1375 int server_schedule_sync(Server *s, int priority) {
1376         int r;
1377
1378         assert(s);
1379
1380         if (priority <= LOG_CRIT) {
1381                 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1382                 server_sync(s);
1383                 return 0;
1384         }
1385
1386         if (s->sync_scheduled)
1387                 return 0;
1388
1389         if (s->sync_interval_usec > 0) {
1390                 usec_t when;
1391
1392                 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
1393                 if (r < 0)
1394                         return r;
1395
1396                 when += s->sync_interval_usec;
1397
1398                 if (!s->sync_event_source) {
1399                         r = sd_event_add_time(
1400                                         s->event,
1401                                         &s->sync_event_source,
1402                                         CLOCK_MONOTONIC,
1403                                         when, 0,
1404                                         server_dispatch_sync, s);
1405                         if (r < 0)
1406                                 return r;
1407
1408                         r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1409                 } else {
1410                         r = sd_event_source_set_time(s->sync_event_source, when);
1411                         if (r < 0)
1412                                 return r;
1413
1414                         r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1415                 }
1416                 if (r < 0)
1417                         return r;
1418
1419                 s->sync_scheduled = true;
1420         }
1421
1422         return 0;
1423 }
1424
1425 static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1426         Server *s = userdata;
1427
1428         assert(s);
1429
1430         server_cache_hostname(s);
1431         return 0;
1432 }
1433
1434 static int server_open_hostname(Server *s) {
1435         int r;
1436
1437         assert(s);
1438
1439         s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
1440         if (s->hostname_fd < 0)
1441                 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
1442
1443         r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
1444         if (r < 0) {
1445                 /* kernels prior to 3.2 don't support polling this file. Ignore
1446                  * the failure. */
1447                 if (r == -EPERM) {
1448                         log_warning("Failed to register hostname fd in event loop: %s. Ignoring.",
1449                                         strerror(-r));
1450                         s->hostname_fd = safe_close(s->hostname_fd);
1451                         return 0;
1452                 }
1453
1454                 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
1455         }
1456
1457         r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
1458         if (r < 0)
1459                 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
1460
1461         return 0;
1462 }
1463
1464 int server_init(Server *s) {
1465         int n, r, fd;
1466
1467         assert(s);
1468
1469         zero(*s);
1470         s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = -1;
1471         s->compress = true;
1472         s->seal = true;
1473
1474         s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1475         s->sync_scheduled = false;
1476
1477         s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1478         s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1479
1480         s->forward_to_wall = true;
1481
1482         s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1483
1484         s->max_level_store = LOG_DEBUG;
1485         s->max_level_syslog = LOG_DEBUG;
1486         s->max_level_kmsg = LOG_NOTICE;
1487         s->max_level_console = LOG_INFO;
1488         s->max_level_wall = LOG_EMERG;
1489
1490         memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1491         memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1492
1493         server_parse_config_file(s);
1494         server_parse_proc_cmdline(s);
1495         if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1496                 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1497                           s->rate_limit_interval, s->rate_limit_burst);
1498                 s->rate_limit_interval = s->rate_limit_burst = 0;
1499         }
1500
1501         mkdir_p("/run/systemd/journal", 0755);
1502
1503         s->user_journals = ordered_hashmap_new(NULL);
1504         if (!s->user_journals)
1505                 return log_oom();
1506
1507         s->mmap = mmap_cache_new();
1508         if (!s->mmap)
1509                 return log_oom();
1510
1511         r = sd_event_default(&s->event);
1512         if (r < 0)
1513                 return log_error_errno(r, "Failed to create event loop: %m");
1514
1515         sd_event_set_watchdog(s->event, true);
1516
1517         n = sd_listen_fds(true);
1518         if (n < 0)
1519                 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
1520
1521         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1522
1523                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1524
1525                         if (s->native_fd >= 0) {
1526                                 log_error("Too many native sockets passed.");
1527                                 return -EINVAL;
1528                         }
1529
1530                         s->native_fd = fd;
1531
1532                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1533
1534                         if (s->stdout_fd >= 0) {
1535                                 log_error("Too many stdout sockets passed.");
1536                                 return -EINVAL;
1537                         }
1538
1539                         s->stdout_fd = fd;
1540
1541                 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1542                            sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1543
1544                         if (s->syslog_fd >= 0) {
1545                                 log_error("Too many /dev/log sockets passed.");
1546                                 return -EINVAL;
1547                         }
1548
1549                         s->syslog_fd = fd;
1550
1551                 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
1552
1553                         if (s->audit_fd >= 0) {
1554                                 log_error("Too many audit sockets passed.");
1555                                 return -EINVAL;
1556                         }
1557
1558                         s->audit_fd = fd;
1559
1560                 } else {
1561                         log_warning("Unknown socket passed as file descriptor %d, ignoring.", fd);
1562
1563                         /* Let's close the fd, better be safe than
1564                            sorry. The fd might reference some resource
1565                            that we really want to release if we don't
1566                            make use of it. */
1567
1568                         safe_close(fd);
1569                 }
1570         }
1571
1572         r = server_open_syslog_socket(s);
1573         if (r < 0)
1574                 return r;
1575
1576         r = server_open_native_socket(s);
1577         if (r < 0)
1578                 return r;
1579
1580         r = server_open_stdout_socket(s);
1581         if (r < 0)
1582                 return r;
1583
1584         r = server_open_dev_kmsg(s);
1585         if (r < 0)
1586                 return r;
1587
1588         r = server_open_audit(s);
1589         if (r < 0)
1590                 return r;
1591
1592         r = server_open_kernel_seqnum(s);
1593         if (r < 0)
1594                 return r;
1595
1596         r = server_open_hostname(s);
1597         if (r < 0)
1598                 return r;
1599
1600         r = setup_signals(s);
1601         if (r < 0)
1602                 return r;
1603
1604         s->udev = udev_new();
1605         if (!s->udev)
1606                 return -ENOMEM;
1607
1608         s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
1609         if (!s->rate_limit)
1610                 return -ENOMEM;
1611
1612         r = cg_get_root_path(&s->cgroup_root);
1613         if (r < 0)
1614                 return r;
1615
1616         server_cache_hostname(s);
1617         server_cache_boot_id(s);
1618         server_cache_machine_id(s);
1619
1620         r = system_journal_open(s, false);
1621         if (r < 0)
1622                 return r;
1623
1624         return 0;
1625 }
1626
1627 void server_maybe_append_tags(Server *s) {
1628 #ifdef HAVE_GCRYPT
1629         JournalFile *f;
1630         Iterator i;
1631         usec_t n;
1632
1633         n = now(CLOCK_REALTIME);
1634
1635         if (s->system_journal)
1636                 journal_file_maybe_append_tag(s->system_journal, n);
1637
1638         ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
1639                 journal_file_maybe_append_tag(f, n);
1640 #endif
1641 }
1642
1643 void server_done(Server *s) {
1644         JournalFile *f;
1645         assert(s);
1646
1647         while (s->stdout_streams)
1648                 stdout_stream_free(s->stdout_streams);
1649
1650         if (s->system_journal)
1651                 journal_file_close(s->system_journal);
1652
1653         if (s->runtime_journal)
1654                 journal_file_close(s->runtime_journal);
1655
1656         while ((f = ordered_hashmap_steal_first(s->user_journals)))
1657                 journal_file_close(f);
1658
1659         ordered_hashmap_free(s->user_journals);
1660
1661         sd_event_source_unref(s->syslog_event_source);
1662         sd_event_source_unref(s->native_event_source);
1663         sd_event_source_unref(s->stdout_event_source);
1664         sd_event_source_unref(s->dev_kmsg_event_source);
1665         sd_event_source_unref(s->audit_event_source);
1666         sd_event_source_unref(s->sync_event_source);
1667         sd_event_source_unref(s->sigusr1_event_source);
1668         sd_event_source_unref(s->sigusr2_event_source);
1669         sd_event_source_unref(s->sigterm_event_source);
1670         sd_event_source_unref(s->sigint_event_source);
1671         sd_event_source_unref(s->hostname_event_source);
1672         sd_event_unref(s->event);
1673
1674         safe_close(s->syslog_fd);
1675         safe_close(s->native_fd);
1676         safe_close(s->stdout_fd);
1677         safe_close(s->dev_kmsg_fd);
1678         safe_close(s->audit_fd);
1679         safe_close(s->hostname_fd);
1680
1681         if (s->rate_limit)
1682                 journal_rate_limit_free(s->rate_limit);
1683
1684         if (s->kernel_seqnum)
1685                 munmap(s->kernel_seqnum, sizeof(uint64_t));
1686
1687         free(s->buffer);
1688         free(s->tty_path);
1689         free(s->cgroup_root);
1690         free(s->hostname_field);
1691
1692         if (s->mmap)
1693                 mmap_cache_unref(s->mmap);
1694
1695         if (s->udev)
1696                 udev_unref(s->udev);
1697 }