chiark / gitweb /
journald: process SIGBUS for the memory maps we set up
[elogind.git] / src / journal / journald-server.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
26 #include <sys/mman.h>
27 #include <sys/timerfd.h>
28
29 #include <libudev.h>
30
31 #include "sd-journal.h"
32 #include "sd-messages.h"
33 #include "sd-daemon.h"
34 #include "fileio.h"
35 #include "mkdir.h"
36 #include "hashmap.h"
37 #include "journal-file.h"
38 #include "socket-util.h"
39 #include "cgroup-util.h"
40 #include "list.h"
41 #include "missing.h"
42 #include "conf-parser.h"
43 #include "selinux-util.h"
44 #include "journal-internal.h"
45 #include "journal-vacuum.h"
46 #include "journal-authenticate.h"
47 #include "journald-rate-limit.h"
48 #include "journald-kmsg.h"
49 #include "journald-syslog.h"
50 #include "journald-stream.h"
51 #include "journald-console.h"
52 #include "journald-native.h"
53 #include "journald-audit.h"
54 #include "journald-server.h"
55
56 #ifdef HAVE_ACL
57 #include <sys/acl.h>
58 #include <acl/libacl.h>
59 #include "acl-util.h"
60 #endif
61
62 #ifdef HAVE_SELINUX
63 #include <selinux/selinux.h>
64 #endif
65
66 #define USER_JOURNALS_MAX 1024
67
68 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
69 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
70 #define DEFAULT_RATE_LIMIT_BURST 1000
71 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
72
73 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
74
75 static const char* const storage_table[_STORAGE_MAX] = {
76         [STORAGE_AUTO] = "auto",
77         [STORAGE_VOLATILE] = "volatile",
78         [STORAGE_PERSISTENT] = "persistent",
79         [STORAGE_NONE] = "none"
80 };
81
82 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
83 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
84
85 static const char* const split_mode_table[_SPLIT_MAX] = {
86         [SPLIT_LOGIN] = "login",
87         [SPLIT_UID] = "uid",
88         [SPLIT_NONE] = "none",
89 };
90
91 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
92 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
93
94 static uint64_t available_space(Server *s, bool verbose) {
95         char ids[33];
96         _cleanup_free_ char *p = NULL;
97         sd_id128_t machine;
98         struct statvfs ss;
99         uint64_t sum = 0, ss_avail = 0, avail = 0;
100         int r;
101         _cleanup_closedir_ DIR *d = NULL;
102         usec_t ts;
103         const char *f;
104         JournalMetrics *m;
105
106         ts = now(CLOCK_MONOTONIC);
107
108         if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts
109             && !verbose)
110                 return s->cached_available_space;
111
112         r = sd_id128_get_machine(&machine);
113         if (r < 0)
114                 return 0;
115
116         if (s->system_journal) {
117                 f = "/var/log/journal/";
118                 m = &s->system_metrics;
119         } else {
120                 f = "/run/log/journal/";
121                 m = &s->runtime_metrics;
122         }
123
124         assert(m);
125
126         p = strappend(f, sd_id128_to_string(machine, ids));
127         if (!p)
128                 return 0;
129
130         d = opendir(p);
131         if (!d)
132                 return 0;
133
134         if (fstatvfs(dirfd(d), &ss) < 0)
135                 return 0;
136
137         for (;;) {
138                 struct stat st;
139                 struct dirent *de;
140
141                 errno = 0;
142                 de = readdir(d);
143                 if (!de && errno != 0)
144                         return 0;
145
146                 if (!de)
147                         break;
148
149                 if (!endswith(de->d_name, ".journal") &&
150                     !endswith(de->d_name, ".journal~"))
151                         continue;
152
153                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
154                         continue;
155
156                 if (!S_ISREG(st.st_mode))
157                         continue;
158
159                 sum += (uint64_t) st.st_blocks * 512UL;
160         }
161
162         ss_avail = ss.f_bsize * ss.f_bavail;
163
164         /* If we reached a high mark, we will always allow this much
165          * again, unless usage goes above max_use. This watermark
166          * value is cached so that we don't give up space on pressure,
167          * but hover below the maximum usage. */
168
169         if (m->use < sum)
170                 m->use = sum;
171
172         avail = LESS_BY(ss_avail, m->keep_free);
173
174         s->cached_available_space = LESS_BY(MIN(m->max_use, avail), sum);
175         s->cached_available_space_timestamp = ts;
176
177         if (verbose) {
178                 char    fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
179                         fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX];
180
181                 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
182                                       "%s journal is using %s (max allowed %s, "
183                                       "trying to leave %s free of %s available → current limit %s).",
184                                       s->system_journal ? "Permanent" : "Runtime",
185                                       format_bytes(fb1, sizeof(fb1), sum),
186                                       format_bytes(fb2, sizeof(fb2), m->max_use),
187                                       format_bytes(fb3, sizeof(fb3), m->keep_free),
188                                       format_bytes(fb4, sizeof(fb4), ss_avail),
189                                       format_bytes(fb5, sizeof(fb5), s->cached_available_space + sum));
190         }
191
192         return s->cached_available_space;
193 }
194
195 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
196         int r;
197 #ifdef HAVE_ACL
198         acl_t acl;
199         acl_entry_t entry;
200         acl_permset_t permset;
201 #endif
202
203         assert(f);
204
205         r = fchmod(f->fd, 0640);
206         if (r < 0)
207                 log_warning_errno(r, "Failed to fix access mode on %s, ignoring: %m", f->path);
208
209 #ifdef HAVE_ACL
210         if (uid <= SYSTEM_UID_MAX)
211                 return;
212
213         acl = acl_get_fd(f->fd);
214         if (!acl) {
215                 log_warning_errno(errno, "Failed to read ACL on %s, ignoring: %m", f->path);
216                 return;
217         }
218
219         r = acl_find_uid(acl, uid, &entry);
220         if (r <= 0) {
221
222                 if (acl_create_entry(&acl, &entry) < 0 ||
223                     acl_set_tag_type(entry, ACL_USER) < 0 ||
224                     acl_set_qualifier(entry, &uid) < 0) {
225                         log_warning_errno(errno, "Failed to patch ACL on %s, ignoring: %m", f->path);
226                         goto finish;
227                 }
228         }
229
230         /* We do not recalculate the mask unconditionally here,
231          * so that the fchmod() mask above stays intact. */
232         if (acl_get_permset(entry, &permset) < 0 ||
233             acl_add_perm(permset, ACL_READ) < 0 ||
234             calc_acl_mask_if_needed(&acl) < 0) {
235                 log_warning_errno(errno, "Failed to patch ACL on %s, ignoring: %m", f->path);
236                 goto finish;
237         }
238
239         if (acl_set_fd(f->fd, acl) < 0)
240                 log_warning_errno(errno, "Failed to set ACL on %s, ignoring: %m", f->path);
241
242 finish:
243         acl_free(acl);
244 #endif
245 }
246
247 static JournalFile* find_journal(Server *s, uid_t uid) {
248         _cleanup_free_ char *p = NULL;
249         int r;
250         JournalFile *f;
251         sd_id128_t machine;
252
253         assert(s);
254
255         /* We split up user logs only on /var, not on /run. If the
256          * runtime file is open, we write to it exclusively, in order
257          * to guarantee proper order as soon as we flush /run to
258          * /var and close the runtime file. */
259
260         if (s->runtime_journal)
261                 return s->runtime_journal;
262
263         if (uid <= SYSTEM_UID_MAX)
264                 return s->system_journal;
265
266         r = sd_id128_get_machine(&machine);
267         if (r < 0)
268                 return s->system_journal;
269
270         f = ordered_hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
271         if (f)
272                 return f;
273
274         if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
275                      SD_ID128_FORMAT_VAL(machine), uid) < 0)
276                 return s->system_journal;
277
278         while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
279                 /* Too many open? Then let's close one */
280                 f = ordered_hashmap_steal_first(s->user_journals);
281                 assert(f);
282                 journal_file_close(f);
283         }
284
285         r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
286         if (r < 0)
287                 return s->system_journal;
288
289         server_fix_perms(s, f, uid);
290
291         r = ordered_hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
292         if (r < 0) {
293                 journal_file_close(f);
294                 return s->system_journal;
295         }
296
297         return f;
298 }
299
300 static int do_rotate(Server *s, JournalFile **f, const char* name,
301                      bool seal, uint32_t uid) {
302         int r;
303         assert(s);
304
305         if (!*f)
306                 return -EINVAL;
307
308         r = journal_file_rotate(f, s->compress, seal);
309         if (r < 0)
310                 if (*f)
311                         log_error_errno(r, "Failed to rotate %s: %m",
312                                         (*f)->path);
313                 else
314                         log_error_errno(r, "Failed to create new %s journal: %m",
315                                         name);
316         else
317                 server_fix_perms(s, *f, uid);
318         return r;
319 }
320
321 void server_rotate(Server *s) {
322         JournalFile *f;
323         void *k;
324         Iterator i;
325         int r;
326
327         log_debug("Rotating...");
328
329         do_rotate(s, &s->runtime_journal, "runtime", false, 0);
330         do_rotate(s, &s->system_journal, "system", s->seal, 0);
331
332         ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
333                 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UINT32(k));
334                 if (r >= 0)
335                         ordered_hashmap_replace(s->user_journals, k, f);
336                 else if (!f)
337                         /* Old file has been closed and deallocated */
338                         ordered_hashmap_remove(s->user_journals, k);
339         }
340 }
341
342 void server_sync(Server *s) {
343         JournalFile *f;
344         void *k;
345         Iterator i;
346         int r;
347
348         if (s->system_journal) {
349                 r = journal_file_set_offline(s->system_journal);
350                 if (r < 0)
351                         log_error_errno(r, "Failed to sync system journal: %m");
352         }
353
354         ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
355                 r = journal_file_set_offline(f);
356                 if (r < 0)
357                         log_error_errno(r, "Failed to sync user journal: %m");
358         }
359
360         if (s->sync_event_source) {
361                 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
362                 if (r < 0)
363                         log_error_errno(r, "Failed to disable sync timer source: %m");
364         }
365
366         s->sync_scheduled = false;
367 }
368
369 static void do_vacuum(Server *s, char *ids, JournalFile *f, const char* path,
370                       JournalMetrics *metrics) {
371         char *p;
372         int r;
373
374         if (!f)
375                 return;
376
377         p = strappenda(path, ids);
378         r = journal_directory_vacuum(p, metrics->max_use, s->max_retention_usec, &s->oldest_file_usec, false);
379         if (r < 0 && r != -ENOENT)
380                 log_error_errno(r, "Failed to vacuum %s: %m", p);
381 }
382
383 void server_vacuum(Server *s) {
384         char ids[33];
385         sd_id128_t machine;
386         int r;
387
388         log_debug("Vacuuming...");
389
390         s->oldest_file_usec = 0;
391
392         r = sd_id128_get_machine(&machine);
393         if (r < 0) {
394                 log_error_errno(r, "Failed to get machine ID: %m");
395                 return;
396         }
397         sd_id128_to_string(machine, ids);
398
399         do_vacuum(s, ids, s->system_journal, "/var/log/journal/", &s->system_metrics);
400         do_vacuum(s, ids, s->runtime_journal, "/run/log/journal/", &s->runtime_metrics);
401
402         s->cached_available_space_timestamp = 0;
403 }
404
405 static void server_cache_machine_id(Server *s) {
406         sd_id128_t id;
407         int r;
408
409         assert(s);
410
411         r = sd_id128_get_machine(&id);
412         if (r < 0)
413                 return;
414
415         sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
416 }
417
418 static void server_cache_boot_id(Server *s) {
419         sd_id128_t id;
420         int r;
421
422         assert(s);
423
424         r = sd_id128_get_boot(&id);
425         if (r < 0)
426                 return;
427
428         sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
429 }
430
431 static void server_cache_hostname(Server *s) {
432         _cleanup_free_ char *t = NULL;
433         char *x;
434
435         assert(s);
436
437         t = gethostname_malloc();
438         if (!t)
439                 return;
440
441         x = strappend("_HOSTNAME=", t);
442         if (!x)
443                 return;
444
445         free(s->hostname_field);
446         s->hostname_field = x;
447 }
448
449 bool shall_try_append_again(JournalFile *f, int r) {
450
451         /* -E2BIG            Hit configured limit
452            -EFBIG            Hit fs limit
453            -EDQUOT           Quota limit hit
454            -ENOSPC           Disk full
455            -EIO              I/O error of some kind (mmap)
456            -EHOSTDOWN        Other machine
457            -EBUSY            Unclean shutdown
458            -EPROTONOSUPPORT  Unsupported feature
459            -EBADMSG          Corrupted
460            -ENODATA          Truncated
461            -ESHUTDOWN        Already archived */
462
463         if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
464                 log_debug("%s: Allocation limit reached, rotating.", f->path);
465         else if (r == -EHOSTDOWN)
466                 log_info("%s: Journal file from other machine, rotating.", f->path);
467         else if (r == -EBUSY)
468                 log_info("%s: Unclean shutdown, rotating.", f->path);
469         else if (r == -EPROTONOSUPPORT)
470                 log_info("%s: Unsupported feature, rotating.", f->path);
471         else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
472                 log_warning("%s: Journal file corrupted, rotating.", f->path);
473         else if (r == -EIO)
474                 log_warning("%s: IO error, rotating.", f->path);
475         else
476                 return false;
477
478         return true;
479 }
480
481 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
482         JournalFile *f;
483         bool vacuumed = false;
484         int r;
485
486         assert(s);
487         assert(iovec);
488         assert(n > 0);
489
490         f = find_journal(s, uid);
491         if (!f)
492                 return;
493
494         if (journal_file_rotate_suggested(f, s->max_file_usec)) {
495                 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
496                 server_rotate(s);
497                 server_vacuum(s);
498                 vacuumed = true;
499
500                 f = find_journal(s, uid);
501                 if (!f)
502                         return;
503         }
504
505         r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
506         if (r >= 0) {
507                 server_schedule_sync(s, priority);
508                 return;
509         }
510
511         if (vacuumed || !shall_try_append_again(f, r)) {
512                 size_t size = 0;
513                 unsigned i;
514                 for (i = 0; i < n; i++)
515                         size += iovec[i].iov_len;
516
517                 log_error_errno(r, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n, size);
518                 return;
519         }
520
521         server_rotate(s);
522         server_vacuum(s);
523
524         f = find_journal(s, uid);
525         if (!f)
526                 return;
527
528         log_debug("Retrying write.");
529         r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
530         if (r < 0) {
531                 size_t size = 0;
532                 unsigned i;
533                 for (i = 0; i < n; i++)
534                         size += iovec[i].iov_len;
535
536                 log_error_errno(r, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n, size);
537         } else
538                 server_schedule_sync(s, priority);
539 }
540
541 static void dispatch_message_real(
542                 Server *s,
543                 struct iovec *iovec, unsigned n, unsigned m,
544                 const struct ucred *ucred,
545                 const struct timeval *tv,
546                 const char *label, size_t label_len,
547                 const char *unit_id,
548                 int priority,
549                 pid_t object_pid) {
550
551         char    pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
552                 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
553                 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
554                 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
555                 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
556                 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
557                 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
558                 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
559         uid_t object_uid;
560         gid_t object_gid;
561         char *x;
562         int r;
563         char *t, *c;
564         uid_t realuid = 0, owner = 0, journal_uid;
565         bool owner_valid = false;
566 #ifdef HAVE_AUDIT
567         char    audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
568                 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
569                 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
570                 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
571
572         uint32_t audit;
573         uid_t loginuid;
574 #endif
575
576         assert(s);
577         assert(iovec);
578         assert(n > 0);
579         assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
580
581         if (ucred) {
582                 realuid = ucred->uid;
583
584                 sprintf(pid, "_PID="PID_FMT, ucred->pid);
585                 IOVEC_SET_STRING(iovec[n++], pid);
586
587                 sprintf(uid, "_UID="UID_FMT, ucred->uid);
588                 IOVEC_SET_STRING(iovec[n++], uid);
589
590                 sprintf(gid, "_GID="GID_FMT, ucred->gid);
591                 IOVEC_SET_STRING(iovec[n++], gid);
592
593                 r = get_process_comm(ucred->pid, &t);
594                 if (r >= 0) {
595                         x = strappenda("_COMM=", t);
596                         free(t);
597                         IOVEC_SET_STRING(iovec[n++], x);
598                 }
599
600                 r = get_process_exe(ucred->pid, &t);
601                 if (r >= 0) {
602                         x = strappenda("_EXE=", t);
603                         free(t);
604                         IOVEC_SET_STRING(iovec[n++], x);
605                 }
606
607                 r = get_process_cmdline(ucred->pid, 0, false, &t);
608                 if (r >= 0) {
609                         x = strappenda("_CMDLINE=", t);
610                         free(t);
611                         IOVEC_SET_STRING(iovec[n++], x);
612                 }
613
614                 r = get_process_capeff(ucred->pid, &t);
615                 if (r >= 0) {
616                         x = strappenda("_CAP_EFFECTIVE=", t);
617                         free(t);
618                         IOVEC_SET_STRING(iovec[n++], x);
619                 }
620
621 #ifdef HAVE_AUDIT
622                 r = audit_session_from_pid(ucred->pid, &audit);
623                 if (r >= 0) {
624                         sprintf(audit_session, "_AUDIT_SESSION=%"PRIu32, audit);
625                         IOVEC_SET_STRING(iovec[n++], audit_session);
626                 }
627
628                 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
629                 if (r >= 0) {
630                         sprintf(audit_loginuid, "_AUDIT_LOGINUID="UID_FMT, loginuid);
631                         IOVEC_SET_STRING(iovec[n++], audit_loginuid);
632                 }
633 #endif
634
635                 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
636                 if (r >= 0) {
637                         char *session = NULL;
638
639                         x = strappenda("_SYSTEMD_CGROUP=", c);
640                         IOVEC_SET_STRING(iovec[n++], x);
641
642                         r = cg_path_get_session(c, &t);
643                         if (r >= 0) {
644                                 session = strappenda("_SYSTEMD_SESSION=", t);
645                                 free(t);
646                                 IOVEC_SET_STRING(iovec[n++], session);
647                         }
648
649                         if (cg_path_get_owner_uid(c, &owner) >= 0) {
650                                 owner_valid = true;
651
652                                 sprintf(owner_uid, "_SYSTEMD_OWNER_UID="UID_FMT, owner);
653                                 IOVEC_SET_STRING(iovec[n++], owner_uid);
654                         }
655
656                         if (cg_path_get_unit(c, &t) >= 0) {
657                                 x = strappenda("_SYSTEMD_UNIT=", t);
658                                 free(t);
659                                 IOVEC_SET_STRING(iovec[n++], x);
660                         } else if (unit_id && !session) {
661                                 x = strappenda("_SYSTEMD_UNIT=", unit_id);
662                                 IOVEC_SET_STRING(iovec[n++], x);
663                         }
664
665                         if (cg_path_get_user_unit(c, &t) >= 0) {
666                                 x = strappenda("_SYSTEMD_USER_UNIT=", t);
667                                 free(t);
668                                 IOVEC_SET_STRING(iovec[n++], x);
669                         } else if (unit_id && session) {
670                                 x = strappenda("_SYSTEMD_USER_UNIT=", unit_id);
671                                 IOVEC_SET_STRING(iovec[n++], x);
672                         }
673
674                         if (cg_path_get_slice(c, &t) >= 0) {
675                                 x = strappenda("_SYSTEMD_SLICE=", t);
676                                 free(t);
677                                 IOVEC_SET_STRING(iovec[n++], x);
678                         }
679
680                         free(c);
681                 } else if (unit_id) {
682                         x = strappenda("_SYSTEMD_UNIT=", unit_id);
683                         IOVEC_SET_STRING(iovec[n++], x);
684                 }
685
686 #ifdef HAVE_SELINUX
687                 if (mac_selinux_use()) {
688                         if (label) {
689                                 x = alloca(strlen("_SELINUX_CONTEXT=") + label_len + 1);
690
691                                 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
692                                 IOVEC_SET_STRING(iovec[n++], x);
693                         } else {
694                                 security_context_t con;
695
696                                 if (getpidcon(ucred->pid, &con) >= 0) {
697                                         x = strappenda("_SELINUX_CONTEXT=", con);
698
699                                         freecon(con);
700                                         IOVEC_SET_STRING(iovec[n++], x);
701                                 }
702                         }
703                 }
704 #endif
705         }
706         assert(n <= m);
707
708         if (object_pid) {
709                 r = get_process_uid(object_pid, &object_uid);
710                 if (r >= 0) {
711                         sprintf(o_uid, "OBJECT_UID="UID_FMT, object_uid);
712                         IOVEC_SET_STRING(iovec[n++], o_uid);
713                 }
714
715                 r = get_process_gid(object_pid, &object_gid);
716                 if (r >= 0) {
717                         sprintf(o_gid, "OBJECT_GID="GID_FMT, object_gid);
718                         IOVEC_SET_STRING(iovec[n++], o_gid);
719                 }
720
721                 r = get_process_comm(object_pid, &t);
722                 if (r >= 0) {
723                         x = strappenda("OBJECT_COMM=", t);
724                         free(t);
725                         IOVEC_SET_STRING(iovec[n++], x);
726                 }
727
728                 r = get_process_exe(object_pid, &t);
729                 if (r >= 0) {
730                         x = strappenda("OBJECT_EXE=", t);
731                         free(t);
732                         IOVEC_SET_STRING(iovec[n++], x);
733                 }
734
735                 r = get_process_cmdline(object_pid, 0, false, &t);
736                 if (r >= 0) {
737                         x = strappenda("OBJECT_CMDLINE=", t);
738                         free(t);
739                         IOVEC_SET_STRING(iovec[n++], x);
740                 }
741
742 #ifdef HAVE_AUDIT
743                 r = audit_session_from_pid(object_pid, &audit);
744                 if (r >= 0) {
745                         sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%"PRIu32, audit);
746                         IOVEC_SET_STRING(iovec[n++], o_audit_session);
747                 }
748
749                 r = audit_loginuid_from_pid(object_pid, &loginuid);
750                 if (r >= 0) {
751                         sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID="UID_FMT, loginuid);
752                         IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
753                 }
754 #endif
755
756                 r = cg_pid_get_path_shifted(object_pid, s->cgroup_root, &c);
757                 if (r >= 0) {
758                         x = strappenda("OBJECT_SYSTEMD_CGROUP=", c);
759                         IOVEC_SET_STRING(iovec[n++], x);
760
761                         r = cg_path_get_session(c, &t);
762                         if (r >= 0) {
763                                 x = strappenda("OBJECT_SYSTEMD_SESSION=", t);
764                                 free(t);
765                                 IOVEC_SET_STRING(iovec[n++], x);
766                         }
767
768                         if (cg_path_get_owner_uid(c, &owner) >= 0) {
769                                 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT, owner);
770                                 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
771                         }
772
773                         if (cg_path_get_unit(c, &t) >= 0) {
774                                 x = strappenda("OBJECT_SYSTEMD_UNIT=", t);
775                                 free(t);
776                                 IOVEC_SET_STRING(iovec[n++], x);
777                         }
778
779                         if (cg_path_get_user_unit(c, &t) >= 0) {
780                                 x = strappenda("OBJECT_SYSTEMD_USER_UNIT=", t);
781                                 free(t);
782                                 IOVEC_SET_STRING(iovec[n++], x);
783                         }
784
785                         free(c);
786                 }
787         }
788         assert(n <= m);
789
790         if (tv) {
791                 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
792                 IOVEC_SET_STRING(iovec[n++], source_time);
793         }
794
795         /* Note that strictly speaking storing the boot id here is
796          * redundant since the entry includes this in-line
797          * anyway. However, we need this indexed, too. */
798         if (!isempty(s->boot_id_field))
799                 IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
800
801         if (!isempty(s->machine_id_field))
802                 IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
803
804         if (!isempty(s->hostname_field))
805                 IOVEC_SET_STRING(iovec[n++], s->hostname_field);
806
807         assert(n <= m);
808
809         if (s->split_mode == SPLIT_UID && realuid > 0)
810                 /* Split up strictly by any UID */
811                 journal_uid = realuid;
812         else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
813                 /* Split up by login UIDs.  We do this only if the
814                  * realuid is not root, in order not to accidentally
815                  * leak privileged information to the user that is
816                  * logged by a privileged process that is part of an
817                  * unprivileged session. */
818                 journal_uid = owner;
819         else
820                 journal_uid = 0;
821
822         write_to_journal(s, journal_uid, iovec, n, priority);
823 }
824
825 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
826         char mid[11 + 32 + 1];
827         char buffer[16 + LINE_MAX + 1];
828         struct iovec iovec[N_IOVEC_META_FIELDS + 4];
829         int n = 0;
830         va_list ap;
831         struct ucred ucred = {};
832
833         assert(s);
834         assert(format);
835
836         IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
837         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
838
839         memcpy(buffer, "MESSAGE=", 8);
840         va_start(ap, format);
841         vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
842         va_end(ap);
843         char_array_0(buffer);
844         IOVEC_SET_STRING(iovec[n++], buffer);
845
846         if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
847                 snprintf(mid, sizeof(mid), LOG_MESSAGE_ID(message_id));
848                 char_array_0(mid);
849                 IOVEC_SET_STRING(iovec[n++], mid);
850         }
851
852         ucred.pid = getpid();
853         ucred.uid = getuid();
854         ucred.gid = getgid();
855
856         dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
857 }
858
859 void server_dispatch_message(
860                 Server *s,
861                 struct iovec *iovec, unsigned n, unsigned m,
862                 const struct ucred *ucred,
863                 const struct timeval *tv,
864                 const char *label, size_t label_len,
865                 const char *unit_id,
866                 int priority,
867                 pid_t object_pid) {
868
869         int rl, r;
870         _cleanup_free_ char *path = NULL;
871         char *c;
872
873         assert(s);
874         assert(iovec || n == 0);
875
876         if (n == 0)
877                 return;
878
879         if (LOG_PRI(priority) > s->max_level_store)
880                 return;
881
882         /* Stop early in case the information will not be stored
883          * in a journal. */
884         if (s->storage == STORAGE_NONE)
885                 return;
886
887         if (!ucred)
888                 goto finish;
889
890         r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &path);
891         if (r < 0)
892                 goto finish;
893
894         /* example: /user/lennart/3/foobar
895          *          /system/dbus.service/foobar
896          *
897          * So let's cut of everything past the third /, since that is
898          * where user directories start */
899
900         c = strchr(path, '/');
901         if (c) {
902                 c = strchr(c+1, '/');
903                 if (c) {
904                         c = strchr(c+1, '/');
905                         if (c)
906                                 *c = 0;
907                 }
908         }
909
910         rl = journal_rate_limit_test(s->rate_limit, path,
911                                      priority & LOG_PRIMASK, available_space(s, false));
912
913         if (rl == 0)
914                 return;
915
916         /* Write a suppression message if we suppressed something */
917         if (rl > 1)
918                 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
919                                       "Suppressed %u messages from %s", rl - 1, path);
920
921 finish:
922         dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
923 }
924
925
926 static int system_journal_open(Server *s, bool flush_requested) {
927         int r;
928         char *fn;
929         sd_id128_t machine;
930         char ids[33];
931
932         r = sd_id128_get_machine(&machine);
933         if (r < 0)
934                 return log_error_errno(r, "Failed to get machine id: %m");
935
936         sd_id128_to_string(machine, ids);
937
938         if (!s->system_journal &&
939             (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
940             (flush_requested
941              || access("/run/systemd/journal/flushed", F_OK) >= 0)) {
942
943                 /* If in auto mode: first try to create the machine
944                  * path, but not the prefix.
945                  *
946                  * If in persistent mode: create /var/log/journal and
947                  * the machine path */
948
949                 if (s->storage == STORAGE_PERSISTENT)
950                         (void) mkdir("/var/log/journal/", 0755);
951
952                 fn = strappenda("/var/log/journal/", ids);
953                 (void) mkdir(fn, 0755);
954
955                 fn = strappenda(fn, "/system.journal");
956                 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
957
958                 if (r >= 0)
959                         server_fix_perms(s, s->system_journal, 0);
960                 else if (r < 0) {
961                         if (r != -ENOENT && r != -EROFS)
962                                 log_warning_errno(r, "Failed to open system journal: %m");
963
964                         r = 0;
965                 }
966         }
967
968         if (!s->runtime_journal &&
969             (s->storage != STORAGE_NONE)) {
970
971                 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
972                 if (!fn)
973                         return -ENOMEM;
974
975                 if (s->system_journal) {
976
977                         /* Try to open the runtime journal, but only
978                          * if it already exists, so that we can flush
979                          * it into the system journal */
980
981                         r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
982                         free(fn);
983
984                         if (r < 0) {
985                                 if (r != -ENOENT)
986                                         log_warning_errno(r, "Failed to open runtime journal: %m");
987
988                                 r = 0;
989                         }
990
991                 } else {
992
993                         /* OK, we really need the runtime journal, so create
994                          * it if necessary. */
995
996                         (void) mkdir("/run/log", 0755);
997                         (void) mkdir("/run/log/journal", 0755);
998                         (void) mkdir_parents(fn, 0750);
999
1000                         r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
1001                         free(fn);
1002
1003                         if (r < 0)
1004                                 return log_error_errno(r, "Failed to open runtime journal: %m");
1005                 }
1006
1007                 if (s->runtime_journal)
1008                         server_fix_perms(s, s->runtime_journal, 0);
1009         }
1010
1011         available_space(s, true);
1012
1013         return r;
1014 }
1015
1016 int server_flush_to_var(Server *s) {
1017         sd_id128_t machine;
1018         sd_journal *j = NULL;
1019         char ts[FORMAT_TIMESPAN_MAX];
1020         usec_t start;
1021         unsigned n = 0;
1022         int r;
1023
1024         assert(s);
1025
1026         if (s->storage != STORAGE_AUTO &&
1027             s->storage != STORAGE_PERSISTENT)
1028                 return 0;
1029
1030         if (!s->runtime_journal)
1031                 return 0;
1032
1033         system_journal_open(s, true);
1034
1035         if (!s->system_journal)
1036                 return 0;
1037
1038         log_debug("Flushing to /var...");
1039
1040         start = now(CLOCK_MONOTONIC);
1041
1042         r = sd_id128_get_machine(&machine);
1043         if (r < 0)
1044                 return r;
1045
1046         r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1047         if (r < 0)
1048                 return log_error_errno(r, "Failed to read runtime journal: %m");
1049
1050         sd_journal_set_data_threshold(j, 0);
1051
1052         SD_JOURNAL_FOREACH(j) {
1053                 Object *o = NULL;
1054                 JournalFile *f;
1055
1056                 f = j->current_file;
1057                 assert(f && f->current_offset > 0);
1058
1059                 n++;
1060
1061                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1062                 if (r < 0) {
1063                         log_error_errno(r, "Can't read entry: %m");
1064                         goto finish;
1065                 }
1066
1067                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1068                 if (r >= 0)
1069                         continue;
1070
1071                 if (!shall_try_append_again(s->system_journal, r)) {
1072                         log_error_errno(r, "Can't write entry: %m");
1073                         goto finish;
1074                 }
1075
1076                 server_rotate(s);
1077                 server_vacuum(s);
1078
1079                 if (!s->system_journal) {
1080                         log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1081                         r = -EIO;
1082                         goto finish;
1083                 }
1084
1085                 log_debug("Retrying write.");
1086                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1087                 if (r < 0) {
1088                         log_error_errno(r, "Can't write entry: %m");
1089                         goto finish;
1090                 }
1091         }
1092
1093 finish:
1094         journal_file_post_change(s->system_journal);
1095
1096         journal_file_close(s->runtime_journal);
1097         s->runtime_journal = NULL;
1098
1099         if (r >= 0)
1100                 rm_rf("/run/log/journal", false, true, false);
1101
1102         sd_journal_close(j);
1103
1104         server_driver_message(s, SD_ID128_NULL, "Time spent on flushing to /var is %s for %u entries.", format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0), n);
1105
1106         return r;
1107 }
1108
1109 int process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1110         Server *s = userdata;
1111
1112         assert(s);
1113         assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
1114
1115         if (revents != EPOLLIN) {
1116                 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1117                 return -EIO;
1118         }
1119
1120         for (;;) {
1121                 struct ucred *ucred = NULL;
1122                 struct timeval *tv = NULL;
1123                 struct cmsghdr *cmsg;
1124                 char *label = NULL;
1125                 size_t label_len = 0;
1126                 struct iovec iovec;
1127
1128                 union {
1129                         struct cmsghdr cmsghdr;
1130
1131                         /* We use NAME_MAX space for the SELinux label
1132                          * here. The kernel currently enforces no
1133                          * limit, but according to suggestions from
1134                          * the SELinux people this will change and it
1135                          * will probably be identical to NAME_MAX. For
1136                          * now we use that, but this should be updated
1137                          * one day when the final limit is known. */
1138                         uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1139                                     CMSG_SPACE(sizeof(struct timeval)) +
1140                                     CMSG_SPACE(sizeof(int)) + /* fd */
1141                                     CMSG_SPACE(NAME_MAX)]; /* selinux label */
1142                 } control = {};
1143                 union sockaddr_union sa = {};
1144                 struct msghdr msghdr = {
1145                         .msg_iov = &iovec,
1146                         .msg_iovlen = 1,
1147                         .msg_control = &control,
1148                         .msg_controllen = sizeof(control),
1149                         .msg_name = &sa,
1150                         .msg_namelen = sizeof(sa),
1151                 };
1152
1153                 ssize_t n;
1154                 int *fds = NULL;
1155                 unsigned n_fds = 0;
1156                 int v = 0;
1157                 size_t m;
1158
1159                 /* Try to get the right size, if we can. (Not all
1160                  * sockets support SIOCINQ, hence we just try, but
1161                  * don't rely on it. */
1162                 (void) ioctl(fd, SIOCINQ, &v);
1163
1164                 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1165                 m = PAGE_ALIGN(MAX3((size_t) v + 1,
1166                                     (size_t) LINE_MAX,
1167                                     ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
1168
1169                 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1170                         return log_oom();
1171
1172                 iovec.iov_base = s->buffer;
1173                 iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
1174
1175                 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1176                 if (n < 0) {
1177                         if (errno == EINTR || errno == EAGAIN)
1178                                 return 0;
1179
1180                         log_error_errno(errno, "recvmsg() failed: %m");
1181                         return -errno;
1182                 }
1183
1184                 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1185
1186                         if (cmsg->cmsg_level == SOL_SOCKET &&
1187                             cmsg->cmsg_type == SCM_CREDENTIALS &&
1188                             cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1189                                 ucred = (struct ucred*) CMSG_DATA(cmsg);
1190                         else if (cmsg->cmsg_level == SOL_SOCKET &&
1191                                  cmsg->cmsg_type == SCM_SECURITY) {
1192                                 label = (char*) CMSG_DATA(cmsg);
1193                                 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1194                         } else if (cmsg->cmsg_level == SOL_SOCKET &&
1195                                    cmsg->cmsg_type == SO_TIMESTAMP &&
1196                                    cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1197                                 tv = (struct timeval*) CMSG_DATA(cmsg);
1198                         else if (cmsg->cmsg_level == SOL_SOCKET &&
1199                                  cmsg->cmsg_type == SCM_RIGHTS) {
1200                                 fds = (int*) CMSG_DATA(cmsg);
1201                                 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1202                         }
1203                 }
1204
1205                 /* And a trailing NUL, just in case */
1206                 s->buffer[n] = 0;
1207
1208                 if (fd == s->syslog_fd) {
1209                         if (n > 0 && n_fds == 0)
1210                                 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1211                         else if (n_fds > 0)
1212                                 log_warning("Got file descriptors via syslog socket. Ignoring.");
1213
1214                 } else if (fd == s->native_fd) {
1215                         if (n > 0 && n_fds == 0)
1216                                 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1217                         else if (n == 0 && n_fds == 1)
1218                                 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1219                         else if (n_fds > 0)
1220                                 log_warning("Got too many file descriptors via native socket. Ignoring.");
1221
1222                 } else {
1223                         assert(fd == s->audit_fd);
1224
1225                         if (n > 0 && n_fds == 0)
1226                                 server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1227                         else if (n_fds > 0)
1228                                 log_warning("Got file descriptors via audit socket. Ignoring.");
1229                 }
1230
1231                 close_many(fds, n_fds);
1232         }
1233 }
1234
1235 static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1236         Server *s = userdata;
1237
1238         assert(s);
1239
1240         log_info("Received request to flush runtime journal from PID %"PRIu32, si->ssi_pid);
1241
1242         server_flush_to_var(s);
1243         server_sync(s);
1244         server_vacuum(s);
1245
1246         touch("/run/systemd/journal/flushed");
1247
1248         return 0;
1249 }
1250
1251 static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1252         Server *s = userdata;
1253
1254         assert(s);
1255
1256         log_info("Received request to rotate journal from PID %"PRIu32, si->ssi_pid);
1257         server_rotate(s);
1258         server_vacuum(s);
1259
1260         return 0;
1261 }
1262
1263 static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1264         Server *s = userdata;
1265
1266         assert(s);
1267
1268         log_received_signal(LOG_INFO, si);
1269
1270         sd_event_exit(s->event, 0);
1271         return 0;
1272 }
1273
1274 static int setup_signals(Server *s) {
1275         sigset_t mask;
1276         int r;
1277
1278         assert(s);
1279
1280         assert_se(sigemptyset(&mask) == 0);
1281         sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1282         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1283
1284         r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
1285         if (r < 0)
1286                 return r;
1287
1288         r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
1289         if (r < 0)
1290                 return r;
1291
1292         r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
1293         if (r < 0)
1294                 return r;
1295
1296         r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
1297         if (r < 0)
1298                 return r;
1299
1300         return 0;
1301 }
1302
1303 static int server_parse_proc_cmdline(Server *s) {
1304         _cleanup_free_ char *line = NULL;
1305         const char *w, *state;
1306         size_t l;
1307         int r;
1308
1309         r = proc_cmdline(&line);
1310         if (r < 0) {
1311                 log_warning_errno(r, "Failed to read /proc/cmdline, ignoring: %m");
1312                 return 0;
1313         }
1314
1315         FOREACH_WORD_QUOTED(w, l, line, state) {
1316                 _cleanup_free_ char *word;
1317
1318                 word = strndup(w, l);
1319                 if (!word)
1320                         return -ENOMEM;
1321
1322                 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1323                         r = parse_boolean(word + 35);
1324                         if (r < 0)
1325                                 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1326                         else
1327                                 s->forward_to_syslog = r;
1328                 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1329                         r = parse_boolean(word + 33);
1330                         if (r < 0)
1331                                 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1332                         else
1333                                 s->forward_to_kmsg = r;
1334                 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1335                         r = parse_boolean(word + 36);
1336                         if (r < 0)
1337                                 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1338                         else
1339                                 s->forward_to_console = r;
1340                 } else if (startswith(word, "systemd.journald.forward_to_wall=")) {
1341                         r = parse_boolean(word + 33);
1342                         if (r < 0)
1343                                 log_warning("Failed to parse forward to wall switch %s. Ignoring.", word + 33);
1344                         else
1345                                 s->forward_to_wall = r;
1346                 } else if (startswith(word, "systemd.journald"))
1347                         log_warning("Invalid systemd.journald parameter. Ignoring.");
1348         }
1349         /* do not warn about state here, since probably systemd already did */
1350
1351         return 0;
1352 }
1353
1354 static int server_parse_config_file(Server *s) {
1355         assert(s);
1356
1357         return config_parse_many("/etc/systemd/journald.conf",
1358                                  CONF_DIRS_NULSTR("systemd/journald.conf"),
1359                                  "Journal\0",
1360                                  config_item_perf_lookup, journald_gperf_lookup,
1361                                  false, s);
1362 }
1363
1364 static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1365         Server *s = userdata;
1366
1367         assert(s);
1368
1369         server_sync(s);
1370         return 0;
1371 }
1372
1373 int server_schedule_sync(Server *s, int priority) {
1374         int r;
1375
1376         assert(s);
1377
1378         if (priority <= LOG_CRIT) {
1379                 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1380                 server_sync(s);
1381                 return 0;
1382         }
1383
1384         if (s->sync_scheduled)
1385                 return 0;
1386
1387         if (s->sync_interval_usec > 0) {
1388                 usec_t when;
1389
1390                 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
1391                 if (r < 0)
1392                         return r;
1393
1394                 when += s->sync_interval_usec;
1395
1396                 if (!s->sync_event_source) {
1397                         r = sd_event_add_time(
1398                                         s->event,
1399                                         &s->sync_event_source,
1400                                         CLOCK_MONOTONIC,
1401                                         when, 0,
1402                                         server_dispatch_sync, s);
1403                         if (r < 0)
1404                                 return r;
1405
1406                         r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1407                 } else {
1408                         r = sd_event_source_set_time(s->sync_event_source, when);
1409                         if (r < 0)
1410                                 return r;
1411
1412                         r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1413                 }
1414                 if (r < 0)
1415                         return r;
1416
1417                 s->sync_scheduled = true;
1418         }
1419
1420         return 0;
1421 }
1422
1423 static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1424         Server *s = userdata;
1425
1426         assert(s);
1427
1428         server_cache_hostname(s);
1429         return 0;
1430 }
1431
1432 static int server_open_hostname(Server *s) {
1433         int r;
1434
1435         assert(s);
1436
1437         s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
1438         if (s->hostname_fd < 0)
1439                 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
1440
1441         r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
1442         if (r < 0) {
1443                 /* kernels prior to 3.2 don't support polling this file. Ignore
1444                  * the failure. */
1445                 if (r == -EPERM) {
1446                         log_warning("Failed to register hostname fd in event loop: %s. Ignoring.",
1447                                         strerror(-r));
1448                         s->hostname_fd = safe_close(s->hostname_fd);
1449                         return 0;
1450                 }
1451
1452                 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
1453         }
1454
1455         r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
1456         if (r < 0)
1457                 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
1458
1459         return 0;
1460 }
1461
1462 int server_init(Server *s) {
1463         int n, r, fd;
1464
1465         assert(s);
1466
1467         zero(*s);
1468         s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = -1;
1469         s->compress = true;
1470         s->seal = true;
1471
1472         s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1473         s->sync_scheduled = false;
1474
1475         s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1476         s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1477
1478         s->forward_to_wall = true;
1479
1480         s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1481
1482         s->max_level_store = LOG_DEBUG;
1483         s->max_level_syslog = LOG_DEBUG;
1484         s->max_level_kmsg = LOG_NOTICE;
1485         s->max_level_console = LOG_INFO;
1486         s->max_level_wall = LOG_EMERG;
1487
1488         memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1489         memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1490
1491         server_parse_config_file(s);
1492         server_parse_proc_cmdline(s);
1493         if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1494                 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1495                           s->rate_limit_interval, s->rate_limit_burst);
1496                 s->rate_limit_interval = s->rate_limit_burst = 0;
1497         }
1498
1499         mkdir_p("/run/systemd/journal", 0755);
1500
1501         s->user_journals = ordered_hashmap_new(NULL);
1502         if (!s->user_journals)
1503                 return log_oom();
1504
1505         s->mmap = mmap_cache_new();
1506         if (!s->mmap)
1507                 return log_oom();
1508
1509         r = sd_event_default(&s->event);
1510         if (r < 0)
1511                 return log_error_errno(r, "Failed to create event loop: %m");
1512
1513         sd_event_set_watchdog(s->event, true);
1514
1515         n = sd_listen_fds(true);
1516         if (n < 0)
1517                 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
1518
1519         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1520
1521                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1522
1523                         if (s->native_fd >= 0) {
1524                                 log_error("Too many native sockets passed.");
1525                                 return -EINVAL;
1526                         }
1527
1528                         s->native_fd = fd;
1529
1530                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1531
1532                         if (s->stdout_fd >= 0) {
1533                                 log_error("Too many stdout sockets passed.");
1534                                 return -EINVAL;
1535                         }
1536
1537                         s->stdout_fd = fd;
1538
1539                 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1540                            sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1541
1542                         if (s->syslog_fd >= 0) {
1543                                 log_error("Too many /dev/log sockets passed.");
1544                                 return -EINVAL;
1545                         }
1546
1547                         s->syslog_fd = fd;
1548
1549                 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
1550
1551                         if (s->audit_fd >= 0) {
1552                                 log_error("Too many audit sockets passed.");
1553                                 return -EINVAL;
1554                         }
1555
1556                         s->audit_fd = fd;
1557
1558                 } else {
1559                         log_warning("Unknown socket passed as file descriptor %d, ignoring.", fd);
1560
1561                         /* Let's close the fd, better be safe than
1562                            sorry. The fd might reference some resource
1563                            that we really want to release if we don't
1564                            make use of it. */
1565
1566                         safe_close(fd);
1567                 }
1568         }
1569
1570         r = server_open_syslog_socket(s);
1571         if (r < 0)
1572                 return r;
1573
1574         r = server_open_native_socket(s);
1575         if (r < 0)
1576                 return r;
1577
1578         r = server_open_stdout_socket(s);
1579         if (r < 0)
1580                 return r;
1581
1582         r = server_open_dev_kmsg(s);
1583         if (r < 0)
1584                 return r;
1585
1586         r = server_open_audit(s);
1587         if (r < 0)
1588                 return r;
1589
1590         r = server_open_kernel_seqnum(s);
1591         if (r < 0)
1592                 return r;
1593
1594         r = server_open_hostname(s);
1595         if (r < 0)
1596                 return r;
1597
1598         r = setup_signals(s);
1599         if (r < 0)
1600                 return r;
1601
1602         s->udev = udev_new();
1603         if (!s->udev)
1604                 return -ENOMEM;
1605
1606         s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
1607         if (!s->rate_limit)
1608                 return -ENOMEM;
1609
1610         r = cg_get_root_path(&s->cgroup_root);
1611         if (r < 0)
1612                 return r;
1613
1614         server_cache_hostname(s);
1615         server_cache_boot_id(s);
1616         server_cache_machine_id(s);
1617
1618         r = system_journal_open(s, false);
1619         if (r < 0)
1620                 return r;
1621
1622         return 0;
1623 }
1624
1625 void server_maybe_append_tags(Server *s) {
1626 #ifdef HAVE_GCRYPT
1627         JournalFile *f;
1628         Iterator i;
1629         usec_t n;
1630
1631         n = now(CLOCK_REALTIME);
1632
1633         if (s->system_journal)
1634                 journal_file_maybe_append_tag(s->system_journal, n);
1635
1636         ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
1637                 journal_file_maybe_append_tag(f, n);
1638 #endif
1639 }
1640
1641 void server_done(Server *s) {
1642         JournalFile *f;
1643         assert(s);
1644
1645         while (s->stdout_streams)
1646                 stdout_stream_free(s->stdout_streams);
1647
1648         if (s->system_journal)
1649                 journal_file_close(s->system_journal);
1650
1651         if (s->runtime_journal)
1652                 journal_file_close(s->runtime_journal);
1653
1654         while ((f = ordered_hashmap_steal_first(s->user_journals)))
1655                 journal_file_close(f);
1656
1657         ordered_hashmap_free(s->user_journals);
1658
1659         sd_event_source_unref(s->syslog_event_source);
1660         sd_event_source_unref(s->native_event_source);
1661         sd_event_source_unref(s->stdout_event_source);
1662         sd_event_source_unref(s->dev_kmsg_event_source);
1663         sd_event_source_unref(s->audit_event_source);
1664         sd_event_source_unref(s->sync_event_source);
1665         sd_event_source_unref(s->sigusr1_event_source);
1666         sd_event_source_unref(s->sigusr2_event_source);
1667         sd_event_source_unref(s->sigterm_event_source);
1668         sd_event_source_unref(s->sigint_event_source);
1669         sd_event_source_unref(s->hostname_event_source);
1670         sd_event_unref(s->event);
1671
1672         safe_close(s->syslog_fd);
1673         safe_close(s->native_fd);
1674         safe_close(s->stdout_fd);
1675         safe_close(s->dev_kmsg_fd);
1676         safe_close(s->audit_fd);
1677         safe_close(s->hostname_fd);
1678
1679         if (s->rate_limit)
1680                 journal_rate_limit_free(s->rate_limit);
1681
1682         if (s->kernel_seqnum)
1683                 munmap(s->kernel_seqnum, sizeof(uint64_t));
1684
1685         free(s->buffer);
1686         free(s->tty_path);
1687         free(s->cgroup_root);
1688         free(s->hostname_field);
1689
1690         if (s->mmap)
1691                 mmap_cache_unref(s->mmap);
1692
1693         if (s->udev)
1694                 udev_unref(s->udev);
1695 }