chiark / gitweb /
journald: proceed even if some sockets are unknown
[elogind.git] / src / journal / journald-server.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
26 #include <sys/mman.h>
27 #include <sys/timerfd.h>
28
29 #include <libudev.h>
30
31 #include "sd-journal.h"
32 #include "sd-messages.h"
33 #include "sd-daemon.h"
34 #include "fileio.h"
35 #include "mkdir.h"
36 #include "hashmap.h"
37 #include "journal-file.h"
38 #include "socket-util.h"
39 #include "cgroup-util.h"
40 #include "list.h"
41 #include "missing.h"
42 #include "conf-parser.h"
43 #include "selinux-util.h"
44 #include "journal-internal.h"
45 #include "journal-vacuum.h"
46 #include "journal-authenticate.h"
47 #include "journald-rate-limit.h"
48 #include "journald-kmsg.h"
49 #include "journald-syslog.h"
50 #include "journald-stream.h"
51 #include "journald-console.h"
52 #include "journald-native.h"
53 #include "journald-audit.h"
54 #include "journald-server.h"
55
56 #ifdef HAVE_ACL
57 #include <sys/acl.h>
58 #include <acl/libacl.h>
59 #include "acl-util.h"
60 #endif
61
62 #ifdef HAVE_SELINUX
63 #include <selinux/selinux.h>
64 #endif
65
66 #define USER_JOURNALS_MAX 1024
67
68 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
69 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
70 #define DEFAULT_RATE_LIMIT_BURST 1000
71 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
72
73 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
74
75 static const char* const storage_table[_STORAGE_MAX] = {
76         [STORAGE_AUTO] = "auto",
77         [STORAGE_VOLATILE] = "volatile",
78         [STORAGE_PERSISTENT] = "persistent",
79         [STORAGE_NONE] = "none"
80 };
81
82 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
83 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
84
85 static const char* const split_mode_table[_SPLIT_MAX] = {
86         [SPLIT_LOGIN] = "login",
87         [SPLIT_UID] = "uid",
88         [SPLIT_NONE] = "none",
89 };
90
91 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
92 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
93
94 static uint64_t available_space(Server *s, bool verbose) {
95         char ids[33];
96         _cleanup_free_ char *p = NULL;
97         sd_id128_t machine;
98         struct statvfs ss;
99         uint64_t sum = 0, ss_avail = 0, avail = 0;
100         int r;
101         _cleanup_closedir_ DIR *d = NULL;
102         usec_t ts;
103         const char *f;
104         JournalMetrics *m;
105
106         ts = now(CLOCK_MONOTONIC);
107
108         if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts
109             && !verbose)
110                 return s->cached_available_space;
111
112         r = sd_id128_get_machine(&machine);
113         if (r < 0)
114                 return 0;
115
116         if (s->system_journal) {
117                 f = "/var/log/journal/";
118                 m = &s->system_metrics;
119         } else {
120                 f = "/run/log/journal/";
121                 m = &s->runtime_metrics;
122         }
123
124         assert(m);
125
126         p = strappend(f, sd_id128_to_string(machine, ids));
127         if (!p)
128                 return 0;
129
130         d = opendir(p);
131         if (!d)
132                 return 0;
133
134         if (fstatvfs(dirfd(d), &ss) < 0)
135                 return 0;
136
137         for (;;) {
138                 struct stat st;
139                 struct dirent *de;
140
141                 errno = 0;
142                 de = readdir(d);
143                 if (!de && errno != 0)
144                         return 0;
145
146                 if (!de)
147                         break;
148
149                 if (!endswith(de->d_name, ".journal") &&
150                     !endswith(de->d_name, ".journal~"))
151                         continue;
152
153                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
154                         continue;
155
156                 if (!S_ISREG(st.st_mode))
157                         continue;
158
159                 sum += (uint64_t) st.st_blocks * 512UL;
160         }
161
162         ss_avail = ss.f_bsize * ss.f_bavail;
163
164         /* If we reached a high mark, we will always allow this much
165          * again, unless usage goes above max_use. This watermark
166          * value is cached so that we don't give up space on pressure,
167          * but hover below the maximum usage. */
168
169         if (m->use < sum)
170                 m->use = sum;
171
172         avail = LESS_BY(ss_avail, m->keep_free);
173
174         s->cached_available_space = LESS_BY(MIN(m->max_use, avail), sum);
175         s->cached_available_space_timestamp = ts;
176
177         if (verbose) {
178                 char    fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
179                         fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX];
180
181                 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
182                                       "%s journal is using %s (max allowed %s, "
183                                       "trying to leave %s free of %s available → current limit %s).",
184                                       s->system_journal ? "Permanent" : "Runtime",
185                                       format_bytes(fb1, sizeof(fb1), sum),
186                                       format_bytes(fb2, sizeof(fb2), m->max_use),
187                                       format_bytes(fb3, sizeof(fb3), m->keep_free),
188                                       format_bytes(fb4, sizeof(fb4), ss_avail),
189                                       format_bytes(fb5, sizeof(fb5), s->cached_available_space + sum));
190         }
191
192         return s->cached_available_space;
193 }
194
195 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
196         int r;
197 #ifdef HAVE_ACL
198         acl_t acl;
199         acl_entry_t entry;
200         acl_permset_t permset;
201 #endif
202
203         assert(f);
204
205         r = fchmod(f->fd, 0640);
206         if (r < 0)
207                 log_warning("Failed to fix access mode on %s, ignoring: %s", f->path, strerror(-r));
208
209 #ifdef HAVE_ACL
210         if (uid <= SYSTEM_UID_MAX)
211                 return;
212
213         acl = acl_get_fd(f->fd);
214         if (!acl) {
215                 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
216                 return;
217         }
218
219         r = acl_find_uid(acl, uid, &entry);
220         if (r <= 0) {
221
222                 if (acl_create_entry(&acl, &entry) < 0 ||
223                     acl_set_tag_type(entry, ACL_USER) < 0 ||
224                     acl_set_qualifier(entry, &uid) < 0) {
225                         log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
226                         goto finish;
227                 }
228         }
229
230         /* We do not recalculate the mask unconditionally here,
231          * so that the fchmod() mask above stays intact. */
232         if (acl_get_permset(entry, &permset) < 0 ||
233             acl_add_perm(permset, ACL_READ) < 0 ||
234             calc_acl_mask_if_needed(&acl) < 0) {
235                 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
236                 goto finish;
237         }
238
239         if (acl_set_fd(f->fd, acl) < 0)
240                 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
241
242 finish:
243         acl_free(acl);
244 #endif
245 }
246
247 static JournalFile* find_journal(Server *s, uid_t uid) {
248         _cleanup_free_ char *p = NULL;
249         int r;
250         JournalFile *f;
251         sd_id128_t machine;
252
253         assert(s);
254
255         /* We split up user logs only on /var, not on /run. If the
256          * runtime file is open, we write to it exclusively, in order
257          * to guarantee proper order as soon as we flush /run to
258          * /var and close the runtime file. */
259
260         if (s->runtime_journal)
261                 return s->runtime_journal;
262
263         if (uid <= SYSTEM_UID_MAX)
264                 return s->system_journal;
265
266         r = sd_id128_get_machine(&machine);
267         if (r < 0)
268                 return s->system_journal;
269
270         f = ordered_hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
271         if (f)
272                 return f;
273
274         if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
275                      SD_ID128_FORMAT_VAL(machine), uid) < 0)
276                 return s->system_journal;
277
278         while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
279                 /* Too many open? Then let's close one */
280                 f = ordered_hashmap_steal_first(s->user_journals);
281                 assert(f);
282                 journal_file_close(f);
283         }
284
285         r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
286         if (r < 0)
287                 return s->system_journal;
288
289         server_fix_perms(s, f, uid);
290
291         r = ordered_hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
292         if (r < 0) {
293                 journal_file_close(f);
294                 return s->system_journal;
295         }
296
297         return f;
298 }
299
300 static int do_rotate(Server *s, JournalFile **f, const char* name,
301                      bool seal, uint32_t uid) {
302         int r;
303         assert(s);
304
305         if (!*f)
306                 return -EINVAL;
307
308         r = journal_file_rotate(f, s->compress, seal);
309         if (r < 0)
310                 if (*f)
311                         log_error("Failed to rotate %s: %s",
312                                   (*f)->path, strerror(-r));
313                 else
314                         log_error("Failed to create new %s journal: %s",
315                                   name, strerror(-r));
316         else
317                 server_fix_perms(s, *f, uid);
318         return r;
319 }
320
321 void server_rotate(Server *s) {
322         JournalFile *f;
323         void *k;
324         Iterator i;
325         int r;
326
327         log_debug("Rotating...");
328
329         do_rotate(s, &s->runtime_journal, "runtime", false, 0);
330         do_rotate(s, &s->system_journal, "system", s->seal, 0);
331
332         ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
333                 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UINT32(k));
334                 if (r >= 0)
335                         ordered_hashmap_replace(s->user_journals, k, f);
336                 else if (!f)
337                         /* Old file has been closed and deallocated */
338                         ordered_hashmap_remove(s->user_journals, k);
339         }
340 }
341
342 void server_sync(Server *s) {
343         JournalFile *f;
344         void *k;
345         Iterator i;
346         int r;
347
348         if (s->system_journal) {
349                 r = journal_file_set_offline(s->system_journal);
350                 if (r < 0)
351                         log_error("Failed to sync system journal: %s", strerror(-r));
352         }
353
354         ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
355                 r = journal_file_set_offline(f);
356                 if (r < 0)
357                         log_error("Failed to sync user journal: %s", strerror(-r));
358         }
359
360         if (s->sync_event_source) {
361                 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
362                 if (r < 0)
363                         log_error("Failed to disable sync timer source: %s", strerror(-r));
364         }
365
366         s->sync_scheduled = false;
367 }
368
369 static void do_vacuum(Server *s, char *ids, JournalFile *f, const char* path,
370                       JournalMetrics *metrics) {
371         char *p;
372         int r;
373
374         if (!f)
375                 return;
376
377         p = strappenda(path, ids);
378         r = journal_directory_vacuum(p, metrics->max_use, s->max_retention_usec, &s->oldest_file_usec, false);
379         if (r < 0 && r != -ENOENT)
380                 log_error("Failed to vacuum %s: %s", p, strerror(-r));
381 }
382
383 void server_vacuum(Server *s) {
384         char ids[33];
385         sd_id128_t machine;
386         int r;
387
388         log_debug("Vacuuming...");
389
390         s->oldest_file_usec = 0;
391
392         r = sd_id128_get_machine(&machine);
393         if (r < 0) {
394                 log_error("Failed to get machine ID: %s", strerror(-r));
395                 return;
396         }
397         sd_id128_to_string(machine, ids);
398
399         do_vacuum(s, ids, s->system_journal, "/var/log/journal/", &s->system_metrics);
400         do_vacuum(s, ids, s->runtime_journal, "/run/log/journal/", &s->runtime_metrics);
401
402         s->cached_available_space_timestamp = 0;
403 }
404
405 static void server_cache_machine_id(Server *s) {
406         sd_id128_t id;
407         int r;
408
409         assert(s);
410
411         r = sd_id128_get_machine(&id);
412         if (r < 0)
413                 return;
414
415         sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
416 }
417
418 static void server_cache_boot_id(Server *s) {
419         sd_id128_t id;
420         int r;
421
422         assert(s);
423
424         r = sd_id128_get_boot(&id);
425         if (r < 0)
426                 return;
427
428         sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
429 }
430
431 static void server_cache_hostname(Server *s) {
432         _cleanup_free_ char *t = NULL;
433         char *x;
434
435         assert(s);
436
437         t = gethostname_malloc();
438         if (!t)
439                 return;
440
441         x = strappend("_HOSTNAME=", t);
442         if (!x)
443                 return;
444
445         free(s->hostname_field);
446         s->hostname_field = x;
447 }
448
449 bool shall_try_append_again(JournalFile *f, int r) {
450
451         /* -E2BIG            Hit configured limit
452            -EFBIG            Hit fs limit
453            -EDQUOT           Quota limit hit
454            -ENOSPC           Disk full
455            -EHOSTDOWN        Other machine
456            -EBUSY            Unclean shutdown
457            -EPROTONOSUPPORT  Unsupported feature
458            -EBADMSG          Corrupted
459            -ENODATA          Truncated
460            -ESHUTDOWN        Already archived */
461
462         if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
463                 log_debug("%s: Allocation limit reached, rotating.", f->path);
464         else if (r == -EHOSTDOWN)
465                 log_info("%s: Journal file from other machine, rotating.", f->path);
466         else if (r == -EBUSY)
467                 log_info("%s: Unclean shutdown, rotating.", f->path);
468         else if (r == -EPROTONOSUPPORT)
469                 log_info("%s: Unsupported feature, rotating.", f->path);
470         else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
471                 log_warning("%s: Journal file corrupted, rotating.", f->path);
472         else
473                 return false;
474
475         return true;
476 }
477
478 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
479         JournalFile *f;
480         bool vacuumed = false;
481         int r;
482
483         assert(s);
484         assert(iovec);
485         assert(n > 0);
486
487         f = find_journal(s, uid);
488         if (!f)
489                 return;
490
491         if (journal_file_rotate_suggested(f, s->max_file_usec)) {
492                 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
493                 server_rotate(s);
494                 server_vacuum(s);
495                 vacuumed = true;
496
497                 f = find_journal(s, uid);
498                 if (!f)
499                         return;
500         }
501
502         r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
503         if (r >= 0) {
504                 server_schedule_sync(s, priority);
505                 return;
506         }
507
508         if (vacuumed || !shall_try_append_again(f, r)) {
509                 size_t size = 0;
510                 unsigned i;
511                 for (i = 0; i < n; i++)
512                         size += iovec[i].iov_len;
513
514                 log_error("Failed to write entry (%d items, %zu bytes), ignoring: %s", n, size, strerror(-r));
515                 return;
516         }
517
518         server_rotate(s);
519         server_vacuum(s);
520
521         f = find_journal(s, uid);
522         if (!f)
523                 return;
524
525         log_debug("Retrying write.");
526         r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
527         if (r < 0) {
528                 size_t size = 0;
529                 unsigned i;
530                 for (i = 0; i < n; i++)
531                         size += iovec[i].iov_len;
532
533                 log_error("Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %s", n, size, strerror(-r));
534         } else
535                 server_schedule_sync(s, priority);
536 }
537
538 static void dispatch_message_real(
539                 Server *s,
540                 struct iovec *iovec, unsigned n, unsigned m,
541                 const struct ucred *ucred,
542                 const struct timeval *tv,
543                 const char *label, size_t label_len,
544                 const char *unit_id,
545                 int priority,
546                 pid_t object_pid) {
547
548         char    pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
549                 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
550                 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
551                 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
552                 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
553                 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
554                 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
555                 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
556         uid_t object_uid;
557         gid_t object_gid;
558         char *x;
559         int r;
560         char *t, *c;
561         uid_t realuid = 0, owner = 0, journal_uid;
562         bool owner_valid = false;
563 #ifdef HAVE_AUDIT
564         char    audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
565                 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
566                 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
567                 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
568
569         uint32_t audit;
570         uid_t loginuid;
571 #endif
572
573         assert(s);
574         assert(iovec);
575         assert(n > 0);
576         assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
577
578         if (ucred) {
579                 realuid = ucred->uid;
580
581                 sprintf(pid, "_PID="PID_FMT, ucred->pid);
582                 IOVEC_SET_STRING(iovec[n++], pid);
583
584                 sprintf(uid, "_UID="UID_FMT, ucred->uid);
585                 IOVEC_SET_STRING(iovec[n++], uid);
586
587                 sprintf(gid, "_GID="GID_FMT, ucred->gid);
588                 IOVEC_SET_STRING(iovec[n++], gid);
589
590                 r = get_process_comm(ucred->pid, &t);
591                 if (r >= 0) {
592                         x = strappenda("_COMM=", t);
593                         free(t);
594                         IOVEC_SET_STRING(iovec[n++], x);
595                 }
596
597                 r = get_process_exe(ucred->pid, &t);
598                 if (r >= 0) {
599                         x = strappenda("_EXE=", t);
600                         free(t);
601                         IOVEC_SET_STRING(iovec[n++], x);
602                 }
603
604                 r = get_process_cmdline(ucred->pid, 0, false, &t);
605                 if (r >= 0) {
606                         x = strappenda("_CMDLINE=", t);
607                         free(t);
608                         IOVEC_SET_STRING(iovec[n++], x);
609                 }
610
611                 r = get_process_capeff(ucred->pid, &t);
612                 if (r >= 0) {
613                         x = strappenda("_CAP_EFFECTIVE=", t);
614                         free(t);
615                         IOVEC_SET_STRING(iovec[n++], x);
616                 }
617
618 #ifdef HAVE_AUDIT
619                 r = audit_session_from_pid(ucred->pid, &audit);
620                 if (r >= 0) {
621                         sprintf(audit_session, "_AUDIT_SESSION=%"PRIu32, audit);
622                         IOVEC_SET_STRING(iovec[n++], audit_session);
623                 }
624
625                 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
626                 if (r >= 0) {
627                         sprintf(audit_loginuid, "_AUDIT_LOGINUID="UID_FMT, loginuid);
628                         IOVEC_SET_STRING(iovec[n++], audit_loginuid);
629                 }
630 #endif
631
632                 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
633                 if (r >= 0) {
634                         char *session = NULL;
635
636                         x = strappenda("_SYSTEMD_CGROUP=", c);
637                         IOVEC_SET_STRING(iovec[n++], x);
638
639                         r = cg_path_get_session(c, &t);
640                         if (r >= 0) {
641                                 session = strappenda("_SYSTEMD_SESSION=", t);
642                                 free(t);
643                                 IOVEC_SET_STRING(iovec[n++], session);
644                         }
645
646                         if (cg_path_get_owner_uid(c, &owner) >= 0) {
647                                 owner_valid = true;
648
649                                 sprintf(owner_uid, "_SYSTEMD_OWNER_UID="UID_FMT, owner);
650                                 IOVEC_SET_STRING(iovec[n++], owner_uid);
651                         }
652
653                         if (cg_path_get_unit(c, &t) >= 0) {
654                                 x = strappenda("_SYSTEMD_UNIT=", t);
655                                 free(t);
656                                 IOVEC_SET_STRING(iovec[n++], x);
657                         } else if (unit_id && !session) {
658                                 x = strappenda("_SYSTEMD_UNIT=", unit_id);
659                                 IOVEC_SET_STRING(iovec[n++], x);
660                         }
661
662                         if (cg_path_get_user_unit(c, &t) >= 0) {
663                                 x = strappenda("_SYSTEMD_USER_UNIT=", t);
664                                 free(t);
665                                 IOVEC_SET_STRING(iovec[n++], x);
666                         } else if (unit_id && session) {
667                                 x = strappenda("_SYSTEMD_USER_UNIT=", unit_id);
668                                 IOVEC_SET_STRING(iovec[n++], x);
669                         }
670
671                         if (cg_path_get_slice(c, &t) >= 0) {
672                                 x = strappenda("_SYSTEMD_SLICE=", t);
673                                 free(t);
674                                 IOVEC_SET_STRING(iovec[n++], x);
675                         }
676
677                         free(c);
678                 } else if (unit_id) {
679                         x = strappenda("_SYSTEMD_UNIT=", unit_id);
680                         IOVEC_SET_STRING(iovec[n++], x);
681                 }
682
683 #ifdef HAVE_SELINUX
684                 if (mac_selinux_use()) {
685                         if (label) {
686                                 x = alloca(strlen("_SELINUX_CONTEXT=") + label_len + 1);
687
688                                 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
689                                 IOVEC_SET_STRING(iovec[n++], x);
690                         } else {
691                                 security_context_t con;
692
693                                 if (getpidcon(ucred->pid, &con) >= 0) {
694                                         x = strappenda("_SELINUX_CONTEXT=", con);
695
696                                         freecon(con);
697                                         IOVEC_SET_STRING(iovec[n++], x);
698                                 }
699                         }
700                 }
701 #endif
702         }
703         assert(n <= m);
704
705         if (object_pid) {
706                 r = get_process_uid(object_pid, &object_uid);
707                 if (r >= 0) {
708                         sprintf(o_uid, "OBJECT_UID="UID_FMT, object_uid);
709                         IOVEC_SET_STRING(iovec[n++], o_uid);
710                 }
711
712                 r = get_process_gid(object_pid, &object_gid);
713                 if (r >= 0) {
714                         sprintf(o_gid, "OBJECT_GID="GID_FMT, object_gid);
715                         IOVEC_SET_STRING(iovec[n++], o_gid);
716                 }
717
718                 r = get_process_comm(object_pid, &t);
719                 if (r >= 0) {
720                         x = strappenda("OBJECT_COMM=", t);
721                         free(t);
722                         IOVEC_SET_STRING(iovec[n++], x);
723                 }
724
725                 r = get_process_exe(object_pid, &t);
726                 if (r >= 0) {
727                         x = strappenda("OBJECT_EXE=", t);
728                         free(t);
729                         IOVEC_SET_STRING(iovec[n++], x);
730                 }
731
732                 r = get_process_cmdline(object_pid, 0, false, &t);
733                 if (r >= 0) {
734                         x = strappenda("OBJECT_CMDLINE=", t);
735                         free(t);
736                         IOVEC_SET_STRING(iovec[n++], x);
737                 }
738
739 #ifdef HAVE_AUDIT
740                 r = audit_session_from_pid(object_pid, &audit);
741                 if (r >= 0) {
742                         sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%"PRIu32, audit);
743                         IOVEC_SET_STRING(iovec[n++], o_audit_session);
744                 }
745
746                 r = audit_loginuid_from_pid(object_pid, &loginuid);
747                 if (r >= 0) {
748                         sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID="UID_FMT, loginuid);
749                         IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
750                 }
751 #endif
752
753                 r = cg_pid_get_path_shifted(object_pid, s->cgroup_root, &c);
754                 if (r >= 0) {
755                         x = strappenda("OBJECT_SYSTEMD_CGROUP=", c);
756                         IOVEC_SET_STRING(iovec[n++], x);
757
758                         r = cg_path_get_session(c, &t);
759                         if (r >= 0) {
760                                 x = strappenda("OBJECT_SYSTEMD_SESSION=", t);
761                                 free(t);
762                                 IOVEC_SET_STRING(iovec[n++], x);
763                         }
764
765                         if (cg_path_get_owner_uid(c, &owner) >= 0) {
766                                 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT, owner);
767                                 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
768                         }
769
770                         if (cg_path_get_unit(c, &t) >= 0) {
771                                 x = strappenda("OBJECT_SYSTEMD_UNIT=", t);
772                                 free(t);
773                                 IOVEC_SET_STRING(iovec[n++], x);
774                         }
775
776                         if (cg_path_get_user_unit(c, &t) >= 0) {
777                                 x = strappenda("OBJECT_SYSTEMD_USER_UNIT=", t);
778                                 free(t);
779                                 IOVEC_SET_STRING(iovec[n++], x);
780                         }
781
782                         free(c);
783                 }
784         }
785         assert(n <= m);
786
787         if (tv) {
788                 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
789                 IOVEC_SET_STRING(iovec[n++], source_time);
790         }
791
792         /* Note that strictly speaking storing the boot id here is
793          * redundant since the entry includes this in-line
794          * anyway. However, we need this indexed, too. */
795         if (!isempty(s->boot_id_field))
796                 IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
797
798         if (!isempty(s->machine_id_field))
799                 IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
800
801         if (!isempty(s->hostname_field))
802                 IOVEC_SET_STRING(iovec[n++], s->hostname_field);
803
804         assert(n <= m);
805
806         if (s->split_mode == SPLIT_UID && realuid > 0)
807                 /* Split up strictly by any UID */
808                 journal_uid = realuid;
809         else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
810                 /* Split up by login UIDs.  We do this only if the
811                  * realuid is not root, in order not to accidentally
812                  * leak privileged information to the user that is
813                  * logged by a privileged process that is part of an
814                  * unprivileged session.*/
815                 journal_uid = owner;
816         else
817                 journal_uid = 0;
818
819         write_to_journal(s, journal_uid, iovec, n, priority);
820 }
821
822 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
823         char mid[11 + 32 + 1];
824         char buffer[16 + LINE_MAX + 1];
825         struct iovec iovec[N_IOVEC_META_FIELDS + 4];
826         int n = 0;
827         va_list ap;
828         struct ucred ucred = {};
829
830         assert(s);
831         assert(format);
832
833         IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
834         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
835
836         memcpy(buffer, "MESSAGE=", 8);
837         va_start(ap, format);
838         vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
839         va_end(ap);
840         char_array_0(buffer);
841         IOVEC_SET_STRING(iovec[n++], buffer);
842
843         if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
844                 snprintf(mid, sizeof(mid), MESSAGE_ID(message_id));
845                 char_array_0(mid);
846                 IOVEC_SET_STRING(iovec[n++], mid);
847         }
848
849         ucred.pid = getpid();
850         ucred.uid = getuid();
851         ucred.gid = getgid();
852
853         dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
854 }
855
856 void server_dispatch_message(
857                 Server *s,
858                 struct iovec *iovec, unsigned n, unsigned m,
859                 const struct ucred *ucred,
860                 const struct timeval *tv,
861                 const char *label, size_t label_len,
862                 const char *unit_id,
863                 int priority,
864                 pid_t object_pid) {
865
866         int rl, r;
867         _cleanup_free_ char *path = NULL;
868         char *c;
869
870         assert(s);
871         assert(iovec || n == 0);
872
873         if (n == 0)
874                 return;
875
876         if (LOG_PRI(priority) > s->max_level_store)
877                 return;
878
879         /* Stop early in case the information will not be stored
880          * in a journal. */
881         if (s->storage == STORAGE_NONE)
882                 return;
883
884         if (!ucred)
885                 goto finish;
886
887         r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &path);
888         if (r < 0)
889                 goto finish;
890
891         /* example: /user/lennart/3/foobar
892          *          /system/dbus.service/foobar
893          *
894          * So let's cut of everything past the third /, since that is
895          * where user directories start */
896
897         c = strchr(path, '/');
898         if (c) {
899                 c = strchr(c+1, '/');
900                 if (c) {
901                         c = strchr(c+1, '/');
902                         if (c)
903                                 *c = 0;
904                 }
905         }
906
907         rl = journal_rate_limit_test(s->rate_limit, path,
908                                      priority & LOG_PRIMASK, available_space(s, false));
909
910         if (rl == 0)
911                 return;
912
913         /* Write a suppression message if we suppressed something */
914         if (rl > 1)
915                 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
916                                       "Suppressed %u messages from %s", rl - 1, path);
917
918 finish:
919         dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
920 }
921
922
923 static int system_journal_open(Server *s, bool flush_requested) {
924         int r;
925         char *fn;
926         sd_id128_t machine;
927         char ids[33];
928
929         r = sd_id128_get_machine(&machine);
930         if (r < 0) {
931                 log_error("Failed to get machine id: %s", strerror(-r));
932                 return r;
933         }
934
935         sd_id128_to_string(machine, ids);
936
937         if (!s->system_journal &&
938             (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
939             (flush_requested
940              || access("/run/systemd/journal/flushed", F_OK) >= 0)) {
941
942                 /* If in auto mode: first try to create the machine
943                  * path, but not the prefix.
944                  *
945                  * If in persistent mode: create /var/log/journal and
946                  * the machine path */
947
948                 if (s->storage == STORAGE_PERSISTENT)
949                         (void) mkdir("/var/log/journal/", 0755);
950
951                 fn = strappenda("/var/log/journal/", ids);
952                 (void) mkdir(fn, 0755);
953
954                 fn = strappenda(fn, "/system.journal");
955                 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
956
957                 if (r >= 0)
958                         server_fix_perms(s, s->system_journal, 0);
959                 else if (r < 0) {
960                         if (r != -ENOENT && r != -EROFS)
961                                 log_warning("Failed to open system journal: %s", strerror(-r));
962
963                         r = 0;
964                 }
965         }
966
967         if (!s->runtime_journal &&
968             (s->storage != STORAGE_NONE)) {
969
970                 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
971                 if (!fn)
972                         return -ENOMEM;
973
974                 if (s->system_journal) {
975
976                         /* Try to open the runtime journal, but only
977                          * if it already exists, so that we can flush
978                          * it into the system journal */
979
980                         r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
981                         free(fn);
982
983                         if (r < 0) {
984                                 if (r != -ENOENT)
985                                         log_warning("Failed to open runtime journal: %s", strerror(-r));
986
987                                 r = 0;
988                         }
989
990                 } else {
991
992                         /* OK, we really need the runtime journal, so create
993                          * it if necessary. */
994
995                         (void) mkdir("/run/log", 0755);
996                         (void) mkdir("/run/log/journal", 0755);
997                         (void) mkdir_parents(fn, 0750);
998
999                         r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
1000                         free(fn);
1001
1002                         if (r < 0) {
1003                                 log_error("Failed to open runtime journal: %s", strerror(-r));
1004                                 return r;
1005                         }
1006                 }
1007
1008                 if (s->runtime_journal)
1009                         server_fix_perms(s, s->runtime_journal, 0);
1010         }
1011
1012         available_space(s, true);
1013
1014         return r;
1015 }
1016
1017 int server_flush_to_var(Server *s) {
1018         sd_id128_t machine;
1019         sd_journal *j = NULL;
1020         char ts[FORMAT_TIMESPAN_MAX];
1021         usec_t start;
1022         unsigned n = 0;
1023         int r;
1024
1025         assert(s);
1026
1027         if (s->storage != STORAGE_AUTO &&
1028             s->storage != STORAGE_PERSISTENT)
1029                 return 0;
1030
1031         if (!s->runtime_journal)
1032                 return 0;
1033
1034         system_journal_open(s, true);
1035
1036         if (!s->system_journal)
1037                 return 0;
1038
1039         log_debug("Flushing to /var...");
1040
1041         start = now(CLOCK_MONOTONIC);
1042
1043         r = sd_id128_get_machine(&machine);
1044         if (r < 0)
1045                 return r;
1046
1047         r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1048         if (r < 0) {
1049                 log_error("Failed to read runtime journal: %s", strerror(-r));
1050                 return r;
1051         }
1052
1053         sd_journal_set_data_threshold(j, 0);
1054
1055         SD_JOURNAL_FOREACH(j) {
1056                 Object *o = NULL;
1057                 JournalFile *f;
1058
1059                 f = j->current_file;
1060                 assert(f && f->current_offset > 0);
1061
1062                 n++;
1063
1064                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1065                 if (r < 0) {
1066                         log_error("Can't read entry: %s", strerror(-r));
1067                         goto finish;
1068                 }
1069
1070                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1071                 if (r >= 0)
1072                         continue;
1073
1074                 if (!shall_try_append_again(s->system_journal, r)) {
1075                         log_error("Can't write entry: %s", strerror(-r));
1076                         goto finish;
1077                 }
1078
1079                 server_rotate(s);
1080                 server_vacuum(s);
1081
1082                 if (!s->system_journal) {
1083                         log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1084                         r = -EIO;
1085                         goto finish;
1086                 }
1087
1088                 log_debug("Retrying write.");
1089                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1090                 if (r < 0) {
1091                         log_error("Can't write entry: %s", strerror(-r));
1092                         goto finish;
1093                 }
1094         }
1095
1096 finish:
1097         journal_file_post_change(s->system_journal);
1098
1099         journal_file_close(s->runtime_journal);
1100         s->runtime_journal = NULL;
1101
1102         if (r >= 0)
1103                 rm_rf("/run/log/journal", false, true, false);
1104
1105         sd_journal_close(j);
1106
1107         server_driver_message(s, SD_ID128_NULL, "Time spent on flushing to /var is %s for %u entries.", format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0), n);
1108
1109         return r;
1110 }
1111
1112 int process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1113         Server *s = userdata;
1114
1115         assert(s);
1116         assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
1117
1118         if (revents != EPOLLIN) {
1119                 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1120                 return -EIO;
1121         }
1122
1123         for (;;) {
1124                 struct ucred *ucred = NULL;
1125                 struct timeval *tv = NULL;
1126                 struct cmsghdr *cmsg;
1127                 char *label = NULL;
1128                 size_t label_len = 0;
1129                 struct iovec iovec;
1130
1131                 union {
1132                         struct cmsghdr cmsghdr;
1133
1134                         /* We use NAME_MAX space for the SELinux label
1135                          * here. The kernel currently enforces no
1136                          * limit, but according to suggestions from
1137                          * the SELinux people this will change and it
1138                          * will probably be identical to NAME_MAX. For
1139                          * now we use that, but this should be updated
1140                          * one day when the final limit is known.*/
1141                         uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1142                                     CMSG_SPACE(sizeof(struct timeval)) +
1143                                     CMSG_SPACE(sizeof(int)) + /* fd */
1144                                     CMSG_SPACE(NAME_MAX)]; /* selinux label */
1145                 } control = {};
1146                 union sockaddr_union sa = {};
1147                 struct msghdr msghdr = {
1148                         .msg_iov = &iovec,
1149                         .msg_iovlen = 1,
1150                         .msg_control = &control,
1151                         .msg_controllen = sizeof(control),
1152                         .msg_name = &sa,
1153                         .msg_namelen = sizeof(sa),
1154                 };
1155
1156                 ssize_t n;
1157                 int *fds = NULL;
1158                 unsigned n_fds = 0;
1159                 int v = 0;
1160                 size_t m;
1161
1162                 /* Try to get the right size, if we can. (Not all
1163                  * sockets support SIOCINQ, hence we just try, but
1164                  * don't rely on it. */
1165                 (void) ioctl(fd, SIOCINQ, &v);
1166
1167                 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful!*/
1168                 m = PAGE_ALIGN(MAX3((size_t) v + 1,
1169                                     (size_t) LINE_MAX,
1170                                     ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
1171
1172                 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1173                         return log_oom();
1174
1175                 iovec.iov_base = s->buffer;
1176                 iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
1177
1178                 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1179                 if (n < 0) {
1180                         if (errno == EINTR || errno == EAGAIN)
1181                                 return 0;
1182
1183                         log_error("recvmsg() failed: %m");
1184                         return -errno;
1185                 }
1186
1187                 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1188
1189                         if (cmsg->cmsg_level == SOL_SOCKET &&
1190                             cmsg->cmsg_type == SCM_CREDENTIALS &&
1191                             cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1192                                 ucred = (struct ucred*) CMSG_DATA(cmsg);
1193                         else if (cmsg->cmsg_level == SOL_SOCKET &&
1194                                  cmsg->cmsg_type == SCM_SECURITY) {
1195                                 label = (char*) CMSG_DATA(cmsg);
1196                                 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1197                         } else if (cmsg->cmsg_level == SOL_SOCKET &&
1198                                    cmsg->cmsg_type == SO_TIMESTAMP &&
1199                                    cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1200                                 tv = (struct timeval*) CMSG_DATA(cmsg);
1201                         else if (cmsg->cmsg_level == SOL_SOCKET &&
1202                                  cmsg->cmsg_type == SCM_RIGHTS) {
1203                                 fds = (int*) CMSG_DATA(cmsg);
1204                                 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1205                         }
1206                 }
1207
1208                 /* And a trailing NUL, just in case */
1209                 s->buffer[n] = 0;
1210
1211                 if (fd == s->syslog_fd) {
1212                         if (n > 0 && n_fds == 0)
1213                                 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1214                         else if (n_fds > 0)
1215                                 log_warning("Got file descriptors via syslog socket. Ignoring.");
1216
1217                 } else if (fd == s->native_fd) {
1218                         if (n > 0 && n_fds == 0)
1219                                 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1220                         else if (n == 0 && n_fds == 1)
1221                                 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1222                         else if (n_fds > 0)
1223                                 log_warning("Got too many file descriptors via native socket. Ignoring.");
1224
1225                 } else {
1226                         assert(fd == s->audit_fd);
1227
1228                         if (n > 0 && n_fds == 0)
1229                                 server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1230                         else if (n_fds > 0)
1231                                 log_warning("Got file descriptors via audit socket. Ignoring.");
1232                 }
1233
1234                 close_many(fds, n_fds);
1235         }
1236 }
1237
1238 static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1239         Server *s = userdata;
1240
1241         assert(s);
1242
1243         log_info("Received request to flush runtime journal from PID %"PRIu32, si->ssi_pid);
1244
1245         server_flush_to_var(s);
1246         server_sync(s);
1247         server_vacuum(s);
1248
1249         touch("/run/systemd/journal/flushed");
1250
1251         return 0;
1252 }
1253
1254 static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1255         Server *s = userdata;
1256
1257         assert(s);
1258
1259         log_info("Received request to rotate journal from PID %"PRIu32, si->ssi_pid);
1260         server_rotate(s);
1261         server_vacuum(s);
1262
1263         return 0;
1264 }
1265
1266 static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1267         Server *s = userdata;
1268
1269         assert(s);
1270
1271         log_received_signal(LOG_INFO, si);
1272
1273         sd_event_exit(s->event, 0);
1274         return 0;
1275 }
1276
1277 static int setup_signals(Server *s) {
1278         sigset_t mask;
1279         int r;
1280
1281         assert(s);
1282
1283         assert_se(sigemptyset(&mask) == 0);
1284         sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1285         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1286
1287         r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
1288         if (r < 0)
1289                 return r;
1290
1291         r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
1292         if (r < 0)
1293                 return r;
1294
1295         r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
1296         if (r < 0)
1297                 return r;
1298
1299         r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
1300         if (r < 0)
1301                 return r;
1302
1303         return 0;
1304 }
1305
1306 static int server_parse_proc_cmdline(Server *s) {
1307         _cleanup_free_ char *line = NULL;
1308         const char *w, *state;
1309         size_t l;
1310         int r;
1311
1312         r = proc_cmdline(&line);
1313         if (r < 0) {
1314                 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1315                 return 0;
1316         }
1317
1318         FOREACH_WORD_QUOTED(w, l, line, state) {
1319                 _cleanup_free_ char *word;
1320
1321                 word = strndup(w, l);
1322                 if (!word)
1323                         return -ENOMEM;
1324
1325                 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1326                         r = parse_boolean(word + 35);
1327                         if (r < 0)
1328                                 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1329                         else
1330                                 s->forward_to_syslog = r;
1331                 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1332                         r = parse_boolean(word + 33);
1333                         if (r < 0)
1334                                 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1335                         else
1336                                 s->forward_to_kmsg = r;
1337                 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1338                         r = parse_boolean(word + 36);
1339                         if (r < 0)
1340                                 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1341                         else
1342                                 s->forward_to_console = r;
1343                 } else if (startswith(word, "systemd.journald.forward_to_wall=")) {
1344                         r = parse_boolean(word + 33);
1345                         if (r < 0)
1346                                 log_warning("Failed to parse forward to wall switch %s. Ignoring.", word + 33);
1347                         else
1348                                 s->forward_to_wall = r;
1349                 } else if (startswith(word, "systemd.journald"))
1350                         log_warning("Invalid systemd.journald parameter. Ignoring.");
1351         }
1352         /* do not warn about state here, since probably systemd already did */
1353
1354         return 0;
1355 }
1356
1357 static int server_parse_config_file(Server *s) {
1358         assert(s);
1359
1360         return config_parse(NULL, "/etc/systemd/journald.conf", NULL,
1361                             "Journal\0",
1362                             config_item_perf_lookup, journald_gperf_lookup,
1363                             false, false, true, s);
1364 }
1365
1366 static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1367         Server *s = userdata;
1368
1369         assert(s);
1370
1371         server_sync(s);
1372         return 0;
1373 }
1374
1375 int server_schedule_sync(Server *s, int priority) {
1376         int r;
1377
1378         assert(s);
1379
1380         if (priority <= LOG_CRIT) {
1381                 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1382                 server_sync(s);
1383                 return 0;
1384         }
1385
1386         if (s->sync_scheduled)
1387                 return 0;
1388
1389         if (s->sync_interval_usec > 0) {
1390                 usec_t when;
1391
1392                 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
1393                 if (r < 0)
1394                         return r;
1395
1396                 when += s->sync_interval_usec;
1397
1398                 if (!s->sync_event_source) {
1399                         r = sd_event_add_time(
1400                                         s->event,
1401                                         &s->sync_event_source,
1402                                         CLOCK_MONOTONIC,
1403                                         when, 0,
1404                                         server_dispatch_sync, s);
1405                         if (r < 0)
1406                                 return r;
1407
1408                         r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1409                 } else {
1410                         r = sd_event_source_set_time(s->sync_event_source, when);
1411                         if (r < 0)
1412                                 return r;
1413
1414                         r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1415                 }
1416                 if (r < 0)
1417                         return r;
1418
1419                 s->sync_scheduled = true;
1420         }
1421
1422         return 0;
1423 }
1424
1425 static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1426         Server *s = userdata;
1427
1428         assert(s);
1429
1430         server_cache_hostname(s);
1431         return 0;
1432 }
1433
1434 static int server_open_hostname(Server *s) {
1435         int r;
1436
1437         assert(s);
1438
1439         s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
1440         if (s->hostname_fd < 0) {
1441                 log_error("Failed to open /proc/sys/kernel/hostname: %m");
1442                 return -errno;
1443         }
1444
1445         r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
1446         if (r < 0) {
1447                 /* kernels prior to 3.2 don't support polling this file. Ignore
1448                  * the failure. */
1449                 if (r == -EPERM) {
1450                         log_warning("Failed to register hostname fd in event loop: %s. Ignoring.",
1451                                         strerror(-r));
1452                         s->hostname_fd = safe_close(s->hostname_fd);
1453                         return 0;
1454                 }
1455
1456                 log_error("Failed to register hostname fd in event loop: %s", strerror(-r));
1457                 return r;
1458         }
1459
1460         r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
1461         if (r < 0) {
1462                 log_error("Failed to adjust priority of host name event source: %s", strerror(-r));
1463                 return r;
1464         }
1465
1466         return 0;
1467 }
1468
1469 int server_init(Server *s) {
1470         int n, r, fd;
1471
1472         assert(s);
1473
1474         zero(*s);
1475         s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = -1;
1476         s->compress = true;
1477         s->seal = true;
1478
1479         s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1480         s->sync_scheduled = false;
1481
1482         s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1483         s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1484
1485         s->forward_to_wall = true;
1486
1487         s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1488
1489         s->max_level_store = LOG_DEBUG;
1490         s->max_level_syslog = LOG_DEBUG;
1491         s->max_level_kmsg = LOG_NOTICE;
1492         s->max_level_console = LOG_INFO;
1493         s->max_level_wall = LOG_EMERG;
1494
1495         memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1496         memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1497
1498         server_parse_config_file(s);
1499         server_parse_proc_cmdline(s);
1500         if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1501                 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1502                           s->rate_limit_interval, s->rate_limit_burst);
1503                 s->rate_limit_interval = s->rate_limit_burst = 0;
1504         }
1505
1506         mkdir_p("/run/systemd/journal", 0755);
1507
1508         s->user_journals = ordered_hashmap_new(NULL);
1509         if (!s->user_journals)
1510                 return log_oom();
1511
1512         s->mmap = mmap_cache_new();
1513         if (!s->mmap)
1514                 return log_oom();
1515
1516         r = sd_event_default(&s->event);
1517         if (r < 0) {
1518                 log_error("Failed to create event loop: %s", strerror(-r));
1519                 return r;
1520         }
1521
1522         sd_event_set_watchdog(s->event, true);
1523
1524         n = sd_listen_fds(true);
1525         if (n < 0) {
1526                 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1527                 return n;
1528         }
1529
1530         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1531
1532                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1533
1534                         if (s->native_fd >= 0) {
1535                                 log_error("Too many native sockets passed.");
1536                                 return -EINVAL;
1537                         }
1538
1539                         s->native_fd = fd;
1540
1541                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1542
1543                         if (s->stdout_fd >= 0) {
1544                                 log_error("Too many stdout sockets passed.");
1545                                 return -EINVAL;
1546                         }
1547
1548                         s->stdout_fd = fd;
1549
1550                 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1551                            sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1552
1553                         if (s->syslog_fd >= 0) {
1554                                 log_error("Too many /dev/log sockets passed.");
1555                                 return -EINVAL;
1556                         }
1557
1558                         s->syslog_fd = fd;
1559
1560                 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
1561
1562                         if (s->audit_fd >= 0) {
1563                                 log_error("Too many audit sockets passed.");
1564                                 return -EINVAL;
1565                         }
1566
1567                         s->audit_fd = fd;
1568
1569                 } else
1570                         log_error("Unknown socket passed as file descriptor %d, ignoring.", fd);
1571         }
1572
1573         r = server_open_syslog_socket(s);
1574         if (r < 0)
1575                 return r;
1576
1577         r = server_open_native_socket(s);
1578         if (r < 0)
1579                 return r;
1580
1581         r = server_open_stdout_socket(s);
1582         if (r < 0)
1583                 return r;
1584
1585         r = server_open_dev_kmsg(s);
1586         if (r < 0)
1587                 return r;
1588
1589         r = server_open_audit(s);
1590         if (r < 0)
1591                 return r;
1592
1593         r = server_open_kernel_seqnum(s);
1594         if (r < 0)
1595                 return r;
1596
1597         r = server_open_hostname(s);
1598         if (r < 0)
1599                 return r;
1600
1601         r = setup_signals(s);
1602         if (r < 0)
1603                 return r;
1604
1605         s->udev = udev_new();
1606         if (!s->udev)
1607                 return -ENOMEM;
1608
1609         s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
1610         if (!s->rate_limit)
1611                 return -ENOMEM;
1612
1613         r = cg_get_root_path(&s->cgroup_root);
1614         if (r < 0)
1615                 return r;
1616
1617         server_cache_hostname(s);
1618         server_cache_boot_id(s);
1619         server_cache_machine_id(s);
1620
1621         r = system_journal_open(s, false);
1622         if (r < 0)
1623                 return r;
1624
1625         return 0;
1626 }
1627
1628 void server_maybe_append_tags(Server *s) {
1629 #ifdef HAVE_GCRYPT
1630         JournalFile *f;
1631         Iterator i;
1632         usec_t n;
1633
1634         n = now(CLOCK_REALTIME);
1635
1636         if (s->system_journal)
1637                 journal_file_maybe_append_tag(s->system_journal, n);
1638
1639         ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
1640                 journal_file_maybe_append_tag(f, n);
1641 #endif
1642 }
1643
1644 void server_done(Server *s) {
1645         JournalFile *f;
1646         assert(s);
1647
1648         while (s->stdout_streams)
1649                 stdout_stream_free(s->stdout_streams);
1650
1651         if (s->system_journal)
1652                 journal_file_close(s->system_journal);
1653
1654         if (s->runtime_journal)
1655                 journal_file_close(s->runtime_journal);
1656
1657         while ((f = ordered_hashmap_steal_first(s->user_journals)))
1658                 journal_file_close(f);
1659
1660         ordered_hashmap_free(s->user_journals);
1661
1662         sd_event_source_unref(s->syslog_event_source);
1663         sd_event_source_unref(s->native_event_source);
1664         sd_event_source_unref(s->stdout_event_source);
1665         sd_event_source_unref(s->dev_kmsg_event_source);
1666         sd_event_source_unref(s->audit_event_source);
1667         sd_event_source_unref(s->sync_event_source);
1668         sd_event_source_unref(s->sigusr1_event_source);
1669         sd_event_source_unref(s->sigusr2_event_source);
1670         sd_event_source_unref(s->sigterm_event_source);
1671         sd_event_source_unref(s->sigint_event_source);
1672         sd_event_source_unref(s->hostname_event_source);
1673         sd_event_unref(s->event);
1674
1675         safe_close(s->syslog_fd);
1676         safe_close(s->native_fd);
1677         safe_close(s->stdout_fd);
1678         safe_close(s->dev_kmsg_fd);
1679         safe_close(s->audit_fd);
1680         safe_close(s->hostname_fd);
1681
1682         if (s->rate_limit)
1683                 journal_rate_limit_free(s->rate_limit);
1684
1685         if (s->kernel_seqnum)
1686                 munmap(s->kernel_seqnum, sizeof(uint64_t));
1687
1688         free(s->buffer);
1689         free(s->tty_path);
1690         free(s->cgroup_root);
1691         free(s->hostname_field);
1692
1693         if (s->mmap)
1694                 mmap_cache_unref(s->mmap);
1695
1696         if (s->udev)
1697                 udev_unref(s->udev);
1698 }