chiark / gitweb /
0ab8c7095b30ce87e0db237eaabbbc00bdd55917
[elogind.git] / src / journal / journald-server.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
26 #include <sys/mman.h>
27 #include <sys/timerfd.h>
28
29 #include <libudev.h>
30
31 #include "sd-journal.h"
32 #include "sd-messages.h"
33 #include "sd-daemon.h"
34 #include "fileio.h"
35 #include "mkdir.h"
36 #include "hashmap.h"
37 #include "journal-file.h"
38 #include "socket-util.h"
39 #include "cgroup-util.h"
40 #include "list.h"
41 #include "missing.h"
42 #include "conf-parser.h"
43 #include "selinux-util.h"
44 #include "journal-internal.h"
45 #include "journal-vacuum.h"
46 #include "journal-authenticate.h"
47 #include "journald-rate-limit.h"
48 #include "journald-kmsg.h"
49 #include "journald-syslog.h"
50 #include "journald-stream.h"
51 #include "journald-console.h"
52 #include "journald-native.h"
53 #include "journald-server.h"
54
55 #ifdef HAVE_ACL
56 #include <sys/acl.h>
57 #include <acl/libacl.h>
58 #include "acl-util.h"
59 #endif
60
61 #ifdef HAVE_SELINUX
62 #include <selinux/selinux.h>
63 #endif
64
65 #define USER_JOURNALS_MAX 1024
66
67 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
68 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
69 #define DEFAULT_RATE_LIMIT_BURST 1000
70
71 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
72
73 static const char* const storage_table[_STORAGE_MAX] = {
74         [STORAGE_AUTO] = "auto",
75         [STORAGE_VOLATILE] = "volatile",
76         [STORAGE_PERSISTENT] = "persistent",
77         [STORAGE_NONE] = "none"
78 };
79
80 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
81 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
82
83 static const char* const split_mode_table[_SPLIT_MAX] = {
84         [SPLIT_LOGIN] = "login",
85         [SPLIT_UID] = "uid",
86         [SPLIT_NONE] = "none",
87 };
88
89 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
90 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
91
92 static uint64_t available_space(Server *s, bool verbose) {
93         char ids[33];
94         _cleanup_free_ char *p = NULL;
95         sd_id128_t machine;
96         struct statvfs ss;
97         uint64_t sum = 0, ss_avail = 0, avail = 0;
98         int r;
99         _cleanup_closedir_ DIR *d = NULL;
100         usec_t ts;
101         const char *f;
102         JournalMetrics *m;
103
104         ts = now(CLOCK_MONOTONIC);
105
106         if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts
107             && !verbose)
108                 return s->cached_available_space;
109
110         r = sd_id128_get_machine(&machine);
111         if (r < 0)
112                 return 0;
113
114         if (s->system_journal) {
115                 f = "/var/log/journal/";
116                 m = &s->system_metrics;
117         } else {
118                 f = "/run/log/journal/";
119                 m = &s->runtime_metrics;
120         }
121
122         assert(m);
123
124         p = strappend(f, sd_id128_to_string(machine, ids));
125         if (!p)
126                 return 0;
127
128         d = opendir(p);
129         if (!d)
130                 return 0;
131
132         if (fstatvfs(dirfd(d), &ss) < 0)
133                 return 0;
134
135         for (;;) {
136                 struct stat st;
137                 struct dirent *de;
138
139                 errno = 0;
140                 de = readdir(d);
141                 if (!de && errno != 0)
142                         return 0;
143
144                 if (!de)
145                         break;
146
147                 if (!endswith(de->d_name, ".journal") &&
148                     !endswith(de->d_name, ".journal~"))
149                         continue;
150
151                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
152                         continue;
153
154                 if (!S_ISREG(st.st_mode))
155                         continue;
156
157                 sum += (uint64_t) st.st_blocks * 512UL;
158         }
159
160         ss_avail = ss.f_bsize * ss.f_bavail;
161
162         /* If we reached a high mark, we will always allow this much
163          * again, unless usage goes above max_use. This watermark
164          * value is cached so that we don't give up space on pressure,
165          * but hover below the maximum usage. */
166
167         if (m->use < sum)
168                 m->use = sum;
169
170         avail = LESS_BY(ss_avail, m->keep_free);
171
172         s->cached_available_space = LESS_BY(MIN(m->max_use, avail), sum);
173         s->cached_available_space_timestamp = ts;
174
175         if (verbose) {
176                 char    fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
177                         fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX];
178
179                 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
180                                       "%s journal is using %s (max allowed %s, "
181                                       "trying to leave %s free of %s available → current limit %s).",
182                                       s->system_journal ? "Permanent" : "Runtime",
183                                       format_bytes(fb1, sizeof(fb1), sum),
184                                       format_bytes(fb2, sizeof(fb2), m->max_use),
185                                       format_bytes(fb3, sizeof(fb3), m->keep_free),
186                                       format_bytes(fb4, sizeof(fb4), ss_avail),
187                                       format_bytes(fb5, sizeof(fb5), s->cached_available_space + sum));
188         }
189
190         return s->cached_available_space;
191 }
192
193 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
194         int r;
195 #ifdef HAVE_ACL
196         acl_t acl;
197         acl_entry_t entry;
198         acl_permset_t permset;
199 #endif
200
201         assert(f);
202
203         r = fchmod(f->fd, 0640);
204         if (r < 0)
205                 log_warning("Failed to fix access mode on %s, ignoring: %s", f->path, strerror(-r));
206
207 #ifdef HAVE_ACL
208         if (uid <= 0)
209                 return;
210
211         acl = acl_get_fd(f->fd);
212         if (!acl) {
213                 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
214                 return;
215         }
216
217         r = acl_find_uid(acl, uid, &entry);
218         if (r <= 0) {
219
220                 if (acl_create_entry(&acl, &entry) < 0 ||
221                     acl_set_tag_type(entry, ACL_USER) < 0 ||
222                     acl_set_qualifier(entry, &uid) < 0) {
223                         log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
224                         goto finish;
225                 }
226         }
227
228         /* We do not recalculate the mask unconditionally here,
229          * so that the fchmod() mask above stays intact. */
230         if (acl_get_permset(entry, &permset) < 0 ||
231             acl_add_perm(permset, ACL_READ) < 0 ||
232             calc_acl_mask_if_needed(&acl) < 0) {
233                 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
234                 goto finish;
235         }
236
237         if (acl_set_fd(f->fd, acl) < 0)
238                 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
239
240 finish:
241         acl_free(acl);
242 #endif
243 }
244
245 static JournalFile* find_journal(Server *s, uid_t uid) {
246         _cleanup_free_ char *p = NULL;
247         int r;
248         JournalFile *f;
249         sd_id128_t machine;
250
251         assert(s);
252
253         /* We split up user logs only on /var, not on /run. If the
254          * runtime file is open, we write to it exclusively, in order
255          * to guarantee proper order as soon as we flush /run to
256          * /var and close the runtime file. */
257
258         if (s->runtime_journal)
259                 return s->runtime_journal;
260
261         if (uid <= 0)
262                 return s->system_journal;
263
264         r = sd_id128_get_machine(&machine);
265         if (r < 0)
266                 return s->system_journal;
267
268         f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
269         if (f)
270                 return f;
271
272         if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
273                      SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
274                 return s->system_journal;
275
276         while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
277                 /* Too many open? Then let's close one */
278                 f = hashmap_steal_first(s->user_journals);
279                 assert(f);
280                 journal_file_close(f);
281         }
282
283         r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
284         if (r < 0)
285                 return s->system_journal;
286
287         server_fix_perms(s, f, uid);
288
289         r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
290         if (r < 0) {
291                 journal_file_close(f);
292                 return s->system_journal;
293         }
294
295         return f;
296 }
297
298 static int do_rotate(Server *s, JournalFile **f, const char* name,
299                      bool seal, uint32_t uid) {
300         int r;
301         assert(s);
302
303         if (!*f)
304                 return -EINVAL;
305
306         r = journal_file_rotate(f, s->compress, seal);
307         if (r < 0)
308                 if (*f)
309                         log_error("Failed to rotate %s: %s",
310                                   (*f)->path, strerror(-r));
311                 else
312                         log_error("Failed to create new %s journal: %s",
313                                   name, strerror(-r));
314         else
315                 server_fix_perms(s, *f, uid);
316         return r;
317 }
318
319 void server_rotate(Server *s) {
320         JournalFile *f;
321         void *k;
322         Iterator i;
323         int r;
324
325         log_debug("Rotating...");
326
327         do_rotate(s, &s->runtime_journal, "runtime", false, 0);
328         do_rotate(s, &s->system_journal, "system", s->seal, 0);
329
330         HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
331                 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UINT32(k));
332                 if (r >= 0)
333                         hashmap_replace(s->user_journals, k, f);
334                 else if (!f)
335                         /* Old file has been closed and deallocated */
336                         hashmap_remove(s->user_journals, k);
337         }
338 }
339
340 void server_sync(Server *s) {
341         JournalFile *f;
342         void *k;
343         Iterator i;
344         int r;
345
346         if (s->system_journal) {
347                 r = journal_file_set_offline(s->system_journal);
348                 if (r < 0)
349                         log_error("Failed to sync system journal: %s", strerror(-r));
350         }
351
352         HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
353                 r = journal_file_set_offline(f);
354                 if (r < 0)
355                         log_error("Failed to sync user journal: %s", strerror(-r));
356         }
357
358         if (s->sync_event_source) {
359                 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
360                 if (r < 0)
361                         log_error("Failed to disable sync timer source: %s", strerror(-r));
362         }
363
364         s->sync_scheduled = false;
365 }
366
367 void server_vacuum(Server *s) {
368         char ids[33];
369         sd_id128_t machine;
370         int r;
371
372         log_debug("Vacuuming...");
373
374         s->oldest_file_usec = 0;
375
376         r = sd_id128_get_machine(&machine);
377         if (r < 0) {
378                 log_error("Failed to get machine ID: %s", strerror(-r));
379                 return;
380         }
381
382         sd_id128_to_string(machine, ids);
383
384         if (s->system_journal) {
385                 char *p = strappenda("/var/log/journal/", ids);
386
387                 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->max_retention_usec, &s->oldest_file_usec);
388                 if (r < 0 && r != -ENOENT)
389                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
390         }
391
392         if (s->runtime_journal) {
393                 char *p = strappenda("/run/log/journal/", ids);
394
395                 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->max_retention_usec, &s->oldest_file_usec);
396                 if (r < 0 && r != -ENOENT)
397                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
398         }
399
400         s->cached_available_space_timestamp = 0;
401 }
402
403 static void server_cache_machine_id(Server *s) {
404         sd_id128_t id;
405         int r;
406
407         assert(s);
408
409         r = sd_id128_get_machine(&id);
410         if (r < 0)
411                 return;
412
413         sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
414 }
415
416 static void server_cache_boot_id(Server *s) {
417         sd_id128_t id;
418         int r;
419
420         assert(s);
421
422         r = sd_id128_get_boot(&id);
423         if (r < 0)
424                 return;
425
426         sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
427 }
428
429 static void server_cache_hostname(Server *s) {
430         _cleanup_free_ char *t = NULL;
431         char *x;
432
433         assert(s);
434
435         t = gethostname_malloc();
436         if (!t)
437                 return;
438
439         x = strappend("_HOSTNAME=", t);
440         if (!x)
441                 return;
442
443         free(s->hostname_field);
444         s->hostname_field = x;
445 }
446
447 bool shall_try_append_again(JournalFile *f, int r) {
448
449         /* -E2BIG            Hit configured limit
450            -EFBIG            Hit fs limit
451            -EDQUOT           Quota limit hit
452            -ENOSPC           Disk full
453            -EHOSTDOWN        Other machine
454            -EBUSY            Unclean shutdown
455            -EPROTONOSUPPORT  Unsupported feature
456            -EBADMSG          Corrupted
457            -ENODATA          Truncated
458            -ESHUTDOWN        Already archived */
459
460         if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
461                 log_debug("%s: Allocation limit reached, rotating.", f->path);
462         else if (r == -EHOSTDOWN)
463                 log_info("%s: Journal file from other machine, rotating.", f->path);
464         else if (r == -EBUSY)
465                 log_info("%s: Unclean shutdown, rotating.", f->path);
466         else if (r == -EPROTONOSUPPORT)
467                 log_info("%s: Unsupported feature, rotating.", f->path);
468         else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
469                 log_warning("%s: Journal file corrupted, rotating.", f->path);
470         else
471                 return false;
472
473         return true;
474 }
475
476 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
477         JournalFile *f;
478         bool vacuumed = false;
479         int r;
480
481         assert(s);
482         assert(iovec);
483         assert(n > 0);
484
485         f = find_journal(s, uid);
486         if (!f)
487                 return;
488
489         if (journal_file_rotate_suggested(f, s->max_file_usec)) {
490                 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
491                 server_rotate(s);
492                 server_vacuum(s);
493                 vacuumed = true;
494
495                 f = find_journal(s, uid);
496                 if (!f)
497                         return;
498         }
499
500         r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
501         if (r >= 0) {
502                 server_schedule_sync(s, priority);
503                 return;
504         }
505
506         if (vacuumed || !shall_try_append_again(f, r)) {
507                 size_t size = 0;
508                 unsigned i;
509                 for (i = 0; i < n; i++)
510                         size += iovec[i].iov_len;
511
512                 log_error("Failed to write entry (%d items, %zu bytes), ignoring: %s", n, size, strerror(-r));
513                 return;
514         }
515
516         server_rotate(s);
517         server_vacuum(s);
518
519         f = find_journal(s, uid);
520         if (!f)
521                 return;
522
523         log_debug("Retrying write.");
524         r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
525         if (r < 0) {
526                 size_t size = 0;
527                 unsigned i;
528                 for (i = 0; i < n; i++)
529                         size += iovec[i].iov_len;
530
531                 log_error("Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %s", n, size, strerror(-r));
532         } else
533                 server_schedule_sync(s, priority);
534 }
535
536 static void dispatch_message_real(
537                 Server *s,
538                 struct iovec *iovec, unsigned n, unsigned m,
539                 struct ucred *ucred,
540                 struct timeval *tv,
541                 const char *label, size_t label_len,
542                 const char *unit_id,
543                 int priority,
544                 pid_t object_pid) {
545
546         char    pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
547                 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
548                 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
549                 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
550                 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
551                 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
552                 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
553                 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
554         uid_t object_uid;
555         gid_t object_gid;
556         char *x;
557         int r;
558         char *t, *c;
559         uid_t realuid = 0, owner = 0, journal_uid;
560         bool owner_valid = false;
561 #ifdef HAVE_AUDIT
562         char    audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
563                 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
564                 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
565                 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
566
567         uint32_t audit;
568         uid_t loginuid;
569 #endif
570
571         assert(s);
572         assert(iovec);
573         assert(n > 0);
574         assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
575
576         if (ucred) {
577                 realuid = ucred->uid;
578
579                 sprintf(pid, "_PID=%lu", (unsigned long) ucred->pid);
580                 IOVEC_SET_STRING(iovec[n++], pid);
581
582                 sprintf(uid, "_UID=%lu", (unsigned long) ucred->uid);
583                 IOVEC_SET_STRING(iovec[n++], uid);
584
585                 sprintf(gid, "_GID=%lu", (unsigned long) ucred->gid);
586                 IOVEC_SET_STRING(iovec[n++], gid);
587
588                 r = get_process_comm(ucred->pid, &t);
589                 if (r >= 0) {
590                         x = strappenda("_COMM=", t);
591                         free(t);
592                         IOVEC_SET_STRING(iovec[n++], x);
593                 }
594
595                 r = get_process_exe(ucred->pid, &t);
596                 if (r >= 0) {
597                         x = strappenda("_EXE=", t);
598                         free(t);
599                         IOVEC_SET_STRING(iovec[n++], x);
600                 }
601
602                 r = get_process_cmdline(ucred->pid, 0, false, &t);
603                 if (r >= 0) {
604                         x = strappenda("_CMDLINE=", t);
605                         free(t);
606                         IOVEC_SET_STRING(iovec[n++], x);
607                 }
608
609                 r = get_process_capeff(ucred->pid, &t);
610                 if (r >= 0) {
611                         x = strappenda("_CAP_EFFECTIVE=", t);
612                         free(t);
613                         IOVEC_SET_STRING(iovec[n++], x);
614                 }
615
616 #ifdef HAVE_AUDIT
617                 r = audit_session_from_pid(ucred->pid, &audit);
618                 if (r >= 0) {
619                         sprintf(audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit);
620                         IOVEC_SET_STRING(iovec[n++], audit_session);
621                 }
622
623                 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
624                 if (r >= 0) {
625                         sprintf(audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid);
626                         IOVEC_SET_STRING(iovec[n++], audit_loginuid);
627                 }
628 #endif
629
630                 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
631                 if (r >= 0) {
632                         char *session = NULL;
633
634                         x = strappenda("_SYSTEMD_CGROUP=", c);
635                         IOVEC_SET_STRING(iovec[n++], x);
636
637                         r = cg_path_get_session(c, &t);
638                         if (r >= 0) {
639                                 session = strappenda("_SYSTEMD_SESSION=", t);
640                                 free(t);
641                                 IOVEC_SET_STRING(iovec[n++], session);
642                         }
643
644                         if (cg_path_get_owner_uid(c, &owner) >= 0) {
645                                 owner_valid = true;
646
647                                 sprintf(owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner);
648                                 IOVEC_SET_STRING(iovec[n++], owner_uid);
649                         }
650
651                         if (cg_path_get_unit(c, &t) >= 0) {
652                                 x = strappenda("_SYSTEMD_UNIT=", t);
653                                 free(t);
654                                 IOVEC_SET_STRING(iovec[n++], x);
655                         } else if (unit_id && !session) {
656                                 x = strappenda("_SYSTEMD_UNIT=", unit_id);
657                                 IOVEC_SET_STRING(iovec[n++], x);
658                         }
659
660                         if (cg_path_get_user_unit(c, &t) >= 0) {
661                                 x = strappenda("_SYSTEMD_USER_UNIT=", t);
662                                 free(t);
663                                 IOVEC_SET_STRING(iovec[n++], x);
664                         } else if (unit_id && session) {
665                                 x = strappenda("_SYSTEMD_USER_UNIT=", unit_id);
666                                 IOVEC_SET_STRING(iovec[n++], x);
667                         }
668
669                         if (cg_path_get_slice(c, &t) >= 0) {
670                                 x = strappenda("_SYSTEMD_SLICE=", t);
671                                 free(t);
672                                 IOVEC_SET_STRING(iovec[n++], x);
673                         }
674
675                         free(c);
676                 } else if (unit_id) {
677                         x = strappenda("_SYSTEMD_UNIT=", unit_id);
678                         IOVEC_SET_STRING(iovec[n++], x);
679                 }
680
681 #ifdef HAVE_SELINUX
682                 if (use_selinux()) {
683                         if (label) {
684                                 x = alloca(strlen("_SELINUX_CONTEXT=") + label_len + 1);
685
686                                 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
687                                 IOVEC_SET_STRING(iovec[n++], x);
688                         } else {
689                                 security_context_t con;
690
691                                 if (getpidcon(ucred->pid, &con) >= 0) {
692                                         x = strappenda("_SELINUX_CONTEXT=", con);
693
694                                         freecon(con);
695                                         IOVEC_SET_STRING(iovec[n++], x);
696                                 }
697                         }
698                 }
699 #endif
700         }
701         assert(n <= m);
702
703         if (object_pid) {
704                 r = get_process_uid(object_pid, &object_uid);
705                 if (r >= 0) {
706                         sprintf(o_uid, "OBJECT_UID=%lu", (unsigned long) object_uid);
707                         IOVEC_SET_STRING(iovec[n++], o_uid);
708                 }
709
710                 r = get_process_gid(object_pid, &object_gid);
711                 if (r >= 0) {
712                         sprintf(o_gid, "OBJECT_GID=%lu", (unsigned long) object_gid);
713                         IOVEC_SET_STRING(iovec[n++], o_gid);
714                 }
715
716                 r = get_process_comm(object_pid, &t);
717                 if (r >= 0) {
718                         x = strappenda("OBJECT_COMM=", t);
719                         free(t);
720                         IOVEC_SET_STRING(iovec[n++], x);
721                 }
722
723                 r = get_process_exe(object_pid, &t);
724                 if (r >= 0) {
725                         x = strappenda("OBJECT_EXE=", t);
726                         free(t);
727                         IOVEC_SET_STRING(iovec[n++], x);
728                 }
729
730                 r = get_process_cmdline(object_pid, 0, false, &t);
731                 if (r >= 0) {
732                         x = strappenda("OBJECT_CMDLINE=", t);
733                         free(t);
734                         IOVEC_SET_STRING(iovec[n++], x);
735                 }
736
737 #ifdef HAVE_AUDIT
738                 r = audit_session_from_pid(object_pid, &audit);
739                 if (r >= 0) {
740                         sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%lu", (unsigned long) audit);
741                         IOVEC_SET_STRING(iovec[n++], o_audit_session);
742                 }
743
744                 r = audit_loginuid_from_pid(object_pid, &loginuid);
745                 if (r >= 0) {
746                         sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID=%lu", (unsigned long) loginuid);
747                         IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
748                 }
749 #endif
750
751                 r = cg_pid_get_path_shifted(object_pid, s->cgroup_root, &c);
752                 if (r >= 0) {
753                         x = strappenda("OBJECT_SYSTEMD_CGROUP=", c);
754                         IOVEC_SET_STRING(iovec[n++], x);
755
756                         r = cg_path_get_session(c, &t);
757                         if (r >= 0) {
758                                 x = strappenda("OBJECT_SYSTEMD_SESSION=", t);
759                                 free(t);
760                                 IOVEC_SET_STRING(iovec[n++], x);
761                         }
762
763                         if (cg_path_get_owner_uid(c, &owner) >= 0) {
764                                 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner);
765                                 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
766                         }
767
768                         if (cg_path_get_unit(c, &t) >= 0) {
769                                 x = strappenda("OBJECT_SYSTEMD_UNIT=", t);
770                                 free(t);
771                                 IOVEC_SET_STRING(iovec[n++], x);
772                         }
773
774                         if (cg_path_get_user_unit(c, &t) >= 0) {
775                                 x = strappenda("OBJECT_SYSTEMD_USER_UNIT=", t);
776                                 free(t);
777                                 IOVEC_SET_STRING(iovec[n++], x);
778                         }
779
780                         free(c);
781                 }
782         }
783         assert(n <= m);
784
785         if (tv) {
786                 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
787                 IOVEC_SET_STRING(iovec[n++], source_time);
788         }
789
790         /* Note that strictly speaking storing the boot id here is
791          * redundant since the entry includes this in-line
792          * anyway. However, we need this indexed, too. */
793         if (!isempty(s->boot_id_field))
794                 IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
795
796         if (!isempty(s->machine_id_field))
797                 IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
798
799         if (!isempty(s->hostname_field))
800                 IOVEC_SET_STRING(iovec[n++], s->hostname_field);
801
802         assert(n <= m);
803
804         if (s->split_mode == SPLIT_UID && realuid > 0)
805                 /* Split up strictly by any UID */
806                 journal_uid = realuid;
807         else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
808                 /* Split up by login UIDs, this avoids creation of
809                  * individual journals for system UIDs.  We do this
810                  * only if the realuid is not root, in order not to
811                  * accidentally leak privileged information to the
812                  * user that is logged by a privileged process that is
813                  * part of an unprivileged session.*/
814                 journal_uid = owner;
815         else
816                 journal_uid = 0;
817
818         write_to_journal(s, journal_uid, iovec, n, priority);
819 }
820
821 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
822         char mid[11 + 32 + 1];
823         char buffer[16 + LINE_MAX + 1];
824         struct iovec iovec[N_IOVEC_META_FIELDS + 4];
825         int n = 0;
826         va_list ap;
827         struct ucred ucred = {};
828
829         assert(s);
830         assert(format);
831
832         IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
833         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
834
835         memcpy(buffer, "MESSAGE=", 8);
836         va_start(ap, format);
837         vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
838         va_end(ap);
839         char_array_0(buffer);
840         IOVEC_SET_STRING(iovec[n++], buffer);
841
842         if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
843                 snprintf(mid, sizeof(mid), MESSAGE_ID(message_id));
844                 char_array_0(mid);
845                 IOVEC_SET_STRING(iovec[n++], mid);
846         }
847
848         ucred.pid = getpid();
849         ucred.uid = getuid();
850         ucred.gid = getgid();
851
852         dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
853 }
854
855 void server_dispatch_message(
856                 Server *s,
857                 struct iovec *iovec, unsigned n, unsigned m,
858                 struct ucred *ucred,
859                 struct timeval *tv,
860                 const char *label, size_t label_len,
861                 const char *unit_id,
862                 int priority,
863                 pid_t object_pid) {
864
865         int rl, r;
866         _cleanup_free_ char *path = NULL;
867         char *c;
868
869         assert(s);
870         assert(iovec || n == 0);
871
872         if (n == 0)
873                 return;
874
875         if (LOG_PRI(priority) > s->max_level_store)
876                 return;
877
878         /* Stop early in case the information will not be stored
879          * in a journal. */
880         if (s->storage == STORAGE_NONE)
881                 return;
882
883         if (!ucred)
884                 goto finish;
885
886         r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &path);
887         if (r < 0)
888                 goto finish;
889
890         /* example: /user/lennart/3/foobar
891          *          /system/dbus.service/foobar
892          *
893          * So let's cut of everything past the third /, since that is
894          * where user directories start */
895
896         c = strchr(path, '/');
897         if (c) {
898                 c = strchr(c+1, '/');
899                 if (c) {
900                         c = strchr(c+1, '/');
901                         if (c)
902                                 *c = 0;
903                 }
904         }
905
906         rl = journal_rate_limit_test(s->rate_limit, path,
907                                      priority & LOG_PRIMASK, available_space(s, false));
908
909         if (rl == 0)
910                 return;
911
912         /* Write a suppression message if we suppressed something */
913         if (rl > 1)
914                 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
915                                       "Suppressed %u messages from %s", rl - 1, path);
916
917 finish:
918         dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
919 }
920
921
922 static int system_journal_open(Server *s) {
923         int r;
924         char *fn;
925         sd_id128_t machine;
926         char ids[33];
927
928         r = sd_id128_get_machine(&machine);
929         if (r < 0) {
930                 log_error("Failed to get machine id: %s", strerror(-r));
931                 return r;
932         }
933
934         sd_id128_to_string(machine, ids);
935
936         if (!s->system_journal &&
937             (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
938             access("/run/systemd/journal/flushed", F_OK) >= 0) {
939
940                 /* If in auto mode: first try to create the machine
941                  * path, but not the prefix.
942                  *
943                  * If in persistent mode: create /var/log/journal and
944                  * the machine path */
945
946                 if (s->storage == STORAGE_PERSISTENT)
947                         (void) mkdir("/var/log/journal/", 0755);
948
949                 fn = strappenda("/var/log/journal/", ids);
950                 (void) mkdir(fn, 0755);
951
952                 fn = strappenda(fn, "/system.journal");
953                 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
954
955                 if (r >= 0)
956                         server_fix_perms(s, s->system_journal, 0);
957                 else if (r < 0) {
958                         if (r != -ENOENT && r != -EROFS)
959                                 log_warning("Failed to open system journal: %s", strerror(-r));
960
961                         r = 0;
962                 }
963         }
964
965         if (!s->runtime_journal &&
966             (s->storage != STORAGE_NONE)) {
967
968                 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
969                 if (!fn)
970                         return -ENOMEM;
971
972                 if (s->system_journal) {
973
974                         /* Try to open the runtime journal, but only
975                          * if it already exists, so that we can flush
976                          * it into the system journal */
977
978                         r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
979                         free(fn);
980
981                         if (r < 0) {
982                                 if (r != -ENOENT)
983                                         log_warning("Failed to open runtime journal: %s", strerror(-r));
984
985                                 r = 0;
986                         }
987
988                 } else {
989
990                         /* OK, we really need the runtime journal, so create
991                          * it if necessary. */
992
993                         (void) mkdir_parents(fn, 0755);
994                         r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
995                         free(fn);
996
997                         if (r < 0) {
998                                 log_error("Failed to open runtime journal: %s", strerror(-r));
999                                 return r;
1000                         }
1001                 }
1002
1003                 if (s->runtime_journal)
1004                         server_fix_perms(s, s->runtime_journal, 0);
1005         }
1006
1007         available_space(s, true);
1008
1009         return r;
1010 }
1011
1012 int server_flush_to_var(Server *s) {
1013         sd_id128_t machine;
1014         sd_journal *j = NULL;
1015         char ts[FORMAT_TIMESPAN_MAX];
1016         usec_t start;
1017         unsigned n = 0;
1018         int r;
1019
1020         assert(s);
1021
1022         if (s->storage != STORAGE_AUTO &&
1023             s->storage != STORAGE_PERSISTENT)
1024                 return 0;
1025
1026         if (!s->runtime_journal)
1027                 return 0;
1028
1029         system_journal_open(s);
1030
1031         if (!s->system_journal)
1032                 return 0;
1033
1034         log_debug("Flushing to /var...");
1035
1036         start = now(CLOCK_MONOTONIC);
1037
1038         r = sd_id128_get_machine(&machine);
1039         if (r < 0)
1040                 return r;
1041
1042         r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1043         if (r < 0) {
1044                 log_error("Failed to read runtime journal: %s", strerror(-r));
1045                 return r;
1046         }
1047
1048         sd_journal_set_data_threshold(j, 0);
1049
1050         SD_JOURNAL_FOREACH(j) {
1051                 Object *o = NULL;
1052                 JournalFile *f;
1053
1054                 f = j->current_file;
1055                 assert(f && f->current_offset > 0);
1056
1057                 n++;
1058
1059                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1060                 if (r < 0) {
1061                         log_error("Can't read entry: %s", strerror(-r));
1062                         goto finish;
1063                 }
1064
1065                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1066                 if (r >= 0)
1067                         continue;
1068
1069                 if (!shall_try_append_again(s->system_journal, r)) {
1070                         log_error("Can't write entry: %s", strerror(-r));
1071                         goto finish;
1072                 }
1073
1074                 server_rotate(s);
1075                 server_vacuum(s);
1076
1077                 if (!s->system_journal) {
1078                         log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1079                         r = -EIO;
1080                         goto finish;
1081                 }
1082
1083                 log_debug("Retrying write.");
1084                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1085                 if (r < 0) {
1086                         log_error("Can't write entry: %s", strerror(-r));
1087                         goto finish;
1088                 }
1089         }
1090
1091 finish:
1092         journal_file_post_change(s->system_journal);
1093
1094         journal_file_close(s->runtime_journal);
1095         s->runtime_journal = NULL;
1096
1097         if (r >= 0)
1098                 rm_rf("/run/log/journal", false, true, false);
1099
1100         sd_journal_close(j);
1101
1102         server_driver_message(s, SD_ID128_NULL, "Time spent on flushing to /var is %s for %u entries.", format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0), n);
1103
1104         return r;
1105 }
1106
1107 int process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1108         Server *s = userdata;
1109
1110         assert(s);
1111         assert(fd == s->native_fd || fd == s->syslog_fd);
1112
1113         if (revents != EPOLLIN) {
1114                 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1115                 return -EIO;
1116         }
1117
1118         for (;;) {
1119                 struct ucred *ucred = NULL;
1120                 struct timeval *tv = NULL;
1121                 struct cmsghdr *cmsg;
1122                 char *label = NULL;
1123                 size_t label_len = 0;
1124                 struct iovec iovec;
1125
1126                 union {
1127                         struct cmsghdr cmsghdr;
1128
1129                         /* We use NAME_MAX space for the SELinux label
1130                          * here. The kernel currently enforces no
1131                          * limit, but according to suggestions from
1132                          * the SELinux people this will change and it
1133                          * will probably be identical to NAME_MAX. For
1134                          * now we use that, but this should be updated
1135                          * one day when the final limit is known.*/
1136                         uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1137                                     CMSG_SPACE(sizeof(struct timeval)) +
1138                                     CMSG_SPACE(sizeof(int)) + /* fd */
1139                                     CMSG_SPACE(NAME_MAX)]; /* selinux label */
1140                 } control = {};
1141                 struct msghdr msghdr = {
1142                         .msg_iov = &iovec,
1143                         .msg_iovlen = 1,
1144                         .msg_control = &control,
1145                         .msg_controllen = sizeof(control),
1146                 };
1147
1148                 ssize_t n;
1149                 int v;
1150                 int *fds = NULL;
1151                 unsigned n_fds = 0;
1152
1153                 if (ioctl(fd, SIOCINQ, &v) < 0) {
1154                         log_error("SIOCINQ failed: %m");
1155                         return -errno;
1156                 }
1157
1158                 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, LINE_MAX + (size_t) v))
1159                         return log_oom();
1160
1161                 iovec.iov_base = s->buffer;
1162                 iovec.iov_len = s->buffer_size;
1163
1164                 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1165                 if (n < 0) {
1166                         if (errno == EINTR || errno == EAGAIN)
1167                                 return 0;
1168
1169                         log_error("recvmsg() failed: %m");
1170                         return -errno;
1171                 }
1172
1173                 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1174
1175                         if (cmsg->cmsg_level == SOL_SOCKET &&
1176                             cmsg->cmsg_type == SCM_CREDENTIALS &&
1177                             cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1178                                 ucred = (struct ucred*) CMSG_DATA(cmsg);
1179                         else if (cmsg->cmsg_level == SOL_SOCKET &&
1180                                  cmsg->cmsg_type == SCM_SECURITY) {
1181                                 label = (char*) CMSG_DATA(cmsg);
1182                                 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1183                         } else if (cmsg->cmsg_level == SOL_SOCKET &&
1184                                    cmsg->cmsg_type == SO_TIMESTAMP &&
1185                                    cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1186                                 tv = (struct timeval*) CMSG_DATA(cmsg);
1187                         else if (cmsg->cmsg_level == SOL_SOCKET &&
1188                                  cmsg->cmsg_type == SCM_RIGHTS) {
1189                                 fds = (int*) CMSG_DATA(cmsg);
1190                                 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1191                         }
1192                 }
1193
1194                 if (fd == s->syslog_fd) {
1195                         if (n > 0 && n_fds == 0) {
1196                                 s->buffer[n] = 0;
1197                                 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1198                         } else if (n_fds > 0)
1199                                 log_warning("Got file descriptors via syslog socket. Ignoring.");
1200
1201                 } else {
1202                         if (n > 0 && n_fds == 0)
1203                                 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1204                         else if (n == 0 && n_fds == 1)
1205                                 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1206                         else if (n_fds > 0)
1207                                 log_warning("Got too many file descriptors via native socket. Ignoring.");
1208                 }
1209
1210                 close_many(fds, n_fds);
1211         }
1212 }
1213
1214 static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1215         Server *s = userdata;
1216
1217         assert(s);
1218
1219         log_info("Received request to flush runtime journal from PID %"PRIu32, si->ssi_pid);
1220
1221         touch("/run/systemd/journal/flushed");
1222         server_flush_to_var(s);
1223         server_sync(s);
1224
1225         return 0;
1226 }
1227
1228 static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1229         Server *s = userdata;
1230
1231         assert(s);
1232
1233         log_info("Received request to rotate journal from PID %"PRIu32, si->ssi_pid);
1234         server_rotate(s);
1235         server_vacuum(s);
1236
1237         return 0;
1238 }
1239
1240 static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1241         Server *s = userdata;
1242
1243         assert(s);
1244
1245         log_received_signal(LOG_INFO, si);
1246
1247         sd_event_exit(s->event, 0);
1248         return 0;
1249 }
1250
1251 static int setup_signals(Server *s) {
1252         sigset_t mask;
1253         int r;
1254
1255         assert(s);
1256
1257         assert_se(sigemptyset(&mask) == 0);
1258         sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1259         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1260
1261         r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
1262         if (r < 0)
1263                 return r;
1264
1265         r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
1266         if (r < 0)
1267                 return r;
1268
1269         r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
1270         if (r < 0)
1271                 return r;
1272
1273         r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
1274         if (r < 0)
1275                 return r;
1276
1277         return 0;
1278 }
1279
1280 static int server_parse_proc_cmdline(Server *s) {
1281         _cleanup_free_ char *line = NULL;
1282         char *w, *state;
1283         size_t l;
1284         int r;
1285
1286         r = proc_cmdline(&line);
1287         if (r < 0)
1288                 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1289         if (r <= 0)
1290                 return 0;
1291
1292         FOREACH_WORD_QUOTED(w, l, line, state) {
1293                 _cleanup_free_ char *word;
1294
1295                 word = strndup(w, l);
1296                 if (!word)
1297                         return -ENOMEM;
1298
1299                 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1300                         r = parse_boolean(word + 35);
1301                         if (r < 0)
1302                                 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1303                         else
1304                                 s->forward_to_syslog = r;
1305                 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1306                         r = parse_boolean(word + 33);
1307                         if (r < 0)
1308                                 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1309                         else
1310                                 s->forward_to_kmsg = r;
1311                 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1312                         r = parse_boolean(word + 36);
1313                         if (r < 0)
1314                                 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1315                         else
1316                                 s->forward_to_console = r;
1317                 } else if (startswith(word, "systemd.journald.forward_to_wall=")) {
1318                         r = parse_boolean(word + 33);
1319                         if (r < 0)
1320                                 log_warning("Failed to parse forward to wall switch %s. Ignoring.", word + 33);
1321                         else
1322                                 s->forward_to_wall = r;
1323                 } else if (startswith(word, "systemd.journald"))
1324                         log_warning("Invalid systemd.journald parameter. Ignoring.");
1325         }
1326
1327         return 0;
1328 }
1329
1330 static int server_parse_config_file(Server *s) {
1331         static const char fn[] = "/etc/systemd/journald.conf";
1332         _cleanup_fclose_ FILE *f = NULL;
1333         int r;
1334
1335         assert(s);
1336
1337         f = fopen(fn, "re");
1338         if (!f) {
1339                 if (errno == ENOENT)
1340                         return 0;
1341
1342                 log_warning("Failed to open configuration file %s: %m", fn);
1343                 return -errno;
1344         }
1345
1346         r = config_parse(NULL, fn, f, "Journal\0", config_item_perf_lookup,
1347                          (void*) journald_gperf_lookup, false, false, s);
1348         if (r < 0)
1349                 log_warning("Failed to parse configuration file: %s", strerror(-r));
1350
1351         return r;
1352 }
1353
1354 static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1355         Server *s = userdata;
1356
1357         assert(s);
1358
1359         server_sync(s);
1360         return 0;
1361 }
1362
1363 int server_schedule_sync(Server *s, int priority) {
1364         int r;
1365
1366         assert(s);
1367
1368         if (priority <= LOG_CRIT) {
1369                 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1370                 server_sync(s);
1371                 return 0;
1372         }
1373
1374         if (s->sync_scheduled)
1375                 return 0;
1376
1377         if (s->sync_interval_usec > 0) {
1378                 usec_t when;
1379
1380                 r = sd_event_get_now_monotonic(s->event, &when);
1381                 if (r < 0)
1382                         return r;
1383
1384                 when += s->sync_interval_usec;
1385
1386                 if (!s->sync_event_source) {
1387                         r = sd_event_add_monotonic(s->event, &s->sync_event_source, when, 0, server_dispatch_sync, s);
1388                         if (r < 0)
1389                                 return r;
1390
1391                         r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1392                 } else {
1393                         r = sd_event_source_set_time(s->sync_event_source, when);
1394                         if (r < 0)
1395                                 return r;
1396
1397                         r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1398                 }
1399                 if (r < 0)
1400                         return r;
1401
1402                 s->sync_scheduled = true;
1403         }
1404
1405         return 0;
1406 }
1407
1408 static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1409         Server *s = userdata;
1410
1411         assert(s);
1412
1413         server_cache_hostname(s);
1414         return 0;
1415 }
1416
1417 static int server_open_hostname(Server *s) {
1418         int r;
1419
1420         assert(s);
1421
1422         s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
1423         if (s->hostname_fd < 0) {
1424                 log_error("Failed to open /proc/sys/kernel/hostname: %m");
1425                 return -errno;
1426         }
1427
1428         r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
1429         if (r < 0) {
1430                 /* kernels prior to 3.2 don't support polling this file. Ignore
1431                  * the failure. */
1432                 if (r == -EPERM) {
1433                         log_warning("Failed to register hostname fd in event loop: %s. Ignoring.",
1434                                         strerror(-r));
1435                         close_nointr_nofail(s->hostname_fd);
1436                         s->hostname_fd = -1;
1437                         return 0;
1438                 }
1439
1440                 log_error("Failed to register hostname fd in event loop: %s", strerror(-r));
1441                 return r;
1442         }
1443
1444         r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
1445         if (r < 0) {
1446                 log_error("Failed to adjust priority of host name event source: %s", strerror(-r));
1447                 return r;
1448         }
1449
1450         return 0;
1451 }
1452
1453 int server_init(Server *s) {
1454         int n, r, fd;
1455
1456         assert(s);
1457
1458         zero(*s);
1459         s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->hostname_fd = -1;
1460         s->compress = true;
1461         s->seal = true;
1462
1463         s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1464         s->sync_scheduled = false;
1465
1466         s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1467         s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1468
1469         s->forward_to_syslog = true;
1470         s->forward_to_wall = true;
1471
1472         s->max_level_store = LOG_DEBUG;
1473         s->max_level_syslog = LOG_DEBUG;
1474         s->max_level_kmsg = LOG_NOTICE;
1475         s->max_level_console = LOG_INFO;
1476         s->max_level_wall = LOG_EMERG;
1477
1478         memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1479         memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1480
1481         server_parse_config_file(s);
1482         server_parse_proc_cmdline(s);
1483         if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1484                 log_debug("Setting both rate limit interval and burst from %llu,%u to 0,0",
1485                           (long long unsigned) s->rate_limit_interval,
1486                           s->rate_limit_burst);
1487                 s->rate_limit_interval = s->rate_limit_burst = 0;
1488         }
1489
1490         mkdir_p("/run/systemd/journal", 0755);
1491
1492         s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
1493         if (!s->user_journals)
1494                 return log_oom();
1495
1496         s->mmap = mmap_cache_new();
1497         if (!s->mmap)
1498                 return log_oom();
1499
1500         r = sd_event_default(&s->event);
1501         if (r < 0) {
1502                 log_error("Failed to create event loop: %s", strerror(-r));
1503                 return r;
1504         }
1505
1506         sd_event_set_watchdog(s->event, true);
1507
1508         n = sd_listen_fds(true);
1509         if (n < 0) {
1510                 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1511                 return n;
1512         }
1513
1514         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1515
1516                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1517
1518                         if (s->native_fd >= 0) {
1519                                 log_error("Too many native sockets passed.");
1520                                 return -EINVAL;
1521                         }
1522
1523                         s->native_fd = fd;
1524
1525                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1526
1527                         if (s->stdout_fd >= 0) {
1528                                 log_error("Too many stdout sockets passed.");
1529                                 return -EINVAL;
1530                         }
1531
1532                         s->stdout_fd = fd;
1533
1534                 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
1535
1536                         if (s->syslog_fd >= 0) {
1537                                 log_error("Too many /dev/log sockets passed.");
1538                                 return -EINVAL;
1539                         }
1540
1541                         s->syslog_fd = fd;
1542
1543                 } else {
1544                         log_error("Unknown socket passed.");
1545                         return -EINVAL;
1546                 }
1547         }
1548
1549         r = server_open_syslog_socket(s);
1550         if (r < 0)
1551                 return r;
1552
1553         r = server_open_native_socket(s);
1554         if (r < 0)
1555                 return r;
1556
1557         r = server_open_stdout_socket(s);
1558         if (r < 0)
1559                 return r;
1560
1561         r = server_open_dev_kmsg(s);
1562         if (r < 0)
1563                 return r;
1564
1565         r = server_open_kernel_seqnum(s);
1566         if (r < 0)
1567                 return r;
1568
1569         r = server_open_hostname(s);
1570         if (r < 0)
1571                 return r;
1572
1573         r = setup_signals(s);
1574         if (r < 0)
1575                 return r;
1576
1577         s->udev = udev_new();
1578         if (!s->udev)
1579                 return -ENOMEM;
1580
1581         s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
1582         if (!s->rate_limit)
1583                 return -ENOMEM;
1584
1585         r = cg_get_root_path(&s->cgroup_root);
1586         if (r < 0)
1587                 return r;
1588
1589         server_cache_hostname(s);
1590         server_cache_boot_id(s);
1591         server_cache_machine_id(s);
1592
1593         r = system_journal_open(s);
1594         if (r < 0)
1595                 return r;
1596
1597         return 0;
1598 }
1599
1600 void server_maybe_append_tags(Server *s) {
1601 #ifdef HAVE_GCRYPT
1602         JournalFile *f;
1603         Iterator i;
1604         usec_t n;
1605
1606         n = now(CLOCK_REALTIME);
1607
1608         if (s->system_journal)
1609                 journal_file_maybe_append_tag(s->system_journal, n);
1610
1611         HASHMAP_FOREACH(f, s->user_journals, i)
1612                 journal_file_maybe_append_tag(f, n);
1613 #endif
1614 }
1615
1616 void server_done(Server *s) {
1617         JournalFile *f;
1618         assert(s);
1619
1620         while (s->stdout_streams)
1621                 stdout_stream_free(s->stdout_streams);
1622
1623         if (s->system_journal)
1624                 journal_file_close(s->system_journal);
1625
1626         if (s->runtime_journal)
1627                 journal_file_close(s->runtime_journal);
1628
1629         while ((f = hashmap_steal_first(s->user_journals)))
1630                 journal_file_close(f);
1631
1632         hashmap_free(s->user_journals);
1633
1634         sd_event_source_unref(s->syslog_event_source);
1635         sd_event_source_unref(s->native_event_source);
1636         sd_event_source_unref(s->stdout_event_source);
1637         sd_event_source_unref(s->dev_kmsg_event_source);
1638         sd_event_source_unref(s->sync_event_source);
1639         sd_event_source_unref(s->sigusr1_event_source);
1640         sd_event_source_unref(s->sigusr2_event_source);
1641         sd_event_source_unref(s->sigterm_event_source);
1642         sd_event_source_unref(s->sigint_event_source);
1643         sd_event_source_unref(s->hostname_event_source);
1644         sd_event_unref(s->event);
1645
1646         if (s->syslog_fd >= 0)
1647                 close_nointr_nofail(s->syslog_fd);
1648
1649         if (s->native_fd >= 0)
1650                 close_nointr_nofail(s->native_fd);
1651
1652         if (s->stdout_fd >= 0)
1653                 close_nointr_nofail(s->stdout_fd);
1654
1655         if (s->dev_kmsg_fd >= 0)
1656                 close_nointr_nofail(s->dev_kmsg_fd);
1657
1658         if (s->hostname_fd >= 0)
1659                 close_nointr_nofail(s->hostname_fd);
1660
1661         if (s->rate_limit)
1662                 journal_rate_limit_free(s->rate_limit);
1663
1664         if (s->kernel_seqnum)
1665                 munmap(s->kernel_seqnum, sizeof(uint64_t));
1666
1667         free(s->buffer);
1668         free(s->tty_path);
1669         free(s->cgroup_root);
1670
1671         if (s->mmap)
1672                 mmap_cache_unref(s->mmap);
1673
1674         if (s->udev)
1675                 udev_unref(s->udev);
1676 }