chiark / gitweb /
12735c4b81d3e34e3311c0ab4d5e377ba43d9ccd
[elogind.git] / src / journal / journald-server.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
26 #include <sys/mman.h>
27 #include <sys/timerfd.h>
28
29 #include <libudev.h>
30
31 #include "sd-journal.h"
32 #include "sd-messages.h"
33 #include "sd-daemon.h"
34 #include "fileio.h"
35 #include "mkdir.h"
36 #include "hashmap.h"
37 #include "journal-file.h"
38 #include "socket-util.h"
39 #include "cgroup-util.h"
40 #include "list.h"
41 #include "missing.h"
42 #include "conf-parser.h"
43 #include "selinux-util.h"
44 #include "journal-internal.h"
45 #include "journal-vacuum.h"
46 #include "journal-authenticate.h"
47 #include "journald-rate-limit.h"
48 #include "journald-kmsg.h"
49 #include "journald-syslog.h"
50 #include "journald-stream.h"
51 #include "journald-console.h"
52 #include "journald-native.h"
53 #include "journald-server.h"
54
55 #ifdef HAVE_ACL
56 #include <sys/acl.h>
57 #include <acl/libacl.h>
58 #include "acl-util.h"
59 #endif
60
61 #ifdef HAVE_SELINUX
62 #include <selinux/selinux.h>
63 #endif
64
65 #define USER_JOURNALS_MAX 1024
66
67 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
68 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
69 #define DEFAULT_RATE_LIMIT_BURST 1000
70 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
71
72 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
73
74 static const char* const storage_table[_STORAGE_MAX] = {
75         [STORAGE_AUTO] = "auto",
76         [STORAGE_VOLATILE] = "volatile",
77         [STORAGE_PERSISTENT] = "persistent",
78         [STORAGE_NONE] = "none"
79 };
80
81 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
82 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
83
84 static const char* const split_mode_table[_SPLIT_MAX] = {
85         [SPLIT_LOGIN] = "login",
86         [SPLIT_UID] = "uid",
87         [SPLIT_NONE] = "none",
88 };
89
90 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
91 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
92
93 static uint64_t available_space(Server *s, bool verbose) {
94         char ids[33];
95         _cleanup_free_ char *p = NULL;
96         sd_id128_t machine;
97         struct statvfs ss;
98         uint64_t sum = 0, ss_avail = 0, avail = 0;
99         int r;
100         _cleanup_closedir_ DIR *d = NULL;
101         usec_t ts;
102         const char *f;
103         JournalMetrics *m;
104
105         ts = now(CLOCK_MONOTONIC);
106
107         if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts
108             && !verbose)
109                 return s->cached_available_space;
110
111         r = sd_id128_get_machine(&machine);
112         if (r < 0)
113                 return 0;
114
115         if (s->system_journal) {
116                 f = "/var/log/journal/";
117                 m = &s->system_metrics;
118         } else {
119                 f = "/run/log/journal/";
120                 m = &s->runtime_metrics;
121         }
122
123         assert(m);
124
125         p = strappend(f, sd_id128_to_string(machine, ids));
126         if (!p)
127                 return 0;
128
129         d = opendir(p);
130         if (!d)
131                 return 0;
132
133         if (fstatvfs(dirfd(d), &ss) < 0)
134                 return 0;
135
136         for (;;) {
137                 struct stat st;
138                 struct dirent *de;
139
140                 errno = 0;
141                 de = readdir(d);
142                 if (!de && errno != 0)
143                         return 0;
144
145                 if (!de)
146                         break;
147
148                 if (!endswith(de->d_name, ".journal") &&
149                     !endswith(de->d_name, ".journal~"))
150                         continue;
151
152                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
153                         continue;
154
155                 if (!S_ISREG(st.st_mode))
156                         continue;
157
158                 sum += (uint64_t) st.st_blocks * 512UL;
159         }
160
161         ss_avail = ss.f_bsize * ss.f_bavail;
162
163         /* If we reached a high mark, we will always allow this much
164          * again, unless usage goes above max_use. This watermark
165          * value is cached so that we don't give up space on pressure,
166          * but hover below the maximum usage. */
167
168         if (m->use < sum)
169                 m->use = sum;
170
171         avail = LESS_BY(ss_avail, m->keep_free);
172
173         s->cached_available_space = LESS_BY(MIN(m->max_use, avail), sum);
174         s->cached_available_space_timestamp = ts;
175
176         if (verbose) {
177                 char    fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
178                         fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX];
179
180                 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
181                                       "%s journal is using %s (max allowed %s, "
182                                       "trying to leave %s free of %s available → current limit %s).",
183                                       s->system_journal ? "Permanent" : "Runtime",
184                                       format_bytes(fb1, sizeof(fb1), sum),
185                                       format_bytes(fb2, sizeof(fb2), m->max_use),
186                                       format_bytes(fb3, sizeof(fb3), m->keep_free),
187                                       format_bytes(fb4, sizeof(fb4), ss_avail),
188                                       format_bytes(fb5, sizeof(fb5), s->cached_available_space + sum));
189         }
190
191         return s->cached_available_space;
192 }
193
194 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
195         int r;
196 #ifdef HAVE_ACL
197         acl_t acl;
198         acl_entry_t entry;
199         acl_permset_t permset;
200 #endif
201
202         assert(f);
203
204         r = fchmod(f->fd, 0640);
205         if (r < 0)
206                 log_warning("Failed to fix access mode on %s, ignoring: %s", f->path, strerror(-r));
207
208 #ifdef HAVE_ACL
209         if (uid <= SYSTEM_UID_MAX)
210                 return;
211
212         acl = acl_get_fd(f->fd);
213         if (!acl) {
214                 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
215                 return;
216         }
217
218         r = acl_find_uid(acl, uid, &entry);
219         if (r <= 0) {
220
221                 if (acl_create_entry(&acl, &entry) < 0 ||
222                     acl_set_tag_type(entry, ACL_USER) < 0 ||
223                     acl_set_qualifier(entry, &uid) < 0) {
224                         log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
225                         goto finish;
226                 }
227         }
228
229         /* We do not recalculate the mask unconditionally here,
230          * so that the fchmod() mask above stays intact. */
231         if (acl_get_permset(entry, &permset) < 0 ||
232             acl_add_perm(permset, ACL_READ) < 0 ||
233             calc_acl_mask_if_needed(&acl) < 0) {
234                 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
235                 goto finish;
236         }
237
238         if (acl_set_fd(f->fd, acl) < 0)
239                 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
240
241 finish:
242         acl_free(acl);
243 #endif
244 }
245
246 static JournalFile* find_journal(Server *s, uid_t uid) {
247         _cleanup_free_ char *p = NULL;
248         int r;
249         JournalFile *f;
250         sd_id128_t machine;
251
252         assert(s);
253
254         /* We split up user logs only on /var, not on /run. If the
255          * runtime file is open, we write to it exclusively, in order
256          * to guarantee proper order as soon as we flush /run to
257          * /var and close the runtime file. */
258
259         if (s->runtime_journal)
260                 return s->runtime_journal;
261
262         if (uid <= SYSTEM_UID_MAX)
263                 return s->system_journal;
264
265         r = sd_id128_get_machine(&machine);
266         if (r < 0)
267                 return s->system_journal;
268
269         f = ordered_hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
270         if (f)
271                 return f;
272
273         if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
274                      SD_ID128_FORMAT_VAL(machine), uid) < 0)
275                 return s->system_journal;
276
277         while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
278                 /* Too many open? Then let's close one */
279                 f = ordered_hashmap_steal_first(s->user_journals);
280                 assert(f);
281                 journal_file_close(f);
282         }
283
284         r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
285         if (r < 0)
286                 return s->system_journal;
287
288         server_fix_perms(s, f, uid);
289
290         r = ordered_hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
291         if (r < 0) {
292                 journal_file_close(f);
293                 return s->system_journal;
294         }
295
296         return f;
297 }
298
299 static int do_rotate(Server *s, JournalFile **f, const char* name,
300                      bool seal, uint32_t uid) {
301         int r;
302         assert(s);
303
304         if (!*f)
305                 return -EINVAL;
306
307         r = journal_file_rotate(f, s->compress, seal);
308         if (r < 0)
309                 if (*f)
310                         log_error("Failed to rotate %s: %s",
311                                   (*f)->path, strerror(-r));
312                 else
313                         log_error("Failed to create new %s journal: %s",
314                                   name, strerror(-r));
315         else
316                 server_fix_perms(s, *f, uid);
317         return r;
318 }
319
320 void server_rotate(Server *s) {
321         JournalFile *f;
322         void *k;
323         Iterator i;
324         int r;
325
326         log_debug("Rotating...");
327
328         do_rotate(s, &s->runtime_journal, "runtime", false, 0);
329         do_rotate(s, &s->system_journal, "system", s->seal, 0);
330
331         ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
332                 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UINT32(k));
333                 if (r >= 0)
334                         ordered_hashmap_replace(s->user_journals, k, f);
335                 else if (!f)
336                         /* Old file has been closed and deallocated */
337                         ordered_hashmap_remove(s->user_journals, k);
338         }
339 }
340
341 void server_sync(Server *s) {
342         JournalFile *f;
343         void *k;
344         Iterator i;
345         int r;
346
347         if (s->system_journal) {
348                 r = journal_file_set_offline(s->system_journal);
349                 if (r < 0)
350                         log_error("Failed to sync system journal: %s", strerror(-r));
351         }
352
353         ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
354                 r = journal_file_set_offline(f);
355                 if (r < 0)
356                         log_error("Failed to sync user journal: %s", strerror(-r));
357         }
358
359         if (s->sync_event_source) {
360                 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
361                 if (r < 0)
362                         log_error("Failed to disable sync timer source: %s", strerror(-r));
363         }
364
365         s->sync_scheduled = false;
366 }
367
368 static void do_vacuum(Server *s, char *ids, JournalFile *f, const char* path,
369                       JournalMetrics *metrics) {
370         char *p;
371         int r;
372
373         if (!f)
374                 return;
375
376         p = strappenda(path, ids);
377         r = journal_directory_vacuum(p, metrics->max_use, s->max_retention_usec, &s->oldest_file_usec);
378         if (r < 0 && r != -ENOENT)
379                 log_error("Failed to vacuum %s: %s", p, strerror(-r));
380 }
381
382 void server_vacuum(Server *s) {
383         char ids[33];
384         sd_id128_t machine;
385         int r;
386
387         log_debug("Vacuuming...");
388
389         s->oldest_file_usec = 0;
390
391         r = sd_id128_get_machine(&machine);
392         if (r < 0) {
393                 log_error("Failed to get machine ID: %s", strerror(-r));
394                 return;
395         }
396         sd_id128_to_string(machine, ids);
397
398         do_vacuum(s, ids, s->system_journal, "/var/log/journal/", &s->system_metrics);
399         do_vacuum(s, ids, s->runtime_journal, "/run/log/journal/", &s->runtime_metrics);
400
401         s->cached_available_space_timestamp = 0;
402 }
403
404 static void server_cache_machine_id(Server *s) {
405         sd_id128_t id;
406         int r;
407
408         assert(s);
409
410         r = sd_id128_get_machine(&id);
411         if (r < 0)
412                 return;
413
414         sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
415 }
416
417 static void server_cache_boot_id(Server *s) {
418         sd_id128_t id;
419         int r;
420
421         assert(s);
422
423         r = sd_id128_get_boot(&id);
424         if (r < 0)
425                 return;
426
427         sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
428 }
429
430 static void server_cache_hostname(Server *s) {
431         _cleanup_free_ char *t = NULL;
432         char *x;
433
434         assert(s);
435
436         t = gethostname_malloc();
437         if (!t)
438                 return;
439
440         x = strappend("_HOSTNAME=", t);
441         if (!x)
442                 return;
443
444         free(s->hostname_field);
445         s->hostname_field = x;
446 }
447
448 bool shall_try_append_again(JournalFile *f, int r) {
449
450         /* -E2BIG            Hit configured limit
451            -EFBIG            Hit fs limit
452            -EDQUOT           Quota limit hit
453            -ENOSPC           Disk full
454            -EHOSTDOWN        Other machine
455            -EBUSY            Unclean shutdown
456            -EPROTONOSUPPORT  Unsupported feature
457            -EBADMSG          Corrupted
458            -ENODATA          Truncated
459            -ESHUTDOWN        Already archived */
460
461         if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
462                 log_debug("%s: Allocation limit reached, rotating.", f->path);
463         else if (r == -EHOSTDOWN)
464                 log_info("%s: Journal file from other machine, rotating.", f->path);
465         else if (r == -EBUSY)
466                 log_info("%s: Unclean shutdown, rotating.", f->path);
467         else if (r == -EPROTONOSUPPORT)
468                 log_info("%s: Unsupported feature, rotating.", f->path);
469         else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
470                 log_warning("%s: Journal file corrupted, rotating.", f->path);
471         else
472                 return false;
473
474         return true;
475 }
476
477 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
478         JournalFile *f;
479         bool vacuumed = false;
480         int r;
481
482         assert(s);
483         assert(iovec);
484         assert(n > 0);
485
486         f = find_journal(s, uid);
487         if (!f)
488                 return;
489
490         if (journal_file_rotate_suggested(f, s->max_file_usec)) {
491                 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
492                 server_rotate(s);
493                 server_vacuum(s);
494                 vacuumed = true;
495
496                 f = find_journal(s, uid);
497                 if (!f)
498                         return;
499         }
500
501         r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
502         if (r >= 0) {
503                 server_schedule_sync(s, priority);
504                 return;
505         }
506
507         if (vacuumed || !shall_try_append_again(f, r)) {
508                 size_t size = 0;
509                 unsigned i;
510                 for (i = 0; i < n; i++)
511                         size += iovec[i].iov_len;
512
513                 log_error("Failed to write entry (%d items, %zu bytes), ignoring: %s", n, size, strerror(-r));
514                 return;
515         }
516
517         server_rotate(s);
518         server_vacuum(s);
519
520         f = find_journal(s, uid);
521         if (!f)
522                 return;
523
524         log_debug("Retrying write.");
525         r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
526         if (r < 0) {
527                 size_t size = 0;
528                 unsigned i;
529                 for (i = 0; i < n; i++)
530                         size += iovec[i].iov_len;
531
532                 log_error("Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %s", n, size, strerror(-r));
533         } else
534                 server_schedule_sync(s, priority);
535 }
536
537 static void dispatch_message_real(
538                 Server *s,
539                 struct iovec *iovec, unsigned n, unsigned m,
540                 struct ucred *ucred,
541                 struct timeval *tv,
542                 const char *label, size_t label_len,
543                 const char *unit_id,
544                 int priority,
545                 pid_t object_pid) {
546
547         char    pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
548                 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
549                 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
550                 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
551                 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
552                 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
553                 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
554                 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
555         uid_t object_uid;
556         gid_t object_gid;
557         char *x;
558         int r;
559         char *t, *c;
560         uid_t realuid = 0, owner = 0, journal_uid;
561         bool owner_valid = false;
562 #ifdef HAVE_AUDIT
563         char    audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
564                 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
565                 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
566                 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
567
568         uint32_t audit;
569         uid_t loginuid;
570 #endif
571
572         assert(s);
573         assert(iovec);
574         assert(n > 0);
575         assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
576
577         if (ucred) {
578                 realuid = ucred->uid;
579
580                 sprintf(pid, "_PID="PID_FMT, ucred->pid);
581                 IOVEC_SET_STRING(iovec[n++], pid);
582
583                 sprintf(uid, "_UID="UID_FMT, ucred->uid);
584                 IOVEC_SET_STRING(iovec[n++], uid);
585
586                 sprintf(gid, "_GID="GID_FMT, ucred->gid);
587                 IOVEC_SET_STRING(iovec[n++], gid);
588
589                 r = get_process_comm(ucred->pid, &t);
590                 if (r >= 0) {
591                         x = strappenda("_COMM=", t);
592                         free(t);
593                         IOVEC_SET_STRING(iovec[n++], x);
594                 }
595
596                 r = get_process_exe(ucred->pid, &t);
597                 if (r >= 0) {
598                         x = strappenda("_EXE=", t);
599                         free(t);
600                         IOVEC_SET_STRING(iovec[n++], x);
601                 }
602
603                 r = get_process_cmdline(ucred->pid, 0, false, &t);
604                 if (r >= 0) {
605                         x = strappenda("_CMDLINE=", t);
606                         free(t);
607                         IOVEC_SET_STRING(iovec[n++], x);
608                 }
609
610                 r = get_process_capeff(ucred->pid, &t);
611                 if (r >= 0) {
612                         x = strappenda("_CAP_EFFECTIVE=", t);
613                         free(t);
614                         IOVEC_SET_STRING(iovec[n++], x);
615                 }
616
617 #ifdef HAVE_AUDIT
618                 r = audit_session_from_pid(ucred->pid, &audit);
619                 if (r >= 0) {
620                         sprintf(audit_session, "_AUDIT_SESSION=%"PRIu32, audit);
621                         IOVEC_SET_STRING(iovec[n++], audit_session);
622                 }
623
624                 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
625                 if (r >= 0) {
626                         sprintf(audit_loginuid, "_AUDIT_LOGINUID="UID_FMT, loginuid);
627                         IOVEC_SET_STRING(iovec[n++], audit_loginuid);
628                 }
629 #endif
630
631                 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
632                 if (r >= 0) {
633                         char *session = NULL;
634
635                         x = strappenda("_SYSTEMD_CGROUP=", c);
636                         IOVEC_SET_STRING(iovec[n++], x);
637
638                         r = cg_path_get_session(c, &t);
639                         if (r >= 0) {
640                                 session = strappenda("_SYSTEMD_SESSION=", t);
641                                 free(t);
642                                 IOVEC_SET_STRING(iovec[n++], session);
643                         }
644
645                         if (cg_path_get_owner_uid(c, &owner) >= 0) {
646                                 owner_valid = true;
647
648                                 sprintf(owner_uid, "_SYSTEMD_OWNER_UID="UID_FMT, owner);
649                                 IOVEC_SET_STRING(iovec[n++], owner_uid);
650                         }
651
652                         if (cg_path_get_unit(c, &t) >= 0) {
653                                 x = strappenda("_SYSTEMD_UNIT=", t);
654                                 free(t);
655                                 IOVEC_SET_STRING(iovec[n++], x);
656                         } else if (unit_id && !session) {
657                                 x = strappenda("_SYSTEMD_UNIT=", unit_id);
658                                 IOVEC_SET_STRING(iovec[n++], x);
659                         }
660
661                         if (cg_path_get_user_unit(c, &t) >= 0) {
662                                 x = strappenda("_SYSTEMD_USER_UNIT=", t);
663                                 free(t);
664                                 IOVEC_SET_STRING(iovec[n++], x);
665                         } else if (unit_id && session) {
666                                 x = strappenda("_SYSTEMD_USER_UNIT=", unit_id);
667                                 IOVEC_SET_STRING(iovec[n++], x);
668                         }
669
670                         if (cg_path_get_slice(c, &t) >= 0) {
671                                 x = strappenda("_SYSTEMD_SLICE=", t);
672                                 free(t);
673                                 IOVEC_SET_STRING(iovec[n++], x);
674                         }
675
676                         free(c);
677                 } else if (unit_id) {
678                         x = strappenda("_SYSTEMD_UNIT=", unit_id);
679                         IOVEC_SET_STRING(iovec[n++], x);
680                 }
681
682 #ifdef HAVE_SELINUX
683                 if (mac_selinux_use()) {
684                         if (label) {
685                                 x = alloca(strlen("_SELINUX_CONTEXT=") + label_len + 1);
686
687                                 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
688                                 IOVEC_SET_STRING(iovec[n++], x);
689                         } else {
690                                 security_context_t con;
691
692                                 if (getpidcon(ucred->pid, &con) >= 0) {
693                                         x = strappenda("_SELINUX_CONTEXT=", con);
694
695                                         freecon(con);
696                                         IOVEC_SET_STRING(iovec[n++], x);
697                                 }
698                         }
699                 }
700 #endif
701         }
702         assert(n <= m);
703
704         if (object_pid) {
705                 r = get_process_uid(object_pid, &object_uid);
706                 if (r >= 0) {
707                         sprintf(o_uid, "OBJECT_UID="UID_FMT, object_uid);
708                         IOVEC_SET_STRING(iovec[n++], o_uid);
709                 }
710
711                 r = get_process_gid(object_pid, &object_gid);
712                 if (r >= 0) {
713                         sprintf(o_gid, "OBJECT_GID="GID_FMT, object_gid);
714                         IOVEC_SET_STRING(iovec[n++], o_gid);
715                 }
716
717                 r = get_process_comm(object_pid, &t);
718                 if (r >= 0) {
719                         x = strappenda("OBJECT_COMM=", t);
720                         free(t);
721                         IOVEC_SET_STRING(iovec[n++], x);
722                 }
723
724                 r = get_process_exe(object_pid, &t);
725                 if (r >= 0) {
726                         x = strappenda("OBJECT_EXE=", t);
727                         free(t);
728                         IOVEC_SET_STRING(iovec[n++], x);
729                 }
730
731                 r = get_process_cmdline(object_pid, 0, false, &t);
732                 if (r >= 0) {
733                         x = strappenda("OBJECT_CMDLINE=", t);
734                         free(t);
735                         IOVEC_SET_STRING(iovec[n++], x);
736                 }
737
738 #ifdef HAVE_AUDIT
739                 r = audit_session_from_pid(object_pid, &audit);
740                 if (r >= 0) {
741                         sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%"PRIu32, audit);
742                         IOVEC_SET_STRING(iovec[n++], o_audit_session);
743                 }
744
745                 r = audit_loginuid_from_pid(object_pid, &loginuid);
746                 if (r >= 0) {
747                         sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID="UID_FMT, loginuid);
748                         IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
749                 }
750 #endif
751
752                 r = cg_pid_get_path_shifted(object_pid, s->cgroup_root, &c);
753                 if (r >= 0) {
754                         x = strappenda("OBJECT_SYSTEMD_CGROUP=", c);
755                         IOVEC_SET_STRING(iovec[n++], x);
756
757                         r = cg_path_get_session(c, &t);
758                         if (r >= 0) {
759                                 x = strappenda("OBJECT_SYSTEMD_SESSION=", t);
760                                 free(t);
761                                 IOVEC_SET_STRING(iovec[n++], x);
762                         }
763
764                         if (cg_path_get_owner_uid(c, &owner) >= 0) {
765                                 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT, owner);
766                                 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
767                         }
768
769                         if (cg_path_get_unit(c, &t) >= 0) {
770                                 x = strappenda("OBJECT_SYSTEMD_UNIT=", t);
771                                 free(t);
772                                 IOVEC_SET_STRING(iovec[n++], x);
773                         }
774
775                         if (cg_path_get_user_unit(c, &t) >= 0) {
776                                 x = strappenda("OBJECT_SYSTEMD_USER_UNIT=", t);
777                                 free(t);
778                                 IOVEC_SET_STRING(iovec[n++], x);
779                         }
780
781                         free(c);
782                 }
783         }
784         assert(n <= m);
785
786         if (tv) {
787                 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
788                 IOVEC_SET_STRING(iovec[n++], source_time);
789         }
790
791         /* Note that strictly speaking storing the boot id here is
792          * redundant since the entry includes this in-line
793          * anyway. However, we need this indexed, too. */
794         if (!isempty(s->boot_id_field))
795                 IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
796
797         if (!isempty(s->machine_id_field))
798                 IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
799
800         if (!isempty(s->hostname_field))
801                 IOVEC_SET_STRING(iovec[n++], s->hostname_field);
802
803         assert(n <= m);
804
805         if (s->split_mode == SPLIT_UID && realuid > 0)
806                 /* Split up strictly by any UID */
807                 journal_uid = realuid;
808         else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
809                 /* Split up by login UIDs.  We do this only if the
810                  * realuid is not root, in order not to accidentally
811                  * leak privileged information to the user that is
812                  * logged by a privileged process that is part of an
813                  * unprivileged session.*/
814                 journal_uid = owner;
815         else
816                 journal_uid = 0;
817
818         write_to_journal(s, journal_uid, iovec, n, priority);
819 }
820
821 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
822         char mid[11 + 32 + 1];
823         char buffer[16 + LINE_MAX + 1];
824         struct iovec iovec[N_IOVEC_META_FIELDS + 4];
825         int n = 0;
826         va_list ap;
827         struct ucred ucred = {};
828
829         assert(s);
830         assert(format);
831
832         IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
833         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
834
835         memcpy(buffer, "MESSAGE=", 8);
836         va_start(ap, format);
837         vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
838         va_end(ap);
839         char_array_0(buffer);
840         IOVEC_SET_STRING(iovec[n++], buffer);
841
842         if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
843                 snprintf(mid, sizeof(mid), MESSAGE_ID(message_id));
844                 char_array_0(mid);
845                 IOVEC_SET_STRING(iovec[n++], mid);
846         }
847
848         ucred.pid = getpid();
849         ucred.uid = getuid();
850         ucred.gid = getgid();
851
852         dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
853 }
854
855 void server_dispatch_message(
856                 Server *s,
857                 struct iovec *iovec, unsigned n, unsigned m,
858                 struct ucred *ucred,
859                 struct timeval *tv,
860                 const char *label, size_t label_len,
861                 const char *unit_id,
862                 int priority,
863                 pid_t object_pid) {
864
865         int rl, r;
866         _cleanup_free_ char *path = NULL;
867         char *c;
868
869         assert(s);
870         assert(iovec || n == 0);
871
872         if (n == 0)
873                 return;
874
875         if (LOG_PRI(priority) > s->max_level_store)
876                 return;
877
878         /* Stop early in case the information will not be stored
879          * in a journal. */
880         if (s->storage == STORAGE_NONE)
881                 return;
882
883         if (!ucred)
884                 goto finish;
885
886         r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &path);
887         if (r < 0)
888                 goto finish;
889
890         /* example: /user/lennart/3/foobar
891          *          /system/dbus.service/foobar
892          *
893          * So let's cut of everything past the third /, since that is
894          * where user directories start */
895
896         c = strchr(path, '/');
897         if (c) {
898                 c = strchr(c+1, '/');
899                 if (c) {
900                         c = strchr(c+1, '/');
901                         if (c)
902                                 *c = 0;
903                 }
904         }
905
906         rl = journal_rate_limit_test(s->rate_limit, path,
907                                      priority & LOG_PRIMASK, available_space(s, false));
908
909         if (rl == 0)
910                 return;
911
912         /* Write a suppression message if we suppressed something */
913         if (rl > 1)
914                 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
915                                       "Suppressed %u messages from %s", rl - 1, path);
916
917 finish:
918         dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
919 }
920
921
922 static int system_journal_open(Server *s, bool flush_requested) {
923         int r;
924         char *fn;
925         sd_id128_t machine;
926         char ids[33];
927
928         r = sd_id128_get_machine(&machine);
929         if (r < 0) {
930                 log_error("Failed to get machine id: %s", strerror(-r));
931                 return r;
932         }
933
934         sd_id128_to_string(machine, ids);
935
936         if (!s->system_journal &&
937             (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
938             (flush_requested
939              || access("/run/systemd/journal/flushed", F_OK) >= 0)) {
940
941                 /* If in auto mode: first try to create the machine
942                  * path, but not the prefix.
943                  *
944                  * If in persistent mode: create /var/log/journal and
945                  * the machine path */
946
947                 if (s->storage == STORAGE_PERSISTENT)
948                         (void) mkdir("/var/log/journal/", 0755);
949
950                 fn = strappenda("/var/log/journal/", ids);
951                 (void) mkdir(fn, 0755);
952
953                 fn = strappenda(fn, "/system.journal");
954                 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
955
956                 if (r >= 0)
957                         server_fix_perms(s, s->system_journal, 0);
958                 else if (r < 0) {
959                         if (r != -ENOENT && r != -EROFS)
960                                 log_warning("Failed to open system journal: %s", strerror(-r));
961
962                         r = 0;
963                 }
964         }
965
966         if (!s->runtime_journal &&
967             (s->storage != STORAGE_NONE)) {
968
969                 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
970                 if (!fn)
971                         return -ENOMEM;
972
973                 if (s->system_journal) {
974
975                         /* Try to open the runtime journal, but only
976                          * if it already exists, so that we can flush
977                          * it into the system journal */
978
979                         r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
980                         free(fn);
981
982                         if (r < 0) {
983                                 if (r != -ENOENT)
984                                         log_warning("Failed to open runtime journal: %s", strerror(-r));
985
986                                 r = 0;
987                         }
988
989                 } else {
990
991                         /* OK, we really need the runtime journal, so create
992                          * it if necessary. */
993
994                         (void) mkdir("/run/log", 0755);
995                         (void) mkdir("/run/log/journal", 0755);
996                         (void) mkdir_parents(fn, 0750);
997
998                         r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
999                         free(fn);
1000
1001                         if (r < 0) {
1002                                 log_error("Failed to open runtime journal: %s", strerror(-r));
1003                                 return r;
1004                         }
1005                 }
1006
1007                 if (s->runtime_journal)
1008                         server_fix_perms(s, s->runtime_journal, 0);
1009         }
1010
1011         available_space(s, true);
1012
1013         return r;
1014 }
1015
1016 int server_flush_to_var(Server *s) {
1017         sd_id128_t machine;
1018         sd_journal *j = NULL;
1019         char ts[FORMAT_TIMESPAN_MAX];
1020         usec_t start;
1021         unsigned n = 0;
1022         int r;
1023
1024         assert(s);
1025
1026         if (s->storage != STORAGE_AUTO &&
1027             s->storage != STORAGE_PERSISTENT)
1028                 return 0;
1029
1030         if (!s->runtime_journal)
1031                 return 0;
1032
1033         system_journal_open(s, true);
1034
1035         if (!s->system_journal)
1036                 return 0;
1037
1038         log_debug("Flushing to /var...");
1039
1040         start = now(CLOCK_MONOTONIC);
1041
1042         r = sd_id128_get_machine(&machine);
1043         if (r < 0)
1044                 return r;
1045
1046         r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1047         if (r < 0) {
1048                 log_error("Failed to read runtime journal: %s", strerror(-r));
1049                 return r;
1050         }
1051
1052         sd_journal_set_data_threshold(j, 0);
1053
1054         SD_JOURNAL_FOREACH(j) {
1055                 Object *o = NULL;
1056                 JournalFile *f;
1057
1058                 f = j->current_file;
1059                 assert(f && f->current_offset > 0);
1060
1061                 n++;
1062
1063                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1064                 if (r < 0) {
1065                         log_error("Can't read entry: %s", strerror(-r));
1066                         goto finish;
1067                 }
1068
1069                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1070                 if (r >= 0)
1071                         continue;
1072
1073                 if (!shall_try_append_again(s->system_journal, r)) {
1074                         log_error("Can't write entry: %s", strerror(-r));
1075                         goto finish;
1076                 }
1077
1078                 server_rotate(s);
1079                 server_vacuum(s);
1080
1081                 if (!s->system_journal) {
1082                         log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1083                         r = -EIO;
1084                         goto finish;
1085                 }
1086
1087                 log_debug("Retrying write.");
1088                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1089                 if (r < 0) {
1090                         log_error("Can't write entry: %s", strerror(-r));
1091                         goto finish;
1092                 }
1093         }
1094
1095 finish:
1096         journal_file_post_change(s->system_journal);
1097
1098         journal_file_close(s->runtime_journal);
1099         s->runtime_journal = NULL;
1100
1101         if (r >= 0)
1102                 rm_rf("/run/log/journal", false, true, false);
1103
1104         sd_journal_close(j);
1105
1106         server_driver_message(s, SD_ID128_NULL, "Time spent on flushing to /var is %s for %u entries.", format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0), n);
1107
1108         return r;
1109 }
1110
1111 int process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1112         Server *s = userdata;
1113
1114         assert(s);
1115         assert(fd == s->native_fd || fd == s->syslog_fd);
1116
1117         if (revents != EPOLLIN) {
1118                 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1119                 return -EIO;
1120         }
1121
1122         for (;;) {
1123                 struct ucred *ucred = NULL;
1124                 struct timeval *tv = NULL;
1125                 struct cmsghdr *cmsg;
1126                 char *label = NULL;
1127                 size_t label_len = 0;
1128                 struct iovec iovec;
1129
1130                 union {
1131                         struct cmsghdr cmsghdr;
1132
1133                         /* We use NAME_MAX space for the SELinux label
1134                          * here. The kernel currently enforces no
1135                          * limit, but according to suggestions from
1136                          * the SELinux people this will change and it
1137                          * will probably be identical to NAME_MAX. For
1138                          * now we use that, but this should be updated
1139                          * one day when the final limit is known.*/
1140                         uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1141                                     CMSG_SPACE(sizeof(struct timeval)) +
1142                                     CMSG_SPACE(sizeof(int)) + /* fd */
1143                                     CMSG_SPACE(NAME_MAX)]; /* selinux label */
1144                 } control = {};
1145                 struct msghdr msghdr = {
1146                         .msg_iov = &iovec,
1147                         .msg_iovlen = 1,
1148                         .msg_control = &control,
1149                         .msg_controllen = sizeof(control),
1150                 };
1151
1152                 ssize_t n;
1153                 int v;
1154                 int *fds = NULL;
1155                 unsigned n_fds = 0;
1156
1157                 if (ioctl(fd, SIOCINQ, &v) < 0) {
1158                         log_error("SIOCINQ failed: %m");
1159                         return -errno;
1160                 }
1161
1162                 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, LINE_MAX + (size_t) v))
1163                         return log_oom();
1164
1165                 iovec.iov_base = s->buffer;
1166                 iovec.iov_len = s->buffer_size;
1167
1168                 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1169                 if (n < 0) {
1170                         if (errno == EINTR || errno == EAGAIN)
1171                                 return 0;
1172
1173                         log_error("recvmsg() failed: %m");
1174                         return -errno;
1175                 }
1176
1177                 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1178
1179                         if (cmsg->cmsg_level == SOL_SOCKET &&
1180                             cmsg->cmsg_type == SCM_CREDENTIALS &&
1181                             cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1182                                 ucred = (struct ucred*) CMSG_DATA(cmsg);
1183                         else if (cmsg->cmsg_level == SOL_SOCKET &&
1184                                  cmsg->cmsg_type == SCM_SECURITY) {
1185                                 label = (char*) CMSG_DATA(cmsg);
1186                                 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1187                         } else if (cmsg->cmsg_level == SOL_SOCKET &&
1188                                    cmsg->cmsg_type == SO_TIMESTAMP &&
1189                                    cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1190                                 tv = (struct timeval*) CMSG_DATA(cmsg);
1191                         else if (cmsg->cmsg_level == SOL_SOCKET &&
1192                                  cmsg->cmsg_type == SCM_RIGHTS) {
1193                                 fds = (int*) CMSG_DATA(cmsg);
1194                                 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1195                         }
1196                 }
1197
1198                 if (fd == s->syslog_fd) {
1199                         if (n > 0 && n_fds == 0) {
1200                                 s->buffer[n] = 0;
1201                                 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1202                         } else if (n_fds > 0)
1203                                 log_warning("Got file descriptors via syslog socket. Ignoring.");
1204
1205                 } else {
1206                         if (n > 0 && n_fds == 0)
1207                                 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1208                         else if (n == 0 && n_fds == 1)
1209                                 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1210                         else if (n_fds > 0)
1211                                 log_warning("Got too many file descriptors via native socket. Ignoring.");
1212                 }
1213
1214                 close_many(fds, n_fds);
1215         }
1216 }
1217
1218 static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1219         Server *s = userdata;
1220
1221         assert(s);
1222
1223         log_info("Received request to flush runtime journal from PID %"PRIu32, si->ssi_pid);
1224
1225         server_flush_to_var(s);
1226         server_sync(s);
1227         server_vacuum(s);
1228
1229         touch("/run/systemd/journal/flushed");
1230
1231         return 0;
1232 }
1233
1234 static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1235         Server *s = userdata;
1236
1237         assert(s);
1238
1239         log_info("Received request to rotate journal from PID %"PRIu32, si->ssi_pid);
1240         server_rotate(s);
1241         server_vacuum(s);
1242
1243         return 0;
1244 }
1245
1246 static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1247         Server *s = userdata;
1248
1249         assert(s);
1250
1251         log_received_signal(LOG_INFO, si);
1252
1253         sd_event_exit(s->event, 0);
1254         return 0;
1255 }
1256
1257 static int setup_signals(Server *s) {
1258         sigset_t mask;
1259         int r;
1260
1261         assert(s);
1262
1263         assert_se(sigemptyset(&mask) == 0);
1264         sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1265         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1266
1267         r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
1268         if (r < 0)
1269                 return r;
1270
1271         r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
1272         if (r < 0)
1273                 return r;
1274
1275         r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
1276         if (r < 0)
1277                 return r;
1278
1279         r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
1280         if (r < 0)
1281                 return r;
1282
1283         return 0;
1284 }
1285
1286 static int server_parse_proc_cmdline(Server *s) {
1287         _cleanup_free_ char *line = NULL;
1288         const char *w, *state;
1289         size_t l;
1290         int r;
1291
1292         r = proc_cmdline(&line);
1293         if (r < 0)
1294                 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1295         if (r <= 0)
1296                 return 0;
1297
1298         FOREACH_WORD_QUOTED(w, l, line, state) {
1299                 _cleanup_free_ char *word;
1300
1301                 word = strndup(w, l);
1302                 if (!word)
1303                         return -ENOMEM;
1304
1305                 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1306                         r = parse_boolean(word + 35);
1307                         if (r < 0)
1308                                 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1309                         else
1310                                 s->forward_to_syslog = r;
1311                 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1312                         r = parse_boolean(word + 33);
1313                         if (r < 0)
1314                                 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1315                         else
1316                                 s->forward_to_kmsg = r;
1317                 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1318                         r = parse_boolean(word + 36);
1319                         if (r < 0)
1320                                 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1321                         else
1322                                 s->forward_to_console = r;
1323                 } else if (startswith(word, "systemd.journald.forward_to_wall=")) {
1324                         r = parse_boolean(word + 33);
1325                         if (r < 0)
1326                                 log_warning("Failed to parse forward to wall switch %s. Ignoring.", word + 33);
1327                         else
1328                                 s->forward_to_wall = r;
1329                 } else if (startswith(word, "systemd.journald"))
1330                         log_warning("Invalid systemd.journald parameter. Ignoring.");
1331         }
1332         /* do not warn about state here, since probably systemd already did */
1333
1334         return 0;
1335 }
1336
1337 static int server_parse_config_file(Server *s) {
1338         assert(s);
1339
1340         return config_parse(NULL, "/etc/systemd/journald.conf", NULL,
1341                             "Journal\0",
1342                             config_item_perf_lookup, journald_gperf_lookup,
1343                             false, false, true, s);
1344 }
1345
1346 static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1347         Server *s = userdata;
1348
1349         assert(s);
1350
1351         server_sync(s);
1352         return 0;
1353 }
1354
1355 int server_schedule_sync(Server *s, int priority) {
1356         int r;
1357
1358         assert(s);
1359
1360         if (priority <= LOG_CRIT) {
1361                 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1362                 server_sync(s);
1363                 return 0;
1364         }
1365
1366         if (s->sync_scheduled)
1367                 return 0;
1368
1369         if (s->sync_interval_usec > 0) {
1370                 usec_t when;
1371
1372                 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
1373                 if (r < 0)
1374                         return r;
1375
1376                 when += s->sync_interval_usec;
1377
1378                 if (!s->sync_event_source) {
1379                         r = sd_event_add_time(
1380                                         s->event,
1381                                         &s->sync_event_source,
1382                                         CLOCK_MONOTONIC,
1383                                         when, 0,
1384                                         server_dispatch_sync, s);
1385                         if (r < 0)
1386                                 return r;
1387
1388                         r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1389                 } else {
1390                         r = sd_event_source_set_time(s->sync_event_source, when);
1391                         if (r < 0)
1392                                 return r;
1393
1394                         r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1395                 }
1396                 if (r < 0)
1397                         return r;
1398
1399                 s->sync_scheduled = true;
1400         }
1401
1402         return 0;
1403 }
1404
1405 static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1406         Server *s = userdata;
1407
1408         assert(s);
1409
1410         server_cache_hostname(s);
1411         return 0;
1412 }
1413
1414 static int server_open_hostname(Server *s) {
1415         int r;
1416
1417         assert(s);
1418
1419         s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
1420         if (s->hostname_fd < 0) {
1421                 log_error("Failed to open /proc/sys/kernel/hostname: %m");
1422                 return -errno;
1423         }
1424
1425         r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
1426         if (r < 0) {
1427                 /* kernels prior to 3.2 don't support polling this file. Ignore
1428                  * the failure. */
1429                 if (r == -EPERM) {
1430                         log_warning("Failed to register hostname fd in event loop: %s. Ignoring.",
1431                                         strerror(-r));
1432                         s->hostname_fd = safe_close(s->hostname_fd);
1433                         return 0;
1434                 }
1435
1436                 log_error("Failed to register hostname fd in event loop: %s", strerror(-r));
1437                 return r;
1438         }
1439
1440         r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
1441         if (r < 0) {
1442                 log_error("Failed to adjust priority of host name event source: %s", strerror(-r));
1443                 return r;
1444         }
1445
1446         return 0;
1447 }
1448
1449 int server_init(Server *s) {
1450         int n, r, fd;
1451
1452         assert(s);
1453
1454         zero(*s);
1455         s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->hostname_fd = -1;
1456         s->compress = true;
1457         s->seal = true;
1458
1459         s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1460         s->sync_scheduled = false;
1461
1462         s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1463         s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1464
1465         s->forward_to_wall = true;
1466
1467         s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1468
1469         s->max_level_store = LOG_DEBUG;
1470         s->max_level_syslog = LOG_DEBUG;
1471         s->max_level_kmsg = LOG_NOTICE;
1472         s->max_level_console = LOG_INFO;
1473         s->max_level_wall = LOG_EMERG;
1474
1475         memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1476         memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1477
1478         server_parse_config_file(s);
1479         server_parse_proc_cmdline(s);
1480         if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1481                 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1482                           s->rate_limit_interval, s->rate_limit_burst);
1483                 s->rate_limit_interval = s->rate_limit_burst = 0;
1484         }
1485
1486         mkdir_p("/run/systemd/journal", 0755);
1487
1488         s->user_journals = ordered_hashmap_new(NULL);
1489         if (!s->user_journals)
1490                 return log_oom();
1491
1492         s->mmap = mmap_cache_new();
1493         if (!s->mmap)
1494                 return log_oom();
1495
1496         r = sd_event_default(&s->event);
1497         if (r < 0) {
1498                 log_error("Failed to create event loop: %s", strerror(-r));
1499                 return r;
1500         }
1501
1502         sd_event_set_watchdog(s->event, true);
1503
1504         n = sd_listen_fds(true);
1505         if (n < 0) {
1506                 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1507                 return n;
1508         }
1509
1510         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1511
1512                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1513
1514                         if (s->native_fd >= 0) {
1515                                 log_error("Too many native sockets passed.");
1516                                 return -EINVAL;
1517                         }
1518
1519                         s->native_fd = fd;
1520
1521                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1522
1523                         if (s->stdout_fd >= 0) {
1524                                 log_error("Too many stdout sockets passed.");
1525                                 return -EINVAL;
1526                         }
1527
1528                         s->stdout_fd = fd;
1529
1530                 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1531                            sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1532
1533                         if (s->syslog_fd >= 0) {
1534                                 log_error("Too many /dev/log sockets passed.");
1535                                 return -EINVAL;
1536                         }
1537
1538                         s->syslog_fd = fd;
1539
1540                 } else {
1541                         log_error("Unknown socket passed.");
1542                         return -EINVAL;
1543                 }
1544         }
1545
1546         r = server_open_syslog_socket(s);
1547         if (r < 0)
1548                 return r;
1549
1550         r = server_open_native_socket(s);
1551         if (r < 0)
1552                 return r;
1553
1554         r = server_open_stdout_socket(s);
1555         if (r < 0)
1556                 return r;
1557
1558         r = server_open_dev_kmsg(s);
1559         if (r < 0)
1560                 return r;
1561
1562         r = server_open_kernel_seqnum(s);
1563         if (r < 0)
1564                 return r;
1565
1566         r = server_open_hostname(s);
1567         if (r < 0)
1568                 return r;
1569
1570         r = setup_signals(s);
1571         if (r < 0)
1572                 return r;
1573
1574         s->udev = udev_new();
1575         if (!s->udev)
1576                 return -ENOMEM;
1577
1578         s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
1579         if (!s->rate_limit)
1580                 return -ENOMEM;
1581
1582         r = cg_get_root_path(&s->cgroup_root);
1583         if (r < 0)
1584                 return r;
1585
1586         server_cache_hostname(s);
1587         server_cache_boot_id(s);
1588         server_cache_machine_id(s);
1589
1590         r = system_journal_open(s, false);
1591         if (r < 0)
1592                 return r;
1593
1594         return 0;
1595 }
1596
1597 void server_maybe_append_tags(Server *s) {
1598 #ifdef HAVE_GCRYPT
1599         JournalFile *f;
1600         Iterator i;
1601         usec_t n;
1602
1603         n = now(CLOCK_REALTIME);
1604
1605         if (s->system_journal)
1606                 journal_file_maybe_append_tag(s->system_journal, n);
1607
1608         ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
1609                 journal_file_maybe_append_tag(f, n);
1610 #endif
1611 }
1612
1613 void server_done(Server *s) {
1614         JournalFile *f;
1615         assert(s);
1616
1617         while (s->stdout_streams)
1618                 stdout_stream_free(s->stdout_streams);
1619
1620         if (s->system_journal)
1621                 journal_file_close(s->system_journal);
1622
1623         if (s->runtime_journal)
1624                 journal_file_close(s->runtime_journal);
1625
1626         while ((f = ordered_hashmap_steal_first(s->user_journals)))
1627                 journal_file_close(f);
1628
1629         ordered_hashmap_free(s->user_journals);
1630
1631         sd_event_source_unref(s->syslog_event_source);
1632         sd_event_source_unref(s->native_event_source);
1633         sd_event_source_unref(s->stdout_event_source);
1634         sd_event_source_unref(s->dev_kmsg_event_source);
1635         sd_event_source_unref(s->sync_event_source);
1636         sd_event_source_unref(s->sigusr1_event_source);
1637         sd_event_source_unref(s->sigusr2_event_source);
1638         sd_event_source_unref(s->sigterm_event_source);
1639         sd_event_source_unref(s->sigint_event_source);
1640         sd_event_source_unref(s->hostname_event_source);
1641         sd_event_unref(s->event);
1642
1643         safe_close(s->syslog_fd);
1644         safe_close(s->native_fd);
1645         safe_close(s->stdout_fd);
1646         safe_close(s->dev_kmsg_fd);
1647         safe_close(s->hostname_fd);
1648
1649         if (s->rate_limit)
1650                 journal_rate_limit_free(s->rate_limit);
1651
1652         if (s->kernel_seqnum)
1653                 munmap(s->kernel_seqnum, sizeof(uint64_t));
1654
1655         free(s->buffer);
1656         free(s->tty_path);
1657         free(s->cgroup_root);
1658
1659         if (s->mmap)
1660                 mmap_cache_unref(s->mmap);
1661
1662         if (s->udev)
1663                 udev_unref(s->udev);
1664 }