chiark / gitweb /
journald: move /dev/log socket to /run
[elogind.git] / src / journal / journald-server.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
26 #include <sys/mman.h>
27 #include <sys/timerfd.h>
28
29 #include <libudev.h>
30
31 #include "sd-journal.h"
32 #include "sd-messages.h"
33 #include "sd-daemon.h"
34 #include "fileio.h"
35 #include "mkdir.h"
36 #include "hashmap.h"
37 #include "journal-file.h"
38 #include "socket-util.h"
39 #include "cgroup-util.h"
40 #include "list.h"
41 #include "missing.h"
42 #include "conf-parser.h"
43 #include "selinux-util.h"
44 #include "journal-internal.h"
45 #include "journal-vacuum.h"
46 #include "journal-authenticate.h"
47 #include "journald-rate-limit.h"
48 #include "journald-kmsg.h"
49 #include "journald-syslog.h"
50 #include "journald-stream.h"
51 #include "journald-console.h"
52 #include "journald-native.h"
53 #include "journald-server.h"
54
55 #ifdef HAVE_ACL
56 #include <sys/acl.h>
57 #include <acl/libacl.h>
58 #include "acl-util.h"
59 #endif
60
61 #ifdef HAVE_SELINUX
62 #include <selinux/selinux.h>
63 #endif
64
65 #define USER_JOURNALS_MAX 1024
66
67 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
68 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
69 #define DEFAULT_RATE_LIMIT_BURST 1000
70
71 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
72
73 static const char* const storage_table[_STORAGE_MAX] = {
74         [STORAGE_AUTO] = "auto",
75         [STORAGE_VOLATILE] = "volatile",
76         [STORAGE_PERSISTENT] = "persistent",
77         [STORAGE_NONE] = "none"
78 };
79
80 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
81 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
82
83 static const char* const split_mode_table[_SPLIT_MAX] = {
84         [SPLIT_LOGIN] = "login",
85         [SPLIT_UID] = "uid",
86         [SPLIT_NONE] = "none",
87 };
88
89 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
90 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
91
92 static uint64_t available_space(Server *s, bool verbose) {
93         char ids[33];
94         _cleanup_free_ char *p = NULL;
95         sd_id128_t machine;
96         struct statvfs ss;
97         uint64_t sum = 0, ss_avail = 0, avail = 0;
98         int r;
99         _cleanup_closedir_ DIR *d = NULL;
100         usec_t ts;
101         const char *f;
102         JournalMetrics *m;
103
104         ts = now(CLOCK_MONOTONIC);
105
106         if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts
107             && !verbose)
108                 return s->cached_available_space;
109
110         r = sd_id128_get_machine(&machine);
111         if (r < 0)
112                 return 0;
113
114         if (s->system_journal) {
115                 f = "/var/log/journal/";
116                 m = &s->system_metrics;
117         } else {
118                 f = "/run/log/journal/";
119                 m = &s->runtime_metrics;
120         }
121
122         assert(m);
123
124         p = strappend(f, sd_id128_to_string(machine, ids));
125         if (!p)
126                 return 0;
127
128         d = opendir(p);
129         if (!d)
130                 return 0;
131
132         if (fstatvfs(dirfd(d), &ss) < 0)
133                 return 0;
134
135         for (;;) {
136                 struct stat st;
137                 struct dirent *de;
138
139                 errno = 0;
140                 de = readdir(d);
141                 if (!de && errno != 0)
142                         return 0;
143
144                 if (!de)
145                         break;
146
147                 if (!endswith(de->d_name, ".journal") &&
148                     !endswith(de->d_name, ".journal~"))
149                         continue;
150
151                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
152                         continue;
153
154                 if (!S_ISREG(st.st_mode))
155                         continue;
156
157                 sum += (uint64_t) st.st_blocks * 512UL;
158         }
159
160         ss_avail = ss.f_bsize * ss.f_bavail;
161
162         /* If we reached a high mark, we will always allow this much
163          * again, unless usage goes above max_use. This watermark
164          * value is cached so that we don't give up space on pressure,
165          * but hover below the maximum usage. */
166
167         if (m->use < sum)
168                 m->use = sum;
169
170         avail = LESS_BY(ss_avail, m->keep_free);
171
172         s->cached_available_space = LESS_BY(MIN(m->max_use, avail), sum);
173         s->cached_available_space_timestamp = ts;
174
175         if (verbose) {
176                 char    fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
177                         fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX];
178
179                 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
180                                       "%s journal is using %s (max allowed %s, "
181                                       "trying to leave %s free of %s available → current limit %s).",
182                                       s->system_journal ? "Permanent" : "Runtime",
183                                       format_bytes(fb1, sizeof(fb1), sum),
184                                       format_bytes(fb2, sizeof(fb2), m->max_use),
185                                       format_bytes(fb3, sizeof(fb3), m->keep_free),
186                                       format_bytes(fb4, sizeof(fb4), ss_avail),
187                                       format_bytes(fb5, sizeof(fb5), s->cached_available_space + sum));
188         }
189
190         return s->cached_available_space;
191 }
192
193 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
194         int r;
195 #ifdef HAVE_ACL
196         acl_t acl;
197         acl_entry_t entry;
198         acl_permset_t permset;
199 #endif
200
201         assert(f);
202
203         r = fchmod(f->fd, 0640);
204         if (r < 0)
205                 log_warning("Failed to fix access mode on %s, ignoring: %s", f->path, strerror(-r));
206
207 #ifdef HAVE_ACL
208         if (uid <= 0)
209                 return;
210
211         acl = acl_get_fd(f->fd);
212         if (!acl) {
213                 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
214                 return;
215         }
216
217         r = acl_find_uid(acl, uid, &entry);
218         if (r <= 0) {
219
220                 if (acl_create_entry(&acl, &entry) < 0 ||
221                     acl_set_tag_type(entry, ACL_USER) < 0 ||
222                     acl_set_qualifier(entry, &uid) < 0) {
223                         log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
224                         goto finish;
225                 }
226         }
227
228         /* We do not recalculate the mask unconditionally here,
229          * so that the fchmod() mask above stays intact. */
230         if (acl_get_permset(entry, &permset) < 0 ||
231             acl_add_perm(permset, ACL_READ) < 0 ||
232             calc_acl_mask_if_needed(&acl) < 0) {
233                 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
234                 goto finish;
235         }
236
237         if (acl_set_fd(f->fd, acl) < 0)
238                 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
239
240 finish:
241         acl_free(acl);
242 #endif
243 }
244
245 static JournalFile* find_journal(Server *s, uid_t uid) {
246         _cleanup_free_ char *p = NULL;
247         int r;
248         JournalFile *f;
249         sd_id128_t machine;
250
251         assert(s);
252
253         /* We split up user logs only on /var, not on /run. If the
254          * runtime file is open, we write to it exclusively, in order
255          * to guarantee proper order as soon as we flush /run to
256          * /var and close the runtime file. */
257
258         if (s->runtime_journal)
259                 return s->runtime_journal;
260
261         if (uid <= SYSTEM_UID_MAX)
262                 return s->system_journal;
263
264         r = sd_id128_get_machine(&machine);
265         if (r < 0)
266                 return s->system_journal;
267
268         f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
269         if (f)
270                 return f;
271
272         if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
273                      SD_ID128_FORMAT_VAL(machine), uid) < 0)
274                 return s->system_journal;
275
276         while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
277                 /* Too many open? Then let's close one */
278                 f = hashmap_steal_first(s->user_journals);
279                 assert(f);
280                 journal_file_close(f);
281         }
282
283         r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
284         if (r < 0)
285                 return s->system_journal;
286
287         server_fix_perms(s, f, uid);
288
289         r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
290         if (r < 0) {
291                 journal_file_close(f);
292                 return s->system_journal;
293         }
294
295         return f;
296 }
297
298 static int do_rotate(Server *s, JournalFile **f, const char* name,
299                      bool seal, uint32_t uid) {
300         int r;
301         assert(s);
302
303         if (!*f)
304                 return -EINVAL;
305
306         r = journal_file_rotate(f, s->compress, seal);
307         if (r < 0)
308                 if (*f)
309                         log_error("Failed to rotate %s: %s",
310                                   (*f)->path, strerror(-r));
311                 else
312                         log_error("Failed to create new %s journal: %s",
313                                   name, strerror(-r));
314         else
315                 server_fix_perms(s, *f, uid);
316         return r;
317 }
318
319 void server_rotate(Server *s) {
320         JournalFile *f;
321         void *k;
322         Iterator i;
323         int r;
324
325         log_debug("Rotating...");
326
327         do_rotate(s, &s->runtime_journal, "runtime", false, 0);
328         do_rotate(s, &s->system_journal, "system", s->seal, 0);
329
330         HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
331                 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UINT32(k));
332                 if (r >= 0)
333                         hashmap_replace(s->user_journals, k, f);
334                 else if (!f)
335                         /* Old file has been closed and deallocated */
336                         hashmap_remove(s->user_journals, k);
337         }
338 }
339
340 void server_sync(Server *s) {
341         JournalFile *f;
342         void *k;
343         Iterator i;
344         int r;
345
346         if (s->system_journal) {
347                 r = journal_file_set_offline(s->system_journal);
348                 if (r < 0)
349                         log_error("Failed to sync system journal: %s", strerror(-r));
350         }
351
352         HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
353                 r = journal_file_set_offline(f);
354                 if (r < 0)
355                         log_error("Failed to sync user journal: %s", strerror(-r));
356         }
357
358         if (s->sync_event_source) {
359                 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
360                 if (r < 0)
361                         log_error("Failed to disable sync timer source: %s", strerror(-r));
362         }
363
364         s->sync_scheduled = false;
365 }
366
367 static void do_vacuum(Server *s, char *ids, JournalFile *f, const char* path,
368                       JournalMetrics *metrics) {
369         char *p;
370         int r;
371
372         if (!f)
373                 return;
374
375         p = strappenda(path, ids);
376         r = journal_directory_vacuum(p, metrics->max_use, s->max_retention_usec, &s->oldest_file_usec);
377         if (r < 0 && r != -ENOENT)
378                 log_error("Failed to vacuum %s: %s", p, strerror(-r));
379 }
380
381 void server_vacuum(Server *s) {
382         char ids[33];
383         sd_id128_t machine;
384         int r;
385
386         log_debug("Vacuuming...");
387
388         s->oldest_file_usec = 0;
389
390         r = sd_id128_get_machine(&machine);
391         if (r < 0) {
392                 log_error("Failed to get machine ID: %s", strerror(-r));
393                 return;
394         }
395         sd_id128_to_string(machine, ids);
396
397         do_vacuum(s, ids, s->system_journal, "/var/log/journal/", &s->system_metrics);
398         do_vacuum(s, ids, s->runtime_journal, "/run/log/journal/", &s->runtime_metrics);
399
400         s->cached_available_space_timestamp = 0;
401 }
402
403 static void server_cache_machine_id(Server *s) {
404         sd_id128_t id;
405         int r;
406
407         assert(s);
408
409         r = sd_id128_get_machine(&id);
410         if (r < 0)
411                 return;
412
413         sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
414 }
415
416 static void server_cache_boot_id(Server *s) {
417         sd_id128_t id;
418         int r;
419
420         assert(s);
421
422         r = sd_id128_get_boot(&id);
423         if (r < 0)
424                 return;
425
426         sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
427 }
428
429 static void server_cache_hostname(Server *s) {
430         _cleanup_free_ char *t = NULL;
431         char *x;
432
433         assert(s);
434
435         t = gethostname_malloc();
436         if (!t)
437                 return;
438
439         x = strappend("_HOSTNAME=", t);
440         if (!x)
441                 return;
442
443         free(s->hostname_field);
444         s->hostname_field = x;
445 }
446
447 bool shall_try_append_again(JournalFile *f, int r) {
448
449         /* -E2BIG            Hit configured limit
450            -EFBIG            Hit fs limit
451            -EDQUOT           Quota limit hit
452            -ENOSPC           Disk full
453            -EHOSTDOWN        Other machine
454            -EBUSY            Unclean shutdown
455            -EPROTONOSUPPORT  Unsupported feature
456            -EBADMSG          Corrupted
457            -ENODATA          Truncated
458            -ESHUTDOWN        Already archived */
459
460         if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
461                 log_debug("%s: Allocation limit reached, rotating.", f->path);
462         else if (r == -EHOSTDOWN)
463                 log_info("%s: Journal file from other machine, rotating.", f->path);
464         else if (r == -EBUSY)
465                 log_info("%s: Unclean shutdown, rotating.", f->path);
466         else if (r == -EPROTONOSUPPORT)
467                 log_info("%s: Unsupported feature, rotating.", f->path);
468         else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
469                 log_warning("%s: Journal file corrupted, rotating.", f->path);
470         else
471                 return false;
472
473         return true;
474 }
475
476 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
477         JournalFile *f;
478         bool vacuumed = false;
479         int r;
480
481         assert(s);
482         assert(iovec);
483         assert(n > 0);
484
485         f = find_journal(s, uid);
486         if (!f)
487                 return;
488
489         if (journal_file_rotate_suggested(f, s->max_file_usec)) {
490                 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
491                 server_rotate(s);
492                 server_vacuum(s);
493                 vacuumed = true;
494
495                 f = find_journal(s, uid);
496                 if (!f)
497                         return;
498         }
499
500         r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
501         if (r >= 0) {
502                 server_schedule_sync(s, priority);
503                 return;
504         }
505
506         if (vacuumed || !shall_try_append_again(f, r)) {
507                 size_t size = 0;
508                 unsigned i;
509                 for (i = 0; i < n; i++)
510                         size += iovec[i].iov_len;
511
512                 log_error("Failed to write entry (%d items, %zu bytes), ignoring: %s", n, size, strerror(-r));
513                 return;
514         }
515
516         server_rotate(s);
517         server_vacuum(s);
518
519         f = find_journal(s, uid);
520         if (!f)
521                 return;
522
523         log_debug("Retrying write.");
524         r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
525         if (r < 0) {
526                 size_t size = 0;
527                 unsigned i;
528                 for (i = 0; i < n; i++)
529                         size += iovec[i].iov_len;
530
531                 log_error("Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %s", n, size, strerror(-r));
532         } else
533                 server_schedule_sync(s, priority);
534 }
535
536 static void dispatch_message_real(
537                 Server *s,
538                 struct iovec *iovec, unsigned n, unsigned m,
539                 struct ucred *ucred,
540                 struct timeval *tv,
541                 const char *label, size_t label_len,
542                 const char *unit_id,
543                 int priority,
544                 pid_t object_pid) {
545
546         char    pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
547                 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
548                 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
549                 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
550                 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
551                 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
552                 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
553                 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
554         uid_t object_uid;
555         gid_t object_gid;
556         char *x;
557         int r;
558         char *t, *c;
559         uid_t realuid = 0, owner = 0, journal_uid;
560         bool owner_valid = false;
561 #ifdef HAVE_AUDIT
562         char    audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
563                 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
564                 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
565                 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
566
567         uint32_t audit;
568         uid_t loginuid;
569 #endif
570
571         assert(s);
572         assert(iovec);
573         assert(n > 0);
574         assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
575
576         if (ucred) {
577                 realuid = ucred->uid;
578
579                 sprintf(pid, "_PID="PID_FMT, ucred->pid);
580                 IOVEC_SET_STRING(iovec[n++], pid);
581
582                 sprintf(uid, "_UID="UID_FMT, ucred->uid);
583                 IOVEC_SET_STRING(iovec[n++], uid);
584
585                 sprintf(gid, "_GID="GID_FMT, ucred->gid);
586                 IOVEC_SET_STRING(iovec[n++], gid);
587
588                 r = get_process_comm(ucred->pid, &t);
589                 if (r >= 0) {
590                         x = strappenda("_COMM=", t);
591                         free(t);
592                         IOVEC_SET_STRING(iovec[n++], x);
593                 }
594
595                 r = get_process_exe(ucred->pid, &t);
596                 if (r >= 0) {
597                         x = strappenda("_EXE=", t);
598                         free(t);
599                         IOVEC_SET_STRING(iovec[n++], x);
600                 }
601
602                 r = get_process_cmdline(ucred->pid, 0, false, &t);
603                 if (r >= 0) {
604                         x = strappenda("_CMDLINE=", t);
605                         free(t);
606                         IOVEC_SET_STRING(iovec[n++], x);
607                 }
608
609                 r = get_process_capeff(ucred->pid, &t);
610                 if (r >= 0) {
611                         x = strappenda("_CAP_EFFECTIVE=", t);
612                         free(t);
613                         IOVEC_SET_STRING(iovec[n++], x);
614                 }
615
616 #ifdef HAVE_AUDIT
617                 r = audit_session_from_pid(ucred->pid, &audit);
618                 if (r >= 0) {
619                         sprintf(audit_session, "_AUDIT_SESSION=%"PRIu32, audit);
620                         IOVEC_SET_STRING(iovec[n++], audit_session);
621                 }
622
623                 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
624                 if (r >= 0) {
625                         sprintf(audit_loginuid, "_AUDIT_LOGINUID="UID_FMT, loginuid);
626                         IOVEC_SET_STRING(iovec[n++], audit_loginuid);
627                 }
628 #endif
629
630                 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
631                 if (r >= 0) {
632                         char *session = NULL;
633
634                         x = strappenda("_SYSTEMD_CGROUP=", c);
635                         IOVEC_SET_STRING(iovec[n++], x);
636
637                         r = cg_path_get_session(c, &t);
638                         if (r >= 0) {
639                                 session = strappenda("_SYSTEMD_SESSION=", t);
640                                 free(t);
641                                 IOVEC_SET_STRING(iovec[n++], session);
642                         }
643
644                         if (cg_path_get_owner_uid(c, &owner) >= 0) {
645                                 owner_valid = true;
646
647                                 sprintf(owner_uid, "_SYSTEMD_OWNER_UID="UID_FMT, owner);
648                                 IOVEC_SET_STRING(iovec[n++], owner_uid);
649                         }
650
651                         if (cg_path_get_unit(c, &t) >= 0) {
652                                 x = strappenda("_SYSTEMD_UNIT=", t);
653                                 free(t);
654                                 IOVEC_SET_STRING(iovec[n++], x);
655                         } else if (unit_id && !session) {
656                                 x = strappenda("_SYSTEMD_UNIT=", unit_id);
657                                 IOVEC_SET_STRING(iovec[n++], x);
658                         }
659
660                         if (cg_path_get_user_unit(c, &t) >= 0) {
661                                 x = strappenda("_SYSTEMD_USER_UNIT=", t);
662                                 free(t);
663                                 IOVEC_SET_STRING(iovec[n++], x);
664                         } else if (unit_id && session) {
665                                 x = strappenda("_SYSTEMD_USER_UNIT=", unit_id);
666                                 IOVEC_SET_STRING(iovec[n++], x);
667                         }
668
669                         if (cg_path_get_slice(c, &t) >= 0) {
670                                 x = strappenda("_SYSTEMD_SLICE=", t);
671                                 free(t);
672                                 IOVEC_SET_STRING(iovec[n++], x);
673                         }
674
675                         free(c);
676                 } else if (unit_id) {
677                         x = strappenda("_SYSTEMD_UNIT=", unit_id);
678                         IOVEC_SET_STRING(iovec[n++], x);
679                 }
680
681 #ifdef HAVE_SELINUX
682                 if (use_selinux()) {
683                         if (label) {
684                                 x = alloca(strlen("_SELINUX_CONTEXT=") + label_len + 1);
685
686                                 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
687                                 IOVEC_SET_STRING(iovec[n++], x);
688                         } else {
689                                 security_context_t con;
690
691                                 if (getpidcon(ucred->pid, &con) >= 0) {
692                                         x = strappenda("_SELINUX_CONTEXT=", con);
693
694                                         freecon(con);
695                                         IOVEC_SET_STRING(iovec[n++], x);
696                                 }
697                         }
698                 }
699 #endif
700         }
701         assert(n <= m);
702
703         if (object_pid) {
704                 r = get_process_uid(object_pid, &object_uid);
705                 if (r >= 0) {
706                         sprintf(o_uid, "OBJECT_UID="UID_FMT, object_uid);
707                         IOVEC_SET_STRING(iovec[n++], o_uid);
708                 }
709
710                 r = get_process_gid(object_pid, &object_gid);
711                 if (r >= 0) {
712                         sprintf(o_gid, "OBJECT_GID="GID_FMT, object_gid);
713                         IOVEC_SET_STRING(iovec[n++], o_gid);
714                 }
715
716                 r = get_process_comm(object_pid, &t);
717                 if (r >= 0) {
718                         x = strappenda("OBJECT_COMM=", t);
719                         free(t);
720                         IOVEC_SET_STRING(iovec[n++], x);
721                 }
722
723                 r = get_process_exe(object_pid, &t);
724                 if (r >= 0) {
725                         x = strappenda("OBJECT_EXE=", t);
726                         free(t);
727                         IOVEC_SET_STRING(iovec[n++], x);
728                 }
729
730                 r = get_process_cmdline(object_pid, 0, false, &t);
731                 if (r >= 0) {
732                         x = strappenda("OBJECT_CMDLINE=", t);
733                         free(t);
734                         IOVEC_SET_STRING(iovec[n++], x);
735                 }
736
737 #ifdef HAVE_AUDIT
738                 r = audit_session_from_pid(object_pid, &audit);
739                 if (r >= 0) {
740                         sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%"PRIu32, audit);
741                         IOVEC_SET_STRING(iovec[n++], o_audit_session);
742                 }
743
744                 r = audit_loginuid_from_pid(object_pid, &loginuid);
745                 if (r >= 0) {
746                         sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID="UID_FMT, loginuid);
747                         IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
748                 }
749 #endif
750
751                 r = cg_pid_get_path_shifted(object_pid, s->cgroup_root, &c);
752                 if (r >= 0) {
753                         x = strappenda("OBJECT_SYSTEMD_CGROUP=", c);
754                         IOVEC_SET_STRING(iovec[n++], x);
755
756                         r = cg_path_get_session(c, &t);
757                         if (r >= 0) {
758                                 x = strappenda("OBJECT_SYSTEMD_SESSION=", t);
759                                 free(t);
760                                 IOVEC_SET_STRING(iovec[n++], x);
761                         }
762
763                         if (cg_path_get_owner_uid(c, &owner) >= 0) {
764                                 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT, owner);
765                                 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
766                         }
767
768                         if (cg_path_get_unit(c, &t) >= 0) {
769                                 x = strappenda("OBJECT_SYSTEMD_UNIT=", t);
770                                 free(t);
771                                 IOVEC_SET_STRING(iovec[n++], x);
772                         }
773
774                         if (cg_path_get_user_unit(c, &t) >= 0) {
775                                 x = strappenda("OBJECT_SYSTEMD_USER_UNIT=", t);
776                                 free(t);
777                                 IOVEC_SET_STRING(iovec[n++], x);
778                         }
779
780                         free(c);
781                 }
782         }
783         assert(n <= m);
784
785         if (tv) {
786                 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
787                 IOVEC_SET_STRING(iovec[n++], source_time);
788         }
789
790         /* Note that strictly speaking storing the boot id here is
791          * redundant since the entry includes this in-line
792          * anyway. However, we need this indexed, too. */
793         if (!isempty(s->boot_id_field))
794                 IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
795
796         if (!isempty(s->machine_id_field))
797                 IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
798
799         if (!isempty(s->hostname_field))
800                 IOVEC_SET_STRING(iovec[n++], s->hostname_field);
801
802         assert(n <= m);
803
804         if (s->split_mode == SPLIT_UID && realuid > 0)
805                 /* Split up strictly by any UID */
806                 journal_uid = realuid;
807         else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
808                 /* Split up by login UIDs, this avoids creation of
809                  * individual journals for system UIDs.  We do this
810                  * only if the realuid is not root, in order not to
811                  * accidentally leak privileged information to the
812                  * user that is logged by a privileged process that is
813                  * part of an unprivileged session.*/
814                 journal_uid = owner;
815         else
816                 journal_uid = 0;
817
818         write_to_journal(s, journal_uid, iovec, n, priority);
819 }
820
821 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
822         char mid[11 + 32 + 1];
823         char buffer[16 + LINE_MAX + 1];
824         struct iovec iovec[N_IOVEC_META_FIELDS + 4];
825         int n = 0;
826         va_list ap;
827         struct ucred ucred = {};
828
829         assert(s);
830         assert(format);
831
832         IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
833         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
834
835         memcpy(buffer, "MESSAGE=", 8);
836         va_start(ap, format);
837         vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
838         va_end(ap);
839         char_array_0(buffer);
840         IOVEC_SET_STRING(iovec[n++], buffer);
841
842         if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
843                 snprintf(mid, sizeof(mid), MESSAGE_ID(message_id));
844                 char_array_0(mid);
845                 IOVEC_SET_STRING(iovec[n++], mid);
846         }
847
848         ucred.pid = getpid();
849         ucred.uid = getuid();
850         ucred.gid = getgid();
851
852         dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
853 }
854
855 void server_dispatch_message(
856                 Server *s,
857                 struct iovec *iovec, unsigned n, unsigned m,
858                 struct ucred *ucred,
859                 struct timeval *tv,
860                 const char *label, size_t label_len,
861                 const char *unit_id,
862                 int priority,
863                 pid_t object_pid) {
864
865         int rl, r;
866         _cleanup_free_ char *path = NULL;
867         char *c;
868
869         assert(s);
870         assert(iovec || n == 0);
871
872         if (n == 0)
873                 return;
874
875         if (LOG_PRI(priority) > s->max_level_store)
876                 return;
877
878         /* Stop early in case the information will not be stored
879          * in a journal. */
880         if (s->storage == STORAGE_NONE)
881                 return;
882
883         if (!ucred)
884                 goto finish;
885
886         r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &path);
887         if (r < 0)
888                 goto finish;
889
890         /* example: /user/lennart/3/foobar
891          *          /system/dbus.service/foobar
892          *
893          * So let's cut of everything past the third /, since that is
894          * where user directories start */
895
896         c = strchr(path, '/');
897         if (c) {
898                 c = strchr(c+1, '/');
899                 if (c) {
900                         c = strchr(c+1, '/');
901                         if (c)
902                                 *c = 0;
903                 }
904         }
905
906         rl = journal_rate_limit_test(s->rate_limit, path,
907                                      priority & LOG_PRIMASK, available_space(s, false));
908
909         if (rl == 0)
910                 return;
911
912         /* Write a suppression message if we suppressed something */
913         if (rl > 1)
914                 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
915                                       "Suppressed %u messages from %s", rl - 1, path);
916
917 finish:
918         dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
919 }
920
921
922 static int system_journal_open(Server *s) {
923         int r;
924         char *fn;
925         sd_id128_t machine;
926         char ids[33];
927
928         r = sd_id128_get_machine(&machine);
929         if (r < 0) {
930                 log_error("Failed to get machine id: %s", strerror(-r));
931                 return r;
932         }
933
934         sd_id128_to_string(machine, ids);
935
936         if (!s->system_journal &&
937             (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
938             access("/run/systemd/journal/flushed", F_OK) >= 0) {
939
940                 /* If in auto mode: first try to create the machine
941                  * path, but not the prefix.
942                  *
943                  * If in persistent mode: create /var/log/journal and
944                  * the machine path */
945
946                 if (s->storage == STORAGE_PERSISTENT)
947                         (void) mkdir("/var/log/journal/", 0755);
948
949                 fn = strappenda("/var/log/journal/", ids);
950                 (void) mkdir(fn, 0755);
951
952                 fn = strappenda(fn, "/system.journal");
953                 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
954
955                 if (r >= 0)
956                         server_fix_perms(s, s->system_journal, 0);
957                 else if (r < 0) {
958                         if (r != -ENOENT && r != -EROFS)
959                                 log_warning("Failed to open system journal: %s", strerror(-r));
960
961                         r = 0;
962                 }
963         }
964
965         if (!s->runtime_journal &&
966             (s->storage != STORAGE_NONE)) {
967
968                 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
969                 if (!fn)
970                         return -ENOMEM;
971
972                 if (s->system_journal) {
973
974                         /* Try to open the runtime journal, but only
975                          * if it already exists, so that we can flush
976                          * it into the system journal */
977
978                         r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
979                         free(fn);
980
981                         if (r < 0) {
982                                 if (r != -ENOENT)
983                                         log_warning("Failed to open runtime journal: %s", strerror(-r));
984
985                                 r = 0;
986                         }
987
988                 } else {
989
990                         /* OK, we really need the runtime journal, so create
991                          * it if necessary. */
992
993                         (void) mkdir_parents(fn, 0755);
994                         r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
995                         free(fn);
996
997                         if (r < 0) {
998                                 log_error("Failed to open runtime journal: %s", strerror(-r));
999                                 return r;
1000                         }
1001                 }
1002
1003                 if (s->runtime_journal)
1004                         server_fix_perms(s, s->runtime_journal, 0);
1005         }
1006
1007         available_space(s, true);
1008
1009         return r;
1010 }
1011
1012 int server_flush_to_var(Server *s) {
1013         sd_id128_t machine;
1014         sd_journal *j = NULL;
1015         char ts[FORMAT_TIMESPAN_MAX];
1016         usec_t start;
1017         unsigned n = 0;
1018         int r;
1019
1020         assert(s);
1021
1022         if (s->storage != STORAGE_AUTO &&
1023             s->storage != STORAGE_PERSISTENT)
1024                 return 0;
1025
1026         if (!s->runtime_journal)
1027                 return 0;
1028
1029         system_journal_open(s);
1030
1031         if (!s->system_journal)
1032                 return 0;
1033
1034         log_debug("Flushing to /var...");
1035
1036         start = now(CLOCK_MONOTONIC);
1037
1038         r = sd_id128_get_machine(&machine);
1039         if (r < 0)
1040                 return r;
1041
1042         r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1043         if (r < 0) {
1044                 log_error("Failed to read runtime journal: %s", strerror(-r));
1045                 return r;
1046         }
1047
1048         sd_journal_set_data_threshold(j, 0);
1049
1050         SD_JOURNAL_FOREACH(j) {
1051                 Object *o = NULL;
1052                 JournalFile *f;
1053
1054                 f = j->current_file;
1055                 assert(f && f->current_offset > 0);
1056
1057                 n++;
1058
1059                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1060                 if (r < 0) {
1061                         log_error("Can't read entry: %s", strerror(-r));
1062                         goto finish;
1063                 }
1064
1065                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1066                 if (r >= 0)
1067                         continue;
1068
1069                 if (!shall_try_append_again(s->system_journal, r)) {
1070                         log_error("Can't write entry: %s", strerror(-r));
1071                         goto finish;
1072                 }
1073
1074                 server_rotate(s);
1075                 server_vacuum(s);
1076
1077                 if (!s->system_journal) {
1078                         log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1079                         r = -EIO;
1080                         goto finish;
1081                 }
1082
1083                 log_debug("Retrying write.");
1084                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1085                 if (r < 0) {
1086                         log_error("Can't write entry: %s", strerror(-r));
1087                         goto finish;
1088                 }
1089         }
1090
1091 finish:
1092         journal_file_post_change(s->system_journal);
1093
1094         journal_file_close(s->runtime_journal);
1095         s->runtime_journal = NULL;
1096
1097         if (r >= 0)
1098                 rm_rf("/run/log/journal", false, true, false);
1099
1100         sd_journal_close(j);
1101
1102         server_driver_message(s, SD_ID128_NULL, "Time spent on flushing to /var is %s for %u entries.", format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0), n);
1103
1104         return r;
1105 }
1106
1107 int process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1108         Server *s = userdata;
1109
1110         assert(s);
1111         assert(fd == s->native_fd || fd == s->syslog_fd);
1112
1113         if (revents != EPOLLIN) {
1114                 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1115                 return -EIO;
1116         }
1117
1118         for (;;) {
1119                 struct ucred *ucred = NULL;
1120                 struct timeval *tv = NULL;
1121                 struct cmsghdr *cmsg;
1122                 char *label = NULL;
1123                 size_t label_len = 0;
1124                 struct iovec iovec;
1125
1126                 union {
1127                         struct cmsghdr cmsghdr;
1128
1129                         /* We use NAME_MAX space for the SELinux label
1130                          * here. The kernel currently enforces no
1131                          * limit, but according to suggestions from
1132                          * the SELinux people this will change and it
1133                          * will probably be identical to NAME_MAX. For
1134                          * now we use that, but this should be updated
1135                          * one day when the final limit is known.*/
1136                         uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1137                                     CMSG_SPACE(sizeof(struct timeval)) +
1138                                     CMSG_SPACE(sizeof(int)) + /* fd */
1139                                     CMSG_SPACE(NAME_MAX)]; /* selinux label */
1140                 } control = {};
1141                 struct msghdr msghdr = {
1142                         .msg_iov = &iovec,
1143                         .msg_iovlen = 1,
1144                         .msg_control = &control,
1145                         .msg_controllen = sizeof(control),
1146                 };
1147
1148                 ssize_t n;
1149                 int v;
1150                 int *fds = NULL;
1151                 unsigned n_fds = 0;
1152
1153                 if (ioctl(fd, SIOCINQ, &v) < 0) {
1154                         log_error("SIOCINQ failed: %m");
1155                         return -errno;
1156                 }
1157
1158                 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, LINE_MAX + (size_t) v))
1159                         return log_oom();
1160
1161                 iovec.iov_base = s->buffer;
1162                 iovec.iov_len = s->buffer_size;
1163
1164                 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1165                 if (n < 0) {
1166                         if (errno == EINTR || errno == EAGAIN)
1167                                 return 0;
1168
1169                         log_error("recvmsg() failed: %m");
1170                         return -errno;
1171                 }
1172
1173                 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1174
1175                         if (cmsg->cmsg_level == SOL_SOCKET &&
1176                             cmsg->cmsg_type == SCM_CREDENTIALS &&
1177                             cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1178                                 ucred = (struct ucred*) CMSG_DATA(cmsg);
1179                         else if (cmsg->cmsg_level == SOL_SOCKET &&
1180                                  cmsg->cmsg_type == SCM_SECURITY) {
1181                                 label = (char*) CMSG_DATA(cmsg);
1182                                 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1183                         } else if (cmsg->cmsg_level == SOL_SOCKET &&
1184                                    cmsg->cmsg_type == SO_TIMESTAMP &&
1185                                    cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1186                                 tv = (struct timeval*) CMSG_DATA(cmsg);
1187                         else if (cmsg->cmsg_level == SOL_SOCKET &&
1188                                  cmsg->cmsg_type == SCM_RIGHTS) {
1189                                 fds = (int*) CMSG_DATA(cmsg);
1190                                 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1191                         }
1192                 }
1193
1194                 if (fd == s->syslog_fd) {
1195                         if (n > 0 && n_fds == 0) {
1196                                 s->buffer[n] = 0;
1197                                 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1198                         } else if (n_fds > 0)
1199                                 log_warning("Got file descriptors via syslog socket. Ignoring.");
1200
1201                 } else {
1202                         if (n > 0 && n_fds == 0)
1203                                 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1204                         else if (n == 0 && n_fds == 1)
1205                                 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1206                         else if (n_fds > 0)
1207                                 log_warning("Got too many file descriptors via native socket. Ignoring.");
1208                 }
1209
1210                 close_many(fds, n_fds);
1211         }
1212 }
1213
1214 static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1215         Server *s = userdata;
1216
1217         assert(s);
1218
1219         log_info("Received request to flush runtime journal from PID %"PRIu32, si->ssi_pid);
1220
1221         touch("/run/systemd/journal/flushed");
1222         server_flush_to_var(s);
1223         server_sync(s);
1224
1225         return 0;
1226 }
1227
1228 static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1229         Server *s = userdata;
1230
1231         assert(s);
1232
1233         log_info("Received request to rotate journal from PID %"PRIu32, si->ssi_pid);
1234         server_rotate(s);
1235         server_vacuum(s);
1236
1237         return 0;
1238 }
1239
1240 static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1241         Server *s = userdata;
1242
1243         assert(s);
1244
1245         log_received_signal(LOG_INFO, si);
1246
1247         sd_event_exit(s->event, 0);
1248         return 0;
1249 }
1250
1251 static int setup_signals(Server *s) {
1252         sigset_t mask;
1253         int r;
1254
1255         assert(s);
1256
1257         assert_se(sigemptyset(&mask) == 0);
1258         sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1259         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1260
1261         r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
1262         if (r < 0)
1263                 return r;
1264
1265         r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
1266         if (r < 0)
1267                 return r;
1268
1269         r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
1270         if (r < 0)
1271                 return r;
1272
1273         r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
1274         if (r < 0)
1275                 return r;
1276
1277         return 0;
1278 }
1279
1280 static int server_parse_proc_cmdline(Server *s) {
1281         _cleanup_free_ char *line = NULL;
1282         char *w, *state;
1283         size_t l;
1284         int r;
1285
1286         r = proc_cmdline(&line);
1287         if (r < 0)
1288                 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1289         if (r <= 0)
1290                 return 0;
1291
1292         FOREACH_WORD_QUOTED(w, l, line, state) {
1293                 _cleanup_free_ char *word;
1294
1295                 word = strndup(w, l);
1296                 if (!word)
1297                         return -ENOMEM;
1298
1299                 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1300                         r = parse_boolean(word + 35);
1301                         if (r < 0)
1302                                 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1303                         else
1304                                 s->forward_to_syslog = r;
1305                 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1306                         r = parse_boolean(word + 33);
1307                         if (r < 0)
1308                                 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1309                         else
1310                                 s->forward_to_kmsg = r;
1311                 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1312                         r = parse_boolean(word + 36);
1313                         if (r < 0)
1314                                 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1315                         else
1316                                 s->forward_to_console = r;
1317                 } else if (startswith(word, "systemd.journald.forward_to_wall=")) {
1318                         r = parse_boolean(word + 33);
1319                         if (r < 0)
1320                                 log_warning("Failed to parse forward to wall switch %s. Ignoring.", word + 33);
1321                         else
1322                                 s->forward_to_wall = r;
1323                 } else if (startswith(word, "systemd.journald"))
1324                         log_warning("Invalid systemd.journald parameter. Ignoring.");
1325         }
1326
1327         return 0;
1328 }
1329
1330 static int server_parse_config_file(Server *s) {
1331         static const char fn[] = "/etc/systemd/journald.conf";
1332         _cleanup_fclose_ FILE *f = NULL;
1333         int r;
1334
1335         assert(s);
1336
1337         f = fopen(fn, "re");
1338         if (!f) {
1339                 if (errno == ENOENT)
1340                         return 0;
1341
1342                 log_warning("Failed to open configuration file %s: %m", fn);
1343                 return -errno;
1344         }
1345
1346         r = config_parse(NULL, fn, f, "Journal\0", config_item_perf_lookup,
1347                          (void*) journald_gperf_lookup, false, false, s);
1348         if (r < 0)
1349                 log_warning("Failed to parse configuration file: %s", strerror(-r));
1350
1351         return r;
1352 }
1353
1354 static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1355         Server *s = userdata;
1356
1357         assert(s);
1358
1359         server_sync(s);
1360         return 0;
1361 }
1362
1363 int server_schedule_sync(Server *s, int priority) {
1364         int r;
1365
1366         assert(s);
1367
1368         if (priority <= LOG_CRIT) {
1369                 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1370                 server_sync(s);
1371                 return 0;
1372         }
1373
1374         if (s->sync_scheduled)
1375                 return 0;
1376
1377         if (s->sync_interval_usec > 0) {
1378                 usec_t when;
1379
1380                 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
1381                 if (r < 0)
1382                         return r;
1383
1384                 when += s->sync_interval_usec;
1385
1386                 if (!s->sync_event_source) {
1387                         r = sd_event_add_time(
1388                                         s->event,
1389                                         &s->sync_event_source,
1390                                         CLOCK_MONOTONIC,
1391                                         when, 0,
1392                                         server_dispatch_sync, s);
1393                         if (r < 0)
1394                                 return r;
1395
1396                         r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1397                 } else {
1398                         r = sd_event_source_set_time(s->sync_event_source, when);
1399                         if (r < 0)
1400                                 return r;
1401
1402                         r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1403                 }
1404                 if (r < 0)
1405                         return r;
1406
1407                 s->sync_scheduled = true;
1408         }
1409
1410         return 0;
1411 }
1412
1413 static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1414         Server *s = userdata;
1415
1416         assert(s);
1417
1418         server_cache_hostname(s);
1419         return 0;
1420 }
1421
1422 static int server_open_hostname(Server *s) {
1423         int r;
1424
1425         assert(s);
1426
1427         s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
1428         if (s->hostname_fd < 0) {
1429                 log_error("Failed to open /proc/sys/kernel/hostname: %m");
1430                 return -errno;
1431         }
1432
1433         r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
1434         if (r < 0) {
1435                 /* kernels prior to 3.2 don't support polling this file. Ignore
1436                  * the failure. */
1437                 if (r == -EPERM) {
1438                         log_warning("Failed to register hostname fd in event loop: %s. Ignoring.",
1439                                         strerror(-r));
1440                         s->hostname_fd = safe_close(s->hostname_fd);
1441                         return 0;
1442                 }
1443
1444                 log_error("Failed to register hostname fd in event loop: %s", strerror(-r));
1445                 return r;
1446         }
1447
1448         r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
1449         if (r < 0) {
1450                 log_error("Failed to adjust priority of host name event source: %s", strerror(-r));
1451                 return r;
1452         }
1453
1454         return 0;
1455 }
1456
1457 int server_init(Server *s) {
1458         int n, r, fd;
1459
1460         assert(s);
1461
1462         zero(*s);
1463         s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->hostname_fd = -1;
1464         s->compress = true;
1465         s->seal = true;
1466
1467         s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1468         s->sync_scheduled = false;
1469
1470         s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1471         s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1472
1473         s->forward_to_syslog = true;
1474         s->forward_to_wall = true;
1475
1476         s->max_level_store = LOG_DEBUG;
1477         s->max_level_syslog = LOG_DEBUG;
1478         s->max_level_kmsg = LOG_NOTICE;
1479         s->max_level_console = LOG_INFO;
1480         s->max_level_wall = LOG_EMERG;
1481
1482         memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1483         memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1484
1485         server_parse_config_file(s);
1486         server_parse_proc_cmdline(s);
1487         if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1488                 log_debug("Setting both rate limit interval and burst from %llu,%u to 0,0",
1489                           (long long unsigned) s->rate_limit_interval,
1490                           s->rate_limit_burst);
1491                 s->rate_limit_interval = s->rate_limit_burst = 0;
1492         }
1493
1494         mkdir_p("/run/systemd/journal", 0755);
1495
1496         s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
1497         if (!s->user_journals)
1498                 return log_oom();
1499
1500         s->mmap = mmap_cache_new();
1501         if (!s->mmap)
1502                 return log_oom();
1503
1504         r = sd_event_default(&s->event);
1505         if (r < 0) {
1506                 log_error("Failed to create event loop: %s", strerror(-r));
1507                 return r;
1508         }
1509
1510         sd_event_set_watchdog(s->event, true);
1511
1512         n = sd_listen_fds(true);
1513         if (n < 0) {
1514                 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1515                 return n;
1516         }
1517
1518         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1519
1520                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1521
1522                         if (s->native_fd >= 0) {
1523                                 log_error("Too many native sockets passed.");
1524                                 return -EINVAL;
1525                         }
1526
1527                         s->native_fd = fd;
1528
1529                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1530
1531                         if (s->stdout_fd >= 0) {
1532                                 log_error("Too many stdout sockets passed.");
1533                                 return -EINVAL;
1534                         }
1535
1536                         s->stdout_fd = fd;
1537
1538                 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1539                            sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1540
1541                         if (s->syslog_fd >= 0) {
1542                                 log_error("Too many /dev/log sockets passed.");
1543                                 return -EINVAL;
1544                         }
1545
1546                         s->syslog_fd = fd;
1547
1548                 } else {
1549                         log_error("Unknown socket passed.");
1550                         return -EINVAL;
1551                 }
1552         }
1553
1554         r = server_open_syslog_socket(s);
1555         if (r < 0)
1556                 return r;
1557
1558         r = server_open_native_socket(s);
1559         if (r < 0)
1560                 return r;
1561
1562         r = server_open_stdout_socket(s);
1563         if (r < 0)
1564                 return r;
1565
1566         r = server_open_dev_kmsg(s);
1567         if (r < 0)
1568                 return r;
1569
1570         r = server_open_kernel_seqnum(s);
1571         if (r < 0)
1572                 return r;
1573
1574         r = server_open_hostname(s);
1575         if (r < 0)
1576                 return r;
1577
1578         r = setup_signals(s);
1579         if (r < 0)
1580                 return r;
1581
1582         s->udev = udev_new();
1583         if (!s->udev)
1584                 return -ENOMEM;
1585
1586         s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
1587         if (!s->rate_limit)
1588                 return -ENOMEM;
1589
1590         r = cg_get_root_path(&s->cgroup_root);
1591         if (r < 0)
1592                 return r;
1593
1594         server_cache_hostname(s);
1595         server_cache_boot_id(s);
1596         server_cache_machine_id(s);
1597
1598         r = system_journal_open(s);
1599         if (r < 0)
1600                 return r;
1601
1602         return 0;
1603 }
1604
1605 void server_maybe_append_tags(Server *s) {
1606 #ifdef HAVE_GCRYPT
1607         JournalFile *f;
1608         Iterator i;
1609         usec_t n;
1610
1611         n = now(CLOCK_REALTIME);
1612
1613         if (s->system_journal)
1614                 journal_file_maybe_append_tag(s->system_journal, n);
1615
1616         HASHMAP_FOREACH(f, s->user_journals, i)
1617                 journal_file_maybe_append_tag(f, n);
1618 #endif
1619 }
1620
1621 void server_done(Server *s) {
1622         JournalFile *f;
1623         assert(s);
1624
1625         while (s->stdout_streams)
1626                 stdout_stream_free(s->stdout_streams);
1627
1628         if (s->system_journal)
1629                 journal_file_close(s->system_journal);
1630
1631         if (s->runtime_journal)
1632                 journal_file_close(s->runtime_journal);
1633
1634         while ((f = hashmap_steal_first(s->user_journals)))
1635                 journal_file_close(f);
1636
1637         hashmap_free(s->user_journals);
1638
1639         sd_event_source_unref(s->syslog_event_source);
1640         sd_event_source_unref(s->native_event_source);
1641         sd_event_source_unref(s->stdout_event_source);
1642         sd_event_source_unref(s->dev_kmsg_event_source);
1643         sd_event_source_unref(s->sync_event_source);
1644         sd_event_source_unref(s->sigusr1_event_source);
1645         sd_event_source_unref(s->sigusr2_event_source);
1646         sd_event_source_unref(s->sigterm_event_source);
1647         sd_event_source_unref(s->sigint_event_source);
1648         sd_event_source_unref(s->hostname_event_source);
1649         sd_event_unref(s->event);
1650
1651         safe_close(s->syslog_fd);
1652         safe_close(s->native_fd);
1653         safe_close(s->stdout_fd);
1654         safe_close(s->dev_kmsg_fd);
1655         safe_close(s->hostname_fd);
1656
1657         if (s->rate_limit)
1658                 journal_rate_limit_free(s->rate_limit);
1659
1660         if (s->kernel_seqnum)
1661                 munmap(s->kernel_seqnum, sizeof(uint64_t));
1662
1663         free(s->buffer);
1664         free(s->tty_path);
1665         free(s->cgroup_root);
1666
1667         if (s->mmap)
1668                 mmap_cache_unref(s->mmap);
1669
1670         if (s->udev)
1671                 udev_unref(s->udev);
1672 }