chiark / gitweb /
journald: Support journald.conf.d directories in the usual search paths
[elogind.git] / src / journal / journald-server.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
26 #include <sys/mman.h>
27 #include <sys/timerfd.h>
28
29 #include <libudev.h>
30
31 #include "sd-journal.h"
32 #include "sd-messages.h"
33 #include "sd-daemon.h"
34 #include "fileio.h"
35 #include "mkdir.h"
36 #include "hashmap.h"
37 #include "journal-file.h"
38 #include "socket-util.h"
39 #include "cgroup-util.h"
40 #include "list.h"
41 #include "missing.h"
42 #include "conf-parser.h"
43 #include "selinux-util.h"
44 #include "journal-internal.h"
45 #include "journal-vacuum.h"
46 #include "journal-authenticate.h"
47 #include "journald-rate-limit.h"
48 #include "journald-kmsg.h"
49 #include "journald-syslog.h"
50 #include "journald-stream.h"
51 #include "journald-console.h"
52 #include "journald-native.h"
53 #include "journald-audit.h"
54 #include "journald-server.h"
55
56 #ifdef HAVE_ACL
57 #include <sys/acl.h>
58 #include <acl/libacl.h>
59 #include "acl-util.h"
60 #endif
61
62 #ifdef HAVE_SELINUX
63 #include <selinux/selinux.h>
64 #endif
65
66 #define USER_JOURNALS_MAX 1024
67
68 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
69 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
70 #define DEFAULT_RATE_LIMIT_BURST 1000
71 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
72
73 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
74
75 static const char* const storage_table[_STORAGE_MAX] = {
76         [STORAGE_AUTO] = "auto",
77         [STORAGE_VOLATILE] = "volatile",
78         [STORAGE_PERSISTENT] = "persistent",
79         [STORAGE_NONE] = "none"
80 };
81
82 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
83 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
84
85 static const char* const split_mode_table[_SPLIT_MAX] = {
86         [SPLIT_LOGIN] = "login",
87         [SPLIT_UID] = "uid",
88         [SPLIT_NONE] = "none",
89 };
90
91 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
92 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
93
94 static uint64_t available_space(Server *s, bool verbose) {
95         char ids[33];
96         _cleanup_free_ char *p = NULL;
97         sd_id128_t machine;
98         struct statvfs ss;
99         uint64_t sum = 0, ss_avail = 0, avail = 0;
100         int r;
101         _cleanup_closedir_ DIR *d = NULL;
102         usec_t ts;
103         const char *f;
104         JournalMetrics *m;
105
106         ts = now(CLOCK_MONOTONIC);
107
108         if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts
109             && !verbose)
110                 return s->cached_available_space;
111
112         r = sd_id128_get_machine(&machine);
113         if (r < 0)
114                 return 0;
115
116         if (s->system_journal) {
117                 f = "/var/log/journal/";
118                 m = &s->system_metrics;
119         } else {
120                 f = "/run/log/journal/";
121                 m = &s->runtime_metrics;
122         }
123
124         assert(m);
125
126         p = strappend(f, sd_id128_to_string(machine, ids));
127         if (!p)
128                 return 0;
129
130         d = opendir(p);
131         if (!d)
132                 return 0;
133
134         if (fstatvfs(dirfd(d), &ss) < 0)
135                 return 0;
136
137         for (;;) {
138                 struct stat st;
139                 struct dirent *de;
140
141                 errno = 0;
142                 de = readdir(d);
143                 if (!de && errno != 0)
144                         return 0;
145
146                 if (!de)
147                         break;
148
149                 if (!endswith(de->d_name, ".journal") &&
150                     !endswith(de->d_name, ".journal~"))
151                         continue;
152
153                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
154                         continue;
155
156                 if (!S_ISREG(st.st_mode))
157                         continue;
158
159                 sum += (uint64_t) st.st_blocks * 512UL;
160         }
161
162         ss_avail = ss.f_bsize * ss.f_bavail;
163
164         /* If we reached a high mark, we will always allow this much
165          * again, unless usage goes above max_use. This watermark
166          * value is cached so that we don't give up space on pressure,
167          * but hover below the maximum usage. */
168
169         if (m->use < sum)
170                 m->use = sum;
171
172         avail = LESS_BY(ss_avail, m->keep_free);
173
174         s->cached_available_space = LESS_BY(MIN(m->max_use, avail), sum);
175         s->cached_available_space_timestamp = ts;
176
177         if (verbose) {
178                 char    fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
179                         fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX];
180
181                 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
182                                       "%s journal is using %s (max allowed %s, "
183                                       "trying to leave %s free of %s available → current limit %s).",
184                                       s->system_journal ? "Permanent" : "Runtime",
185                                       format_bytes(fb1, sizeof(fb1), sum),
186                                       format_bytes(fb2, sizeof(fb2), m->max_use),
187                                       format_bytes(fb3, sizeof(fb3), m->keep_free),
188                                       format_bytes(fb4, sizeof(fb4), ss_avail),
189                                       format_bytes(fb5, sizeof(fb5), s->cached_available_space + sum));
190         }
191
192         return s->cached_available_space;
193 }
194
195 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
196         int r;
197 #ifdef HAVE_ACL
198         acl_t acl;
199         acl_entry_t entry;
200         acl_permset_t permset;
201 #endif
202
203         assert(f);
204
205         r = fchmod(f->fd, 0640);
206         if (r < 0)
207                 log_warning_errno(r, "Failed to fix access mode on %s, ignoring: %m", f->path);
208
209 #ifdef HAVE_ACL
210         if (uid <= SYSTEM_UID_MAX)
211                 return;
212
213         acl = acl_get_fd(f->fd);
214         if (!acl) {
215                 log_warning_errno(errno, "Failed to read ACL on %s, ignoring: %m", f->path);
216                 return;
217         }
218
219         r = acl_find_uid(acl, uid, &entry);
220         if (r <= 0) {
221
222                 if (acl_create_entry(&acl, &entry) < 0 ||
223                     acl_set_tag_type(entry, ACL_USER) < 0 ||
224                     acl_set_qualifier(entry, &uid) < 0) {
225                         log_warning_errno(errno, "Failed to patch ACL on %s, ignoring: %m", f->path);
226                         goto finish;
227                 }
228         }
229
230         /* We do not recalculate the mask unconditionally here,
231          * so that the fchmod() mask above stays intact. */
232         if (acl_get_permset(entry, &permset) < 0 ||
233             acl_add_perm(permset, ACL_READ) < 0 ||
234             calc_acl_mask_if_needed(&acl) < 0) {
235                 log_warning_errno(errno, "Failed to patch ACL on %s, ignoring: %m", f->path);
236                 goto finish;
237         }
238
239         if (acl_set_fd(f->fd, acl) < 0)
240                 log_warning_errno(errno, "Failed to set ACL on %s, ignoring: %m", f->path);
241
242 finish:
243         acl_free(acl);
244 #endif
245 }
246
247 static JournalFile* find_journal(Server *s, uid_t uid) {
248         _cleanup_free_ char *p = NULL;
249         int r;
250         JournalFile *f;
251         sd_id128_t machine;
252
253         assert(s);
254
255         /* We split up user logs only on /var, not on /run. If the
256          * runtime file is open, we write to it exclusively, in order
257          * to guarantee proper order as soon as we flush /run to
258          * /var and close the runtime file. */
259
260         if (s->runtime_journal)
261                 return s->runtime_journal;
262
263         if (uid <= SYSTEM_UID_MAX)
264                 return s->system_journal;
265
266         r = sd_id128_get_machine(&machine);
267         if (r < 0)
268                 return s->system_journal;
269
270         f = ordered_hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
271         if (f)
272                 return f;
273
274         if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
275                      SD_ID128_FORMAT_VAL(machine), uid) < 0)
276                 return s->system_journal;
277
278         while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
279                 /* Too many open? Then let's close one */
280                 f = ordered_hashmap_steal_first(s->user_journals);
281                 assert(f);
282                 journal_file_close(f);
283         }
284
285         r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
286         if (r < 0)
287                 return s->system_journal;
288
289         server_fix_perms(s, f, uid);
290
291         r = ordered_hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
292         if (r < 0) {
293                 journal_file_close(f);
294                 return s->system_journal;
295         }
296
297         return f;
298 }
299
300 static int do_rotate(Server *s, JournalFile **f, const char* name,
301                      bool seal, uint32_t uid) {
302         int r;
303         assert(s);
304
305         if (!*f)
306                 return -EINVAL;
307
308         r = journal_file_rotate(f, s->compress, seal);
309         if (r < 0)
310                 if (*f)
311                         log_error_errno(r, "Failed to rotate %s: %m",
312                                         (*f)->path);
313                 else
314                         log_error_errno(r, "Failed to create new %s journal: %m",
315                                         name);
316         else
317                 server_fix_perms(s, *f, uid);
318         return r;
319 }
320
321 void server_rotate(Server *s) {
322         JournalFile *f;
323         void *k;
324         Iterator i;
325         int r;
326
327         log_debug("Rotating...");
328
329         do_rotate(s, &s->runtime_journal, "runtime", false, 0);
330         do_rotate(s, &s->system_journal, "system", s->seal, 0);
331
332         ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
333                 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UINT32(k));
334                 if (r >= 0)
335                         ordered_hashmap_replace(s->user_journals, k, f);
336                 else if (!f)
337                         /* Old file has been closed and deallocated */
338                         ordered_hashmap_remove(s->user_journals, k);
339         }
340 }
341
342 void server_sync(Server *s) {
343         JournalFile *f;
344         void *k;
345         Iterator i;
346         int r;
347
348         if (s->system_journal) {
349                 r = journal_file_set_offline(s->system_journal);
350                 if (r < 0)
351                         log_error_errno(r, "Failed to sync system journal: %m");
352         }
353
354         ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
355                 r = journal_file_set_offline(f);
356                 if (r < 0)
357                         log_error_errno(r, "Failed to sync user journal: %m");
358         }
359
360         if (s->sync_event_source) {
361                 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
362                 if (r < 0)
363                         log_error_errno(r, "Failed to disable sync timer source: %m");
364         }
365
366         s->sync_scheduled = false;
367 }
368
369 static void do_vacuum(Server *s, char *ids, JournalFile *f, const char* path,
370                       JournalMetrics *metrics) {
371         char *p;
372         int r;
373
374         if (!f)
375                 return;
376
377         p = strappenda(path, ids);
378         r = journal_directory_vacuum(p, metrics->max_use, s->max_retention_usec, &s->oldest_file_usec, false);
379         if (r < 0 && r != -ENOENT)
380                 log_error_errno(r, "Failed to vacuum %s: %m", p);
381 }
382
383 void server_vacuum(Server *s) {
384         char ids[33];
385         sd_id128_t machine;
386         int r;
387
388         log_debug("Vacuuming...");
389
390         s->oldest_file_usec = 0;
391
392         r = sd_id128_get_machine(&machine);
393         if (r < 0) {
394                 log_error_errno(r, "Failed to get machine ID: %m");
395                 return;
396         }
397         sd_id128_to_string(machine, ids);
398
399         do_vacuum(s, ids, s->system_journal, "/var/log/journal/", &s->system_metrics);
400         do_vacuum(s, ids, s->runtime_journal, "/run/log/journal/", &s->runtime_metrics);
401
402         s->cached_available_space_timestamp = 0;
403 }
404
405 static void server_cache_machine_id(Server *s) {
406         sd_id128_t id;
407         int r;
408
409         assert(s);
410
411         r = sd_id128_get_machine(&id);
412         if (r < 0)
413                 return;
414
415         sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
416 }
417
418 static void server_cache_boot_id(Server *s) {
419         sd_id128_t id;
420         int r;
421
422         assert(s);
423
424         r = sd_id128_get_boot(&id);
425         if (r < 0)
426                 return;
427
428         sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
429 }
430
431 static void server_cache_hostname(Server *s) {
432         _cleanup_free_ char *t = NULL;
433         char *x;
434
435         assert(s);
436
437         t = gethostname_malloc();
438         if (!t)
439                 return;
440
441         x = strappend("_HOSTNAME=", t);
442         if (!x)
443                 return;
444
445         free(s->hostname_field);
446         s->hostname_field = x;
447 }
448
449 bool shall_try_append_again(JournalFile *f, int r) {
450
451         /* -E2BIG            Hit configured limit
452            -EFBIG            Hit fs limit
453            -EDQUOT           Quota limit hit
454            -ENOSPC           Disk full
455            -EHOSTDOWN        Other machine
456            -EBUSY            Unclean shutdown
457            -EPROTONOSUPPORT  Unsupported feature
458            -EBADMSG          Corrupted
459            -ENODATA          Truncated
460            -ESHUTDOWN        Already archived */
461
462         if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
463                 log_debug("%s: Allocation limit reached, rotating.", f->path);
464         else if (r == -EHOSTDOWN)
465                 log_info("%s: Journal file from other machine, rotating.", f->path);
466         else if (r == -EBUSY)
467                 log_info("%s: Unclean shutdown, rotating.", f->path);
468         else if (r == -EPROTONOSUPPORT)
469                 log_info("%s: Unsupported feature, rotating.", f->path);
470         else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
471                 log_warning("%s: Journal file corrupted, rotating.", f->path);
472         else
473                 return false;
474
475         return true;
476 }
477
478 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
479         JournalFile *f;
480         bool vacuumed = false;
481         int r;
482
483         assert(s);
484         assert(iovec);
485         assert(n > 0);
486
487         f = find_journal(s, uid);
488         if (!f)
489                 return;
490
491         if (journal_file_rotate_suggested(f, s->max_file_usec)) {
492                 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
493                 server_rotate(s);
494                 server_vacuum(s);
495                 vacuumed = true;
496
497                 f = find_journal(s, uid);
498                 if (!f)
499                         return;
500         }
501
502         r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
503         if (r >= 0) {
504                 server_schedule_sync(s, priority);
505                 return;
506         }
507
508         if (vacuumed || !shall_try_append_again(f, r)) {
509                 size_t size = 0;
510                 unsigned i;
511                 for (i = 0; i < n; i++)
512                         size += iovec[i].iov_len;
513
514                 log_error_errno(r, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n, size);
515                 return;
516         }
517
518         server_rotate(s);
519         server_vacuum(s);
520
521         f = find_journal(s, uid);
522         if (!f)
523                 return;
524
525         log_debug("Retrying write.");
526         r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
527         if (r < 0) {
528                 size_t size = 0;
529                 unsigned i;
530                 for (i = 0; i < n; i++)
531                         size += iovec[i].iov_len;
532
533                 log_error_errno(r, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n, size);
534         } else
535                 server_schedule_sync(s, priority);
536 }
537
538 static void dispatch_message_real(
539                 Server *s,
540                 struct iovec *iovec, unsigned n, unsigned m,
541                 const struct ucred *ucred,
542                 const struct timeval *tv,
543                 const char *label, size_t label_len,
544                 const char *unit_id,
545                 int priority,
546                 pid_t object_pid) {
547
548         char    pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
549                 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
550                 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
551                 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
552                 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
553                 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
554                 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
555                 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
556         uid_t object_uid;
557         gid_t object_gid;
558         char *x;
559         int r;
560         char *t, *c;
561         uid_t realuid = 0, owner = 0, journal_uid;
562         bool owner_valid = false;
563 #ifdef HAVE_AUDIT
564         char    audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
565                 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
566                 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
567                 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
568
569         uint32_t audit;
570         uid_t loginuid;
571 #endif
572
573         assert(s);
574         assert(iovec);
575         assert(n > 0);
576         assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
577
578         if (ucred) {
579                 realuid = ucred->uid;
580
581                 sprintf(pid, "_PID="PID_FMT, ucred->pid);
582                 IOVEC_SET_STRING(iovec[n++], pid);
583
584                 sprintf(uid, "_UID="UID_FMT, ucred->uid);
585                 IOVEC_SET_STRING(iovec[n++], uid);
586
587                 sprintf(gid, "_GID="GID_FMT, ucred->gid);
588                 IOVEC_SET_STRING(iovec[n++], gid);
589
590                 r = get_process_comm(ucred->pid, &t);
591                 if (r >= 0) {
592                         x = strappenda("_COMM=", t);
593                         free(t);
594                         IOVEC_SET_STRING(iovec[n++], x);
595                 }
596
597                 r = get_process_exe(ucred->pid, &t);
598                 if (r >= 0) {
599                         x = strappenda("_EXE=", t);
600                         free(t);
601                         IOVEC_SET_STRING(iovec[n++], x);
602                 }
603
604                 r = get_process_cmdline(ucred->pid, 0, false, &t);
605                 if (r >= 0) {
606                         x = strappenda("_CMDLINE=", t);
607                         free(t);
608                         IOVEC_SET_STRING(iovec[n++], x);
609                 }
610
611                 r = get_process_capeff(ucred->pid, &t);
612                 if (r >= 0) {
613                         x = strappenda("_CAP_EFFECTIVE=", t);
614                         free(t);
615                         IOVEC_SET_STRING(iovec[n++], x);
616                 }
617
618 #ifdef HAVE_AUDIT
619                 r = audit_session_from_pid(ucred->pid, &audit);
620                 if (r >= 0) {
621                         sprintf(audit_session, "_AUDIT_SESSION=%"PRIu32, audit);
622                         IOVEC_SET_STRING(iovec[n++], audit_session);
623                 }
624
625                 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
626                 if (r >= 0) {
627                         sprintf(audit_loginuid, "_AUDIT_LOGINUID="UID_FMT, loginuid);
628                         IOVEC_SET_STRING(iovec[n++], audit_loginuid);
629                 }
630 #endif
631
632                 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
633                 if (r >= 0) {
634                         char *session = NULL;
635
636                         x = strappenda("_SYSTEMD_CGROUP=", c);
637                         IOVEC_SET_STRING(iovec[n++], x);
638
639                         r = cg_path_get_session(c, &t);
640                         if (r >= 0) {
641                                 session = strappenda("_SYSTEMD_SESSION=", t);
642                                 free(t);
643                                 IOVEC_SET_STRING(iovec[n++], session);
644                         }
645
646                         if (cg_path_get_owner_uid(c, &owner) >= 0) {
647                                 owner_valid = true;
648
649                                 sprintf(owner_uid, "_SYSTEMD_OWNER_UID="UID_FMT, owner);
650                                 IOVEC_SET_STRING(iovec[n++], owner_uid);
651                         }
652
653                         if (cg_path_get_unit(c, &t) >= 0) {
654                                 x = strappenda("_SYSTEMD_UNIT=", t);
655                                 free(t);
656                                 IOVEC_SET_STRING(iovec[n++], x);
657                         } else if (unit_id && !session) {
658                                 x = strappenda("_SYSTEMD_UNIT=", unit_id);
659                                 IOVEC_SET_STRING(iovec[n++], x);
660                         }
661
662                         if (cg_path_get_user_unit(c, &t) >= 0) {
663                                 x = strappenda("_SYSTEMD_USER_UNIT=", t);
664                                 free(t);
665                                 IOVEC_SET_STRING(iovec[n++], x);
666                         } else if (unit_id && session) {
667                                 x = strappenda("_SYSTEMD_USER_UNIT=", unit_id);
668                                 IOVEC_SET_STRING(iovec[n++], x);
669                         }
670
671                         if (cg_path_get_slice(c, &t) >= 0) {
672                                 x = strappenda("_SYSTEMD_SLICE=", t);
673                                 free(t);
674                                 IOVEC_SET_STRING(iovec[n++], x);
675                         }
676
677                         free(c);
678                 } else if (unit_id) {
679                         x = strappenda("_SYSTEMD_UNIT=", unit_id);
680                         IOVEC_SET_STRING(iovec[n++], x);
681                 }
682
683 #ifdef HAVE_SELINUX
684                 if (mac_selinux_use()) {
685                         if (label) {
686                                 x = alloca(strlen("_SELINUX_CONTEXT=") + label_len + 1);
687
688                                 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
689                                 IOVEC_SET_STRING(iovec[n++], x);
690                         } else {
691                                 security_context_t con;
692
693                                 if (getpidcon(ucred->pid, &con) >= 0) {
694                                         x = strappenda("_SELINUX_CONTEXT=", con);
695
696                                         freecon(con);
697                                         IOVEC_SET_STRING(iovec[n++], x);
698                                 }
699                         }
700                 }
701 #endif
702         }
703         assert(n <= m);
704
705         if (object_pid) {
706                 r = get_process_uid(object_pid, &object_uid);
707                 if (r >= 0) {
708                         sprintf(o_uid, "OBJECT_UID="UID_FMT, object_uid);
709                         IOVEC_SET_STRING(iovec[n++], o_uid);
710                 }
711
712                 r = get_process_gid(object_pid, &object_gid);
713                 if (r >= 0) {
714                         sprintf(o_gid, "OBJECT_GID="GID_FMT, object_gid);
715                         IOVEC_SET_STRING(iovec[n++], o_gid);
716                 }
717
718                 r = get_process_comm(object_pid, &t);
719                 if (r >= 0) {
720                         x = strappenda("OBJECT_COMM=", t);
721                         free(t);
722                         IOVEC_SET_STRING(iovec[n++], x);
723                 }
724
725                 r = get_process_exe(object_pid, &t);
726                 if (r >= 0) {
727                         x = strappenda("OBJECT_EXE=", t);
728                         free(t);
729                         IOVEC_SET_STRING(iovec[n++], x);
730                 }
731
732                 r = get_process_cmdline(object_pid, 0, false, &t);
733                 if (r >= 0) {
734                         x = strappenda("OBJECT_CMDLINE=", t);
735                         free(t);
736                         IOVEC_SET_STRING(iovec[n++], x);
737                 }
738
739 #ifdef HAVE_AUDIT
740                 r = audit_session_from_pid(object_pid, &audit);
741                 if (r >= 0) {
742                         sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%"PRIu32, audit);
743                         IOVEC_SET_STRING(iovec[n++], o_audit_session);
744                 }
745
746                 r = audit_loginuid_from_pid(object_pid, &loginuid);
747                 if (r >= 0) {
748                         sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID="UID_FMT, loginuid);
749                         IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
750                 }
751 #endif
752
753                 r = cg_pid_get_path_shifted(object_pid, s->cgroup_root, &c);
754                 if (r >= 0) {
755                         x = strappenda("OBJECT_SYSTEMD_CGROUP=", c);
756                         IOVEC_SET_STRING(iovec[n++], x);
757
758                         r = cg_path_get_session(c, &t);
759                         if (r >= 0) {
760                                 x = strappenda("OBJECT_SYSTEMD_SESSION=", t);
761                                 free(t);
762                                 IOVEC_SET_STRING(iovec[n++], x);
763                         }
764
765                         if (cg_path_get_owner_uid(c, &owner) >= 0) {
766                                 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT, owner);
767                                 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
768                         }
769
770                         if (cg_path_get_unit(c, &t) >= 0) {
771                                 x = strappenda("OBJECT_SYSTEMD_UNIT=", t);
772                                 free(t);
773                                 IOVEC_SET_STRING(iovec[n++], x);
774                         }
775
776                         if (cg_path_get_user_unit(c, &t) >= 0) {
777                                 x = strappenda("OBJECT_SYSTEMD_USER_UNIT=", t);
778                                 free(t);
779                                 IOVEC_SET_STRING(iovec[n++], x);
780                         }
781
782                         free(c);
783                 }
784         }
785         assert(n <= m);
786
787         if (tv) {
788                 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
789                 IOVEC_SET_STRING(iovec[n++], source_time);
790         }
791
792         /* Note that strictly speaking storing the boot id here is
793          * redundant since the entry includes this in-line
794          * anyway. However, we need this indexed, too. */
795         if (!isempty(s->boot_id_field))
796                 IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
797
798         if (!isempty(s->machine_id_field))
799                 IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
800
801         if (!isempty(s->hostname_field))
802                 IOVEC_SET_STRING(iovec[n++], s->hostname_field);
803
804         assert(n <= m);
805
806         if (s->split_mode == SPLIT_UID && realuid > 0)
807                 /* Split up strictly by any UID */
808                 journal_uid = realuid;
809         else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
810                 /* Split up by login UIDs.  We do this only if the
811                  * realuid is not root, in order not to accidentally
812                  * leak privileged information to the user that is
813                  * logged by a privileged process that is part of an
814                  * unprivileged session.*/
815                 journal_uid = owner;
816         else
817                 journal_uid = 0;
818
819         write_to_journal(s, journal_uid, iovec, n, priority);
820 }
821
822 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
823         char mid[11 + 32 + 1];
824         char buffer[16 + LINE_MAX + 1];
825         struct iovec iovec[N_IOVEC_META_FIELDS + 4];
826         int n = 0;
827         va_list ap;
828         struct ucred ucred = {};
829
830         assert(s);
831         assert(format);
832
833         IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
834         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
835
836         memcpy(buffer, "MESSAGE=", 8);
837         va_start(ap, format);
838         vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
839         va_end(ap);
840         char_array_0(buffer);
841         IOVEC_SET_STRING(iovec[n++], buffer);
842
843         if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
844                 snprintf(mid, sizeof(mid), LOG_MESSAGE_ID(message_id));
845                 char_array_0(mid);
846                 IOVEC_SET_STRING(iovec[n++], mid);
847         }
848
849         ucred.pid = getpid();
850         ucred.uid = getuid();
851         ucred.gid = getgid();
852
853         dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
854 }
855
856 void server_dispatch_message(
857                 Server *s,
858                 struct iovec *iovec, unsigned n, unsigned m,
859                 const struct ucred *ucred,
860                 const struct timeval *tv,
861                 const char *label, size_t label_len,
862                 const char *unit_id,
863                 int priority,
864                 pid_t object_pid) {
865
866         int rl, r;
867         _cleanup_free_ char *path = NULL;
868         char *c;
869
870         assert(s);
871         assert(iovec || n == 0);
872
873         if (n == 0)
874                 return;
875
876         if (LOG_PRI(priority) > s->max_level_store)
877                 return;
878
879         /* Stop early in case the information will not be stored
880          * in a journal. */
881         if (s->storage == STORAGE_NONE)
882                 return;
883
884         if (!ucred)
885                 goto finish;
886
887         r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &path);
888         if (r < 0)
889                 goto finish;
890
891         /* example: /user/lennart/3/foobar
892          *          /system/dbus.service/foobar
893          *
894          * So let's cut of everything past the third /, since that is
895          * where user directories start */
896
897         c = strchr(path, '/');
898         if (c) {
899                 c = strchr(c+1, '/');
900                 if (c) {
901                         c = strchr(c+1, '/');
902                         if (c)
903                                 *c = 0;
904                 }
905         }
906
907         rl = journal_rate_limit_test(s->rate_limit, path,
908                                      priority & LOG_PRIMASK, available_space(s, false));
909
910         if (rl == 0)
911                 return;
912
913         /* Write a suppression message if we suppressed something */
914         if (rl > 1)
915                 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
916                                       "Suppressed %u messages from %s", rl - 1, path);
917
918 finish:
919         dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
920 }
921
922
923 static int system_journal_open(Server *s, bool flush_requested) {
924         int r;
925         char *fn;
926         sd_id128_t machine;
927         char ids[33];
928
929         r = sd_id128_get_machine(&machine);
930         if (r < 0)
931                 return log_error_errno(r, "Failed to get machine id: %m");
932
933         sd_id128_to_string(machine, ids);
934
935         if (!s->system_journal &&
936             (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
937             (flush_requested
938              || access("/run/systemd/journal/flushed", F_OK) >= 0)) {
939
940                 /* If in auto mode: first try to create the machine
941                  * path, but not the prefix.
942                  *
943                  * If in persistent mode: create /var/log/journal and
944                  * the machine path */
945
946                 if (s->storage == STORAGE_PERSISTENT)
947                         (void) mkdir("/var/log/journal/", 0755);
948
949                 fn = strappenda("/var/log/journal/", ids);
950                 (void) mkdir(fn, 0755);
951
952                 fn = strappenda(fn, "/system.journal");
953                 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
954
955                 if (r >= 0)
956                         server_fix_perms(s, s->system_journal, 0);
957                 else if (r < 0) {
958                         if (r != -ENOENT && r != -EROFS)
959                                 log_warning_errno(r, "Failed to open system journal: %m");
960
961                         r = 0;
962                 }
963         }
964
965         if (!s->runtime_journal &&
966             (s->storage != STORAGE_NONE)) {
967
968                 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
969                 if (!fn)
970                         return -ENOMEM;
971
972                 if (s->system_journal) {
973
974                         /* Try to open the runtime journal, but only
975                          * if it already exists, so that we can flush
976                          * it into the system journal */
977
978                         r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
979                         free(fn);
980
981                         if (r < 0) {
982                                 if (r != -ENOENT)
983                                         log_warning_errno(r, "Failed to open runtime journal: %m");
984
985                                 r = 0;
986                         }
987
988                 } else {
989
990                         /* OK, we really need the runtime journal, so create
991                          * it if necessary. */
992
993                         (void) mkdir("/run/log", 0755);
994                         (void) mkdir("/run/log/journal", 0755);
995                         (void) mkdir_parents(fn, 0750);
996
997                         r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
998                         free(fn);
999
1000                         if (r < 0)
1001                                 return log_error_errno(r, "Failed to open runtime journal: %m");
1002                 }
1003
1004                 if (s->runtime_journal)
1005                         server_fix_perms(s, s->runtime_journal, 0);
1006         }
1007
1008         available_space(s, true);
1009
1010         return r;
1011 }
1012
1013 int server_flush_to_var(Server *s) {
1014         sd_id128_t machine;
1015         sd_journal *j = NULL;
1016         char ts[FORMAT_TIMESPAN_MAX];
1017         usec_t start;
1018         unsigned n = 0;
1019         int r;
1020
1021         assert(s);
1022
1023         if (s->storage != STORAGE_AUTO &&
1024             s->storage != STORAGE_PERSISTENT)
1025                 return 0;
1026
1027         if (!s->runtime_journal)
1028                 return 0;
1029
1030         system_journal_open(s, true);
1031
1032         if (!s->system_journal)
1033                 return 0;
1034
1035         log_debug("Flushing to /var...");
1036
1037         start = now(CLOCK_MONOTONIC);
1038
1039         r = sd_id128_get_machine(&machine);
1040         if (r < 0)
1041                 return r;
1042
1043         r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1044         if (r < 0)
1045                 return log_error_errno(r, "Failed to read runtime journal: %m");
1046
1047         sd_journal_set_data_threshold(j, 0);
1048
1049         SD_JOURNAL_FOREACH(j) {
1050                 Object *o = NULL;
1051                 JournalFile *f;
1052
1053                 f = j->current_file;
1054                 assert(f && f->current_offset > 0);
1055
1056                 n++;
1057
1058                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1059                 if (r < 0) {
1060                         log_error_errno(r, "Can't read entry: %m");
1061                         goto finish;
1062                 }
1063
1064                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1065                 if (r >= 0)
1066                         continue;
1067
1068                 if (!shall_try_append_again(s->system_journal, r)) {
1069                         log_error_errno(r, "Can't write entry: %m");
1070                         goto finish;
1071                 }
1072
1073                 server_rotate(s);
1074                 server_vacuum(s);
1075
1076                 if (!s->system_journal) {
1077                         log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1078                         r = -EIO;
1079                         goto finish;
1080                 }
1081
1082                 log_debug("Retrying write.");
1083                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1084                 if (r < 0) {
1085                         log_error_errno(r, "Can't write entry: %m");
1086                         goto finish;
1087                 }
1088         }
1089
1090 finish:
1091         journal_file_post_change(s->system_journal);
1092
1093         journal_file_close(s->runtime_journal);
1094         s->runtime_journal = NULL;
1095
1096         if (r >= 0)
1097                 rm_rf("/run/log/journal", false, true, false);
1098
1099         sd_journal_close(j);
1100
1101         server_driver_message(s, SD_ID128_NULL, "Time spent on flushing to /var is %s for %u entries.", format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0), n);
1102
1103         return r;
1104 }
1105
1106 int process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1107         Server *s = userdata;
1108
1109         assert(s);
1110         assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
1111
1112         if (revents != EPOLLIN) {
1113                 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1114                 return -EIO;
1115         }
1116
1117         for (;;) {
1118                 struct ucred *ucred = NULL;
1119                 struct timeval *tv = NULL;
1120                 struct cmsghdr *cmsg;
1121                 char *label = NULL;
1122                 size_t label_len = 0;
1123                 struct iovec iovec;
1124
1125                 union {
1126                         struct cmsghdr cmsghdr;
1127
1128                         /* We use NAME_MAX space for the SELinux label
1129                          * here. The kernel currently enforces no
1130                          * limit, but according to suggestions from
1131                          * the SELinux people this will change and it
1132                          * will probably be identical to NAME_MAX. For
1133                          * now we use that, but this should be updated
1134                          * one day when the final limit is known.*/
1135                         uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1136                                     CMSG_SPACE(sizeof(struct timeval)) +
1137                                     CMSG_SPACE(sizeof(int)) + /* fd */
1138                                     CMSG_SPACE(NAME_MAX)]; /* selinux label */
1139                 } control = {};
1140                 union sockaddr_union sa = {};
1141                 struct msghdr msghdr = {
1142                         .msg_iov = &iovec,
1143                         .msg_iovlen = 1,
1144                         .msg_control = &control,
1145                         .msg_controllen = sizeof(control),
1146                         .msg_name = &sa,
1147                         .msg_namelen = sizeof(sa),
1148                 };
1149
1150                 ssize_t n;
1151                 int *fds = NULL;
1152                 unsigned n_fds = 0;
1153                 int v = 0;
1154                 size_t m;
1155
1156                 /* Try to get the right size, if we can. (Not all
1157                  * sockets support SIOCINQ, hence we just try, but
1158                  * don't rely on it. */
1159                 (void) ioctl(fd, SIOCINQ, &v);
1160
1161                 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful!*/
1162                 m = PAGE_ALIGN(MAX3((size_t) v + 1,
1163                                     (size_t) LINE_MAX,
1164                                     ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
1165
1166                 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1167                         return log_oom();
1168
1169                 iovec.iov_base = s->buffer;
1170                 iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
1171
1172                 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1173                 if (n < 0) {
1174                         if (errno == EINTR || errno == EAGAIN)
1175                                 return 0;
1176
1177                         log_error_errno(errno, "recvmsg() failed: %m");
1178                         return -errno;
1179                 }
1180
1181                 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1182
1183                         if (cmsg->cmsg_level == SOL_SOCKET &&
1184                             cmsg->cmsg_type == SCM_CREDENTIALS &&
1185                             cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1186                                 ucred = (struct ucred*) CMSG_DATA(cmsg);
1187                         else if (cmsg->cmsg_level == SOL_SOCKET &&
1188                                  cmsg->cmsg_type == SCM_SECURITY) {
1189                                 label = (char*) CMSG_DATA(cmsg);
1190                                 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1191                         } else if (cmsg->cmsg_level == SOL_SOCKET &&
1192                                    cmsg->cmsg_type == SO_TIMESTAMP &&
1193                                    cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1194                                 tv = (struct timeval*) CMSG_DATA(cmsg);
1195                         else if (cmsg->cmsg_level == SOL_SOCKET &&
1196                                  cmsg->cmsg_type == SCM_RIGHTS) {
1197                                 fds = (int*) CMSG_DATA(cmsg);
1198                                 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1199                         }
1200                 }
1201
1202                 /* And a trailing NUL, just in case */
1203                 s->buffer[n] = 0;
1204
1205                 if (fd == s->syslog_fd) {
1206                         if (n > 0 && n_fds == 0)
1207                                 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1208                         else if (n_fds > 0)
1209                                 log_warning("Got file descriptors via syslog socket. Ignoring.");
1210
1211                 } else if (fd == s->native_fd) {
1212                         if (n > 0 && n_fds == 0)
1213                                 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1214                         else if (n == 0 && n_fds == 1)
1215                                 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1216                         else if (n_fds > 0)
1217                                 log_warning("Got too many file descriptors via native socket. Ignoring.");
1218
1219                 } else {
1220                         assert(fd == s->audit_fd);
1221
1222                         if (n > 0 && n_fds == 0)
1223                                 server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1224                         else if (n_fds > 0)
1225                                 log_warning("Got file descriptors via audit socket. Ignoring.");
1226                 }
1227
1228                 close_many(fds, n_fds);
1229         }
1230 }
1231
1232 static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1233         Server *s = userdata;
1234
1235         assert(s);
1236
1237         log_info("Received request to flush runtime journal from PID %"PRIu32, si->ssi_pid);
1238
1239         server_flush_to_var(s);
1240         server_sync(s);
1241         server_vacuum(s);
1242
1243         touch("/run/systemd/journal/flushed");
1244
1245         return 0;
1246 }
1247
1248 static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1249         Server *s = userdata;
1250
1251         assert(s);
1252
1253         log_info("Received request to rotate journal from PID %"PRIu32, si->ssi_pid);
1254         server_rotate(s);
1255         server_vacuum(s);
1256
1257         return 0;
1258 }
1259
1260 static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1261         Server *s = userdata;
1262
1263         assert(s);
1264
1265         log_received_signal(LOG_INFO, si);
1266
1267         sd_event_exit(s->event, 0);
1268         return 0;
1269 }
1270
1271 static int setup_signals(Server *s) {
1272         sigset_t mask;
1273         int r;
1274
1275         assert(s);
1276
1277         assert_se(sigemptyset(&mask) == 0);
1278         sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1279         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1280
1281         r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
1282         if (r < 0)
1283                 return r;
1284
1285         r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
1286         if (r < 0)
1287                 return r;
1288
1289         r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
1290         if (r < 0)
1291                 return r;
1292
1293         r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
1294         if (r < 0)
1295                 return r;
1296
1297         return 0;
1298 }
1299
1300 static int server_parse_proc_cmdline(Server *s) {
1301         _cleanup_free_ char *line = NULL;
1302         const char *w, *state;
1303         size_t l;
1304         int r;
1305
1306         r = proc_cmdline(&line);
1307         if (r < 0) {
1308                 log_warning_errno(r, "Failed to read /proc/cmdline, ignoring: %m");
1309                 return 0;
1310         }
1311
1312         FOREACH_WORD_QUOTED(w, l, line, state) {
1313                 _cleanup_free_ char *word;
1314
1315                 word = strndup(w, l);
1316                 if (!word)
1317                         return -ENOMEM;
1318
1319                 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1320                         r = parse_boolean(word + 35);
1321                         if (r < 0)
1322                                 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1323                         else
1324                                 s->forward_to_syslog = r;
1325                 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1326                         r = parse_boolean(word + 33);
1327                         if (r < 0)
1328                                 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1329                         else
1330                                 s->forward_to_kmsg = r;
1331                 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1332                         r = parse_boolean(word + 36);
1333                         if (r < 0)
1334                                 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1335                         else
1336                                 s->forward_to_console = r;
1337                 } else if (startswith(word, "systemd.journald.forward_to_wall=")) {
1338                         r = parse_boolean(word + 33);
1339                         if (r < 0)
1340                                 log_warning("Failed to parse forward to wall switch %s. Ignoring.", word + 33);
1341                         else
1342                                 s->forward_to_wall = r;
1343                 } else if (startswith(word, "systemd.journald"))
1344                         log_warning("Invalid systemd.journald parameter. Ignoring.");
1345         }
1346         /* do not warn about state here, since probably systemd already did */
1347
1348         return 0;
1349 }
1350
1351 static int server_parse_config_file(Server *s) {
1352         assert(s);
1353
1354         return config_parse_many("/etc/systemd/journald.conf",
1355                                  CONF_DIRS_NULSTR("systemd/journald.conf"),
1356                                  "Journal\0",
1357                                  config_item_perf_lookup, journald_gperf_lookup,
1358                                  false, s);
1359 }
1360
1361 static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1362         Server *s = userdata;
1363
1364         assert(s);
1365
1366         server_sync(s);
1367         return 0;
1368 }
1369
1370 int server_schedule_sync(Server *s, int priority) {
1371         int r;
1372
1373         assert(s);
1374
1375         if (priority <= LOG_CRIT) {
1376                 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1377                 server_sync(s);
1378                 return 0;
1379         }
1380
1381         if (s->sync_scheduled)
1382                 return 0;
1383
1384         if (s->sync_interval_usec > 0) {
1385                 usec_t when;
1386
1387                 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
1388                 if (r < 0)
1389                         return r;
1390
1391                 when += s->sync_interval_usec;
1392
1393                 if (!s->sync_event_source) {
1394                         r = sd_event_add_time(
1395                                         s->event,
1396                                         &s->sync_event_source,
1397                                         CLOCK_MONOTONIC,
1398                                         when, 0,
1399                                         server_dispatch_sync, s);
1400                         if (r < 0)
1401                                 return r;
1402
1403                         r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1404                 } else {
1405                         r = sd_event_source_set_time(s->sync_event_source, when);
1406                         if (r < 0)
1407                                 return r;
1408
1409                         r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1410                 }
1411                 if (r < 0)
1412                         return r;
1413
1414                 s->sync_scheduled = true;
1415         }
1416
1417         return 0;
1418 }
1419
1420 static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1421         Server *s = userdata;
1422
1423         assert(s);
1424
1425         server_cache_hostname(s);
1426         return 0;
1427 }
1428
1429 static int server_open_hostname(Server *s) {
1430         int r;
1431
1432         assert(s);
1433
1434         s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
1435         if (s->hostname_fd < 0)
1436                 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
1437
1438         r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
1439         if (r < 0) {
1440                 /* kernels prior to 3.2 don't support polling this file. Ignore
1441                  * the failure. */
1442                 if (r == -EPERM) {
1443                         log_warning("Failed to register hostname fd in event loop: %s. Ignoring.",
1444                                         strerror(-r));
1445                         s->hostname_fd = safe_close(s->hostname_fd);
1446                         return 0;
1447                 }
1448
1449                 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
1450         }
1451
1452         r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
1453         if (r < 0)
1454                 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
1455
1456         return 0;
1457 }
1458
1459 int server_init(Server *s) {
1460         int n, r, fd;
1461
1462         assert(s);
1463
1464         zero(*s);
1465         s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = -1;
1466         s->compress = true;
1467         s->seal = true;
1468
1469         s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1470         s->sync_scheduled = false;
1471
1472         s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1473         s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1474
1475         s->forward_to_wall = true;
1476
1477         s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1478
1479         s->max_level_store = LOG_DEBUG;
1480         s->max_level_syslog = LOG_DEBUG;
1481         s->max_level_kmsg = LOG_NOTICE;
1482         s->max_level_console = LOG_INFO;
1483         s->max_level_wall = LOG_EMERG;
1484
1485         memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1486         memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1487
1488         server_parse_config_file(s);
1489         server_parse_proc_cmdline(s);
1490         if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1491                 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1492                           s->rate_limit_interval, s->rate_limit_burst);
1493                 s->rate_limit_interval = s->rate_limit_burst = 0;
1494         }
1495
1496         mkdir_p("/run/systemd/journal", 0755);
1497
1498         s->user_journals = ordered_hashmap_new(NULL);
1499         if (!s->user_journals)
1500                 return log_oom();
1501
1502         s->mmap = mmap_cache_new();
1503         if (!s->mmap)
1504                 return log_oom();
1505
1506         r = sd_event_default(&s->event);
1507         if (r < 0)
1508                 return log_error_errno(r, "Failed to create event loop: %m");
1509
1510         sd_event_set_watchdog(s->event, true);
1511
1512         n = sd_listen_fds(true);
1513         if (n < 0)
1514                 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
1515
1516         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1517
1518                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1519
1520                         if (s->native_fd >= 0) {
1521                                 log_error("Too many native sockets passed.");
1522                                 return -EINVAL;
1523                         }
1524
1525                         s->native_fd = fd;
1526
1527                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1528
1529                         if (s->stdout_fd >= 0) {
1530                                 log_error("Too many stdout sockets passed.");
1531                                 return -EINVAL;
1532                         }
1533
1534                         s->stdout_fd = fd;
1535
1536                 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1537                            sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1538
1539                         if (s->syslog_fd >= 0) {
1540                                 log_error("Too many /dev/log sockets passed.");
1541                                 return -EINVAL;
1542                         }
1543
1544                         s->syslog_fd = fd;
1545
1546                 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
1547
1548                         if (s->audit_fd >= 0) {
1549                                 log_error("Too many audit sockets passed.");
1550                                 return -EINVAL;
1551                         }
1552
1553                         s->audit_fd = fd;
1554
1555                 } else
1556                         log_error("Unknown socket passed as file descriptor %d, ignoring.", fd);
1557         }
1558
1559         r = server_open_syslog_socket(s);
1560         if (r < 0)
1561                 return r;
1562
1563         r = server_open_native_socket(s);
1564         if (r < 0)
1565                 return r;
1566
1567         r = server_open_stdout_socket(s);
1568         if (r < 0)
1569                 return r;
1570
1571         r = server_open_dev_kmsg(s);
1572         if (r < 0)
1573                 return r;
1574
1575         r = server_open_audit(s);
1576         if (r < 0)
1577                 return r;
1578
1579         r = server_open_kernel_seqnum(s);
1580         if (r < 0)
1581                 return r;
1582
1583         r = server_open_hostname(s);
1584         if (r < 0)
1585                 return r;
1586
1587         r = setup_signals(s);
1588         if (r < 0)
1589                 return r;
1590
1591         s->udev = udev_new();
1592         if (!s->udev)
1593                 return -ENOMEM;
1594
1595         s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
1596         if (!s->rate_limit)
1597                 return -ENOMEM;
1598
1599         r = cg_get_root_path(&s->cgroup_root);
1600         if (r < 0)
1601                 return r;
1602
1603         server_cache_hostname(s);
1604         server_cache_boot_id(s);
1605         server_cache_machine_id(s);
1606
1607         r = system_journal_open(s, false);
1608         if (r < 0)
1609                 return r;
1610
1611         return 0;
1612 }
1613
1614 void server_maybe_append_tags(Server *s) {
1615 #ifdef HAVE_GCRYPT
1616         JournalFile *f;
1617         Iterator i;
1618         usec_t n;
1619
1620         n = now(CLOCK_REALTIME);
1621
1622         if (s->system_journal)
1623                 journal_file_maybe_append_tag(s->system_journal, n);
1624
1625         ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
1626                 journal_file_maybe_append_tag(f, n);
1627 #endif
1628 }
1629
1630 void server_done(Server *s) {
1631         JournalFile *f;
1632         assert(s);
1633
1634         while (s->stdout_streams)
1635                 stdout_stream_free(s->stdout_streams);
1636
1637         if (s->system_journal)
1638                 journal_file_close(s->system_journal);
1639
1640         if (s->runtime_journal)
1641                 journal_file_close(s->runtime_journal);
1642
1643         while ((f = ordered_hashmap_steal_first(s->user_journals)))
1644                 journal_file_close(f);
1645
1646         ordered_hashmap_free(s->user_journals);
1647
1648         sd_event_source_unref(s->syslog_event_source);
1649         sd_event_source_unref(s->native_event_source);
1650         sd_event_source_unref(s->stdout_event_source);
1651         sd_event_source_unref(s->dev_kmsg_event_source);
1652         sd_event_source_unref(s->audit_event_source);
1653         sd_event_source_unref(s->sync_event_source);
1654         sd_event_source_unref(s->sigusr1_event_source);
1655         sd_event_source_unref(s->sigusr2_event_source);
1656         sd_event_source_unref(s->sigterm_event_source);
1657         sd_event_source_unref(s->sigint_event_source);
1658         sd_event_source_unref(s->hostname_event_source);
1659         sd_event_unref(s->event);
1660
1661         safe_close(s->syslog_fd);
1662         safe_close(s->native_fd);
1663         safe_close(s->stdout_fd);
1664         safe_close(s->dev_kmsg_fd);
1665         safe_close(s->audit_fd);
1666         safe_close(s->hostname_fd);
1667
1668         if (s->rate_limit)
1669                 journal_rate_limit_free(s->rate_limit);
1670
1671         if (s->kernel_seqnum)
1672                 munmap(s->kernel_seqnum, sizeof(uint64_t));
1673
1674         free(s->buffer);
1675         free(s->tty_path);
1676         free(s->cgroup_root);
1677         free(s->hostname_field);
1678
1679         if (s->mmap)
1680                 mmap_cache_unref(s->mmap);
1681
1682         if (s->udev)
1683                 udev_unref(s->udev);
1684 }