chiark / gitweb /
remove unused includes
[elogind.git] / src / journal / journald-server.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
26 #include <sys/mman.h>
27
28 #include <libudev.h>
29
30 #include "sd-journal.h"
31 #include "sd-messages.h"
32 #include "sd-daemon.h"
33 #include "mkdir.h"
34 #include "hashmap.h"
35 #include "journal-file.h"
36 #include "socket-util.h"
37 #include "cgroup-util.h"
38 #include "missing.h"
39 #include "conf-parser.h"
40 #include "selinux-util.h"
41 #include "journal-internal.h"
42 #include "journal-vacuum.h"
43 #include "journal-authenticate.h"
44 #include "journald-rate-limit.h"
45 #include "journald-kmsg.h"
46 #include "journald-syslog.h"
47 #include "journald-stream.h"
48 #include "journald-native.h"
49 #include "journald-audit.h"
50 #include "journald-server.h"
51 #include "acl-util.h"
52
53 #ifdef HAVE_SELINUX
54 #include <selinux/selinux.h>
55 #endif
56
57 #define USER_JOURNALS_MAX 1024
58
59 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
60 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
61 #define DEFAULT_RATE_LIMIT_BURST 1000
62 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
63
64 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
65
66 static const char* const storage_table[_STORAGE_MAX] = {
67         [STORAGE_AUTO] = "auto",
68         [STORAGE_VOLATILE] = "volatile",
69         [STORAGE_PERSISTENT] = "persistent",
70         [STORAGE_NONE] = "none"
71 };
72
73 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
74 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
75
76 static const char* const split_mode_table[_SPLIT_MAX] = {
77         [SPLIT_LOGIN] = "login",
78         [SPLIT_UID] = "uid",
79         [SPLIT_NONE] = "none",
80 };
81
82 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
83 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
84
85 static uint64_t available_space(Server *s, bool verbose) {
86         char ids[33];
87         _cleanup_free_ char *p = NULL;
88         sd_id128_t machine;
89         struct statvfs ss;
90         uint64_t sum = 0, ss_avail = 0, avail = 0;
91         int r;
92         _cleanup_closedir_ DIR *d = NULL;
93         usec_t ts;
94         const char *f;
95         JournalMetrics *m;
96
97         ts = now(CLOCK_MONOTONIC);
98
99         if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts
100             && !verbose)
101                 return s->cached_available_space;
102
103         r = sd_id128_get_machine(&machine);
104         if (r < 0)
105                 return 0;
106
107         if (s->system_journal) {
108                 f = "/var/log/journal/";
109                 m = &s->system_metrics;
110         } else {
111                 f = "/run/log/journal/";
112                 m = &s->runtime_metrics;
113         }
114
115         assert(m);
116
117         p = strappend(f, sd_id128_to_string(machine, ids));
118         if (!p)
119                 return 0;
120
121         d = opendir(p);
122         if (!d)
123                 return 0;
124
125         if (fstatvfs(dirfd(d), &ss) < 0)
126                 return 0;
127
128         for (;;) {
129                 struct stat st;
130                 struct dirent *de;
131
132                 errno = 0;
133                 de = readdir(d);
134                 if (!de && errno != 0)
135                         return 0;
136
137                 if (!de)
138                         break;
139
140                 if (!endswith(de->d_name, ".journal") &&
141                     !endswith(de->d_name, ".journal~"))
142                         continue;
143
144                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
145                         continue;
146
147                 if (!S_ISREG(st.st_mode))
148                         continue;
149
150                 sum += (uint64_t) st.st_blocks * 512UL;
151         }
152
153         ss_avail = ss.f_bsize * ss.f_bavail;
154
155         /* If we reached a high mark, we will always allow this much
156          * again, unless usage goes above max_use. This watermark
157          * value is cached so that we don't give up space on pressure,
158          * but hover below the maximum usage. */
159
160         if (m->use < sum)
161                 m->use = sum;
162
163         avail = LESS_BY(ss_avail, m->keep_free);
164
165         s->cached_available_space = LESS_BY(MIN(m->max_use, avail), sum);
166         s->cached_available_space_timestamp = ts;
167
168         if (verbose) {
169                 char    fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
170                         fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX];
171
172                 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
173                                       "%s journal is using %s (max allowed %s, "
174                                       "trying to leave %s free of %s available → current limit %s).",
175                                       s->system_journal ? "Permanent" : "Runtime",
176                                       format_bytes(fb1, sizeof(fb1), sum),
177                                       format_bytes(fb2, sizeof(fb2), m->max_use),
178                                       format_bytes(fb3, sizeof(fb3), m->keep_free),
179                                       format_bytes(fb4, sizeof(fb4), ss_avail),
180                                       format_bytes(fb5, sizeof(fb5), s->cached_available_space + sum));
181         }
182
183         return s->cached_available_space;
184 }
185
186 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
187         int r;
188 #ifdef HAVE_ACL
189         acl_t acl;
190         acl_entry_t entry;
191         acl_permset_t permset;
192 #endif
193
194         assert(f);
195
196         r = fchmod(f->fd, 0640);
197         if (r < 0)
198                 log_warning_errno(r, "Failed to fix access mode on %s, ignoring: %m", f->path);
199
200 #ifdef HAVE_ACL
201         if (uid <= SYSTEM_UID_MAX)
202                 return;
203
204         acl = acl_get_fd(f->fd);
205         if (!acl) {
206                 log_warning_errno(errno, "Failed to read ACL on %s, ignoring: %m", f->path);
207                 return;
208         }
209
210         r = acl_find_uid(acl, uid, &entry);
211         if (r <= 0) {
212
213                 if (acl_create_entry(&acl, &entry) < 0 ||
214                     acl_set_tag_type(entry, ACL_USER) < 0 ||
215                     acl_set_qualifier(entry, &uid) < 0) {
216                         log_warning_errno(errno, "Failed to patch ACL on %s, ignoring: %m", f->path);
217                         goto finish;
218                 }
219         }
220
221         /* We do not recalculate the mask unconditionally here,
222          * so that the fchmod() mask above stays intact. */
223         if (acl_get_permset(entry, &permset) < 0 ||
224             acl_add_perm(permset, ACL_READ) < 0 ||
225             calc_acl_mask_if_needed(&acl) < 0) {
226                 log_warning_errno(errno, "Failed to patch ACL on %s, ignoring: %m", f->path);
227                 goto finish;
228         }
229
230         if (acl_set_fd(f->fd, acl) < 0)
231                 log_warning_errno(errno, "Failed to set ACL on %s, ignoring: %m", f->path);
232
233 finish:
234         acl_free(acl);
235 #endif
236 }
237
238 static JournalFile* find_journal(Server *s, uid_t uid) {
239         _cleanup_free_ char *p = NULL;
240         int r;
241         JournalFile *f;
242         sd_id128_t machine;
243
244         assert(s);
245
246         /* We split up user logs only on /var, not on /run. If the
247          * runtime file is open, we write to it exclusively, in order
248          * to guarantee proper order as soon as we flush /run to
249          * /var and close the runtime file. */
250
251         if (s->runtime_journal)
252                 return s->runtime_journal;
253
254         if (uid <= SYSTEM_UID_MAX)
255                 return s->system_journal;
256
257         r = sd_id128_get_machine(&machine);
258         if (r < 0)
259                 return s->system_journal;
260
261         f = ordered_hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
262         if (f)
263                 return f;
264
265         if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
266                      SD_ID128_FORMAT_VAL(machine), uid) < 0)
267                 return s->system_journal;
268
269         while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
270                 /* Too many open? Then let's close one */
271                 f = ordered_hashmap_steal_first(s->user_journals);
272                 assert(f);
273                 journal_file_close(f);
274         }
275
276         r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
277         if (r < 0)
278                 return s->system_journal;
279
280         server_fix_perms(s, f, uid);
281
282         r = ordered_hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
283         if (r < 0) {
284                 journal_file_close(f);
285                 return s->system_journal;
286         }
287
288         return f;
289 }
290
291 static int do_rotate(
292                 Server *s,
293                 JournalFile **f,
294                 const char* name,
295                 bool seal,
296                 uint32_t uid) {
297
298         int r;
299         assert(s);
300
301         if (!*f)
302                 return -EINVAL;
303
304         r = journal_file_rotate(f, s->compress, seal);
305         if (r < 0)
306                 if (*f)
307                         log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
308                 else
309                         log_error_errno(r, "Failed to create new %s journal: %m", name);
310         else
311                 server_fix_perms(s, *f, uid);
312
313         return r;
314 }
315
316 void server_rotate(Server *s) {
317         JournalFile *f;
318         void *k;
319         Iterator i;
320         int r;
321
322         log_debug("Rotating...");
323
324         do_rotate(s, &s->runtime_journal, "runtime", false, 0);
325         do_rotate(s, &s->system_journal, "system", s->seal, 0);
326
327         ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
328                 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UINT32(k));
329                 if (r >= 0)
330                         ordered_hashmap_replace(s->user_journals, k, f);
331                 else if (!f)
332                         /* Old file has been closed and deallocated */
333                         ordered_hashmap_remove(s->user_journals, k);
334         }
335 }
336
337 void server_sync(Server *s) {
338         JournalFile *f;
339         void *k;
340         Iterator i;
341         int r;
342
343         if (s->system_journal) {
344                 r = journal_file_set_offline(s->system_journal);
345                 if (r < 0)
346                         log_error_errno(r, "Failed to sync system journal: %m");
347         }
348
349         ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
350                 r = journal_file_set_offline(f);
351                 if (r < 0)
352                         log_error_errno(r, "Failed to sync user journal: %m");
353         }
354
355         if (s->sync_event_source) {
356                 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
357                 if (r < 0)
358                         log_error_errno(r, "Failed to disable sync timer source: %m");
359         }
360
361         s->sync_scheduled = false;
362 }
363
364 static void do_vacuum(
365                 Server *s,
366                 const char *id,
367                 JournalFile *f,
368                 const char* path,
369                 JournalMetrics *metrics) {
370
371         const char *p;
372         int r;
373
374         if (!f)
375                 return;
376
377         p = strjoina(path, id);
378         r = journal_directory_vacuum(p, metrics->max_use, s->max_retention_usec, &s->oldest_file_usec, false);
379         if (r < 0 && r != -ENOENT)
380                 log_error_errno(r, "Failed to vacuum %s: %m", p);
381 }
382
383 void server_vacuum(Server *s) {
384         char ids[33];
385         sd_id128_t machine;
386         int r;
387
388         log_debug("Vacuuming...");
389
390         s->oldest_file_usec = 0;
391
392         r = sd_id128_get_machine(&machine);
393         if (r < 0) {
394                 log_error_errno(r, "Failed to get machine ID: %m");
395                 return;
396         }
397         sd_id128_to_string(machine, ids);
398
399         do_vacuum(s, ids, s->system_journal, "/var/log/journal/", &s->system_metrics);
400         do_vacuum(s, ids, s->runtime_journal, "/run/log/journal/", &s->runtime_metrics);
401
402         s->cached_available_space_timestamp = 0;
403 }
404
405 static void server_cache_machine_id(Server *s) {
406         sd_id128_t id;
407         int r;
408
409         assert(s);
410
411         r = sd_id128_get_machine(&id);
412         if (r < 0)
413                 return;
414
415         sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
416 }
417
418 static void server_cache_boot_id(Server *s) {
419         sd_id128_t id;
420         int r;
421
422         assert(s);
423
424         r = sd_id128_get_boot(&id);
425         if (r < 0)
426                 return;
427
428         sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
429 }
430
431 static void server_cache_hostname(Server *s) {
432         _cleanup_free_ char *t = NULL;
433         char *x;
434
435         assert(s);
436
437         t = gethostname_malloc();
438         if (!t)
439                 return;
440
441         x = strappend("_HOSTNAME=", t);
442         if (!x)
443                 return;
444
445         free(s->hostname_field);
446         s->hostname_field = x;
447 }
448
449 static bool shall_try_append_again(JournalFile *f, int r) {
450
451         /* -E2BIG            Hit configured limit
452            -EFBIG            Hit fs limit
453            -EDQUOT           Quota limit hit
454            -ENOSPC           Disk full
455            -EIO              I/O error of some kind (mmap)
456            -EHOSTDOWN        Other machine
457            -EBUSY            Unclean shutdown
458            -EPROTONOSUPPORT  Unsupported feature
459            -EBADMSG          Corrupted
460            -ENODATA          Truncated
461            -ESHUTDOWN        Already archived
462            -EIDRM            Journal file has been deleted */
463
464         if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
465                 log_debug("%s: Allocation limit reached, rotating.", f->path);
466         else if (r == -EHOSTDOWN)
467                 log_info("%s: Journal file from other machine, rotating.", f->path);
468         else if (r == -EBUSY)
469                 log_info("%s: Unclean shutdown, rotating.", f->path);
470         else if (r == -EPROTONOSUPPORT)
471                 log_info("%s: Unsupported feature, rotating.", f->path);
472         else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
473                 log_warning("%s: Journal file corrupted, rotating.", f->path);
474         else if (r == -EIO)
475                 log_warning("%s: IO error, rotating.", f->path);
476         else if (r == -EIDRM)
477                 log_warning("%s: Journal file has been deleted, rotating.", f->path);
478         else
479                 return false;
480
481         return true;
482 }
483
484 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
485         JournalFile *f;
486         bool vacuumed = false;
487         int r;
488
489         assert(s);
490         assert(iovec);
491         assert(n > 0);
492
493         f = find_journal(s, uid);
494         if (!f)
495                 return;
496
497         if (journal_file_rotate_suggested(f, s->max_file_usec)) {
498                 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
499                 server_rotate(s);
500                 server_vacuum(s);
501                 vacuumed = true;
502
503                 f = find_journal(s, uid);
504                 if (!f)
505                         return;
506         }
507
508         r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
509         if (r >= 0) {
510                 server_schedule_sync(s, priority);
511                 return;
512         }
513
514         if (vacuumed || !shall_try_append_again(f, r)) {
515                 log_error_errno(r, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
516                 return;
517         }
518
519         server_rotate(s);
520         server_vacuum(s);
521
522         f = find_journal(s, uid);
523         if (!f)
524                 return;
525
526         log_debug("Retrying write.");
527         r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
528         if (r < 0)
529                 log_error_errno(r, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
530         else
531                 server_schedule_sync(s, priority);
532 }
533
534 static void dispatch_message_real(
535                 Server *s,
536                 struct iovec *iovec, unsigned n, unsigned m,
537                 const struct ucred *ucred,
538                 const struct timeval *tv,
539                 const char *label, size_t label_len,
540                 const char *unit_id,
541                 int priority,
542                 pid_t object_pid) {
543
544         char    pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
545                 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
546                 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
547                 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
548                 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
549                 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
550                 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
551                 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
552         uid_t object_uid;
553         gid_t object_gid;
554         char *x;
555         int r;
556         char *t, *c;
557         uid_t realuid = 0, owner = 0, journal_uid;
558         bool owner_valid = false;
559 #ifdef HAVE_AUDIT
560         char    audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
561                 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
562                 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
563                 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
564
565         uint32_t audit;
566         uid_t loginuid;
567 #endif
568
569         assert(s);
570         assert(iovec);
571         assert(n > 0);
572         assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
573
574         if (ucred) {
575                 realuid = ucred->uid;
576
577                 sprintf(pid, "_PID="PID_FMT, ucred->pid);
578                 IOVEC_SET_STRING(iovec[n++], pid);
579
580                 sprintf(uid, "_UID="UID_FMT, ucred->uid);
581                 IOVEC_SET_STRING(iovec[n++], uid);
582
583                 sprintf(gid, "_GID="GID_FMT, ucred->gid);
584                 IOVEC_SET_STRING(iovec[n++], gid);
585
586                 r = get_process_comm(ucred->pid, &t);
587                 if (r >= 0) {
588                         x = strjoina("_COMM=", t);
589                         free(t);
590                         IOVEC_SET_STRING(iovec[n++], x);
591                 }
592
593                 r = get_process_exe(ucred->pid, &t);
594                 if (r >= 0) {
595                         x = strjoina("_EXE=", t);
596                         free(t);
597                         IOVEC_SET_STRING(iovec[n++], x);
598                 }
599
600                 r = get_process_cmdline(ucred->pid, 0, false, &t);
601                 if (r >= 0) {
602                         x = strjoina("_CMDLINE=", t);
603                         free(t);
604                         IOVEC_SET_STRING(iovec[n++], x);
605                 }
606
607                 r = get_process_capeff(ucred->pid, &t);
608                 if (r >= 0) {
609                         x = strjoina("_CAP_EFFECTIVE=", t);
610                         free(t);
611                         IOVEC_SET_STRING(iovec[n++], x);
612                 }
613
614 #ifdef HAVE_AUDIT
615                 r = audit_session_from_pid(ucred->pid, &audit);
616                 if (r >= 0) {
617                         sprintf(audit_session, "_AUDIT_SESSION=%"PRIu32, audit);
618                         IOVEC_SET_STRING(iovec[n++], audit_session);
619                 }
620
621                 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
622                 if (r >= 0) {
623                         sprintf(audit_loginuid, "_AUDIT_LOGINUID="UID_FMT, loginuid);
624                         IOVEC_SET_STRING(iovec[n++], audit_loginuid);
625                 }
626 #endif
627
628                 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
629                 if (r >= 0) {
630                         char *session = NULL;
631
632                         x = strjoina("_SYSTEMD_CGROUP=", c);
633                         IOVEC_SET_STRING(iovec[n++], x);
634
635                         r = cg_path_get_session(c, &t);
636                         if (r >= 0) {
637                                 session = strjoina("_SYSTEMD_SESSION=", t);
638                                 free(t);
639                                 IOVEC_SET_STRING(iovec[n++], session);
640                         }
641
642                         if (cg_path_get_owner_uid(c, &owner) >= 0) {
643                                 owner_valid = true;
644
645                                 sprintf(owner_uid, "_SYSTEMD_OWNER_UID="UID_FMT, owner);
646                                 IOVEC_SET_STRING(iovec[n++], owner_uid);
647                         }
648
649                         if (cg_path_get_unit(c, &t) >= 0) {
650                                 x = strjoina("_SYSTEMD_UNIT=", t);
651                                 free(t);
652                                 IOVEC_SET_STRING(iovec[n++], x);
653                         } else if (unit_id && !session) {
654                                 x = strjoina("_SYSTEMD_UNIT=", unit_id);
655                                 IOVEC_SET_STRING(iovec[n++], x);
656                         }
657
658                         if (cg_path_get_user_unit(c, &t) >= 0) {
659                                 x = strjoina("_SYSTEMD_USER_UNIT=", t);
660                                 free(t);
661                                 IOVEC_SET_STRING(iovec[n++], x);
662                         } else if (unit_id && session) {
663                                 x = strjoina("_SYSTEMD_USER_UNIT=", unit_id);
664                                 IOVEC_SET_STRING(iovec[n++], x);
665                         }
666
667                         if (cg_path_get_slice(c, &t) >= 0) {
668                                 x = strjoina("_SYSTEMD_SLICE=", t);
669                                 free(t);
670                                 IOVEC_SET_STRING(iovec[n++], x);
671                         }
672
673                         free(c);
674                 } else if (unit_id) {
675                         x = strjoina("_SYSTEMD_UNIT=", unit_id);
676                         IOVEC_SET_STRING(iovec[n++], x);
677                 }
678
679 #ifdef HAVE_SELINUX
680                 if (mac_selinux_use()) {
681                         if (label) {
682                                 x = alloca(strlen("_SELINUX_CONTEXT=") + label_len + 1);
683
684                                 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
685                                 IOVEC_SET_STRING(iovec[n++], x);
686                         } else {
687                                 security_context_t con;
688
689                                 if (getpidcon(ucred->pid, &con) >= 0) {
690                                         x = strjoina("_SELINUX_CONTEXT=", con);
691
692                                         freecon(con);
693                                         IOVEC_SET_STRING(iovec[n++], x);
694                                 }
695                         }
696                 }
697 #endif
698         }
699         assert(n <= m);
700
701         if (object_pid) {
702                 r = get_process_uid(object_pid, &object_uid);
703                 if (r >= 0) {
704                         sprintf(o_uid, "OBJECT_UID="UID_FMT, object_uid);
705                         IOVEC_SET_STRING(iovec[n++], o_uid);
706                 }
707
708                 r = get_process_gid(object_pid, &object_gid);
709                 if (r >= 0) {
710                         sprintf(o_gid, "OBJECT_GID="GID_FMT, object_gid);
711                         IOVEC_SET_STRING(iovec[n++], o_gid);
712                 }
713
714                 r = get_process_comm(object_pid, &t);
715                 if (r >= 0) {
716                         x = strjoina("OBJECT_COMM=", t);
717                         free(t);
718                         IOVEC_SET_STRING(iovec[n++], x);
719                 }
720
721                 r = get_process_exe(object_pid, &t);
722                 if (r >= 0) {
723                         x = strjoina("OBJECT_EXE=", t);
724                         free(t);
725                         IOVEC_SET_STRING(iovec[n++], x);
726                 }
727
728                 r = get_process_cmdline(object_pid, 0, false, &t);
729                 if (r >= 0) {
730                         x = strjoina("OBJECT_CMDLINE=", t);
731                         free(t);
732                         IOVEC_SET_STRING(iovec[n++], x);
733                 }
734
735 #ifdef HAVE_AUDIT
736                 r = audit_session_from_pid(object_pid, &audit);
737                 if (r >= 0) {
738                         sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%"PRIu32, audit);
739                         IOVEC_SET_STRING(iovec[n++], o_audit_session);
740                 }
741
742                 r = audit_loginuid_from_pid(object_pid, &loginuid);
743                 if (r >= 0) {
744                         sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID="UID_FMT, loginuid);
745                         IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
746                 }
747 #endif
748
749                 r = cg_pid_get_path_shifted(object_pid, s->cgroup_root, &c);
750                 if (r >= 0) {
751                         x = strjoina("OBJECT_SYSTEMD_CGROUP=", c);
752                         IOVEC_SET_STRING(iovec[n++], x);
753
754                         r = cg_path_get_session(c, &t);
755                         if (r >= 0) {
756                                 x = strjoina("OBJECT_SYSTEMD_SESSION=", t);
757                                 free(t);
758                                 IOVEC_SET_STRING(iovec[n++], x);
759                         }
760
761                         if (cg_path_get_owner_uid(c, &owner) >= 0) {
762                                 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT, owner);
763                                 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
764                         }
765
766                         if (cg_path_get_unit(c, &t) >= 0) {
767                                 x = strjoina("OBJECT_SYSTEMD_UNIT=", t);
768                                 free(t);
769                                 IOVEC_SET_STRING(iovec[n++], x);
770                         }
771
772                         if (cg_path_get_user_unit(c, &t) >= 0) {
773                                 x = strjoina("OBJECT_SYSTEMD_USER_UNIT=", t);
774                                 free(t);
775                                 IOVEC_SET_STRING(iovec[n++], x);
776                         }
777
778                         free(c);
779                 }
780         }
781         assert(n <= m);
782
783         if (tv) {
784                 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
785                 IOVEC_SET_STRING(iovec[n++], source_time);
786         }
787
788         /* Note that strictly speaking storing the boot id here is
789          * redundant since the entry includes this in-line
790          * anyway. However, we need this indexed, too. */
791         if (!isempty(s->boot_id_field))
792                 IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
793
794         if (!isempty(s->machine_id_field))
795                 IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
796
797         if (!isempty(s->hostname_field))
798                 IOVEC_SET_STRING(iovec[n++], s->hostname_field);
799
800         assert(n <= m);
801
802         if (s->split_mode == SPLIT_UID && realuid > 0)
803                 /* Split up strictly by any UID */
804                 journal_uid = realuid;
805         else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
806                 /* Split up by login UIDs.  We do this only if the
807                  * realuid is not root, in order not to accidentally
808                  * leak privileged information to the user that is
809                  * logged by a privileged process that is part of an
810                  * unprivileged session. */
811                 journal_uid = owner;
812         else
813                 journal_uid = 0;
814
815         write_to_journal(s, journal_uid, iovec, n, priority);
816 }
817
818 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
819         char mid[11 + 32 + 1];
820         char buffer[16 + LINE_MAX + 1];
821         struct iovec iovec[N_IOVEC_META_FIELDS + 4];
822         int n = 0;
823         va_list ap;
824         struct ucred ucred = {};
825
826         assert(s);
827         assert(format);
828
829         IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
830         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
831
832         memcpy(buffer, "MESSAGE=", 8);
833         va_start(ap, format);
834         vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
835         va_end(ap);
836         IOVEC_SET_STRING(iovec[n++], buffer);
837
838         if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
839                 snprintf(mid, sizeof(mid), LOG_MESSAGE_ID(message_id));
840                 IOVEC_SET_STRING(iovec[n++], mid);
841         }
842
843         ucred.pid = getpid();
844         ucred.uid = getuid();
845         ucred.gid = getgid();
846
847         dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
848 }
849
850 void server_dispatch_message(
851                 Server *s,
852                 struct iovec *iovec, unsigned n, unsigned m,
853                 const struct ucred *ucred,
854                 const struct timeval *tv,
855                 const char *label, size_t label_len,
856                 const char *unit_id,
857                 int priority,
858                 pid_t object_pid) {
859
860         int rl, r;
861         _cleanup_free_ char *path = NULL;
862         char *c;
863
864         assert(s);
865         assert(iovec || n == 0);
866
867         if (n == 0)
868                 return;
869
870         if (LOG_PRI(priority) > s->max_level_store)
871                 return;
872
873         /* Stop early in case the information will not be stored
874          * in a journal. */
875         if (s->storage == STORAGE_NONE)
876                 return;
877
878         if (!ucred)
879                 goto finish;
880
881         r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &path);
882         if (r < 0)
883                 goto finish;
884
885         /* example: /user/lennart/3/foobar
886          *          /system/dbus.service/foobar
887          *
888          * So let's cut of everything past the third /, since that is
889          * where user directories start */
890
891         c = strchr(path, '/');
892         if (c) {
893                 c = strchr(c+1, '/');
894                 if (c) {
895                         c = strchr(c+1, '/');
896                         if (c)
897                                 *c = 0;
898                 }
899         }
900
901         rl = journal_rate_limit_test(s->rate_limit, path,
902                                      priority & LOG_PRIMASK, available_space(s, false));
903
904         if (rl == 0)
905                 return;
906
907         /* Write a suppression message if we suppressed something */
908         if (rl > 1)
909                 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
910                                       "Suppressed %u messages from %s", rl - 1, path);
911
912 finish:
913         dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
914 }
915
916
917 static int system_journal_open(Server *s, bool flush_requested) {
918         int r;
919         char *fn;
920         sd_id128_t machine;
921         char ids[33];
922
923         r = sd_id128_get_machine(&machine);
924         if (r < 0)
925                 return log_error_errno(r, "Failed to get machine id: %m");
926
927         sd_id128_to_string(machine, ids);
928
929         if (!s->system_journal &&
930             (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
931             (flush_requested
932              || access("/run/systemd/journal/flushed", F_OK) >= 0)) {
933
934                 /* If in auto mode: first try to create the machine
935                  * path, but not the prefix.
936                  *
937                  * If in persistent mode: create /var/log/journal and
938                  * the machine path */
939
940                 if (s->storage == STORAGE_PERSISTENT)
941                         (void) mkdir("/var/log/journal/", 0755);
942
943                 fn = strjoina("/var/log/journal/", ids);
944                 (void) mkdir(fn, 0755);
945
946                 fn = strjoina(fn, "/system.journal");
947                 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
948
949                 if (r >= 0)
950                         server_fix_perms(s, s->system_journal, 0);
951                 else if (r < 0) {
952                         if (r != -ENOENT && r != -EROFS)
953                                 log_warning_errno(r, "Failed to open system journal: %m");
954
955                         r = 0;
956                 }
957         }
958
959         if (!s->runtime_journal &&
960             (s->storage != STORAGE_NONE)) {
961
962                 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
963                 if (!fn)
964                         return -ENOMEM;
965
966                 if (s->system_journal) {
967
968                         /* Try to open the runtime journal, but only
969                          * if it already exists, so that we can flush
970                          * it into the system journal */
971
972                         r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
973                         free(fn);
974
975                         if (r < 0) {
976                                 if (r != -ENOENT)
977                                         log_warning_errno(r, "Failed to open runtime journal: %m");
978
979                                 r = 0;
980                         }
981
982                 } else {
983
984                         /* OK, we really need the runtime journal, so create
985                          * it if necessary. */
986
987                         (void) mkdir("/run/log", 0755);
988                         (void) mkdir("/run/log/journal", 0755);
989                         (void) mkdir_parents(fn, 0750);
990
991                         r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
992                         free(fn);
993
994                         if (r < 0)
995                                 return log_error_errno(r, "Failed to open runtime journal: %m");
996                 }
997
998                 if (s->runtime_journal)
999                         server_fix_perms(s, s->runtime_journal, 0);
1000         }
1001
1002         available_space(s, true);
1003
1004         return r;
1005 }
1006
1007 int server_flush_to_var(Server *s) {
1008         sd_id128_t machine;
1009         sd_journal *j = NULL;
1010         char ts[FORMAT_TIMESPAN_MAX];
1011         usec_t start;
1012         unsigned n = 0;
1013         int r;
1014
1015         assert(s);
1016
1017         if (s->storage != STORAGE_AUTO &&
1018             s->storage != STORAGE_PERSISTENT)
1019                 return 0;
1020
1021         if (!s->runtime_journal)
1022                 return 0;
1023
1024         system_journal_open(s, true);
1025
1026         if (!s->system_journal)
1027                 return 0;
1028
1029         log_debug("Flushing to /var...");
1030
1031         start = now(CLOCK_MONOTONIC);
1032
1033         r = sd_id128_get_machine(&machine);
1034         if (r < 0)
1035                 return r;
1036
1037         r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1038         if (r < 0)
1039                 return log_error_errno(r, "Failed to read runtime journal: %m");
1040
1041         sd_journal_set_data_threshold(j, 0);
1042
1043         SD_JOURNAL_FOREACH(j) {
1044                 Object *o = NULL;
1045                 JournalFile *f;
1046
1047                 f = j->current_file;
1048                 assert(f && f->current_offset > 0);
1049
1050                 n++;
1051
1052                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1053                 if (r < 0) {
1054                         log_error_errno(r, "Can't read entry: %m");
1055                         goto finish;
1056                 }
1057
1058                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1059                 if (r >= 0)
1060                         continue;
1061
1062                 if (!shall_try_append_again(s->system_journal, r)) {
1063                         log_error_errno(r, "Can't write entry: %m");
1064                         goto finish;
1065                 }
1066
1067                 server_rotate(s);
1068                 server_vacuum(s);
1069
1070                 if (!s->system_journal) {
1071                         log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1072                         r = -EIO;
1073                         goto finish;
1074                 }
1075
1076                 log_debug("Retrying write.");
1077                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1078                 if (r < 0) {
1079                         log_error_errno(r, "Can't write entry: %m");
1080                         goto finish;
1081                 }
1082         }
1083
1084 finish:
1085         journal_file_post_change(s->system_journal);
1086
1087         journal_file_close(s->runtime_journal);
1088         s->runtime_journal = NULL;
1089
1090         if (r >= 0)
1091                 rm_rf("/run/log/journal", false, true, false);
1092
1093         sd_journal_close(j);
1094
1095         server_driver_message(s, SD_ID128_NULL, "Time spent on flushing to /var is %s for %u entries.", format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0), n);
1096
1097         return r;
1098 }
1099
1100 int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1101         Server *s = userdata;
1102
1103         assert(s);
1104         assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
1105
1106         if (revents != EPOLLIN) {
1107                 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1108                 return -EIO;
1109         }
1110
1111         for (;;) {
1112                 struct ucred *ucred = NULL;
1113                 struct timeval *tv = NULL;
1114                 struct cmsghdr *cmsg;
1115                 char *label = NULL;
1116                 size_t label_len = 0;
1117                 struct iovec iovec;
1118
1119                 union {
1120                         struct cmsghdr cmsghdr;
1121
1122                         /* We use NAME_MAX space for the SELinux label
1123                          * here. The kernel currently enforces no
1124                          * limit, but according to suggestions from
1125                          * the SELinux people this will change and it
1126                          * will probably be identical to NAME_MAX. For
1127                          * now we use that, but this should be updated
1128                          * one day when the final limit is known. */
1129                         uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1130                                     CMSG_SPACE(sizeof(struct timeval)) +
1131                                     CMSG_SPACE(sizeof(int)) + /* fd */
1132                                     CMSG_SPACE(NAME_MAX)]; /* selinux label */
1133                 } control = {};
1134                 union sockaddr_union sa = {};
1135                 struct msghdr msghdr = {
1136                         .msg_iov = &iovec,
1137                         .msg_iovlen = 1,
1138                         .msg_control = &control,
1139                         .msg_controllen = sizeof(control),
1140                         .msg_name = &sa,
1141                         .msg_namelen = sizeof(sa),
1142                 };
1143
1144                 ssize_t n;
1145                 int *fds = NULL;
1146                 unsigned n_fds = 0;
1147                 int v = 0;
1148                 size_t m;
1149
1150                 /* Try to get the right size, if we can. (Not all
1151                  * sockets support SIOCINQ, hence we just try, but
1152                  * don't rely on it. */
1153                 (void) ioctl(fd, SIOCINQ, &v);
1154
1155                 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1156                 m = PAGE_ALIGN(MAX3((size_t) v + 1,
1157                                     (size_t) LINE_MAX,
1158                                     ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
1159
1160                 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1161                         return log_oom();
1162
1163                 iovec.iov_base = s->buffer;
1164                 iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
1165
1166                 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1167                 if (n < 0) {
1168                         if (errno == EINTR || errno == EAGAIN)
1169                                 return 0;
1170
1171                         log_error_errno(errno, "recvmsg() failed: %m");
1172                         return -errno;
1173                 }
1174
1175                 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1176
1177                         if (cmsg->cmsg_level == SOL_SOCKET &&
1178                             cmsg->cmsg_type == SCM_CREDENTIALS &&
1179                             cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1180                                 ucred = (struct ucred*) CMSG_DATA(cmsg);
1181                         else if (cmsg->cmsg_level == SOL_SOCKET &&
1182                                  cmsg->cmsg_type == SCM_SECURITY) {
1183                                 label = (char*) CMSG_DATA(cmsg);
1184                                 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1185                         } else if (cmsg->cmsg_level == SOL_SOCKET &&
1186                                    cmsg->cmsg_type == SO_TIMESTAMP &&
1187                                    cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1188                                 tv = (struct timeval*) CMSG_DATA(cmsg);
1189                         else if (cmsg->cmsg_level == SOL_SOCKET &&
1190                                  cmsg->cmsg_type == SCM_RIGHTS) {
1191                                 fds = (int*) CMSG_DATA(cmsg);
1192                                 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1193                         }
1194                 }
1195
1196                 /* And a trailing NUL, just in case */
1197                 s->buffer[n] = 0;
1198
1199                 if (fd == s->syslog_fd) {
1200                         if (n > 0 && n_fds == 0)
1201                                 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1202                         else if (n_fds > 0)
1203                                 log_warning("Got file descriptors via syslog socket. Ignoring.");
1204
1205                 } else if (fd == s->native_fd) {
1206                         if (n > 0 && n_fds == 0)
1207                                 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1208                         else if (n == 0 && n_fds == 1)
1209                                 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1210                         else if (n_fds > 0)
1211                                 log_warning("Got too many file descriptors via native socket. Ignoring.");
1212
1213                 } else {
1214                         assert(fd == s->audit_fd);
1215
1216                         if (n > 0 && n_fds == 0)
1217                                 server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1218                         else if (n_fds > 0)
1219                                 log_warning("Got file descriptors via audit socket. Ignoring.");
1220                 }
1221
1222                 close_many(fds, n_fds);
1223         }
1224 }
1225
1226 static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1227         Server *s = userdata;
1228
1229         assert(s);
1230
1231         log_info("Received request to flush runtime journal from PID %"PRIu32, si->ssi_pid);
1232
1233         server_flush_to_var(s);
1234         server_sync(s);
1235         server_vacuum(s);
1236
1237         touch("/run/systemd/journal/flushed");
1238
1239         return 0;
1240 }
1241
1242 static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1243         Server *s = userdata;
1244
1245         assert(s);
1246
1247         log_info("Received request to rotate journal from PID %"PRIu32, si->ssi_pid);
1248         server_rotate(s);
1249         server_vacuum(s);
1250
1251         return 0;
1252 }
1253
1254 static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1255         Server *s = userdata;
1256
1257         assert(s);
1258
1259         log_received_signal(LOG_INFO, si);
1260
1261         sd_event_exit(s->event, 0);
1262         return 0;
1263 }
1264
1265 static int setup_signals(Server *s) {
1266         sigset_t mask;
1267         int r;
1268
1269         assert(s);
1270
1271         assert_se(sigemptyset(&mask) == 0);
1272         sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1273         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1274
1275         r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
1276         if (r < 0)
1277                 return r;
1278
1279         r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
1280         if (r < 0)
1281                 return r;
1282
1283         r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
1284         if (r < 0)
1285                 return r;
1286
1287         r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
1288         if (r < 0)
1289                 return r;
1290
1291         return 0;
1292 }
1293
1294 static int server_parse_proc_cmdline(Server *s) {
1295         _cleanup_free_ char *line = NULL;
1296         const char *w, *state;
1297         size_t l;
1298         int r;
1299
1300         r = proc_cmdline(&line);
1301         if (r < 0) {
1302                 log_warning_errno(r, "Failed to read /proc/cmdline, ignoring: %m");
1303                 return 0;
1304         }
1305
1306         FOREACH_WORD_QUOTED(w, l, line, state) {
1307                 _cleanup_free_ char *word;
1308
1309                 word = strndup(w, l);
1310                 if (!word)
1311                         return -ENOMEM;
1312
1313                 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1314                         r = parse_boolean(word + 35);
1315                         if (r < 0)
1316                                 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1317                         else
1318                                 s->forward_to_syslog = r;
1319                 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1320                         r = parse_boolean(word + 33);
1321                         if (r < 0)
1322                                 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1323                         else
1324                                 s->forward_to_kmsg = r;
1325                 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1326                         r = parse_boolean(word + 36);
1327                         if (r < 0)
1328                                 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1329                         else
1330                                 s->forward_to_console = r;
1331                 } else if (startswith(word, "systemd.journald.forward_to_wall=")) {
1332                         r = parse_boolean(word + 33);
1333                         if (r < 0)
1334                                 log_warning("Failed to parse forward to wall switch %s. Ignoring.", word + 33);
1335                         else
1336                                 s->forward_to_wall = r;
1337                 } else if (startswith(word, "systemd.journald"))
1338                         log_warning("Invalid systemd.journald parameter. Ignoring.");
1339         }
1340         /* do not warn about state here, since probably systemd already did */
1341
1342         return 0;
1343 }
1344
1345 static int server_parse_config_file(Server *s) {
1346         assert(s);
1347
1348         return config_parse_many("/etc/systemd/journald.conf",
1349                                  CONF_DIRS_NULSTR("systemd/journald.conf"),
1350                                  "Journal\0",
1351                                  config_item_perf_lookup, journald_gperf_lookup,
1352                                  false, s);
1353 }
1354
1355 static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1356         Server *s = userdata;
1357
1358         assert(s);
1359
1360         server_sync(s);
1361         return 0;
1362 }
1363
1364 int server_schedule_sync(Server *s, int priority) {
1365         int r;
1366
1367         assert(s);
1368
1369         if (priority <= LOG_CRIT) {
1370                 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1371                 server_sync(s);
1372                 return 0;
1373         }
1374
1375         if (s->sync_scheduled)
1376                 return 0;
1377
1378         if (s->sync_interval_usec > 0) {
1379                 usec_t when;
1380
1381                 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
1382                 if (r < 0)
1383                         return r;
1384
1385                 when += s->sync_interval_usec;
1386
1387                 if (!s->sync_event_source) {
1388                         r = sd_event_add_time(
1389                                         s->event,
1390                                         &s->sync_event_source,
1391                                         CLOCK_MONOTONIC,
1392                                         when, 0,
1393                                         server_dispatch_sync, s);
1394                         if (r < 0)
1395                                 return r;
1396
1397                         r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1398                 } else {
1399                         r = sd_event_source_set_time(s->sync_event_source, when);
1400                         if (r < 0)
1401                                 return r;
1402
1403                         r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1404                 }
1405                 if (r < 0)
1406                         return r;
1407
1408                 s->sync_scheduled = true;
1409         }
1410
1411         return 0;
1412 }
1413
1414 static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1415         Server *s = userdata;
1416
1417         assert(s);
1418
1419         server_cache_hostname(s);
1420         return 0;
1421 }
1422
1423 static int server_open_hostname(Server *s) {
1424         int r;
1425
1426         assert(s);
1427
1428         s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
1429         if (s->hostname_fd < 0)
1430                 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
1431
1432         r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
1433         if (r < 0) {
1434                 /* kernels prior to 3.2 don't support polling this file. Ignore
1435                  * the failure. */
1436                 if (r == -EPERM) {
1437                         log_warning("Failed to register hostname fd in event loop: %s. Ignoring.",
1438                                         strerror(-r));
1439                         s->hostname_fd = safe_close(s->hostname_fd);
1440                         return 0;
1441                 }
1442
1443                 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
1444         }
1445
1446         r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
1447         if (r < 0)
1448                 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
1449
1450         return 0;
1451 }
1452
1453 int server_init(Server *s) {
1454         _cleanup_fdset_free_ FDSet *fds = NULL;
1455         int n, r, fd;
1456
1457         assert(s);
1458
1459         zero(*s);
1460         s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = -1;
1461         s->compress = true;
1462         s->seal = true;
1463
1464         s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1465         s->sync_scheduled = false;
1466
1467         s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1468         s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1469
1470         s->forward_to_wall = true;
1471
1472         s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1473
1474         s->max_level_store = LOG_DEBUG;
1475         s->max_level_syslog = LOG_DEBUG;
1476         s->max_level_kmsg = LOG_NOTICE;
1477         s->max_level_console = LOG_INFO;
1478         s->max_level_wall = LOG_EMERG;
1479
1480         memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1481         memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1482
1483         server_parse_config_file(s);
1484         server_parse_proc_cmdline(s);
1485         if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1486                 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1487                           s->rate_limit_interval, s->rate_limit_burst);
1488                 s->rate_limit_interval = s->rate_limit_burst = 0;
1489         }
1490
1491         mkdir_p("/run/systemd/journal", 0755);
1492
1493         s->user_journals = ordered_hashmap_new(NULL);
1494         if (!s->user_journals)
1495                 return log_oom();
1496
1497         s->mmap = mmap_cache_new();
1498         if (!s->mmap)
1499                 return log_oom();
1500
1501         r = sd_event_default(&s->event);
1502         if (r < 0)
1503                 return log_error_errno(r, "Failed to create event loop: %m");
1504
1505         sd_event_set_watchdog(s->event, true);
1506
1507         n = sd_listen_fds(true);
1508         if (n < 0)
1509                 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
1510
1511         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1512
1513                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1514
1515                         if (s->native_fd >= 0) {
1516                                 log_error("Too many native sockets passed.");
1517                                 return -EINVAL;
1518                         }
1519
1520                         s->native_fd = fd;
1521
1522                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1523
1524                         if (s->stdout_fd >= 0) {
1525                                 log_error("Too many stdout sockets passed.");
1526                                 return -EINVAL;
1527                         }
1528
1529                         s->stdout_fd = fd;
1530
1531                 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1532                            sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1533
1534                         if (s->syslog_fd >= 0) {
1535                                 log_error("Too many /dev/log sockets passed.");
1536                                 return -EINVAL;
1537                         }
1538
1539                         s->syslog_fd = fd;
1540
1541                 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
1542
1543                         if (s->audit_fd >= 0) {
1544                                 log_error("Too many audit sockets passed.");
1545                                 return -EINVAL;
1546                         }
1547
1548                         s->audit_fd = fd;
1549
1550                 } else {
1551
1552                         if (!fds) {
1553                                 fds = fdset_new();
1554                                 if (!fds)
1555                                         return log_oom();
1556                         }
1557
1558                         r = fdset_put(fds, fd);
1559                         if (r < 0)
1560                                 return log_oom();
1561                 }
1562         }
1563
1564         r = server_open_stdout_socket(s, fds);
1565         if (r < 0)
1566                 return r;
1567
1568         if (fdset_size(fds) > 0) {
1569                 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds));
1570                 fds = fdset_free(fds);
1571         }
1572
1573         r = server_open_syslog_socket(s);
1574         if (r < 0)
1575                 return r;
1576
1577         r = server_open_native_socket(s);
1578         if (r < 0)
1579                 return r;
1580
1581         r = server_open_dev_kmsg(s);
1582         if (r < 0)
1583                 return r;
1584
1585         r = server_open_audit(s);
1586         if (r < 0)
1587                 return r;
1588
1589         r = server_open_kernel_seqnum(s);
1590         if (r < 0)
1591                 return r;
1592
1593         r = server_open_hostname(s);
1594         if (r < 0)
1595                 return r;
1596
1597         r = setup_signals(s);
1598         if (r < 0)
1599                 return r;
1600
1601         s->udev = udev_new();
1602         if (!s->udev)
1603                 return -ENOMEM;
1604
1605         s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
1606         if (!s->rate_limit)
1607                 return -ENOMEM;
1608
1609         r = cg_get_root_path(&s->cgroup_root);
1610         if (r < 0)
1611                 return r;
1612
1613         server_cache_hostname(s);
1614         server_cache_boot_id(s);
1615         server_cache_machine_id(s);
1616
1617         r = system_journal_open(s, false);
1618         if (r < 0)
1619                 return r;
1620
1621         return 0;
1622 }
1623
1624 void server_maybe_append_tags(Server *s) {
1625 #ifdef HAVE_GCRYPT
1626         JournalFile *f;
1627         Iterator i;
1628         usec_t n;
1629
1630         n = now(CLOCK_REALTIME);
1631
1632         if (s->system_journal)
1633                 journal_file_maybe_append_tag(s->system_journal, n);
1634
1635         ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
1636                 journal_file_maybe_append_tag(f, n);
1637 #endif
1638 }
1639
1640 void server_done(Server *s) {
1641         JournalFile *f;
1642         assert(s);
1643
1644         while (s->stdout_streams)
1645                 stdout_stream_free(s->stdout_streams);
1646
1647         if (s->system_journal)
1648                 journal_file_close(s->system_journal);
1649
1650         if (s->runtime_journal)
1651                 journal_file_close(s->runtime_journal);
1652
1653         while ((f = ordered_hashmap_steal_first(s->user_journals)))
1654                 journal_file_close(f);
1655
1656         ordered_hashmap_free(s->user_journals);
1657
1658         sd_event_source_unref(s->syslog_event_source);
1659         sd_event_source_unref(s->native_event_source);
1660         sd_event_source_unref(s->stdout_event_source);
1661         sd_event_source_unref(s->dev_kmsg_event_source);
1662         sd_event_source_unref(s->audit_event_source);
1663         sd_event_source_unref(s->sync_event_source);
1664         sd_event_source_unref(s->sigusr1_event_source);
1665         sd_event_source_unref(s->sigusr2_event_source);
1666         sd_event_source_unref(s->sigterm_event_source);
1667         sd_event_source_unref(s->sigint_event_source);
1668         sd_event_source_unref(s->hostname_event_source);
1669         sd_event_unref(s->event);
1670
1671         safe_close(s->syslog_fd);
1672         safe_close(s->native_fd);
1673         safe_close(s->stdout_fd);
1674         safe_close(s->dev_kmsg_fd);
1675         safe_close(s->audit_fd);
1676         safe_close(s->hostname_fd);
1677
1678         if (s->rate_limit)
1679                 journal_rate_limit_free(s->rate_limit);
1680
1681         if (s->kernel_seqnum)
1682                 munmap(s->kernel_seqnum, sizeof(uint64_t));
1683
1684         free(s->buffer);
1685         free(s->tty_path);
1686         free(s->cgroup_root);
1687         free(s->hostname_field);
1688
1689         if (s->mmap)
1690                 mmap_cache_unref(s->mmap);
1691
1692         if (s->udev)
1693                 udev_unref(s->udev);
1694 }