chiark / gitweb /
7ee8174ea235d3a65da7afdcf7fde7b94a8541cb
[elogind.git] / src / journal / journald-server.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
26 #include <sys/mman.h>
27 #include <sys/timerfd.h>
28
29 #include <libudev.h>
30
31 #include "sd-journal.h"
32 #include "sd-messages.h"
33 #include "sd-daemon.h"
34 #include "fileio.h"
35 #include "mkdir.h"
36 #include "hashmap.h"
37 #include "journal-file.h"
38 #include "socket-util.h"
39 #include "cgroup-util.h"
40 #include "list.h"
41 #include "missing.h"
42 #include "conf-parser.h"
43 #include "selinux-util.h"
44 #include "journal-internal.h"
45 #include "journal-vacuum.h"
46 #include "journal-authenticate.h"
47 #include "journald-rate-limit.h"
48 #include "journald-kmsg.h"
49 #include "journald-syslog.h"
50 #include "journald-stream.h"
51 #include "journald-console.h"
52 #include "journald-native.h"
53 #include "journald-audit.h"
54 #include "journald-server.h"
55 #include "acl-util.h"
56
57 #ifdef HAVE_SELINUX
58 #include <selinux/selinux.h>
59 #endif
60
61 #define USER_JOURNALS_MAX 1024
62
63 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
64 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
65 #define DEFAULT_RATE_LIMIT_BURST 1000
66 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
67
68 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
69
70 static const char* const storage_table[_STORAGE_MAX] = {
71         [STORAGE_AUTO] = "auto",
72         [STORAGE_VOLATILE] = "volatile",
73         [STORAGE_PERSISTENT] = "persistent",
74         [STORAGE_NONE] = "none"
75 };
76
77 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
78 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
79
80 static const char* const split_mode_table[_SPLIT_MAX] = {
81         [SPLIT_LOGIN] = "login",
82         [SPLIT_UID] = "uid",
83         [SPLIT_NONE] = "none",
84 };
85
86 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
87 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
88
89 static uint64_t available_space(Server *s, bool verbose) {
90         char ids[33];
91         _cleanup_free_ char *p = NULL;
92         sd_id128_t machine;
93         struct statvfs ss;
94         uint64_t sum = 0, ss_avail = 0, avail = 0;
95         int r;
96         _cleanup_closedir_ DIR *d = NULL;
97         usec_t ts;
98         const char *f;
99         JournalMetrics *m;
100
101         ts = now(CLOCK_MONOTONIC);
102
103         if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts
104             && !verbose)
105                 return s->cached_available_space;
106
107         r = sd_id128_get_machine(&machine);
108         if (r < 0)
109                 return 0;
110
111         if (s->system_journal) {
112                 f = "/var/log/journal/";
113                 m = &s->system_metrics;
114         } else {
115                 f = "/run/log/journal/";
116                 m = &s->runtime_metrics;
117         }
118
119         assert(m);
120
121         p = strappend(f, sd_id128_to_string(machine, ids));
122         if (!p)
123                 return 0;
124
125         d = opendir(p);
126         if (!d)
127                 return 0;
128
129         if (fstatvfs(dirfd(d), &ss) < 0)
130                 return 0;
131
132         for (;;) {
133                 struct stat st;
134                 struct dirent *de;
135
136                 errno = 0;
137                 de = readdir(d);
138                 if (!de && errno != 0)
139                         return 0;
140
141                 if (!de)
142                         break;
143
144                 if (!endswith(de->d_name, ".journal") &&
145                     !endswith(de->d_name, ".journal~"))
146                         continue;
147
148                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
149                         continue;
150
151                 if (!S_ISREG(st.st_mode))
152                         continue;
153
154                 sum += (uint64_t) st.st_blocks * 512UL;
155         }
156
157         ss_avail = ss.f_bsize * ss.f_bavail;
158
159         /* If we reached a high mark, we will always allow this much
160          * again, unless usage goes above max_use. This watermark
161          * value is cached so that we don't give up space on pressure,
162          * but hover below the maximum usage. */
163
164         if (m->use < sum)
165                 m->use = sum;
166
167         avail = LESS_BY(ss_avail, m->keep_free);
168
169         s->cached_available_space = LESS_BY(MIN(m->max_use, avail), sum);
170         s->cached_available_space_timestamp = ts;
171
172         if (verbose) {
173                 char    fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
174                         fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX];
175
176                 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
177                                       "%s journal is using %s (max allowed %s, "
178                                       "trying to leave %s free of %s available → current limit %s).",
179                                       s->system_journal ? "Permanent" : "Runtime",
180                                       format_bytes(fb1, sizeof(fb1), sum),
181                                       format_bytes(fb2, sizeof(fb2), m->max_use),
182                                       format_bytes(fb3, sizeof(fb3), m->keep_free),
183                                       format_bytes(fb4, sizeof(fb4), ss_avail),
184                                       format_bytes(fb5, sizeof(fb5), s->cached_available_space + sum));
185         }
186
187         return s->cached_available_space;
188 }
189
190 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
191         int r;
192 #ifdef HAVE_ACL
193         acl_t acl;
194         acl_entry_t entry;
195         acl_permset_t permset;
196 #endif
197
198         assert(f);
199
200         r = fchmod(f->fd, 0640);
201         if (r < 0)
202                 log_warning_errno(r, "Failed to fix access mode on %s, ignoring: %m", f->path);
203
204 #ifdef HAVE_ACL
205         if (uid <= SYSTEM_UID_MAX)
206                 return;
207
208         acl = acl_get_fd(f->fd);
209         if (!acl) {
210                 log_warning_errno(errno, "Failed to read ACL on %s, ignoring: %m", f->path);
211                 return;
212         }
213
214         r = acl_find_uid(acl, uid, &entry);
215         if (r <= 0) {
216
217                 if (acl_create_entry(&acl, &entry) < 0 ||
218                     acl_set_tag_type(entry, ACL_USER) < 0 ||
219                     acl_set_qualifier(entry, &uid) < 0) {
220                         log_warning_errno(errno, "Failed to patch ACL on %s, ignoring: %m", f->path);
221                         goto finish;
222                 }
223         }
224
225         /* We do not recalculate the mask unconditionally here,
226          * so that the fchmod() mask above stays intact. */
227         if (acl_get_permset(entry, &permset) < 0 ||
228             acl_add_perm(permset, ACL_READ) < 0 ||
229             calc_acl_mask_if_needed(&acl) < 0) {
230                 log_warning_errno(errno, "Failed to patch ACL on %s, ignoring: %m", f->path);
231                 goto finish;
232         }
233
234         if (acl_set_fd(f->fd, acl) < 0)
235                 log_warning_errno(errno, "Failed to set ACL on %s, ignoring: %m", f->path);
236
237 finish:
238         acl_free(acl);
239 #endif
240 }
241
242 static JournalFile* find_journal(Server *s, uid_t uid) {
243         _cleanup_free_ char *p = NULL;
244         int r;
245         JournalFile *f;
246         sd_id128_t machine;
247
248         assert(s);
249
250         /* We split up user logs only on /var, not on /run. If the
251          * runtime file is open, we write to it exclusively, in order
252          * to guarantee proper order as soon as we flush /run to
253          * /var and close the runtime file. */
254
255         if (s->runtime_journal)
256                 return s->runtime_journal;
257
258         if (uid <= SYSTEM_UID_MAX)
259                 return s->system_journal;
260
261         r = sd_id128_get_machine(&machine);
262         if (r < 0)
263                 return s->system_journal;
264
265         f = ordered_hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
266         if (f)
267                 return f;
268
269         if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
270                      SD_ID128_FORMAT_VAL(machine), uid) < 0)
271                 return s->system_journal;
272
273         while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
274                 /* Too many open? Then let's close one */
275                 f = ordered_hashmap_steal_first(s->user_journals);
276                 assert(f);
277                 journal_file_close(f);
278         }
279
280         r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
281         if (r < 0)
282                 return s->system_journal;
283
284         server_fix_perms(s, f, uid);
285
286         r = ordered_hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
287         if (r < 0) {
288                 journal_file_close(f);
289                 return s->system_journal;
290         }
291
292         return f;
293 }
294
295 static int do_rotate(
296                 Server *s,
297                 JournalFile **f,
298                 const char* name,
299                 bool seal,
300                 uint32_t uid) {
301
302         int r;
303         assert(s);
304
305         if (!*f)
306                 return -EINVAL;
307
308         r = journal_file_rotate(f, s->compress, seal);
309         if (r < 0)
310                 if (*f)
311                         log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
312                 else
313                         log_error_errno(r, "Failed to create new %s journal: %m", name);
314         else
315                 server_fix_perms(s, *f, uid);
316
317         return r;
318 }
319
320 void server_rotate(Server *s) {
321         JournalFile *f;
322         void *k;
323         Iterator i;
324         int r;
325
326         log_debug("Rotating...");
327
328         do_rotate(s, &s->runtime_journal, "runtime", false, 0);
329         do_rotate(s, &s->system_journal, "system", s->seal, 0);
330
331         ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
332                 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UINT32(k));
333                 if (r >= 0)
334                         ordered_hashmap_replace(s->user_journals, k, f);
335                 else if (!f)
336                         /* Old file has been closed and deallocated */
337                         ordered_hashmap_remove(s->user_journals, k);
338         }
339 }
340
341 void server_sync(Server *s) {
342         JournalFile *f;
343         void *k;
344         Iterator i;
345         int r;
346
347         if (s->system_journal) {
348                 r = journal_file_set_offline(s->system_journal);
349                 if (r < 0)
350                         log_error_errno(r, "Failed to sync system journal: %m");
351         }
352
353         ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
354                 r = journal_file_set_offline(f);
355                 if (r < 0)
356                         log_error_errno(r, "Failed to sync user journal: %m");
357         }
358
359         if (s->sync_event_source) {
360                 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
361                 if (r < 0)
362                         log_error_errno(r, "Failed to disable sync timer source: %m");
363         }
364
365         s->sync_scheduled = false;
366 }
367
368 static void do_vacuum(
369                 Server *s,
370                 const char *id,
371                 JournalFile *f,
372                 const char* path,
373                 JournalMetrics *metrics) {
374
375         const char *p;
376         int r;
377
378         if (!f)
379                 return;
380
381         p = strjoina(path, id);
382         r = journal_directory_vacuum(p, metrics->max_use, s->max_retention_usec, &s->oldest_file_usec, false);
383         if (r < 0 && r != -ENOENT)
384                 log_error_errno(r, "Failed to vacuum %s: %m", p);
385 }
386
387 void server_vacuum(Server *s) {
388         char ids[33];
389         sd_id128_t machine;
390         int r;
391
392         log_debug("Vacuuming...");
393
394         s->oldest_file_usec = 0;
395
396         r = sd_id128_get_machine(&machine);
397         if (r < 0) {
398                 log_error_errno(r, "Failed to get machine ID: %m");
399                 return;
400         }
401         sd_id128_to_string(machine, ids);
402
403         do_vacuum(s, ids, s->system_journal, "/var/log/journal/", &s->system_metrics);
404         do_vacuum(s, ids, s->runtime_journal, "/run/log/journal/", &s->runtime_metrics);
405
406         s->cached_available_space_timestamp = 0;
407 }
408
409 static void server_cache_machine_id(Server *s) {
410         sd_id128_t id;
411         int r;
412
413         assert(s);
414
415         r = sd_id128_get_machine(&id);
416         if (r < 0)
417                 return;
418
419         sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
420 }
421
422 static void server_cache_boot_id(Server *s) {
423         sd_id128_t id;
424         int r;
425
426         assert(s);
427
428         r = sd_id128_get_boot(&id);
429         if (r < 0)
430                 return;
431
432         sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
433 }
434
435 static void server_cache_hostname(Server *s) {
436         _cleanup_free_ char *t = NULL;
437         char *x;
438
439         assert(s);
440
441         t = gethostname_malloc();
442         if (!t)
443                 return;
444
445         x = strappend("_HOSTNAME=", t);
446         if (!x)
447                 return;
448
449         free(s->hostname_field);
450         s->hostname_field = x;
451 }
452
453 static bool shall_try_append_again(JournalFile *f, int r) {
454
455         /* -E2BIG            Hit configured limit
456            -EFBIG            Hit fs limit
457            -EDQUOT           Quota limit hit
458            -ENOSPC           Disk full
459            -EIO              I/O error of some kind (mmap)
460            -EHOSTDOWN        Other machine
461            -EBUSY            Unclean shutdown
462            -EPROTONOSUPPORT  Unsupported feature
463            -EBADMSG          Corrupted
464            -ENODATA          Truncated
465            -ESHUTDOWN        Already archived
466            -EIDRM            Journal file has been deleted */
467
468         if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
469                 log_debug("%s: Allocation limit reached, rotating.", f->path);
470         else if (r == -EHOSTDOWN)
471                 log_info("%s: Journal file from other machine, rotating.", f->path);
472         else if (r == -EBUSY)
473                 log_info("%s: Unclean shutdown, rotating.", f->path);
474         else if (r == -EPROTONOSUPPORT)
475                 log_info("%s: Unsupported feature, rotating.", f->path);
476         else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
477                 log_warning("%s: Journal file corrupted, rotating.", f->path);
478         else if (r == -EIO)
479                 log_warning("%s: IO error, rotating.", f->path);
480         else if (r == -EIDRM)
481                 log_warning("%s: Journal file has been deleted, rotating.", f->path);
482         else
483                 return false;
484
485         return true;
486 }
487
488 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
489         JournalFile *f;
490         bool vacuumed = false;
491         int r;
492
493         assert(s);
494         assert(iovec);
495         assert(n > 0);
496
497         f = find_journal(s, uid);
498         if (!f)
499                 return;
500
501         if (journal_file_rotate_suggested(f, s->max_file_usec)) {
502                 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
503                 server_rotate(s);
504                 server_vacuum(s);
505                 vacuumed = true;
506
507                 f = find_journal(s, uid);
508                 if (!f)
509                         return;
510         }
511
512         r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
513         if (r >= 0) {
514                 server_schedule_sync(s, priority);
515                 return;
516         }
517
518         if (vacuumed || !shall_try_append_again(f, r)) {
519                 log_error_errno(r, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
520                 return;
521         }
522
523         server_rotate(s);
524         server_vacuum(s);
525
526         f = find_journal(s, uid);
527         if (!f)
528                 return;
529
530         log_debug("Retrying write.");
531         r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
532         if (r < 0)
533                 log_error_errno(r, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
534         else
535                 server_schedule_sync(s, priority);
536 }
537
538 static void dispatch_message_real(
539                 Server *s,
540                 struct iovec *iovec, unsigned n, unsigned m,
541                 const struct ucred *ucred,
542                 const struct timeval *tv,
543                 const char *label, size_t label_len,
544                 const char *unit_id,
545                 int priority,
546                 pid_t object_pid) {
547
548         char    pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
549                 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
550                 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
551                 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
552                 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
553                 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
554                 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
555                 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
556         uid_t object_uid;
557         gid_t object_gid;
558         char *x;
559         int r;
560         char *t, *c;
561         uid_t realuid = 0, owner = 0, journal_uid;
562         bool owner_valid = false;
563 #ifdef HAVE_AUDIT
564         char    audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
565                 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
566                 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
567                 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
568
569         uint32_t audit;
570         uid_t loginuid;
571 #endif
572
573         assert(s);
574         assert(iovec);
575         assert(n > 0);
576         assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
577
578         if (ucred) {
579                 realuid = ucred->uid;
580
581                 sprintf(pid, "_PID="PID_FMT, ucred->pid);
582                 IOVEC_SET_STRING(iovec[n++], pid);
583
584                 sprintf(uid, "_UID="UID_FMT, ucred->uid);
585                 IOVEC_SET_STRING(iovec[n++], uid);
586
587                 sprintf(gid, "_GID="GID_FMT, ucred->gid);
588                 IOVEC_SET_STRING(iovec[n++], gid);
589
590                 r = get_process_comm(ucred->pid, &t);
591                 if (r >= 0) {
592                         x = strjoina("_COMM=", t);
593                         free(t);
594                         IOVEC_SET_STRING(iovec[n++], x);
595                 }
596
597                 r = get_process_exe(ucred->pid, &t);
598                 if (r >= 0) {
599                         x = strjoina("_EXE=", t);
600                         free(t);
601                         IOVEC_SET_STRING(iovec[n++], x);
602                 }
603
604                 r = get_process_cmdline(ucred->pid, 0, false, &t);
605                 if (r >= 0) {
606                         x = strjoina("_CMDLINE=", t);
607                         free(t);
608                         IOVEC_SET_STRING(iovec[n++], x);
609                 }
610
611                 r = get_process_capeff(ucred->pid, &t);
612                 if (r >= 0) {
613                         x = strjoina("_CAP_EFFECTIVE=", t);
614                         free(t);
615                         IOVEC_SET_STRING(iovec[n++], x);
616                 }
617
618 #ifdef HAVE_AUDIT
619                 r = audit_session_from_pid(ucred->pid, &audit);
620                 if (r >= 0) {
621                         sprintf(audit_session, "_AUDIT_SESSION=%"PRIu32, audit);
622                         IOVEC_SET_STRING(iovec[n++], audit_session);
623                 }
624
625                 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
626                 if (r >= 0) {
627                         sprintf(audit_loginuid, "_AUDIT_LOGINUID="UID_FMT, loginuid);
628                         IOVEC_SET_STRING(iovec[n++], audit_loginuid);
629                 }
630 #endif
631
632                 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
633                 if (r >= 0) {
634                         char *session = NULL;
635
636                         x = strjoina("_SYSTEMD_CGROUP=", c);
637                         IOVEC_SET_STRING(iovec[n++], x);
638
639                         r = cg_path_get_session(c, &t);
640                         if (r >= 0) {
641                                 session = strjoina("_SYSTEMD_SESSION=", t);
642                                 free(t);
643                                 IOVEC_SET_STRING(iovec[n++], session);
644                         }
645
646                         if (cg_path_get_owner_uid(c, &owner) >= 0) {
647                                 owner_valid = true;
648
649                                 sprintf(owner_uid, "_SYSTEMD_OWNER_UID="UID_FMT, owner);
650                                 IOVEC_SET_STRING(iovec[n++], owner_uid);
651                         }
652
653                         if (cg_path_get_unit(c, &t) >= 0) {
654                                 x = strjoina("_SYSTEMD_UNIT=", t);
655                                 free(t);
656                                 IOVEC_SET_STRING(iovec[n++], x);
657                         } else if (unit_id && !session) {
658                                 x = strjoina("_SYSTEMD_UNIT=", unit_id);
659                                 IOVEC_SET_STRING(iovec[n++], x);
660                         }
661
662                         if (cg_path_get_user_unit(c, &t) >= 0) {
663                                 x = strjoina("_SYSTEMD_USER_UNIT=", t);
664                                 free(t);
665                                 IOVEC_SET_STRING(iovec[n++], x);
666                         } else if (unit_id && session) {
667                                 x = strjoina("_SYSTEMD_USER_UNIT=", unit_id);
668                                 IOVEC_SET_STRING(iovec[n++], x);
669                         }
670
671                         if (cg_path_get_slice(c, &t) >= 0) {
672                                 x = strjoina("_SYSTEMD_SLICE=", t);
673                                 free(t);
674                                 IOVEC_SET_STRING(iovec[n++], x);
675                         }
676
677                         free(c);
678                 } else if (unit_id) {
679                         x = strjoina("_SYSTEMD_UNIT=", unit_id);
680                         IOVEC_SET_STRING(iovec[n++], x);
681                 }
682
683 #ifdef HAVE_SELINUX
684                 if (mac_selinux_use()) {
685                         if (label) {
686                                 x = alloca(strlen("_SELINUX_CONTEXT=") + label_len + 1);
687
688                                 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
689                                 IOVEC_SET_STRING(iovec[n++], x);
690                         } else {
691                                 security_context_t con;
692
693                                 if (getpidcon(ucred->pid, &con) >= 0) {
694                                         x = strjoina("_SELINUX_CONTEXT=", con);
695
696                                         freecon(con);
697                                         IOVEC_SET_STRING(iovec[n++], x);
698                                 }
699                         }
700                 }
701 #endif
702         }
703         assert(n <= m);
704
705         if (object_pid) {
706                 r = get_process_uid(object_pid, &object_uid);
707                 if (r >= 0) {
708                         sprintf(o_uid, "OBJECT_UID="UID_FMT, object_uid);
709                         IOVEC_SET_STRING(iovec[n++], o_uid);
710                 }
711
712                 r = get_process_gid(object_pid, &object_gid);
713                 if (r >= 0) {
714                         sprintf(o_gid, "OBJECT_GID="GID_FMT, object_gid);
715                         IOVEC_SET_STRING(iovec[n++], o_gid);
716                 }
717
718                 r = get_process_comm(object_pid, &t);
719                 if (r >= 0) {
720                         x = strjoina("OBJECT_COMM=", t);
721                         free(t);
722                         IOVEC_SET_STRING(iovec[n++], x);
723                 }
724
725                 r = get_process_exe(object_pid, &t);
726                 if (r >= 0) {
727                         x = strjoina("OBJECT_EXE=", t);
728                         free(t);
729                         IOVEC_SET_STRING(iovec[n++], x);
730                 }
731
732                 r = get_process_cmdline(object_pid, 0, false, &t);
733                 if (r >= 0) {
734                         x = strjoina("OBJECT_CMDLINE=", t);
735                         free(t);
736                         IOVEC_SET_STRING(iovec[n++], x);
737                 }
738
739 #ifdef HAVE_AUDIT
740                 r = audit_session_from_pid(object_pid, &audit);
741                 if (r >= 0) {
742                         sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%"PRIu32, audit);
743                         IOVEC_SET_STRING(iovec[n++], o_audit_session);
744                 }
745
746                 r = audit_loginuid_from_pid(object_pid, &loginuid);
747                 if (r >= 0) {
748                         sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID="UID_FMT, loginuid);
749                         IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
750                 }
751 #endif
752
753                 r = cg_pid_get_path_shifted(object_pid, s->cgroup_root, &c);
754                 if (r >= 0) {
755                         x = strjoina("OBJECT_SYSTEMD_CGROUP=", c);
756                         IOVEC_SET_STRING(iovec[n++], x);
757
758                         r = cg_path_get_session(c, &t);
759                         if (r >= 0) {
760                                 x = strjoina("OBJECT_SYSTEMD_SESSION=", t);
761                                 free(t);
762                                 IOVEC_SET_STRING(iovec[n++], x);
763                         }
764
765                         if (cg_path_get_owner_uid(c, &owner) >= 0) {
766                                 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT, owner);
767                                 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
768                         }
769
770                         if (cg_path_get_unit(c, &t) >= 0) {
771                                 x = strjoina("OBJECT_SYSTEMD_UNIT=", t);
772                                 free(t);
773                                 IOVEC_SET_STRING(iovec[n++], x);
774                         }
775
776                         if (cg_path_get_user_unit(c, &t) >= 0) {
777                                 x = strjoina("OBJECT_SYSTEMD_USER_UNIT=", t);
778                                 free(t);
779                                 IOVEC_SET_STRING(iovec[n++], x);
780                         }
781
782                         free(c);
783                 }
784         }
785         assert(n <= m);
786
787         if (tv) {
788                 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
789                 IOVEC_SET_STRING(iovec[n++], source_time);
790         }
791
792         /* Note that strictly speaking storing the boot id here is
793          * redundant since the entry includes this in-line
794          * anyway. However, we need this indexed, too. */
795         if (!isempty(s->boot_id_field))
796                 IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
797
798         if (!isempty(s->machine_id_field))
799                 IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
800
801         if (!isempty(s->hostname_field))
802                 IOVEC_SET_STRING(iovec[n++], s->hostname_field);
803
804         assert(n <= m);
805
806         if (s->split_mode == SPLIT_UID && realuid > 0)
807                 /* Split up strictly by any UID */
808                 journal_uid = realuid;
809         else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
810                 /* Split up by login UIDs.  We do this only if the
811                  * realuid is not root, in order not to accidentally
812                  * leak privileged information to the user that is
813                  * logged by a privileged process that is part of an
814                  * unprivileged session. */
815                 journal_uid = owner;
816         else
817                 journal_uid = 0;
818
819         write_to_journal(s, journal_uid, iovec, n, priority);
820 }
821
822 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
823         char mid[11 + 32 + 1];
824         char buffer[16 + LINE_MAX + 1];
825         struct iovec iovec[N_IOVEC_META_FIELDS + 4];
826         int n = 0;
827         va_list ap;
828         struct ucred ucred = {};
829
830         assert(s);
831         assert(format);
832
833         IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
834         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
835
836         memcpy(buffer, "MESSAGE=", 8);
837         va_start(ap, format);
838         vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
839         va_end(ap);
840         IOVEC_SET_STRING(iovec[n++], buffer);
841
842         if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
843                 snprintf(mid, sizeof(mid), LOG_MESSAGE_ID(message_id));
844                 IOVEC_SET_STRING(iovec[n++], mid);
845         }
846
847         ucred.pid = getpid();
848         ucred.uid = getuid();
849         ucred.gid = getgid();
850
851         dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
852 }
853
854 void server_dispatch_message(
855                 Server *s,
856                 struct iovec *iovec, unsigned n, unsigned m,
857                 const struct ucred *ucred,
858                 const struct timeval *tv,
859                 const char *label, size_t label_len,
860                 const char *unit_id,
861                 int priority,
862                 pid_t object_pid) {
863
864         int rl, r;
865         _cleanup_free_ char *path = NULL;
866         char *c;
867
868         assert(s);
869         assert(iovec || n == 0);
870
871         if (n == 0)
872                 return;
873
874         if (LOG_PRI(priority) > s->max_level_store)
875                 return;
876
877         /* Stop early in case the information will not be stored
878          * in a journal. */
879         if (s->storage == STORAGE_NONE)
880                 return;
881
882         if (!ucred)
883                 goto finish;
884
885         r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &path);
886         if (r < 0)
887                 goto finish;
888
889         /* example: /user/lennart/3/foobar
890          *          /system/dbus.service/foobar
891          *
892          * So let's cut of everything past the third /, since that is
893          * where user directories start */
894
895         c = strchr(path, '/');
896         if (c) {
897                 c = strchr(c+1, '/');
898                 if (c) {
899                         c = strchr(c+1, '/');
900                         if (c)
901                                 *c = 0;
902                 }
903         }
904
905         rl = journal_rate_limit_test(s->rate_limit, path,
906                                      priority & LOG_PRIMASK, available_space(s, false));
907
908         if (rl == 0)
909                 return;
910
911         /* Write a suppression message if we suppressed something */
912         if (rl > 1)
913                 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
914                                       "Suppressed %u messages from %s", rl - 1, path);
915
916 finish:
917         dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
918 }
919
920
921 static int system_journal_open(Server *s, bool flush_requested) {
922         int r;
923         char *fn;
924         sd_id128_t machine;
925         char ids[33];
926
927         r = sd_id128_get_machine(&machine);
928         if (r < 0)
929                 return log_error_errno(r, "Failed to get machine id: %m");
930
931         sd_id128_to_string(machine, ids);
932
933         if (!s->system_journal &&
934             (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
935             (flush_requested
936              || access("/run/systemd/journal/flushed", F_OK) >= 0)) {
937
938                 /* If in auto mode: first try to create the machine
939                  * path, but not the prefix.
940                  *
941                  * If in persistent mode: create /var/log/journal and
942                  * the machine path */
943
944                 if (s->storage == STORAGE_PERSISTENT)
945                         (void) mkdir("/var/log/journal/", 0755);
946
947                 fn = strjoina("/var/log/journal/", ids);
948                 (void) mkdir(fn, 0755);
949
950                 fn = strjoina(fn, "/system.journal");
951                 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
952
953                 if (r >= 0)
954                         server_fix_perms(s, s->system_journal, 0);
955                 else if (r < 0) {
956                         if (r != -ENOENT && r != -EROFS)
957                                 log_warning_errno(r, "Failed to open system journal: %m");
958
959                         r = 0;
960                 }
961         }
962
963         if (!s->runtime_journal &&
964             (s->storage != STORAGE_NONE)) {
965
966                 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
967                 if (!fn)
968                         return -ENOMEM;
969
970                 if (s->system_journal) {
971
972                         /* Try to open the runtime journal, but only
973                          * if it already exists, so that we can flush
974                          * it into the system journal */
975
976                         r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
977                         free(fn);
978
979                         if (r < 0) {
980                                 if (r != -ENOENT)
981                                         log_warning_errno(r, "Failed to open runtime journal: %m");
982
983                                 r = 0;
984                         }
985
986                 } else {
987
988                         /* OK, we really need the runtime journal, so create
989                          * it if necessary. */
990
991                         (void) mkdir("/run/log", 0755);
992                         (void) mkdir("/run/log/journal", 0755);
993                         (void) mkdir_parents(fn, 0750);
994
995                         r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
996                         free(fn);
997
998                         if (r < 0)
999                                 return log_error_errno(r, "Failed to open runtime journal: %m");
1000                 }
1001
1002                 if (s->runtime_journal)
1003                         server_fix_perms(s, s->runtime_journal, 0);
1004         }
1005
1006         available_space(s, true);
1007
1008         return r;
1009 }
1010
1011 int server_flush_to_var(Server *s) {
1012         sd_id128_t machine;
1013         sd_journal *j = NULL;
1014         char ts[FORMAT_TIMESPAN_MAX];
1015         usec_t start;
1016         unsigned n = 0;
1017         int r;
1018
1019         assert(s);
1020
1021         if (s->storage != STORAGE_AUTO &&
1022             s->storage != STORAGE_PERSISTENT)
1023                 return 0;
1024
1025         if (!s->runtime_journal)
1026                 return 0;
1027
1028         system_journal_open(s, true);
1029
1030         if (!s->system_journal)
1031                 return 0;
1032
1033         log_debug("Flushing to /var...");
1034
1035         start = now(CLOCK_MONOTONIC);
1036
1037         r = sd_id128_get_machine(&machine);
1038         if (r < 0)
1039                 return r;
1040
1041         r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1042         if (r < 0)
1043                 return log_error_errno(r, "Failed to read runtime journal: %m");
1044
1045         sd_journal_set_data_threshold(j, 0);
1046
1047         SD_JOURNAL_FOREACH(j) {
1048                 Object *o = NULL;
1049                 JournalFile *f;
1050
1051                 f = j->current_file;
1052                 assert(f && f->current_offset > 0);
1053
1054                 n++;
1055
1056                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1057                 if (r < 0) {
1058                         log_error_errno(r, "Can't read entry: %m");
1059                         goto finish;
1060                 }
1061
1062                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1063                 if (r >= 0)
1064                         continue;
1065
1066                 if (!shall_try_append_again(s->system_journal, r)) {
1067                         log_error_errno(r, "Can't write entry: %m");
1068                         goto finish;
1069                 }
1070
1071                 server_rotate(s);
1072                 server_vacuum(s);
1073
1074                 if (!s->system_journal) {
1075                         log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1076                         r = -EIO;
1077                         goto finish;
1078                 }
1079
1080                 log_debug("Retrying write.");
1081                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1082                 if (r < 0) {
1083                         log_error_errno(r, "Can't write entry: %m");
1084                         goto finish;
1085                 }
1086         }
1087
1088 finish:
1089         journal_file_post_change(s->system_journal);
1090
1091         journal_file_close(s->runtime_journal);
1092         s->runtime_journal = NULL;
1093
1094         if (r >= 0)
1095                 rm_rf("/run/log/journal", false, true, false);
1096
1097         sd_journal_close(j);
1098
1099         server_driver_message(s, SD_ID128_NULL, "Time spent on flushing to /var is %s for %u entries.", format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0), n);
1100
1101         return r;
1102 }
1103
1104 int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1105         Server *s = userdata;
1106
1107         assert(s);
1108         assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
1109
1110         if (revents != EPOLLIN) {
1111                 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1112                 return -EIO;
1113         }
1114
1115         for (;;) {
1116                 struct ucred *ucred = NULL;
1117                 struct timeval *tv = NULL;
1118                 struct cmsghdr *cmsg;
1119                 char *label = NULL;
1120                 size_t label_len = 0;
1121                 struct iovec iovec;
1122
1123                 union {
1124                         struct cmsghdr cmsghdr;
1125
1126                         /* We use NAME_MAX space for the SELinux label
1127                          * here. The kernel currently enforces no
1128                          * limit, but according to suggestions from
1129                          * the SELinux people this will change and it
1130                          * will probably be identical to NAME_MAX. For
1131                          * now we use that, but this should be updated
1132                          * one day when the final limit is known. */
1133                         uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1134                                     CMSG_SPACE(sizeof(struct timeval)) +
1135                                     CMSG_SPACE(sizeof(int)) + /* fd */
1136                                     CMSG_SPACE(NAME_MAX)]; /* selinux label */
1137                 } control = {};
1138                 union sockaddr_union sa = {};
1139                 struct msghdr msghdr = {
1140                         .msg_iov = &iovec,
1141                         .msg_iovlen = 1,
1142                         .msg_control = &control,
1143                         .msg_controllen = sizeof(control),
1144                         .msg_name = &sa,
1145                         .msg_namelen = sizeof(sa),
1146                 };
1147
1148                 ssize_t n;
1149                 int *fds = NULL;
1150                 unsigned n_fds = 0;
1151                 int v = 0;
1152                 size_t m;
1153
1154                 /* Try to get the right size, if we can. (Not all
1155                  * sockets support SIOCINQ, hence we just try, but
1156                  * don't rely on it. */
1157                 (void) ioctl(fd, SIOCINQ, &v);
1158
1159                 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1160                 m = PAGE_ALIGN(MAX3((size_t) v + 1,
1161                                     (size_t) LINE_MAX,
1162                                     ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
1163
1164                 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1165                         return log_oom();
1166
1167                 iovec.iov_base = s->buffer;
1168                 iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
1169
1170                 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1171                 if (n < 0) {
1172                         if (errno == EINTR || errno == EAGAIN)
1173                                 return 0;
1174
1175                         log_error_errno(errno, "recvmsg() failed: %m");
1176                         return -errno;
1177                 }
1178
1179                 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1180
1181                         if (cmsg->cmsg_level == SOL_SOCKET &&
1182                             cmsg->cmsg_type == SCM_CREDENTIALS &&
1183                             cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1184                                 ucred = (struct ucred*) CMSG_DATA(cmsg);
1185                         else if (cmsg->cmsg_level == SOL_SOCKET &&
1186                                  cmsg->cmsg_type == SCM_SECURITY) {
1187                                 label = (char*) CMSG_DATA(cmsg);
1188                                 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1189                         } else if (cmsg->cmsg_level == SOL_SOCKET &&
1190                                    cmsg->cmsg_type == SO_TIMESTAMP &&
1191                                    cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1192                                 tv = (struct timeval*) CMSG_DATA(cmsg);
1193                         else if (cmsg->cmsg_level == SOL_SOCKET &&
1194                                  cmsg->cmsg_type == SCM_RIGHTS) {
1195                                 fds = (int*) CMSG_DATA(cmsg);
1196                                 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1197                         }
1198                 }
1199
1200                 /* And a trailing NUL, just in case */
1201                 s->buffer[n] = 0;
1202
1203                 if (fd == s->syslog_fd) {
1204                         if (n > 0 && n_fds == 0)
1205                                 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1206                         else if (n_fds > 0)
1207                                 log_warning("Got file descriptors via syslog socket. Ignoring.");
1208
1209                 } else if (fd == s->native_fd) {
1210                         if (n > 0 && n_fds == 0)
1211                                 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1212                         else if (n == 0 && n_fds == 1)
1213                                 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1214                         else if (n_fds > 0)
1215                                 log_warning("Got too many file descriptors via native socket. Ignoring.");
1216
1217                 } else {
1218                         assert(fd == s->audit_fd);
1219
1220                         if (n > 0 && n_fds == 0)
1221                                 server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1222                         else if (n_fds > 0)
1223                                 log_warning("Got file descriptors via audit socket. Ignoring.");
1224                 }
1225
1226                 close_many(fds, n_fds);
1227         }
1228 }
1229
1230 static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1231         Server *s = userdata;
1232
1233         assert(s);
1234
1235         log_info("Received request to flush runtime journal from PID %"PRIu32, si->ssi_pid);
1236
1237         server_flush_to_var(s);
1238         server_sync(s);
1239         server_vacuum(s);
1240
1241         touch("/run/systemd/journal/flushed");
1242
1243         return 0;
1244 }
1245
1246 static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1247         Server *s = userdata;
1248
1249         assert(s);
1250
1251         log_info("Received request to rotate journal from PID %"PRIu32, si->ssi_pid);
1252         server_rotate(s);
1253         server_vacuum(s);
1254
1255         return 0;
1256 }
1257
1258 static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1259         Server *s = userdata;
1260
1261         assert(s);
1262
1263         log_received_signal(LOG_INFO, si);
1264
1265         sd_event_exit(s->event, 0);
1266         return 0;
1267 }
1268
1269 static int setup_signals(Server *s) {
1270         sigset_t mask;
1271         int r;
1272
1273         assert(s);
1274
1275         assert_se(sigemptyset(&mask) == 0);
1276         sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1277         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1278
1279         r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
1280         if (r < 0)
1281                 return r;
1282
1283         r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
1284         if (r < 0)
1285                 return r;
1286
1287         r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
1288         if (r < 0)
1289                 return r;
1290
1291         r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
1292         if (r < 0)
1293                 return r;
1294
1295         return 0;
1296 }
1297
1298 static int server_parse_proc_cmdline(Server *s) {
1299         _cleanup_free_ char *line = NULL;
1300         const char *w, *state;
1301         size_t l;
1302         int r;
1303
1304         r = proc_cmdline(&line);
1305         if (r < 0) {
1306                 log_warning_errno(r, "Failed to read /proc/cmdline, ignoring: %m");
1307                 return 0;
1308         }
1309
1310         FOREACH_WORD_QUOTED(w, l, line, state) {
1311                 _cleanup_free_ char *word;
1312
1313                 word = strndup(w, l);
1314                 if (!word)
1315                         return -ENOMEM;
1316
1317                 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1318                         r = parse_boolean(word + 35);
1319                         if (r < 0)
1320                                 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1321                         else
1322                                 s->forward_to_syslog = r;
1323                 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1324                         r = parse_boolean(word + 33);
1325                         if (r < 0)
1326                                 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1327                         else
1328                                 s->forward_to_kmsg = r;
1329                 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1330                         r = parse_boolean(word + 36);
1331                         if (r < 0)
1332                                 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1333                         else
1334                                 s->forward_to_console = r;
1335                 } else if (startswith(word, "systemd.journald.forward_to_wall=")) {
1336                         r = parse_boolean(word + 33);
1337                         if (r < 0)
1338                                 log_warning("Failed to parse forward to wall switch %s. Ignoring.", word + 33);
1339                         else
1340                                 s->forward_to_wall = r;
1341                 } else if (startswith(word, "systemd.journald"))
1342                         log_warning("Invalid systemd.journald parameter. Ignoring.");
1343         }
1344         /* do not warn about state here, since probably systemd already did */
1345
1346         return 0;
1347 }
1348
1349 static int server_parse_config_file(Server *s) {
1350         assert(s);
1351
1352         return config_parse_many("/etc/systemd/journald.conf",
1353                                  CONF_DIRS_NULSTR("systemd/journald.conf"),
1354                                  "Journal\0",
1355                                  config_item_perf_lookup, journald_gperf_lookup,
1356                                  false, s);
1357 }
1358
1359 static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1360         Server *s = userdata;
1361
1362         assert(s);
1363
1364         server_sync(s);
1365         return 0;
1366 }
1367
1368 int server_schedule_sync(Server *s, int priority) {
1369         int r;
1370
1371         assert(s);
1372
1373         if (priority <= LOG_CRIT) {
1374                 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1375                 server_sync(s);
1376                 return 0;
1377         }
1378
1379         if (s->sync_scheduled)
1380                 return 0;
1381
1382         if (s->sync_interval_usec > 0) {
1383                 usec_t when;
1384
1385                 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
1386                 if (r < 0)
1387                         return r;
1388
1389                 when += s->sync_interval_usec;
1390
1391                 if (!s->sync_event_source) {
1392                         r = sd_event_add_time(
1393                                         s->event,
1394                                         &s->sync_event_source,
1395                                         CLOCK_MONOTONIC,
1396                                         when, 0,
1397                                         server_dispatch_sync, s);
1398                         if (r < 0)
1399                                 return r;
1400
1401                         r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1402                 } else {
1403                         r = sd_event_source_set_time(s->sync_event_source, when);
1404                         if (r < 0)
1405                                 return r;
1406
1407                         r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1408                 }
1409                 if (r < 0)
1410                         return r;
1411
1412                 s->sync_scheduled = true;
1413         }
1414
1415         return 0;
1416 }
1417
1418 static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1419         Server *s = userdata;
1420
1421         assert(s);
1422
1423         server_cache_hostname(s);
1424         return 0;
1425 }
1426
1427 static int server_open_hostname(Server *s) {
1428         int r;
1429
1430         assert(s);
1431
1432         s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
1433         if (s->hostname_fd < 0)
1434                 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
1435
1436         r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
1437         if (r < 0) {
1438                 /* kernels prior to 3.2 don't support polling this file. Ignore
1439                  * the failure. */
1440                 if (r == -EPERM) {
1441                         log_warning("Failed to register hostname fd in event loop: %s. Ignoring.",
1442                                         strerror(-r));
1443                         s->hostname_fd = safe_close(s->hostname_fd);
1444                         return 0;
1445                 }
1446
1447                 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
1448         }
1449
1450         r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
1451         if (r < 0)
1452                 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
1453
1454         return 0;
1455 }
1456
1457 int server_init(Server *s) {
1458         _cleanup_fdset_free_ FDSet *fds = NULL;
1459         int n, r, fd;
1460
1461         assert(s);
1462
1463         zero(*s);
1464         s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = -1;
1465         s->compress = true;
1466         s->seal = true;
1467
1468         s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1469         s->sync_scheduled = false;
1470
1471         s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1472         s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1473
1474         s->forward_to_wall = true;
1475
1476         s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1477
1478         s->max_level_store = LOG_DEBUG;
1479         s->max_level_syslog = LOG_DEBUG;
1480         s->max_level_kmsg = LOG_NOTICE;
1481         s->max_level_console = LOG_INFO;
1482         s->max_level_wall = LOG_EMERG;
1483
1484         memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1485         memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1486
1487         server_parse_config_file(s);
1488         server_parse_proc_cmdline(s);
1489         if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1490                 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1491                           s->rate_limit_interval, s->rate_limit_burst);
1492                 s->rate_limit_interval = s->rate_limit_burst = 0;
1493         }
1494
1495         mkdir_p("/run/systemd/journal", 0755);
1496
1497         s->user_journals = ordered_hashmap_new(NULL);
1498         if (!s->user_journals)
1499                 return log_oom();
1500
1501         s->mmap = mmap_cache_new();
1502         if (!s->mmap)
1503                 return log_oom();
1504
1505         r = sd_event_default(&s->event);
1506         if (r < 0)
1507                 return log_error_errno(r, "Failed to create event loop: %m");
1508
1509         sd_event_set_watchdog(s->event, true);
1510
1511         n = sd_listen_fds(true);
1512         if (n < 0)
1513                 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
1514
1515         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1516
1517                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1518
1519                         if (s->native_fd >= 0) {
1520                                 log_error("Too many native sockets passed.");
1521                                 return -EINVAL;
1522                         }
1523
1524                         s->native_fd = fd;
1525
1526                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1527
1528                         if (s->stdout_fd >= 0) {
1529                                 log_error("Too many stdout sockets passed.");
1530                                 return -EINVAL;
1531                         }
1532
1533                         s->stdout_fd = fd;
1534
1535                 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1536                            sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1537
1538                         if (s->syslog_fd >= 0) {
1539                                 log_error("Too many /dev/log sockets passed.");
1540                                 return -EINVAL;
1541                         }
1542
1543                         s->syslog_fd = fd;
1544
1545                 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
1546
1547                         if (s->audit_fd >= 0) {
1548                                 log_error("Too many audit sockets passed.");
1549                                 return -EINVAL;
1550                         }
1551
1552                         s->audit_fd = fd;
1553
1554                 } else {
1555
1556                         if (!fds) {
1557                                 fds = fdset_new();
1558                                 if (!fds)
1559                                         return log_oom();
1560                         }
1561
1562                         r = fdset_put(fds, fd);
1563                         if (r < 0)
1564                                 return log_oom();
1565                 }
1566         }
1567
1568         r = server_open_stdout_socket(s, fds);
1569         if (r < 0)
1570                 return r;
1571
1572         if (fdset_size(fds) > 0) {
1573                 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds));
1574                 fds = fdset_free(fds);
1575         }
1576
1577         r = server_open_syslog_socket(s);
1578         if (r < 0)
1579                 return r;
1580
1581         r = server_open_native_socket(s);
1582         if (r < 0)
1583                 return r;
1584
1585         r = server_open_dev_kmsg(s);
1586         if (r < 0)
1587                 return r;
1588
1589         r = server_open_audit(s);
1590         if (r < 0)
1591                 return r;
1592
1593         r = server_open_kernel_seqnum(s);
1594         if (r < 0)
1595                 return r;
1596
1597         r = server_open_hostname(s);
1598         if (r < 0)
1599                 return r;
1600
1601         r = setup_signals(s);
1602         if (r < 0)
1603                 return r;
1604
1605         s->udev = udev_new();
1606         if (!s->udev)
1607                 return -ENOMEM;
1608
1609         s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
1610         if (!s->rate_limit)
1611                 return -ENOMEM;
1612
1613         r = cg_get_root_path(&s->cgroup_root);
1614         if (r < 0)
1615                 return r;
1616
1617         server_cache_hostname(s);
1618         server_cache_boot_id(s);
1619         server_cache_machine_id(s);
1620
1621         r = system_journal_open(s, false);
1622         if (r < 0)
1623                 return r;
1624
1625         return 0;
1626 }
1627
1628 void server_maybe_append_tags(Server *s) {
1629 #ifdef HAVE_GCRYPT
1630         JournalFile *f;
1631         Iterator i;
1632         usec_t n;
1633
1634         n = now(CLOCK_REALTIME);
1635
1636         if (s->system_journal)
1637                 journal_file_maybe_append_tag(s->system_journal, n);
1638
1639         ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
1640                 journal_file_maybe_append_tag(f, n);
1641 #endif
1642 }
1643
1644 void server_done(Server *s) {
1645         JournalFile *f;
1646         assert(s);
1647
1648         while (s->stdout_streams)
1649                 stdout_stream_free(s->stdout_streams);
1650
1651         if (s->system_journal)
1652                 journal_file_close(s->system_journal);
1653
1654         if (s->runtime_journal)
1655                 journal_file_close(s->runtime_journal);
1656
1657         while ((f = ordered_hashmap_steal_first(s->user_journals)))
1658                 journal_file_close(f);
1659
1660         ordered_hashmap_free(s->user_journals);
1661
1662         sd_event_source_unref(s->syslog_event_source);
1663         sd_event_source_unref(s->native_event_source);
1664         sd_event_source_unref(s->stdout_event_source);
1665         sd_event_source_unref(s->dev_kmsg_event_source);
1666         sd_event_source_unref(s->audit_event_source);
1667         sd_event_source_unref(s->sync_event_source);
1668         sd_event_source_unref(s->sigusr1_event_source);
1669         sd_event_source_unref(s->sigusr2_event_source);
1670         sd_event_source_unref(s->sigterm_event_source);
1671         sd_event_source_unref(s->sigint_event_source);
1672         sd_event_source_unref(s->hostname_event_source);
1673         sd_event_unref(s->event);
1674
1675         safe_close(s->syslog_fd);
1676         safe_close(s->native_fd);
1677         safe_close(s->stdout_fd);
1678         safe_close(s->dev_kmsg_fd);
1679         safe_close(s->audit_fd);
1680         safe_close(s->hostname_fd);
1681
1682         if (s->rate_limit)
1683                 journal_rate_limit_free(s->rate_limit);
1684
1685         if (s->kernel_seqnum)
1686                 munmap(s->kernel_seqnum, sizeof(uint64_t));
1687
1688         free(s->buffer);
1689         free(s->tty_path);
1690         free(s->cgroup_root);
1691         free(s->hostname_field);
1692
1693         if (s->mmap)
1694                 mmap_cache_unref(s->mmap);
1695
1696         if (s->udev)
1697                 udev_unref(s->udev);
1698 }