chiark / gitweb /
journald: allow restarting journald without losing stream connections
[elogind.git] / src / journal / journald-server.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
26 #include <sys/mman.h>
27 #include <sys/timerfd.h>
28
29 #include <libudev.h>
30
31 #include "sd-journal.h"
32 #include "sd-messages.h"
33 #include "sd-daemon.h"
34 #include "fileio.h"
35 #include "mkdir.h"
36 #include "hashmap.h"
37 #include "journal-file.h"
38 #include "socket-util.h"
39 #include "cgroup-util.h"
40 #include "list.h"
41 #include "missing.h"
42 #include "conf-parser.h"
43 #include "selinux-util.h"
44 #include "journal-internal.h"
45 #include "journal-vacuum.h"
46 #include "journal-authenticate.h"
47 #include "journald-rate-limit.h"
48 #include "journald-kmsg.h"
49 #include "journald-syslog.h"
50 #include "journald-stream.h"
51 #include "journald-console.h"
52 #include "journald-native.h"
53 #include "journald-audit.h"
54 #include "journald-server.h"
55
56 #ifdef HAVE_ACL
57 #include <sys/acl.h>
58 #include <acl/libacl.h>
59 #include "acl-util.h"
60 #endif
61
62 #ifdef HAVE_SELINUX
63 #include <selinux/selinux.h>
64 #endif
65
66 #define USER_JOURNALS_MAX 1024
67
68 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
69 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
70 #define DEFAULT_RATE_LIMIT_BURST 1000
71 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
72
73 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
74
75 static const char* const storage_table[_STORAGE_MAX] = {
76         [STORAGE_AUTO] = "auto",
77         [STORAGE_VOLATILE] = "volatile",
78         [STORAGE_PERSISTENT] = "persistent",
79         [STORAGE_NONE] = "none"
80 };
81
82 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
83 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
84
85 static const char* const split_mode_table[_SPLIT_MAX] = {
86         [SPLIT_LOGIN] = "login",
87         [SPLIT_UID] = "uid",
88         [SPLIT_NONE] = "none",
89 };
90
91 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
92 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
93
94 static uint64_t available_space(Server *s, bool verbose) {
95         char ids[33];
96         _cleanup_free_ char *p = NULL;
97         sd_id128_t machine;
98         struct statvfs ss;
99         uint64_t sum = 0, ss_avail = 0, avail = 0;
100         int r;
101         _cleanup_closedir_ DIR *d = NULL;
102         usec_t ts;
103         const char *f;
104         JournalMetrics *m;
105
106         ts = now(CLOCK_MONOTONIC);
107
108         if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts
109             && !verbose)
110                 return s->cached_available_space;
111
112         r = sd_id128_get_machine(&machine);
113         if (r < 0)
114                 return 0;
115
116         if (s->system_journal) {
117                 f = "/var/log/journal/";
118                 m = &s->system_metrics;
119         } else {
120                 f = "/run/log/journal/";
121                 m = &s->runtime_metrics;
122         }
123
124         assert(m);
125
126         p = strappend(f, sd_id128_to_string(machine, ids));
127         if (!p)
128                 return 0;
129
130         d = opendir(p);
131         if (!d)
132                 return 0;
133
134         if (fstatvfs(dirfd(d), &ss) < 0)
135                 return 0;
136
137         for (;;) {
138                 struct stat st;
139                 struct dirent *de;
140
141                 errno = 0;
142                 de = readdir(d);
143                 if (!de && errno != 0)
144                         return 0;
145
146                 if (!de)
147                         break;
148
149                 if (!endswith(de->d_name, ".journal") &&
150                     !endswith(de->d_name, ".journal~"))
151                         continue;
152
153                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
154                         continue;
155
156                 if (!S_ISREG(st.st_mode))
157                         continue;
158
159                 sum += (uint64_t) st.st_blocks * 512UL;
160         }
161
162         ss_avail = ss.f_bsize * ss.f_bavail;
163
164         /* If we reached a high mark, we will always allow this much
165          * again, unless usage goes above max_use. This watermark
166          * value is cached so that we don't give up space on pressure,
167          * but hover below the maximum usage. */
168
169         if (m->use < sum)
170                 m->use = sum;
171
172         avail = LESS_BY(ss_avail, m->keep_free);
173
174         s->cached_available_space = LESS_BY(MIN(m->max_use, avail), sum);
175         s->cached_available_space_timestamp = ts;
176
177         if (verbose) {
178                 char    fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
179                         fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX];
180
181                 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
182                                       "%s journal is using %s (max allowed %s, "
183                                       "trying to leave %s free of %s available → current limit %s).",
184                                       s->system_journal ? "Permanent" : "Runtime",
185                                       format_bytes(fb1, sizeof(fb1), sum),
186                                       format_bytes(fb2, sizeof(fb2), m->max_use),
187                                       format_bytes(fb3, sizeof(fb3), m->keep_free),
188                                       format_bytes(fb4, sizeof(fb4), ss_avail),
189                                       format_bytes(fb5, sizeof(fb5), s->cached_available_space + sum));
190         }
191
192         return s->cached_available_space;
193 }
194
195 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
196         int r;
197 #ifdef HAVE_ACL
198         acl_t acl;
199         acl_entry_t entry;
200         acl_permset_t permset;
201 #endif
202
203         assert(f);
204
205         r = fchmod(f->fd, 0640);
206         if (r < 0)
207                 log_warning_errno(r, "Failed to fix access mode on %s, ignoring: %m", f->path);
208
209 #ifdef HAVE_ACL
210         if (uid <= SYSTEM_UID_MAX)
211                 return;
212
213         acl = acl_get_fd(f->fd);
214         if (!acl) {
215                 log_warning_errno(errno, "Failed to read ACL on %s, ignoring: %m", f->path);
216                 return;
217         }
218
219         r = acl_find_uid(acl, uid, &entry);
220         if (r <= 0) {
221
222                 if (acl_create_entry(&acl, &entry) < 0 ||
223                     acl_set_tag_type(entry, ACL_USER) < 0 ||
224                     acl_set_qualifier(entry, &uid) < 0) {
225                         log_warning_errno(errno, "Failed to patch ACL on %s, ignoring: %m", f->path);
226                         goto finish;
227                 }
228         }
229
230         /* We do not recalculate the mask unconditionally here,
231          * so that the fchmod() mask above stays intact. */
232         if (acl_get_permset(entry, &permset) < 0 ||
233             acl_add_perm(permset, ACL_READ) < 0 ||
234             calc_acl_mask_if_needed(&acl) < 0) {
235                 log_warning_errno(errno, "Failed to patch ACL on %s, ignoring: %m", f->path);
236                 goto finish;
237         }
238
239         if (acl_set_fd(f->fd, acl) < 0)
240                 log_warning_errno(errno, "Failed to set ACL on %s, ignoring: %m", f->path);
241
242 finish:
243         acl_free(acl);
244 #endif
245 }
246
247 static JournalFile* find_journal(Server *s, uid_t uid) {
248         _cleanup_free_ char *p = NULL;
249         int r;
250         JournalFile *f;
251         sd_id128_t machine;
252
253         assert(s);
254
255         /* We split up user logs only on /var, not on /run. If the
256          * runtime file is open, we write to it exclusively, in order
257          * to guarantee proper order as soon as we flush /run to
258          * /var and close the runtime file. */
259
260         if (s->runtime_journal)
261                 return s->runtime_journal;
262
263         if (uid <= SYSTEM_UID_MAX)
264                 return s->system_journal;
265
266         r = sd_id128_get_machine(&machine);
267         if (r < 0)
268                 return s->system_journal;
269
270         f = ordered_hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
271         if (f)
272                 return f;
273
274         if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
275                      SD_ID128_FORMAT_VAL(machine), uid) < 0)
276                 return s->system_journal;
277
278         while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
279                 /* Too many open? Then let's close one */
280                 f = ordered_hashmap_steal_first(s->user_journals);
281                 assert(f);
282                 journal_file_close(f);
283         }
284
285         r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
286         if (r < 0)
287                 return s->system_journal;
288
289         server_fix_perms(s, f, uid);
290
291         r = ordered_hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
292         if (r < 0) {
293                 journal_file_close(f);
294                 return s->system_journal;
295         }
296
297         return f;
298 }
299
300 static int do_rotate(
301                 Server *s,
302                 JournalFile **f,
303                 const char* name,
304                 bool seal,
305                 uint32_t uid) {
306
307         int r;
308         assert(s);
309
310         if (!*f)
311                 return -EINVAL;
312
313         r = journal_file_rotate(f, s->compress, seal);
314         if (r < 0)
315                 if (*f)
316                         log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
317                 else
318                         log_error_errno(r, "Failed to create new %s journal: %m", name);
319         else
320                 server_fix_perms(s, *f, uid);
321
322         return r;
323 }
324
325 void server_rotate(Server *s) {
326         JournalFile *f;
327         void *k;
328         Iterator i;
329         int r;
330
331         log_debug("Rotating...");
332
333         do_rotate(s, &s->runtime_journal, "runtime", false, 0);
334         do_rotate(s, &s->system_journal, "system", s->seal, 0);
335
336         ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
337                 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UINT32(k));
338                 if (r >= 0)
339                         ordered_hashmap_replace(s->user_journals, k, f);
340                 else if (!f)
341                         /* Old file has been closed and deallocated */
342                         ordered_hashmap_remove(s->user_journals, k);
343         }
344 }
345
346 void server_sync(Server *s) {
347         JournalFile *f;
348         void *k;
349         Iterator i;
350         int r;
351
352         if (s->system_journal) {
353                 r = journal_file_set_offline(s->system_journal);
354                 if (r < 0)
355                         log_error_errno(r, "Failed to sync system journal: %m");
356         }
357
358         ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
359                 r = journal_file_set_offline(f);
360                 if (r < 0)
361                         log_error_errno(r, "Failed to sync user journal: %m");
362         }
363
364         if (s->sync_event_source) {
365                 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
366                 if (r < 0)
367                         log_error_errno(r, "Failed to disable sync timer source: %m");
368         }
369
370         s->sync_scheduled = false;
371 }
372
373 static void do_vacuum(
374                 Server *s,
375                 const char *id,
376                 JournalFile *f,
377                 const char* path,
378                 JournalMetrics *metrics) {
379
380         const char *p;
381         int r;
382
383         if (!f)
384                 return;
385
386         p = strappenda(path, id);
387         r = journal_directory_vacuum(p, metrics->max_use, s->max_retention_usec, &s->oldest_file_usec, false);
388         if (r < 0 && r != -ENOENT)
389                 log_error_errno(r, "Failed to vacuum %s: %m", p);
390 }
391
392 void server_vacuum(Server *s) {
393         char ids[33];
394         sd_id128_t machine;
395         int r;
396
397         log_debug("Vacuuming...");
398
399         s->oldest_file_usec = 0;
400
401         r = sd_id128_get_machine(&machine);
402         if (r < 0) {
403                 log_error_errno(r, "Failed to get machine ID: %m");
404                 return;
405         }
406         sd_id128_to_string(machine, ids);
407
408         do_vacuum(s, ids, s->system_journal, "/var/log/journal/", &s->system_metrics);
409         do_vacuum(s, ids, s->runtime_journal, "/run/log/journal/", &s->runtime_metrics);
410
411         s->cached_available_space_timestamp = 0;
412 }
413
414 static void server_cache_machine_id(Server *s) {
415         sd_id128_t id;
416         int r;
417
418         assert(s);
419
420         r = sd_id128_get_machine(&id);
421         if (r < 0)
422                 return;
423
424         sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
425 }
426
427 static void server_cache_boot_id(Server *s) {
428         sd_id128_t id;
429         int r;
430
431         assert(s);
432
433         r = sd_id128_get_boot(&id);
434         if (r < 0)
435                 return;
436
437         sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
438 }
439
440 static void server_cache_hostname(Server *s) {
441         _cleanup_free_ char *t = NULL;
442         char *x;
443
444         assert(s);
445
446         t = gethostname_malloc();
447         if (!t)
448                 return;
449
450         x = strappend("_HOSTNAME=", t);
451         if (!x)
452                 return;
453
454         free(s->hostname_field);
455         s->hostname_field = x;
456 }
457
458 static bool shall_try_append_again(JournalFile *f, int r) {
459
460         /* -E2BIG            Hit configured limit
461            -EFBIG            Hit fs limit
462            -EDQUOT           Quota limit hit
463            -ENOSPC           Disk full
464            -EIO              I/O error of some kind (mmap)
465            -EHOSTDOWN        Other machine
466            -EBUSY            Unclean shutdown
467            -EPROTONOSUPPORT  Unsupported feature
468            -EBADMSG          Corrupted
469            -ENODATA          Truncated
470            -ESHUTDOWN        Already archived
471            -EIDRM            Journal file has been deleted */
472
473         if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
474                 log_debug("%s: Allocation limit reached, rotating.", f->path);
475         else if (r == -EHOSTDOWN)
476                 log_info("%s: Journal file from other machine, rotating.", f->path);
477         else if (r == -EBUSY)
478                 log_info("%s: Unclean shutdown, rotating.", f->path);
479         else if (r == -EPROTONOSUPPORT)
480                 log_info("%s: Unsupported feature, rotating.", f->path);
481         else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
482                 log_warning("%s: Journal file corrupted, rotating.", f->path);
483         else if (r == -EIO)
484                 log_warning("%s: IO error, rotating.", f->path);
485         else if (r == -EIDRM)
486                 log_warning("%s: Journal file has been deleted, rotating.", f->path);
487         else
488                 return false;
489
490         return true;
491 }
492
493 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
494         JournalFile *f;
495         bool vacuumed = false;
496         int r;
497
498         assert(s);
499         assert(iovec);
500         assert(n > 0);
501
502         f = find_journal(s, uid);
503         if (!f)
504                 return;
505
506         if (journal_file_rotate_suggested(f, s->max_file_usec)) {
507                 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
508                 server_rotate(s);
509                 server_vacuum(s);
510                 vacuumed = true;
511
512                 f = find_journal(s, uid);
513                 if (!f)
514                         return;
515         }
516
517         r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
518         if (r >= 0) {
519                 server_schedule_sync(s, priority);
520                 return;
521         }
522
523         if (vacuumed || !shall_try_append_again(f, r)) {
524                 log_error_errno(r, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
525                 return;
526         }
527
528         server_rotate(s);
529         server_vacuum(s);
530
531         f = find_journal(s, uid);
532         if (!f)
533                 return;
534
535         log_debug("Retrying write.");
536         r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
537         if (r < 0)
538                 log_error_errno(r, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
539         else
540                 server_schedule_sync(s, priority);
541 }
542
543 static void dispatch_message_real(
544                 Server *s,
545                 struct iovec *iovec, unsigned n, unsigned m,
546                 const struct ucred *ucred,
547                 const struct timeval *tv,
548                 const char *label, size_t label_len,
549                 const char *unit_id,
550                 int priority,
551                 pid_t object_pid) {
552
553         char    pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
554                 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
555                 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
556                 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
557                 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
558                 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
559                 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
560                 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
561         uid_t object_uid;
562         gid_t object_gid;
563         char *x;
564         int r;
565         char *t, *c;
566         uid_t realuid = 0, owner = 0, journal_uid;
567         bool owner_valid = false;
568 #ifdef HAVE_AUDIT
569         char    audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
570                 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
571                 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
572                 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
573
574         uint32_t audit;
575         uid_t loginuid;
576 #endif
577
578         assert(s);
579         assert(iovec);
580         assert(n > 0);
581         assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
582
583         if (ucred) {
584                 realuid = ucred->uid;
585
586                 sprintf(pid, "_PID="PID_FMT, ucred->pid);
587                 IOVEC_SET_STRING(iovec[n++], pid);
588
589                 sprintf(uid, "_UID="UID_FMT, ucred->uid);
590                 IOVEC_SET_STRING(iovec[n++], uid);
591
592                 sprintf(gid, "_GID="GID_FMT, ucred->gid);
593                 IOVEC_SET_STRING(iovec[n++], gid);
594
595                 r = get_process_comm(ucred->pid, &t);
596                 if (r >= 0) {
597                         x = strappenda("_COMM=", t);
598                         free(t);
599                         IOVEC_SET_STRING(iovec[n++], x);
600                 }
601
602                 r = get_process_exe(ucred->pid, &t);
603                 if (r >= 0) {
604                         x = strappenda("_EXE=", t);
605                         free(t);
606                         IOVEC_SET_STRING(iovec[n++], x);
607                 }
608
609                 r = get_process_cmdline(ucred->pid, 0, false, &t);
610                 if (r >= 0) {
611                         x = strappenda("_CMDLINE=", t);
612                         free(t);
613                         IOVEC_SET_STRING(iovec[n++], x);
614                 }
615
616                 r = get_process_capeff(ucred->pid, &t);
617                 if (r >= 0) {
618                         x = strappenda("_CAP_EFFECTIVE=", t);
619                         free(t);
620                         IOVEC_SET_STRING(iovec[n++], x);
621                 }
622
623 #ifdef HAVE_AUDIT
624                 r = audit_session_from_pid(ucred->pid, &audit);
625                 if (r >= 0) {
626                         sprintf(audit_session, "_AUDIT_SESSION=%"PRIu32, audit);
627                         IOVEC_SET_STRING(iovec[n++], audit_session);
628                 }
629
630                 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
631                 if (r >= 0) {
632                         sprintf(audit_loginuid, "_AUDIT_LOGINUID="UID_FMT, loginuid);
633                         IOVEC_SET_STRING(iovec[n++], audit_loginuid);
634                 }
635 #endif
636
637                 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
638                 if (r >= 0) {
639                         char *session = NULL;
640
641                         x = strappenda("_SYSTEMD_CGROUP=", c);
642                         IOVEC_SET_STRING(iovec[n++], x);
643
644                         r = cg_path_get_session(c, &t);
645                         if (r >= 0) {
646                                 session = strappenda("_SYSTEMD_SESSION=", t);
647                                 free(t);
648                                 IOVEC_SET_STRING(iovec[n++], session);
649                         }
650
651                         if (cg_path_get_owner_uid(c, &owner) >= 0) {
652                                 owner_valid = true;
653
654                                 sprintf(owner_uid, "_SYSTEMD_OWNER_UID="UID_FMT, owner);
655                                 IOVEC_SET_STRING(iovec[n++], owner_uid);
656                         }
657
658                         if (cg_path_get_unit(c, &t) >= 0) {
659                                 x = strappenda("_SYSTEMD_UNIT=", t);
660                                 free(t);
661                                 IOVEC_SET_STRING(iovec[n++], x);
662                         } else if (unit_id && !session) {
663                                 x = strappenda("_SYSTEMD_UNIT=", unit_id);
664                                 IOVEC_SET_STRING(iovec[n++], x);
665                         }
666
667                         if (cg_path_get_user_unit(c, &t) >= 0) {
668                                 x = strappenda("_SYSTEMD_USER_UNIT=", t);
669                                 free(t);
670                                 IOVEC_SET_STRING(iovec[n++], x);
671                         } else if (unit_id && session) {
672                                 x = strappenda("_SYSTEMD_USER_UNIT=", unit_id);
673                                 IOVEC_SET_STRING(iovec[n++], x);
674                         }
675
676                         if (cg_path_get_slice(c, &t) >= 0) {
677                                 x = strappenda("_SYSTEMD_SLICE=", t);
678                                 free(t);
679                                 IOVEC_SET_STRING(iovec[n++], x);
680                         }
681
682                         free(c);
683                 } else if (unit_id) {
684                         x = strappenda("_SYSTEMD_UNIT=", unit_id);
685                         IOVEC_SET_STRING(iovec[n++], x);
686                 }
687
688 #ifdef HAVE_SELINUX
689                 if (mac_selinux_use()) {
690                         if (label) {
691                                 x = alloca(strlen("_SELINUX_CONTEXT=") + label_len + 1);
692
693                                 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
694                                 IOVEC_SET_STRING(iovec[n++], x);
695                         } else {
696                                 security_context_t con;
697
698                                 if (getpidcon(ucred->pid, &con) >= 0) {
699                                         x = strappenda("_SELINUX_CONTEXT=", con);
700
701                                         freecon(con);
702                                         IOVEC_SET_STRING(iovec[n++], x);
703                                 }
704                         }
705                 }
706 #endif
707         }
708         assert(n <= m);
709
710         if (object_pid) {
711                 r = get_process_uid(object_pid, &object_uid);
712                 if (r >= 0) {
713                         sprintf(o_uid, "OBJECT_UID="UID_FMT, object_uid);
714                         IOVEC_SET_STRING(iovec[n++], o_uid);
715                 }
716
717                 r = get_process_gid(object_pid, &object_gid);
718                 if (r >= 0) {
719                         sprintf(o_gid, "OBJECT_GID="GID_FMT, object_gid);
720                         IOVEC_SET_STRING(iovec[n++], o_gid);
721                 }
722
723                 r = get_process_comm(object_pid, &t);
724                 if (r >= 0) {
725                         x = strappenda("OBJECT_COMM=", t);
726                         free(t);
727                         IOVEC_SET_STRING(iovec[n++], x);
728                 }
729
730                 r = get_process_exe(object_pid, &t);
731                 if (r >= 0) {
732                         x = strappenda("OBJECT_EXE=", t);
733                         free(t);
734                         IOVEC_SET_STRING(iovec[n++], x);
735                 }
736
737                 r = get_process_cmdline(object_pid, 0, false, &t);
738                 if (r >= 0) {
739                         x = strappenda("OBJECT_CMDLINE=", t);
740                         free(t);
741                         IOVEC_SET_STRING(iovec[n++], x);
742                 }
743
744 #ifdef HAVE_AUDIT
745                 r = audit_session_from_pid(object_pid, &audit);
746                 if (r >= 0) {
747                         sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%"PRIu32, audit);
748                         IOVEC_SET_STRING(iovec[n++], o_audit_session);
749                 }
750
751                 r = audit_loginuid_from_pid(object_pid, &loginuid);
752                 if (r >= 0) {
753                         sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID="UID_FMT, loginuid);
754                         IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
755                 }
756 #endif
757
758                 r = cg_pid_get_path_shifted(object_pid, s->cgroup_root, &c);
759                 if (r >= 0) {
760                         x = strappenda("OBJECT_SYSTEMD_CGROUP=", c);
761                         IOVEC_SET_STRING(iovec[n++], x);
762
763                         r = cg_path_get_session(c, &t);
764                         if (r >= 0) {
765                                 x = strappenda("OBJECT_SYSTEMD_SESSION=", t);
766                                 free(t);
767                                 IOVEC_SET_STRING(iovec[n++], x);
768                         }
769
770                         if (cg_path_get_owner_uid(c, &owner) >= 0) {
771                                 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT, owner);
772                                 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
773                         }
774
775                         if (cg_path_get_unit(c, &t) >= 0) {
776                                 x = strappenda("OBJECT_SYSTEMD_UNIT=", t);
777                                 free(t);
778                                 IOVEC_SET_STRING(iovec[n++], x);
779                         }
780
781                         if (cg_path_get_user_unit(c, &t) >= 0) {
782                                 x = strappenda("OBJECT_SYSTEMD_USER_UNIT=", t);
783                                 free(t);
784                                 IOVEC_SET_STRING(iovec[n++], x);
785                         }
786
787                         free(c);
788                 }
789         }
790         assert(n <= m);
791
792         if (tv) {
793                 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
794                 IOVEC_SET_STRING(iovec[n++], source_time);
795         }
796
797         /* Note that strictly speaking storing the boot id here is
798          * redundant since the entry includes this in-line
799          * anyway. However, we need this indexed, too. */
800         if (!isempty(s->boot_id_field))
801                 IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
802
803         if (!isempty(s->machine_id_field))
804                 IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
805
806         if (!isempty(s->hostname_field))
807                 IOVEC_SET_STRING(iovec[n++], s->hostname_field);
808
809         assert(n <= m);
810
811         if (s->split_mode == SPLIT_UID && realuid > 0)
812                 /* Split up strictly by any UID */
813                 journal_uid = realuid;
814         else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
815                 /* Split up by login UIDs.  We do this only if the
816                  * realuid is not root, in order not to accidentally
817                  * leak privileged information to the user that is
818                  * logged by a privileged process that is part of an
819                  * unprivileged session. */
820                 journal_uid = owner;
821         else
822                 journal_uid = 0;
823
824         write_to_journal(s, journal_uid, iovec, n, priority);
825 }
826
827 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
828         char mid[11 + 32 + 1];
829         char buffer[16 + LINE_MAX + 1];
830         struct iovec iovec[N_IOVEC_META_FIELDS + 4];
831         int n = 0;
832         va_list ap;
833         struct ucred ucred = {};
834
835         assert(s);
836         assert(format);
837
838         IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
839         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
840
841         memcpy(buffer, "MESSAGE=", 8);
842         va_start(ap, format);
843         vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
844         va_end(ap);
845         char_array_0(buffer);
846         IOVEC_SET_STRING(iovec[n++], buffer);
847
848         if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
849                 snprintf(mid, sizeof(mid), LOG_MESSAGE_ID(message_id));
850                 char_array_0(mid);
851                 IOVEC_SET_STRING(iovec[n++], mid);
852         }
853
854         ucred.pid = getpid();
855         ucred.uid = getuid();
856         ucred.gid = getgid();
857
858         dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
859 }
860
861 void server_dispatch_message(
862                 Server *s,
863                 struct iovec *iovec, unsigned n, unsigned m,
864                 const struct ucred *ucred,
865                 const struct timeval *tv,
866                 const char *label, size_t label_len,
867                 const char *unit_id,
868                 int priority,
869                 pid_t object_pid) {
870
871         int rl, r;
872         _cleanup_free_ char *path = NULL;
873         char *c;
874
875         assert(s);
876         assert(iovec || n == 0);
877
878         if (n == 0)
879                 return;
880
881         if (LOG_PRI(priority) > s->max_level_store)
882                 return;
883
884         /* Stop early in case the information will not be stored
885          * in a journal. */
886         if (s->storage == STORAGE_NONE)
887                 return;
888
889         if (!ucred)
890                 goto finish;
891
892         r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &path);
893         if (r < 0)
894                 goto finish;
895
896         /* example: /user/lennart/3/foobar
897          *          /system/dbus.service/foobar
898          *
899          * So let's cut of everything past the third /, since that is
900          * where user directories start */
901
902         c = strchr(path, '/');
903         if (c) {
904                 c = strchr(c+1, '/');
905                 if (c) {
906                         c = strchr(c+1, '/');
907                         if (c)
908                                 *c = 0;
909                 }
910         }
911
912         rl = journal_rate_limit_test(s->rate_limit, path,
913                                      priority & LOG_PRIMASK, available_space(s, false));
914
915         if (rl == 0)
916                 return;
917
918         /* Write a suppression message if we suppressed something */
919         if (rl > 1)
920                 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
921                                       "Suppressed %u messages from %s", rl - 1, path);
922
923 finish:
924         dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
925 }
926
927
928 static int system_journal_open(Server *s, bool flush_requested) {
929         int r;
930         char *fn;
931         sd_id128_t machine;
932         char ids[33];
933
934         r = sd_id128_get_machine(&machine);
935         if (r < 0)
936                 return log_error_errno(r, "Failed to get machine id: %m");
937
938         sd_id128_to_string(machine, ids);
939
940         if (!s->system_journal &&
941             (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
942             (flush_requested
943              || access("/run/systemd/journal/flushed", F_OK) >= 0)) {
944
945                 /* If in auto mode: first try to create the machine
946                  * path, but not the prefix.
947                  *
948                  * If in persistent mode: create /var/log/journal and
949                  * the machine path */
950
951                 if (s->storage == STORAGE_PERSISTENT)
952                         (void) mkdir("/var/log/journal/", 0755);
953
954                 fn = strappenda("/var/log/journal/", ids);
955                 (void) mkdir(fn, 0755);
956
957                 fn = strappenda(fn, "/system.journal");
958                 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
959
960                 if (r >= 0)
961                         server_fix_perms(s, s->system_journal, 0);
962                 else if (r < 0) {
963                         if (r != -ENOENT && r != -EROFS)
964                                 log_warning_errno(r, "Failed to open system journal: %m");
965
966                         r = 0;
967                 }
968         }
969
970         if (!s->runtime_journal &&
971             (s->storage != STORAGE_NONE)) {
972
973                 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
974                 if (!fn)
975                         return -ENOMEM;
976
977                 if (s->system_journal) {
978
979                         /* Try to open the runtime journal, but only
980                          * if it already exists, so that we can flush
981                          * it into the system journal */
982
983                         r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
984                         free(fn);
985
986                         if (r < 0) {
987                                 if (r != -ENOENT)
988                                         log_warning_errno(r, "Failed to open runtime journal: %m");
989
990                                 r = 0;
991                         }
992
993                 } else {
994
995                         /* OK, we really need the runtime journal, so create
996                          * it if necessary. */
997
998                         (void) mkdir("/run/log", 0755);
999                         (void) mkdir("/run/log/journal", 0755);
1000                         (void) mkdir_parents(fn, 0750);
1001
1002                         r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
1003                         free(fn);
1004
1005                         if (r < 0)
1006                                 return log_error_errno(r, "Failed to open runtime journal: %m");
1007                 }
1008
1009                 if (s->runtime_journal)
1010                         server_fix_perms(s, s->runtime_journal, 0);
1011         }
1012
1013         available_space(s, true);
1014
1015         return r;
1016 }
1017
1018 int server_flush_to_var(Server *s) {
1019         sd_id128_t machine;
1020         sd_journal *j = NULL;
1021         char ts[FORMAT_TIMESPAN_MAX];
1022         usec_t start;
1023         unsigned n = 0;
1024         int r;
1025
1026         assert(s);
1027
1028         if (s->storage != STORAGE_AUTO &&
1029             s->storage != STORAGE_PERSISTENT)
1030                 return 0;
1031
1032         if (!s->runtime_journal)
1033                 return 0;
1034
1035         system_journal_open(s, true);
1036
1037         if (!s->system_journal)
1038                 return 0;
1039
1040         log_debug("Flushing to /var...");
1041
1042         start = now(CLOCK_MONOTONIC);
1043
1044         r = sd_id128_get_machine(&machine);
1045         if (r < 0)
1046                 return r;
1047
1048         r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1049         if (r < 0)
1050                 return log_error_errno(r, "Failed to read runtime journal: %m");
1051
1052         sd_journal_set_data_threshold(j, 0);
1053
1054         SD_JOURNAL_FOREACH(j) {
1055                 Object *o = NULL;
1056                 JournalFile *f;
1057
1058                 f = j->current_file;
1059                 assert(f && f->current_offset > 0);
1060
1061                 n++;
1062
1063                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1064                 if (r < 0) {
1065                         log_error_errno(r, "Can't read entry: %m");
1066                         goto finish;
1067                 }
1068
1069                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1070                 if (r >= 0)
1071                         continue;
1072
1073                 if (!shall_try_append_again(s->system_journal, r)) {
1074                         log_error_errno(r, "Can't write entry: %m");
1075                         goto finish;
1076                 }
1077
1078                 server_rotate(s);
1079                 server_vacuum(s);
1080
1081                 if (!s->system_journal) {
1082                         log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1083                         r = -EIO;
1084                         goto finish;
1085                 }
1086
1087                 log_debug("Retrying write.");
1088                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1089                 if (r < 0) {
1090                         log_error_errno(r, "Can't write entry: %m");
1091                         goto finish;
1092                 }
1093         }
1094
1095 finish:
1096         journal_file_post_change(s->system_journal);
1097
1098         journal_file_close(s->runtime_journal);
1099         s->runtime_journal = NULL;
1100
1101         if (r >= 0)
1102                 rm_rf("/run/log/journal", false, true, false);
1103
1104         sd_journal_close(j);
1105
1106         server_driver_message(s, SD_ID128_NULL, "Time spent on flushing to /var is %s for %u entries.", format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0), n);
1107
1108         return r;
1109 }
1110
1111 int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1112         Server *s = userdata;
1113
1114         assert(s);
1115         assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
1116
1117         if (revents != EPOLLIN) {
1118                 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1119                 return -EIO;
1120         }
1121
1122         for (;;) {
1123                 struct ucred *ucred = NULL;
1124                 struct timeval *tv = NULL;
1125                 struct cmsghdr *cmsg;
1126                 char *label = NULL;
1127                 size_t label_len = 0;
1128                 struct iovec iovec;
1129
1130                 union {
1131                         struct cmsghdr cmsghdr;
1132
1133                         /* We use NAME_MAX space for the SELinux label
1134                          * here. The kernel currently enforces no
1135                          * limit, but according to suggestions from
1136                          * the SELinux people this will change and it
1137                          * will probably be identical to NAME_MAX. For
1138                          * now we use that, but this should be updated
1139                          * one day when the final limit is known. */
1140                         uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1141                                     CMSG_SPACE(sizeof(struct timeval)) +
1142                                     CMSG_SPACE(sizeof(int)) + /* fd */
1143                                     CMSG_SPACE(NAME_MAX)]; /* selinux label */
1144                 } control = {};
1145                 union sockaddr_union sa = {};
1146                 struct msghdr msghdr = {
1147                         .msg_iov = &iovec,
1148                         .msg_iovlen = 1,
1149                         .msg_control = &control,
1150                         .msg_controllen = sizeof(control),
1151                         .msg_name = &sa,
1152                         .msg_namelen = sizeof(sa),
1153                 };
1154
1155                 ssize_t n;
1156                 int *fds = NULL;
1157                 unsigned n_fds = 0;
1158                 int v = 0;
1159                 size_t m;
1160
1161                 /* Try to get the right size, if we can. (Not all
1162                  * sockets support SIOCINQ, hence we just try, but
1163                  * don't rely on it. */
1164                 (void) ioctl(fd, SIOCINQ, &v);
1165
1166                 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1167                 m = PAGE_ALIGN(MAX3((size_t) v + 1,
1168                                     (size_t) LINE_MAX,
1169                                     ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
1170
1171                 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1172                         return log_oom();
1173
1174                 iovec.iov_base = s->buffer;
1175                 iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
1176
1177                 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1178                 if (n < 0) {
1179                         if (errno == EINTR || errno == EAGAIN)
1180                                 return 0;
1181
1182                         log_error_errno(errno, "recvmsg() failed: %m");
1183                         return -errno;
1184                 }
1185                 if (n == 0) {
1186                         log_error("Got EOF on socket.");
1187                         return -ECONNRESET;
1188                 }
1189
1190                 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1191
1192                         if (cmsg->cmsg_level == SOL_SOCKET &&
1193                             cmsg->cmsg_type == SCM_CREDENTIALS &&
1194                             cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1195                                 ucred = (struct ucred*) CMSG_DATA(cmsg);
1196                         else if (cmsg->cmsg_level == SOL_SOCKET &&
1197                                  cmsg->cmsg_type == SCM_SECURITY) {
1198                                 label = (char*) CMSG_DATA(cmsg);
1199                                 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1200                         } else if (cmsg->cmsg_level == SOL_SOCKET &&
1201                                    cmsg->cmsg_type == SO_TIMESTAMP &&
1202                                    cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1203                                 tv = (struct timeval*) CMSG_DATA(cmsg);
1204                         else if (cmsg->cmsg_level == SOL_SOCKET &&
1205                                  cmsg->cmsg_type == SCM_RIGHTS) {
1206                                 fds = (int*) CMSG_DATA(cmsg);
1207                                 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1208                         }
1209                 }
1210
1211                 /* And a trailing NUL, just in case */
1212                 s->buffer[n] = 0;
1213
1214                 if (fd == s->syslog_fd) {
1215                         if (n > 0 && n_fds == 0)
1216                                 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1217                         else if (n_fds > 0)
1218                                 log_warning("Got file descriptors via syslog socket. Ignoring.");
1219
1220                 } else if (fd == s->native_fd) {
1221                         if (n > 0 && n_fds == 0)
1222                                 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1223                         else if (n == 0 && n_fds == 1)
1224                                 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1225                         else if (n_fds > 0)
1226                                 log_warning("Got too many file descriptors via native socket. Ignoring.");
1227
1228                 } else {
1229                         assert(fd == s->audit_fd);
1230
1231                         if (n > 0 && n_fds == 0)
1232                                 server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1233                         else if (n_fds > 0)
1234                                 log_warning("Got file descriptors via audit socket. Ignoring.");
1235                 }
1236
1237                 close_many(fds, n_fds);
1238         }
1239 }
1240
1241 static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1242         Server *s = userdata;
1243
1244         assert(s);
1245
1246         log_info("Received request to flush runtime journal from PID %"PRIu32, si->ssi_pid);
1247
1248         server_flush_to_var(s);
1249         server_sync(s);
1250         server_vacuum(s);
1251
1252         touch("/run/systemd/journal/flushed");
1253
1254         return 0;
1255 }
1256
1257 static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1258         Server *s = userdata;
1259
1260         assert(s);
1261
1262         log_info("Received request to rotate journal from PID %"PRIu32, si->ssi_pid);
1263         server_rotate(s);
1264         server_vacuum(s);
1265
1266         return 0;
1267 }
1268
1269 static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1270         Server *s = userdata;
1271
1272         assert(s);
1273
1274         log_received_signal(LOG_INFO, si);
1275
1276         sd_event_exit(s->event, 0);
1277         return 0;
1278 }
1279
1280 static int setup_signals(Server *s) {
1281         sigset_t mask;
1282         int r;
1283
1284         assert(s);
1285
1286         assert_se(sigemptyset(&mask) == 0);
1287         sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1288         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1289
1290         r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
1291         if (r < 0)
1292                 return r;
1293
1294         r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
1295         if (r < 0)
1296                 return r;
1297
1298         r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
1299         if (r < 0)
1300                 return r;
1301
1302         r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
1303         if (r < 0)
1304                 return r;
1305
1306         return 0;
1307 }
1308
1309 static int server_parse_proc_cmdline(Server *s) {
1310         _cleanup_free_ char *line = NULL;
1311         const char *w, *state;
1312         size_t l;
1313         int r;
1314
1315         r = proc_cmdline(&line);
1316         if (r < 0) {
1317                 log_warning_errno(r, "Failed to read /proc/cmdline, ignoring: %m");
1318                 return 0;
1319         }
1320
1321         FOREACH_WORD_QUOTED(w, l, line, state) {
1322                 _cleanup_free_ char *word;
1323
1324                 word = strndup(w, l);
1325                 if (!word)
1326                         return -ENOMEM;
1327
1328                 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1329                         r = parse_boolean(word + 35);
1330                         if (r < 0)
1331                                 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1332                         else
1333                                 s->forward_to_syslog = r;
1334                 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1335                         r = parse_boolean(word + 33);
1336                         if (r < 0)
1337                                 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1338                         else
1339                                 s->forward_to_kmsg = r;
1340                 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1341                         r = parse_boolean(word + 36);
1342                         if (r < 0)
1343                                 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1344                         else
1345                                 s->forward_to_console = r;
1346                 } else if (startswith(word, "systemd.journald.forward_to_wall=")) {
1347                         r = parse_boolean(word + 33);
1348                         if (r < 0)
1349                                 log_warning("Failed to parse forward to wall switch %s. Ignoring.", word + 33);
1350                         else
1351                                 s->forward_to_wall = r;
1352                 } else if (startswith(word, "systemd.journald"))
1353                         log_warning("Invalid systemd.journald parameter. Ignoring.");
1354         }
1355         /* do not warn about state here, since probably systemd already did */
1356
1357         return 0;
1358 }
1359
1360 static int server_parse_config_file(Server *s) {
1361         assert(s);
1362
1363         return config_parse_many("/etc/systemd/journald.conf",
1364                                  CONF_DIRS_NULSTR("systemd/journald.conf"),
1365                                  "Journal\0",
1366                                  config_item_perf_lookup, journald_gperf_lookup,
1367                                  false, s);
1368 }
1369
1370 static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1371         Server *s = userdata;
1372
1373         assert(s);
1374
1375         server_sync(s);
1376         return 0;
1377 }
1378
1379 int server_schedule_sync(Server *s, int priority) {
1380         int r;
1381
1382         assert(s);
1383
1384         if (priority <= LOG_CRIT) {
1385                 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1386                 server_sync(s);
1387                 return 0;
1388         }
1389
1390         if (s->sync_scheduled)
1391                 return 0;
1392
1393         if (s->sync_interval_usec > 0) {
1394                 usec_t when;
1395
1396                 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
1397                 if (r < 0)
1398                         return r;
1399
1400                 when += s->sync_interval_usec;
1401
1402                 if (!s->sync_event_source) {
1403                         r = sd_event_add_time(
1404                                         s->event,
1405                                         &s->sync_event_source,
1406                                         CLOCK_MONOTONIC,
1407                                         when, 0,
1408                                         server_dispatch_sync, s);
1409                         if (r < 0)
1410                                 return r;
1411
1412                         r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1413                 } else {
1414                         r = sd_event_source_set_time(s->sync_event_source, when);
1415                         if (r < 0)
1416                                 return r;
1417
1418                         r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1419                 }
1420                 if (r < 0)
1421                         return r;
1422
1423                 s->sync_scheduled = true;
1424         }
1425
1426         return 0;
1427 }
1428
1429 static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1430         Server *s = userdata;
1431
1432         assert(s);
1433
1434         server_cache_hostname(s);
1435         return 0;
1436 }
1437
1438 static int server_open_hostname(Server *s) {
1439         int r;
1440
1441         assert(s);
1442
1443         s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
1444         if (s->hostname_fd < 0)
1445                 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
1446
1447         r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
1448         if (r < 0) {
1449                 /* kernels prior to 3.2 don't support polling this file. Ignore
1450                  * the failure. */
1451                 if (r == -EPERM) {
1452                         log_warning("Failed to register hostname fd in event loop: %s. Ignoring.",
1453                                         strerror(-r));
1454                         s->hostname_fd = safe_close(s->hostname_fd);
1455                         return 0;
1456                 }
1457
1458                 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
1459         }
1460
1461         r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
1462         if (r < 0)
1463                 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
1464
1465         return 0;
1466 }
1467
1468 int server_init(Server *s) {
1469         _cleanup_fdset_free_ FDSet *fds = NULL;
1470         int n, r, fd;
1471
1472         assert(s);
1473
1474         zero(*s);
1475         s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = -1;
1476         s->compress = true;
1477         s->seal = true;
1478
1479         s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1480         s->sync_scheduled = false;
1481
1482         s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1483         s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1484
1485         s->forward_to_wall = true;
1486
1487         s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1488
1489         s->max_level_store = LOG_DEBUG;
1490         s->max_level_syslog = LOG_DEBUG;
1491         s->max_level_kmsg = LOG_NOTICE;
1492         s->max_level_console = LOG_INFO;
1493         s->max_level_wall = LOG_EMERG;
1494
1495         memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1496         memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1497
1498         server_parse_config_file(s);
1499         server_parse_proc_cmdline(s);
1500         if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1501                 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1502                           s->rate_limit_interval, s->rate_limit_burst);
1503                 s->rate_limit_interval = s->rate_limit_burst = 0;
1504         }
1505
1506         mkdir_p("/run/systemd/journal", 0755);
1507
1508         s->user_journals = ordered_hashmap_new(NULL);
1509         if (!s->user_journals)
1510                 return log_oom();
1511
1512         s->mmap = mmap_cache_new();
1513         if (!s->mmap)
1514                 return log_oom();
1515
1516         r = sd_event_default(&s->event);
1517         if (r < 0)
1518                 return log_error_errno(r, "Failed to create event loop: %m");
1519
1520         sd_event_set_watchdog(s->event, true);
1521
1522         n = sd_listen_fds(true);
1523         if (n < 0)
1524                 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
1525
1526         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1527
1528                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1529
1530                         if (s->native_fd >= 0) {
1531                                 log_error("Too many native sockets passed.");
1532                                 return -EINVAL;
1533                         }
1534
1535                         s->native_fd = fd;
1536
1537                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1538
1539                         if (s->stdout_fd >= 0) {
1540                                 log_error("Too many stdout sockets passed.");
1541                                 return -EINVAL;
1542                         }
1543
1544                         s->stdout_fd = fd;
1545
1546                 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1547                            sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1548
1549                         if (s->syslog_fd >= 0) {
1550                                 log_error("Too many /dev/log sockets passed.");
1551                                 return -EINVAL;
1552                         }
1553
1554                         s->syslog_fd = fd;
1555
1556                 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
1557
1558                         if (s->audit_fd >= 0) {
1559                                 log_error("Too many audit sockets passed.");
1560                                 return -EINVAL;
1561                         }
1562
1563                         s->audit_fd = fd;
1564
1565                 } else {
1566
1567                         if (!fds) {
1568                                 fds = fdset_new();
1569                                 if (!fds)
1570                                         return log_oom();
1571                         }
1572
1573                         r = fdset_put(fds, fd);
1574                         if (r < 0)
1575                                 return log_oom();
1576                 }
1577         }
1578
1579         r = server_open_stdout_socket(s, fds);
1580         if (r < 0)
1581                 return r;
1582
1583         if (fdset_size(fds) > 0) {
1584                 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds));
1585                 fds = fdset_free(fds);
1586         }
1587
1588         r = server_open_syslog_socket(s);
1589         if (r < 0)
1590                 return r;
1591
1592         r = server_open_native_socket(s);
1593         if (r < 0)
1594                 return r;
1595
1596         r = server_open_dev_kmsg(s);
1597         if (r < 0)
1598                 return r;
1599
1600         r = server_open_audit(s);
1601         if (r < 0)
1602                 return r;
1603
1604         r = server_open_kernel_seqnum(s);
1605         if (r < 0)
1606                 return r;
1607
1608         r = server_open_hostname(s);
1609         if (r < 0)
1610                 return r;
1611
1612         r = setup_signals(s);
1613         if (r < 0)
1614                 return r;
1615
1616         s->udev = udev_new();
1617         if (!s->udev)
1618                 return -ENOMEM;
1619
1620         s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
1621         if (!s->rate_limit)
1622                 return -ENOMEM;
1623
1624         r = cg_get_root_path(&s->cgroup_root);
1625         if (r < 0)
1626                 return r;
1627
1628         server_cache_hostname(s);
1629         server_cache_boot_id(s);
1630         server_cache_machine_id(s);
1631
1632         r = system_journal_open(s, false);
1633         if (r < 0)
1634                 return r;
1635
1636         return 0;
1637 }
1638
1639 void server_maybe_append_tags(Server *s) {
1640 #ifdef HAVE_GCRYPT
1641         JournalFile *f;
1642         Iterator i;
1643         usec_t n;
1644
1645         n = now(CLOCK_REALTIME);
1646
1647         if (s->system_journal)
1648                 journal_file_maybe_append_tag(s->system_journal, n);
1649
1650         ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
1651                 journal_file_maybe_append_tag(f, n);
1652 #endif
1653 }
1654
1655 void server_done(Server *s) {
1656         JournalFile *f;
1657         assert(s);
1658
1659         while (s->stdout_streams)
1660                 stdout_stream_free(s->stdout_streams);
1661
1662         if (s->system_journal)
1663                 journal_file_close(s->system_journal);
1664
1665         if (s->runtime_journal)
1666                 journal_file_close(s->runtime_journal);
1667
1668         while ((f = ordered_hashmap_steal_first(s->user_journals)))
1669                 journal_file_close(f);
1670
1671         ordered_hashmap_free(s->user_journals);
1672
1673         sd_event_source_unref(s->syslog_event_source);
1674         sd_event_source_unref(s->native_event_source);
1675         sd_event_source_unref(s->stdout_event_source);
1676         sd_event_source_unref(s->dev_kmsg_event_source);
1677         sd_event_source_unref(s->audit_event_source);
1678         sd_event_source_unref(s->sync_event_source);
1679         sd_event_source_unref(s->sigusr1_event_source);
1680         sd_event_source_unref(s->sigusr2_event_source);
1681         sd_event_source_unref(s->sigterm_event_source);
1682         sd_event_source_unref(s->sigint_event_source);
1683         sd_event_source_unref(s->hostname_event_source);
1684         sd_event_unref(s->event);
1685
1686         safe_close(s->syslog_fd);
1687         safe_close(s->native_fd);
1688         safe_close(s->stdout_fd);
1689         safe_close(s->dev_kmsg_fd);
1690         safe_close(s->audit_fd);
1691         safe_close(s->hostname_fd);
1692
1693         if (s->rate_limit)
1694                 journal_rate_limit_free(s->rate_limit);
1695
1696         if (s->kernel_seqnum)
1697                 munmap(s->kernel_seqnum, sizeof(uint64_t));
1698
1699         free(s->buffer);
1700         free(s->tty_path);
1701         free(s->cgroup_root);
1702         free(s->hostname_field);
1703
1704         if (s->mmap)
1705                 mmap_cache_unref(s->mmap);
1706
1707         if (s->udev)
1708                 udev_unref(s->udev);
1709 }