chiark / gitweb /
tmpfiles: add 'a' type to set ACLs
[elogind.git] / src / journal / journald-server.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
26 #include <sys/mman.h>
27 #include <sys/timerfd.h>
28
29 #include <libudev.h>
30
31 #include "sd-journal.h"
32 #include "sd-messages.h"
33 #include "sd-daemon.h"
34 #include "fileio.h"
35 #include "mkdir.h"
36 #include "hashmap.h"
37 #include "journal-file.h"
38 #include "socket-util.h"
39 #include "cgroup-util.h"
40 #include "list.h"
41 #include "missing.h"
42 #include "conf-parser.h"
43 #include "selinux-util.h"
44 #include "journal-internal.h"
45 #include "journal-vacuum.h"
46 #include "journal-authenticate.h"
47 #include "journald-rate-limit.h"
48 #include "journald-kmsg.h"
49 #include "journald-syslog.h"
50 #include "journald-stream.h"
51 #include "journald-console.h"
52 #include "journald-native.h"
53 #include "journald-audit.h"
54 #include "journald-server.h"
55 #include "acl-util.h"
56
57 #ifdef HAVE_SELINUX
58 #include <selinux/selinux.h>
59 #endif
60
61 #define USER_JOURNALS_MAX 1024
62
63 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
64 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
65 #define DEFAULT_RATE_LIMIT_BURST 1000
66 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
67
68 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
69
70 static const char* const storage_table[_STORAGE_MAX] = {
71         [STORAGE_AUTO] = "auto",
72         [STORAGE_VOLATILE] = "volatile",
73         [STORAGE_PERSISTENT] = "persistent",
74         [STORAGE_NONE] = "none"
75 };
76
77 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
78 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
79
80 static const char* const split_mode_table[_SPLIT_MAX] = {
81         [SPLIT_LOGIN] = "login",
82         [SPLIT_UID] = "uid",
83         [SPLIT_NONE] = "none",
84 };
85
86 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
87 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
88
89 static uint64_t available_space(Server *s, bool verbose) {
90         char ids[33];
91         _cleanup_free_ char *p = NULL;
92         sd_id128_t machine;
93         struct statvfs ss;
94         uint64_t sum = 0, ss_avail = 0, avail = 0;
95         int r;
96         _cleanup_closedir_ DIR *d = NULL;
97         usec_t ts;
98         const char *f;
99         JournalMetrics *m;
100
101         ts = now(CLOCK_MONOTONIC);
102
103         if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts
104             && !verbose)
105                 return s->cached_available_space;
106
107         r = sd_id128_get_machine(&machine);
108         if (r < 0)
109                 return 0;
110
111         if (s->system_journal) {
112                 f = "/var/log/journal/";
113                 m = &s->system_metrics;
114         } else {
115                 f = "/run/log/journal/";
116                 m = &s->runtime_metrics;
117         }
118
119         assert(m);
120
121         p = strappend(f, sd_id128_to_string(machine, ids));
122         if (!p)
123                 return 0;
124
125         d = opendir(p);
126         if (!d)
127                 return 0;
128
129         if (fstatvfs(dirfd(d), &ss) < 0)
130                 return 0;
131
132         for (;;) {
133                 struct stat st;
134                 struct dirent *de;
135
136                 errno = 0;
137                 de = readdir(d);
138                 if (!de && errno != 0)
139                         return 0;
140
141                 if (!de)
142                         break;
143
144                 if (!endswith(de->d_name, ".journal") &&
145                     !endswith(de->d_name, ".journal~"))
146                         continue;
147
148                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
149                         continue;
150
151                 if (!S_ISREG(st.st_mode))
152                         continue;
153
154                 sum += (uint64_t) st.st_blocks * 512UL;
155         }
156
157         ss_avail = ss.f_bsize * ss.f_bavail;
158
159         /* If we reached a high mark, we will always allow this much
160          * again, unless usage goes above max_use. This watermark
161          * value is cached so that we don't give up space on pressure,
162          * but hover below the maximum usage. */
163
164         if (m->use < sum)
165                 m->use = sum;
166
167         avail = LESS_BY(ss_avail, m->keep_free);
168
169         s->cached_available_space = LESS_BY(MIN(m->max_use, avail), sum);
170         s->cached_available_space_timestamp = ts;
171
172         if (verbose) {
173                 char    fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
174                         fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX];
175
176                 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
177                                       "%s journal is using %s (max allowed %s, "
178                                       "trying to leave %s free of %s available → current limit %s).",
179                                       s->system_journal ? "Permanent" : "Runtime",
180                                       format_bytes(fb1, sizeof(fb1), sum),
181                                       format_bytes(fb2, sizeof(fb2), m->max_use),
182                                       format_bytes(fb3, sizeof(fb3), m->keep_free),
183                                       format_bytes(fb4, sizeof(fb4), ss_avail),
184                                       format_bytes(fb5, sizeof(fb5), s->cached_available_space + sum));
185         }
186
187         return s->cached_available_space;
188 }
189
190 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
191         int r;
192 #ifdef HAVE_ACL
193         acl_t acl;
194         acl_entry_t entry;
195         acl_permset_t permset;
196 #endif
197
198         assert(f);
199
200         r = fchmod(f->fd, 0640);
201         if (r < 0)
202                 log_warning_errno(r, "Failed to fix access mode on %s, ignoring: %m", f->path);
203
204 #ifdef HAVE_ACL
205         if (uid <= SYSTEM_UID_MAX)
206                 return;
207
208         acl = acl_get_fd(f->fd);
209         if (!acl) {
210                 log_warning_errno(errno, "Failed to read ACL on %s, ignoring: %m", f->path);
211                 return;
212         }
213
214         r = acl_find_uid(acl, uid, &entry);
215         if (r <= 0) {
216
217                 if (acl_create_entry(&acl, &entry) < 0 ||
218                     acl_set_tag_type(entry, ACL_USER) < 0 ||
219                     acl_set_qualifier(entry, &uid) < 0) {
220                         log_warning_errno(errno, "Failed to patch ACL on %s, ignoring: %m", f->path);
221                         goto finish;
222                 }
223         }
224
225         /* We do not recalculate the mask unconditionally here,
226          * so that the fchmod() mask above stays intact. */
227         if (acl_get_permset(entry, &permset) < 0 ||
228             acl_add_perm(permset, ACL_READ) < 0 ||
229             calc_acl_mask_if_needed(&acl) < 0) {
230                 log_warning_errno(errno, "Failed to patch ACL on %s, ignoring: %m", f->path);
231                 goto finish;
232         }
233
234         if (acl_set_fd(f->fd, acl) < 0)
235                 log_warning_errno(errno, "Failed to set ACL on %s, ignoring: %m", f->path);
236
237 finish:
238         acl_free(acl);
239 #endif
240 }
241
242 static JournalFile* find_journal(Server *s, uid_t uid) {
243         _cleanup_free_ char *p = NULL;
244         int r;
245         JournalFile *f;
246         sd_id128_t machine;
247
248         assert(s);
249
250         /* We split up user logs only on /var, not on /run. If the
251          * runtime file is open, we write to it exclusively, in order
252          * to guarantee proper order as soon as we flush /run to
253          * /var and close the runtime file. */
254
255         if (s->runtime_journal)
256                 return s->runtime_journal;
257
258         if (uid <= SYSTEM_UID_MAX)
259                 return s->system_journal;
260
261         r = sd_id128_get_machine(&machine);
262         if (r < 0)
263                 return s->system_journal;
264
265         f = ordered_hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
266         if (f)
267                 return f;
268
269         if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
270                      SD_ID128_FORMAT_VAL(machine), uid) < 0)
271                 return s->system_journal;
272
273         while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
274                 /* Too many open? Then let's close one */
275                 f = ordered_hashmap_steal_first(s->user_journals);
276                 assert(f);
277                 journal_file_close(f);
278         }
279
280         r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
281         if (r < 0)
282                 return s->system_journal;
283
284         server_fix_perms(s, f, uid);
285
286         r = ordered_hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
287         if (r < 0) {
288                 journal_file_close(f);
289                 return s->system_journal;
290         }
291
292         return f;
293 }
294
295 static int do_rotate(
296                 Server *s,
297                 JournalFile **f,
298                 const char* name,
299                 bool seal,
300                 uint32_t uid) {
301
302         int r;
303         assert(s);
304
305         if (!*f)
306                 return -EINVAL;
307
308         r = journal_file_rotate(f, s->compress, seal);
309         if (r < 0)
310                 if (*f)
311                         log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
312                 else
313                         log_error_errno(r, "Failed to create new %s journal: %m", name);
314         else
315                 server_fix_perms(s, *f, uid);
316
317         return r;
318 }
319
320 void server_rotate(Server *s) {
321         JournalFile *f;
322         void *k;
323         Iterator i;
324         int r;
325
326         log_debug("Rotating...");
327
328         do_rotate(s, &s->runtime_journal, "runtime", false, 0);
329         do_rotate(s, &s->system_journal, "system", s->seal, 0);
330
331         ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
332                 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UINT32(k));
333                 if (r >= 0)
334                         ordered_hashmap_replace(s->user_journals, k, f);
335                 else if (!f)
336                         /* Old file has been closed and deallocated */
337                         ordered_hashmap_remove(s->user_journals, k);
338         }
339 }
340
341 void server_sync(Server *s) {
342         JournalFile *f;
343         void *k;
344         Iterator i;
345         int r;
346
347         if (s->system_journal) {
348                 r = journal_file_set_offline(s->system_journal);
349                 if (r < 0)
350                         log_error_errno(r, "Failed to sync system journal: %m");
351         }
352
353         ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
354                 r = journal_file_set_offline(f);
355                 if (r < 0)
356                         log_error_errno(r, "Failed to sync user journal: %m");
357         }
358
359         if (s->sync_event_source) {
360                 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
361                 if (r < 0)
362                         log_error_errno(r, "Failed to disable sync timer source: %m");
363         }
364
365         s->sync_scheduled = false;
366 }
367
368 static void do_vacuum(
369                 Server *s,
370                 const char *id,
371                 JournalFile *f,
372                 const char* path,
373                 JournalMetrics *metrics) {
374
375         const char *p;
376         int r;
377
378         if (!f)
379                 return;
380
381         p = strappenda(path, id);
382         r = journal_directory_vacuum(p, metrics->max_use, s->max_retention_usec, &s->oldest_file_usec, false);
383         if (r < 0 && r != -ENOENT)
384                 log_error_errno(r, "Failed to vacuum %s: %m", p);
385 }
386
387 void server_vacuum(Server *s) {
388         char ids[33];
389         sd_id128_t machine;
390         int r;
391
392         log_debug("Vacuuming...");
393
394         s->oldest_file_usec = 0;
395
396         r = sd_id128_get_machine(&machine);
397         if (r < 0) {
398                 log_error_errno(r, "Failed to get machine ID: %m");
399                 return;
400         }
401         sd_id128_to_string(machine, ids);
402
403         do_vacuum(s, ids, s->system_journal, "/var/log/journal/", &s->system_metrics);
404         do_vacuum(s, ids, s->runtime_journal, "/run/log/journal/", &s->runtime_metrics);
405
406         s->cached_available_space_timestamp = 0;
407 }
408
409 static void server_cache_machine_id(Server *s) {
410         sd_id128_t id;
411         int r;
412
413         assert(s);
414
415         r = sd_id128_get_machine(&id);
416         if (r < 0)
417                 return;
418
419         sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
420 }
421
422 static void server_cache_boot_id(Server *s) {
423         sd_id128_t id;
424         int r;
425
426         assert(s);
427
428         r = sd_id128_get_boot(&id);
429         if (r < 0)
430                 return;
431
432         sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
433 }
434
435 static void server_cache_hostname(Server *s) {
436         _cleanup_free_ char *t = NULL;
437         char *x;
438
439         assert(s);
440
441         t = gethostname_malloc();
442         if (!t)
443                 return;
444
445         x = strappend("_HOSTNAME=", t);
446         if (!x)
447                 return;
448
449         free(s->hostname_field);
450         s->hostname_field = x;
451 }
452
453 static bool shall_try_append_again(JournalFile *f, int r) {
454
455         /* -E2BIG            Hit configured limit
456            -EFBIG            Hit fs limit
457            -EDQUOT           Quota limit hit
458            -ENOSPC           Disk full
459            -EIO              I/O error of some kind (mmap)
460            -EHOSTDOWN        Other machine
461            -EBUSY            Unclean shutdown
462            -EPROTONOSUPPORT  Unsupported feature
463            -EBADMSG          Corrupted
464            -ENODATA          Truncated
465            -ESHUTDOWN        Already archived
466            -EIDRM            Journal file has been deleted */
467
468         if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
469                 log_debug("%s: Allocation limit reached, rotating.", f->path);
470         else if (r == -EHOSTDOWN)
471                 log_info("%s: Journal file from other machine, rotating.", f->path);
472         else if (r == -EBUSY)
473                 log_info("%s: Unclean shutdown, rotating.", f->path);
474         else if (r == -EPROTONOSUPPORT)
475                 log_info("%s: Unsupported feature, rotating.", f->path);
476         else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
477                 log_warning("%s: Journal file corrupted, rotating.", f->path);
478         else if (r == -EIO)
479                 log_warning("%s: IO error, rotating.", f->path);
480         else if (r == -EIDRM)
481                 log_warning("%s: Journal file has been deleted, rotating.", f->path);
482         else
483                 return false;
484
485         return true;
486 }
487
488 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
489         JournalFile *f;
490         bool vacuumed = false;
491         int r;
492
493         assert(s);
494         assert(iovec);
495         assert(n > 0);
496
497         f = find_journal(s, uid);
498         if (!f)
499                 return;
500
501         if (journal_file_rotate_suggested(f, s->max_file_usec)) {
502                 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
503                 server_rotate(s);
504                 server_vacuum(s);
505                 vacuumed = true;
506
507                 f = find_journal(s, uid);
508                 if (!f)
509                         return;
510         }
511
512         r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
513         if (r >= 0) {
514                 server_schedule_sync(s, priority);
515                 return;
516         }
517
518         if (vacuumed || !shall_try_append_again(f, r)) {
519                 log_error_errno(r, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
520                 return;
521         }
522
523         server_rotate(s);
524         server_vacuum(s);
525
526         f = find_journal(s, uid);
527         if (!f)
528                 return;
529
530         log_debug("Retrying write.");
531         r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
532         if (r < 0)
533                 log_error_errno(r, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
534         else
535                 server_schedule_sync(s, priority);
536 }
537
538 static void dispatch_message_real(
539                 Server *s,
540                 struct iovec *iovec, unsigned n, unsigned m,
541                 const struct ucred *ucred,
542                 const struct timeval *tv,
543                 const char *label, size_t label_len,
544                 const char *unit_id,
545                 int priority,
546                 pid_t object_pid) {
547
548         char    pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
549                 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
550                 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
551                 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
552                 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
553                 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
554                 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
555                 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
556         uid_t object_uid;
557         gid_t object_gid;
558         char *x;
559         int r;
560         char *t, *c;
561         uid_t realuid = 0, owner = 0, journal_uid;
562         bool owner_valid = false;
563 #ifdef HAVE_AUDIT
564         char    audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
565                 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
566                 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
567                 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
568
569         uint32_t audit;
570         uid_t loginuid;
571 #endif
572
573         assert(s);
574         assert(iovec);
575         assert(n > 0);
576         assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
577
578         if (ucred) {
579                 realuid = ucred->uid;
580
581                 sprintf(pid, "_PID="PID_FMT, ucred->pid);
582                 IOVEC_SET_STRING(iovec[n++], pid);
583
584                 sprintf(uid, "_UID="UID_FMT, ucred->uid);
585                 IOVEC_SET_STRING(iovec[n++], uid);
586
587                 sprintf(gid, "_GID="GID_FMT, ucred->gid);
588                 IOVEC_SET_STRING(iovec[n++], gid);
589
590                 r = get_process_comm(ucred->pid, &t);
591                 if (r >= 0) {
592                         x = strappenda("_COMM=", t);
593                         free(t);
594                         IOVEC_SET_STRING(iovec[n++], x);
595                 }
596
597                 r = get_process_exe(ucred->pid, &t);
598                 if (r >= 0) {
599                         x = strappenda("_EXE=", t);
600                         free(t);
601                         IOVEC_SET_STRING(iovec[n++], x);
602                 }
603
604                 r = get_process_cmdline(ucred->pid, 0, false, &t);
605                 if (r >= 0) {
606                         x = strappenda("_CMDLINE=", t);
607                         free(t);
608                         IOVEC_SET_STRING(iovec[n++], x);
609                 }
610
611                 r = get_process_capeff(ucred->pid, &t);
612                 if (r >= 0) {
613                         x = strappenda("_CAP_EFFECTIVE=", t);
614                         free(t);
615                         IOVEC_SET_STRING(iovec[n++], x);
616                 }
617
618 #ifdef HAVE_AUDIT
619                 r = audit_session_from_pid(ucred->pid, &audit);
620                 if (r >= 0) {
621                         sprintf(audit_session, "_AUDIT_SESSION=%"PRIu32, audit);
622                         IOVEC_SET_STRING(iovec[n++], audit_session);
623                 }
624
625                 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
626                 if (r >= 0) {
627                         sprintf(audit_loginuid, "_AUDIT_LOGINUID="UID_FMT, loginuid);
628                         IOVEC_SET_STRING(iovec[n++], audit_loginuid);
629                 }
630 #endif
631
632                 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
633                 if (r >= 0) {
634                         char *session = NULL;
635
636                         x = strappenda("_SYSTEMD_CGROUP=", c);
637                         IOVEC_SET_STRING(iovec[n++], x);
638
639                         r = cg_path_get_session(c, &t);
640                         if (r >= 0) {
641                                 session = strappenda("_SYSTEMD_SESSION=", t);
642                                 free(t);
643                                 IOVEC_SET_STRING(iovec[n++], session);
644                         }
645
646                         if (cg_path_get_owner_uid(c, &owner) >= 0) {
647                                 owner_valid = true;
648
649                                 sprintf(owner_uid, "_SYSTEMD_OWNER_UID="UID_FMT, owner);
650                                 IOVEC_SET_STRING(iovec[n++], owner_uid);
651                         }
652
653                         if (cg_path_get_unit(c, &t) >= 0) {
654                                 x = strappenda("_SYSTEMD_UNIT=", t);
655                                 free(t);
656                                 IOVEC_SET_STRING(iovec[n++], x);
657                         } else if (unit_id && !session) {
658                                 x = strappenda("_SYSTEMD_UNIT=", unit_id);
659                                 IOVEC_SET_STRING(iovec[n++], x);
660                         }
661
662                         if (cg_path_get_user_unit(c, &t) >= 0) {
663                                 x = strappenda("_SYSTEMD_USER_UNIT=", t);
664                                 free(t);
665                                 IOVEC_SET_STRING(iovec[n++], x);
666                         } else if (unit_id && session) {
667                                 x = strappenda("_SYSTEMD_USER_UNIT=", unit_id);
668                                 IOVEC_SET_STRING(iovec[n++], x);
669                         }
670
671                         if (cg_path_get_slice(c, &t) >= 0) {
672                                 x = strappenda("_SYSTEMD_SLICE=", t);
673                                 free(t);
674                                 IOVEC_SET_STRING(iovec[n++], x);
675                         }
676
677                         free(c);
678                 } else if (unit_id) {
679                         x = strappenda("_SYSTEMD_UNIT=", unit_id);
680                         IOVEC_SET_STRING(iovec[n++], x);
681                 }
682
683 #ifdef HAVE_SELINUX
684                 if (mac_selinux_use()) {
685                         if (label) {
686                                 x = alloca(strlen("_SELINUX_CONTEXT=") + label_len + 1);
687
688                                 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
689                                 IOVEC_SET_STRING(iovec[n++], x);
690                         } else {
691                                 security_context_t con;
692
693                                 if (getpidcon(ucred->pid, &con) >= 0) {
694                                         x = strappenda("_SELINUX_CONTEXT=", con);
695
696                                         freecon(con);
697                                         IOVEC_SET_STRING(iovec[n++], x);
698                                 }
699                         }
700                 }
701 #endif
702         }
703         assert(n <= m);
704
705         if (object_pid) {
706                 r = get_process_uid(object_pid, &object_uid);
707                 if (r >= 0) {
708                         sprintf(o_uid, "OBJECT_UID="UID_FMT, object_uid);
709                         IOVEC_SET_STRING(iovec[n++], o_uid);
710                 }
711
712                 r = get_process_gid(object_pid, &object_gid);
713                 if (r >= 0) {
714                         sprintf(o_gid, "OBJECT_GID="GID_FMT, object_gid);
715                         IOVEC_SET_STRING(iovec[n++], o_gid);
716                 }
717
718                 r = get_process_comm(object_pid, &t);
719                 if (r >= 0) {
720                         x = strappenda("OBJECT_COMM=", t);
721                         free(t);
722                         IOVEC_SET_STRING(iovec[n++], x);
723                 }
724
725                 r = get_process_exe(object_pid, &t);
726                 if (r >= 0) {
727                         x = strappenda("OBJECT_EXE=", t);
728                         free(t);
729                         IOVEC_SET_STRING(iovec[n++], x);
730                 }
731
732                 r = get_process_cmdline(object_pid, 0, false, &t);
733                 if (r >= 0) {
734                         x = strappenda("OBJECT_CMDLINE=", t);
735                         free(t);
736                         IOVEC_SET_STRING(iovec[n++], x);
737                 }
738
739 #ifdef HAVE_AUDIT
740                 r = audit_session_from_pid(object_pid, &audit);
741                 if (r >= 0) {
742                         sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%"PRIu32, audit);
743                         IOVEC_SET_STRING(iovec[n++], o_audit_session);
744                 }
745
746                 r = audit_loginuid_from_pid(object_pid, &loginuid);
747                 if (r >= 0) {
748                         sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID="UID_FMT, loginuid);
749                         IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
750                 }
751 #endif
752
753                 r = cg_pid_get_path_shifted(object_pid, s->cgroup_root, &c);
754                 if (r >= 0) {
755                         x = strappenda("OBJECT_SYSTEMD_CGROUP=", c);
756                         IOVEC_SET_STRING(iovec[n++], x);
757
758                         r = cg_path_get_session(c, &t);
759                         if (r >= 0) {
760                                 x = strappenda("OBJECT_SYSTEMD_SESSION=", t);
761                                 free(t);
762                                 IOVEC_SET_STRING(iovec[n++], x);
763                         }
764
765                         if (cg_path_get_owner_uid(c, &owner) >= 0) {
766                                 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT, owner);
767                                 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
768                         }
769
770                         if (cg_path_get_unit(c, &t) >= 0) {
771                                 x = strappenda("OBJECT_SYSTEMD_UNIT=", t);
772                                 free(t);
773                                 IOVEC_SET_STRING(iovec[n++], x);
774                         }
775
776                         if (cg_path_get_user_unit(c, &t) >= 0) {
777                                 x = strappenda("OBJECT_SYSTEMD_USER_UNIT=", t);
778                                 free(t);
779                                 IOVEC_SET_STRING(iovec[n++], x);
780                         }
781
782                         free(c);
783                 }
784         }
785         assert(n <= m);
786
787         if (tv) {
788                 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
789                 IOVEC_SET_STRING(iovec[n++], source_time);
790         }
791
792         /* Note that strictly speaking storing the boot id here is
793          * redundant since the entry includes this in-line
794          * anyway. However, we need this indexed, too. */
795         if (!isempty(s->boot_id_field))
796                 IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
797
798         if (!isempty(s->machine_id_field))
799                 IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
800
801         if (!isempty(s->hostname_field))
802                 IOVEC_SET_STRING(iovec[n++], s->hostname_field);
803
804         assert(n <= m);
805
806         if (s->split_mode == SPLIT_UID && realuid > 0)
807                 /* Split up strictly by any UID */
808                 journal_uid = realuid;
809         else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
810                 /* Split up by login UIDs.  We do this only if the
811                  * realuid is not root, in order not to accidentally
812                  * leak privileged information to the user that is
813                  * logged by a privileged process that is part of an
814                  * unprivileged session. */
815                 journal_uid = owner;
816         else
817                 journal_uid = 0;
818
819         write_to_journal(s, journal_uid, iovec, n, priority);
820 }
821
822 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
823         char mid[11 + 32 + 1];
824         char buffer[16 + LINE_MAX + 1];
825         struct iovec iovec[N_IOVEC_META_FIELDS + 4];
826         int n = 0;
827         va_list ap;
828         struct ucred ucred = {};
829
830         assert(s);
831         assert(format);
832
833         IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
834         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
835
836         memcpy(buffer, "MESSAGE=", 8);
837         va_start(ap, format);
838         vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
839         va_end(ap);
840         char_array_0(buffer);
841         IOVEC_SET_STRING(iovec[n++], buffer);
842
843         if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
844                 snprintf(mid, sizeof(mid), LOG_MESSAGE_ID(message_id));
845                 char_array_0(mid);
846                 IOVEC_SET_STRING(iovec[n++], mid);
847         }
848
849         ucred.pid = getpid();
850         ucred.uid = getuid();
851         ucred.gid = getgid();
852
853         dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
854 }
855
856 void server_dispatch_message(
857                 Server *s,
858                 struct iovec *iovec, unsigned n, unsigned m,
859                 const struct ucred *ucred,
860                 const struct timeval *tv,
861                 const char *label, size_t label_len,
862                 const char *unit_id,
863                 int priority,
864                 pid_t object_pid) {
865
866         int rl, r;
867         _cleanup_free_ char *path = NULL;
868         char *c;
869
870         assert(s);
871         assert(iovec || n == 0);
872
873         if (n == 0)
874                 return;
875
876         if (LOG_PRI(priority) > s->max_level_store)
877                 return;
878
879         /* Stop early in case the information will not be stored
880          * in a journal. */
881         if (s->storage == STORAGE_NONE)
882                 return;
883
884         if (!ucred)
885                 goto finish;
886
887         r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &path);
888         if (r < 0)
889                 goto finish;
890
891         /* example: /user/lennart/3/foobar
892          *          /system/dbus.service/foobar
893          *
894          * So let's cut of everything past the third /, since that is
895          * where user directories start */
896
897         c = strchr(path, '/');
898         if (c) {
899                 c = strchr(c+1, '/');
900                 if (c) {
901                         c = strchr(c+1, '/');
902                         if (c)
903                                 *c = 0;
904                 }
905         }
906
907         rl = journal_rate_limit_test(s->rate_limit, path,
908                                      priority & LOG_PRIMASK, available_space(s, false));
909
910         if (rl == 0)
911                 return;
912
913         /* Write a suppression message if we suppressed something */
914         if (rl > 1)
915                 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
916                                       "Suppressed %u messages from %s", rl - 1, path);
917
918 finish:
919         dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
920 }
921
922
923 static int system_journal_open(Server *s, bool flush_requested) {
924         int r;
925         char *fn;
926         sd_id128_t machine;
927         char ids[33];
928
929         r = sd_id128_get_machine(&machine);
930         if (r < 0)
931                 return log_error_errno(r, "Failed to get machine id: %m");
932
933         sd_id128_to_string(machine, ids);
934
935         if (!s->system_journal &&
936             (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
937             (flush_requested
938              || access("/run/systemd/journal/flushed", F_OK) >= 0)) {
939
940                 /* If in auto mode: first try to create the machine
941                  * path, but not the prefix.
942                  *
943                  * If in persistent mode: create /var/log/journal and
944                  * the machine path */
945
946                 if (s->storage == STORAGE_PERSISTENT)
947                         (void) mkdir("/var/log/journal/", 0755);
948
949                 fn = strappenda("/var/log/journal/", ids);
950                 (void) mkdir(fn, 0755);
951
952                 fn = strappenda(fn, "/system.journal");
953                 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
954
955                 if (r >= 0)
956                         server_fix_perms(s, s->system_journal, 0);
957                 else if (r < 0) {
958                         if (r != -ENOENT && r != -EROFS)
959                                 log_warning_errno(r, "Failed to open system journal: %m");
960
961                         r = 0;
962                 }
963         }
964
965         if (!s->runtime_journal &&
966             (s->storage != STORAGE_NONE)) {
967
968                 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
969                 if (!fn)
970                         return -ENOMEM;
971
972                 if (s->system_journal) {
973
974                         /* Try to open the runtime journal, but only
975                          * if it already exists, so that we can flush
976                          * it into the system journal */
977
978                         r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
979                         free(fn);
980
981                         if (r < 0) {
982                                 if (r != -ENOENT)
983                                         log_warning_errno(r, "Failed to open runtime journal: %m");
984
985                                 r = 0;
986                         }
987
988                 } else {
989
990                         /* OK, we really need the runtime journal, so create
991                          * it if necessary. */
992
993                         (void) mkdir("/run/log", 0755);
994                         (void) mkdir("/run/log/journal", 0755);
995                         (void) mkdir_parents(fn, 0750);
996
997                         r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
998                         free(fn);
999
1000                         if (r < 0)
1001                                 return log_error_errno(r, "Failed to open runtime journal: %m");
1002                 }
1003
1004                 if (s->runtime_journal)
1005                         server_fix_perms(s, s->runtime_journal, 0);
1006         }
1007
1008         available_space(s, true);
1009
1010         return r;
1011 }
1012
1013 int server_flush_to_var(Server *s) {
1014         sd_id128_t machine;
1015         sd_journal *j = NULL;
1016         char ts[FORMAT_TIMESPAN_MAX];
1017         usec_t start;
1018         unsigned n = 0;
1019         int r;
1020
1021         assert(s);
1022
1023         if (s->storage != STORAGE_AUTO &&
1024             s->storage != STORAGE_PERSISTENT)
1025                 return 0;
1026
1027         if (!s->runtime_journal)
1028                 return 0;
1029
1030         system_journal_open(s, true);
1031
1032         if (!s->system_journal)
1033                 return 0;
1034
1035         log_debug("Flushing to /var...");
1036
1037         start = now(CLOCK_MONOTONIC);
1038
1039         r = sd_id128_get_machine(&machine);
1040         if (r < 0)
1041                 return r;
1042
1043         r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1044         if (r < 0)
1045                 return log_error_errno(r, "Failed to read runtime journal: %m");
1046
1047         sd_journal_set_data_threshold(j, 0);
1048
1049         SD_JOURNAL_FOREACH(j) {
1050                 Object *o = NULL;
1051                 JournalFile *f;
1052
1053                 f = j->current_file;
1054                 assert(f && f->current_offset > 0);
1055
1056                 n++;
1057
1058                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1059                 if (r < 0) {
1060                         log_error_errno(r, "Can't read entry: %m");
1061                         goto finish;
1062                 }
1063
1064                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1065                 if (r >= 0)
1066                         continue;
1067
1068                 if (!shall_try_append_again(s->system_journal, r)) {
1069                         log_error_errno(r, "Can't write entry: %m");
1070                         goto finish;
1071                 }
1072
1073                 server_rotate(s);
1074                 server_vacuum(s);
1075
1076                 if (!s->system_journal) {
1077                         log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1078                         r = -EIO;
1079                         goto finish;
1080                 }
1081
1082                 log_debug("Retrying write.");
1083                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1084                 if (r < 0) {
1085                         log_error_errno(r, "Can't write entry: %m");
1086                         goto finish;
1087                 }
1088         }
1089
1090 finish:
1091         journal_file_post_change(s->system_journal);
1092
1093         journal_file_close(s->runtime_journal);
1094         s->runtime_journal = NULL;
1095
1096         if (r >= 0)
1097                 rm_rf("/run/log/journal", false, true, false);
1098
1099         sd_journal_close(j);
1100
1101         server_driver_message(s, SD_ID128_NULL, "Time spent on flushing to /var is %s for %u entries.", format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0), n);
1102
1103         return r;
1104 }
1105
1106 int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1107         Server *s = userdata;
1108
1109         assert(s);
1110         assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
1111
1112         if (revents != EPOLLIN) {
1113                 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1114                 return -EIO;
1115         }
1116
1117         for (;;) {
1118                 struct ucred *ucred = NULL;
1119                 struct timeval *tv = NULL;
1120                 struct cmsghdr *cmsg;
1121                 char *label = NULL;
1122                 size_t label_len = 0;
1123                 struct iovec iovec;
1124
1125                 union {
1126                         struct cmsghdr cmsghdr;
1127
1128                         /* We use NAME_MAX space for the SELinux label
1129                          * here. The kernel currently enforces no
1130                          * limit, but according to suggestions from
1131                          * the SELinux people this will change and it
1132                          * will probably be identical to NAME_MAX. For
1133                          * now we use that, but this should be updated
1134                          * one day when the final limit is known. */
1135                         uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1136                                     CMSG_SPACE(sizeof(struct timeval)) +
1137                                     CMSG_SPACE(sizeof(int)) + /* fd */
1138                                     CMSG_SPACE(NAME_MAX)]; /* selinux label */
1139                 } control = {};
1140                 union sockaddr_union sa = {};
1141                 struct msghdr msghdr = {
1142                         .msg_iov = &iovec,
1143                         .msg_iovlen = 1,
1144                         .msg_control = &control,
1145                         .msg_controllen = sizeof(control),
1146                         .msg_name = &sa,
1147                         .msg_namelen = sizeof(sa),
1148                 };
1149
1150                 ssize_t n;
1151                 int *fds = NULL;
1152                 unsigned n_fds = 0;
1153                 int v = 0;
1154                 size_t m;
1155
1156                 /* Try to get the right size, if we can. (Not all
1157                  * sockets support SIOCINQ, hence we just try, but
1158                  * don't rely on it. */
1159                 (void) ioctl(fd, SIOCINQ, &v);
1160
1161                 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1162                 m = PAGE_ALIGN(MAX3((size_t) v + 1,
1163                                     (size_t) LINE_MAX,
1164                                     ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
1165
1166                 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1167                         return log_oom();
1168
1169                 iovec.iov_base = s->buffer;
1170                 iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
1171
1172                 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1173                 if (n < 0) {
1174                         if (errno == EINTR || errno == EAGAIN)
1175                                 return 0;
1176
1177                         log_error_errno(errno, "recvmsg() failed: %m");
1178                         return -errno;
1179                 }
1180
1181                 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1182
1183                         if (cmsg->cmsg_level == SOL_SOCKET &&
1184                             cmsg->cmsg_type == SCM_CREDENTIALS &&
1185                             cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1186                                 ucred = (struct ucred*) CMSG_DATA(cmsg);
1187                         else if (cmsg->cmsg_level == SOL_SOCKET &&
1188                                  cmsg->cmsg_type == SCM_SECURITY) {
1189                                 label = (char*) CMSG_DATA(cmsg);
1190                                 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1191                         } else if (cmsg->cmsg_level == SOL_SOCKET &&
1192                                    cmsg->cmsg_type == SO_TIMESTAMP &&
1193                                    cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1194                                 tv = (struct timeval*) CMSG_DATA(cmsg);
1195                         else if (cmsg->cmsg_level == SOL_SOCKET &&
1196                                  cmsg->cmsg_type == SCM_RIGHTS) {
1197                                 fds = (int*) CMSG_DATA(cmsg);
1198                                 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1199                         }
1200                 }
1201
1202                 /* And a trailing NUL, just in case */
1203                 s->buffer[n] = 0;
1204
1205                 if (fd == s->syslog_fd) {
1206                         if (n > 0 && n_fds == 0)
1207                                 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1208                         else if (n_fds > 0)
1209                                 log_warning("Got file descriptors via syslog socket. Ignoring.");
1210
1211                 } else if (fd == s->native_fd) {
1212                         if (n > 0 && n_fds == 0)
1213                                 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1214                         else if (n == 0 && n_fds == 1)
1215                                 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1216                         else if (n_fds > 0)
1217                                 log_warning("Got too many file descriptors via native socket. Ignoring.");
1218
1219                 } else {
1220                         assert(fd == s->audit_fd);
1221
1222                         if (n > 0 && n_fds == 0)
1223                                 server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1224                         else if (n_fds > 0)
1225                                 log_warning("Got file descriptors via audit socket. Ignoring.");
1226                 }
1227
1228                 close_many(fds, n_fds);
1229         }
1230 }
1231
1232 static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1233         Server *s = userdata;
1234
1235         assert(s);
1236
1237         log_info("Received request to flush runtime journal from PID %"PRIu32, si->ssi_pid);
1238
1239         server_flush_to_var(s);
1240         server_sync(s);
1241         server_vacuum(s);
1242
1243         touch("/run/systemd/journal/flushed");
1244
1245         return 0;
1246 }
1247
1248 static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1249         Server *s = userdata;
1250
1251         assert(s);
1252
1253         log_info("Received request to rotate journal from PID %"PRIu32, si->ssi_pid);
1254         server_rotate(s);
1255         server_vacuum(s);
1256
1257         return 0;
1258 }
1259
1260 static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1261         Server *s = userdata;
1262
1263         assert(s);
1264
1265         log_received_signal(LOG_INFO, si);
1266
1267         sd_event_exit(s->event, 0);
1268         return 0;
1269 }
1270
1271 static int setup_signals(Server *s) {
1272         sigset_t mask;
1273         int r;
1274
1275         assert(s);
1276
1277         assert_se(sigemptyset(&mask) == 0);
1278         sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1279         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1280
1281         r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
1282         if (r < 0)
1283                 return r;
1284
1285         r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
1286         if (r < 0)
1287                 return r;
1288
1289         r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
1290         if (r < 0)
1291                 return r;
1292
1293         r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
1294         if (r < 0)
1295                 return r;
1296
1297         return 0;
1298 }
1299
1300 static int server_parse_proc_cmdline(Server *s) {
1301         _cleanup_free_ char *line = NULL;
1302         const char *w, *state;
1303         size_t l;
1304         int r;
1305
1306         r = proc_cmdline(&line);
1307         if (r < 0) {
1308                 log_warning_errno(r, "Failed to read /proc/cmdline, ignoring: %m");
1309                 return 0;
1310         }
1311
1312         FOREACH_WORD_QUOTED(w, l, line, state) {
1313                 _cleanup_free_ char *word;
1314
1315                 word = strndup(w, l);
1316                 if (!word)
1317                         return -ENOMEM;
1318
1319                 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1320                         r = parse_boolean(word + 35);
1321                         if (r < 0)
1322                                 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1323                         else
1324                                 s->forward_to_syslog = r;
1325                 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1326                         r = parse_boolean(word + 33);
1327                         if (r < 0)
1328                                 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1329                         else
1330                                 s->forward_to_kmsg = r;
1331                 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1332                         r = parse_boolean(word + 36);
1333                         if (r < 0)
1334                                 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1335                         else
1336                                 s->forward_to_console = r;
1337                 } else if (startswith(word, "systemd.journald.forward_to_wall=")) {
1338                         r = parse_boolean(word + 33);
1339                         if (r < 0)
1340                                 log_warning("Failed to parse forward to wall switch %s. Ignoring.", word + 33);
1341                         else
1342                                 s->forward_to_wall = r;
1343                 } else if (startswith(word, "systemd.journald"))
1344                         log_warning("Invalid systemd.journald parameter. Ignoring.");
1345         }
1346         /* do not warn about state here, since probably systemd already did */
1347
1348         return 0;
1349 }
1350
1351 static int server_parse_config_file(Server *s) {
1352         assert(s);
1353
1354         return config_parse_many("/etc/systemd/journald.conf",
1355                                  CONF_DIRS_NULSTR("systemd/journald.conf"),
1356                                  "Journal\0",
1357                                  config_item_perf_lookup, journald_gperf_lookup,
1358                                  false, s);
1359 }
1360
1361 static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1362         Server *s = userdata;
1363
1364         assert(s);
1365
1366         server_sync(s);
1367         return 0;
1368 }
1369
1370 int server_schedule_sync(Server *s, int priority) {
1371         int r;
1372
1373         assert(s);
1374
1375         if (priority <= LOG_CRIT) {
1376                 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1377                 server_sync(s);
1378                 return 0;
1379         }
1380
1381         if (s->sync_scheduled)
1382                 return 0;
1383
1384         if (s->sync_interval_usec > 0) {
1385                 usec_t when;
1386
1387                 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
1388                 if (r < 0)
1389                         return r;
1390
1391                 when += s->sync_interval_usec;
1392
1393                 if (!s->sync_event_source) {
1394                         r = sd_event_add_time(
1395                                         s->event,
1396                                         &s->sync_event_source,
1397                                         CLOCK_MONOTONIC,
1398                                         when, 0,
1399                                         server_dispatch_sync, s);
1400                         if (r < 0)
1401                                 return r;
1402
1403                         r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1404                 } else {
1405                         r = sd_event_source_set_time(s->sync_event_source, when);
1406                         if (r < 0)
1407                                 return r;
1408
1409                         r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1410                 }
1411                 if (r < 0)
1412                         return r;
1413
1414                 s->sync_scheduled = true;
1415         }
1416
1417         return 0;
1418 }
1419
1420 static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1421         Server *s = userdata;
1422
1423         assert(s);
1424
1425         server_cache_hostname(s);
1426         return 0;
1427 }
1428
1429 static int server_open_hostname(Server *s) {
1430         int r;
1431
1432         assert(s);
1433
1434         s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
1435         if (s->hostname_fd < 0)
1436                 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
1437
1438         r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
1439         if (r < 0) {
1440                 /* kernels prior to 3.2 don't support polling this file. Ignore
1441                  * the failure. */
1442                 if (r == -EPERM) {
1443                         log_warning("Failed to register hostname fd in event loop: %s. Ignoring.",
1444                                         strerror(-r));
1445                         s->hostname_fd = safe_close(s->hostname_fd);
1446                         return 0;
1447                 }
1448
1449                 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
1450         }
1451
1452         r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
1453         if (r < 0)
1454                 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
1455
1456         return 0;
1457 }
1458
1459 int server_init(Server *s) {
1460         _cleanup_fdset_free_ FDSet *fds = NULL;
1461         int n, r, fd;
1462
1463         assert(s);
1464
1465         zero(*s);
1466         s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = -1;
1467         s->compress = true;
1468         s->seal = true;
1469
1470         s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1471         s->sync_scheduled = false;
1472
1473         s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1474         s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1475
1476         s->forward_to_wall = true;
1477
1478         s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1479
1480         s->max_level_store = LOG_DEBUG;
1481         s->max_level_syslog = LOG_DEBUG;
1482         s->max_level_kmsg = LOG_NOTICE;
1483         s->max_level_console = LOG_INFO;
1484         s->max_level_wall = LOG_EMERG;
1485
1486         memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1487         memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1488
1489         server_parse_config_file(s);
1490         server_parse_proc_cmdline(s);
1491         if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1492                 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1493                           s->rate_limit_interval, s->rate_limit_burst);
1494                 s->rate_limit_interval = s->rate_limit_burst = 0;
1495         }
1496
1497         mkdir_p("/run/systemd/journal", 0755);
1498
1499         s->user_journals = ordered_hashmap_new(NULL);
1500         if (!s->user_journals)
1501                 return log_oom();
1502
1503         s->mmap = mmap_cache_new();
1504         if (!s->mmap)
1505                 return log_oom();
1506
1507         r = sd_event_default(&s->event);
1508         if (r < 0)
1509                 return log_error_errno(r, "Failed to create event loop: %m");
1510
1511         sd_event_set_watchdog(s->event, true);
1512
1513         n = sd_listen_fds(true);
1514         if (n < 0)
1515                 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
1516
1517         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1518
1519                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1520
1521                         if (s->native_fd >= 0) {
1522                                 log_error("Too many native sockets passed.");
1523                                 return -EINVAL;
1524                         }
1525
1526                         s->native_fd = fd;
1527
1528                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1529
1530                         if (s->stdout_fd >= 0) {
1531                                 log_error("Too many stdout sockets passed.");
1532                                 return -EINVAL;
1533                         }
1534
1535                         s->stdout_fd = fd;
1536
1537                 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1538                            sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1539
1540                         if (s->syslog_fd >= 0) {
1541                                 log_error("Too many /dev/log sockets passed.");
1542                                 return -EINVAL;
1543                         }
1544
1545                         s->syslog_fd = fd;
1546
1547                 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
1548
1549                         if (s->audit_fd >= 0) {
1550                                 log_error("Too many audit sockets passed.");
1551                                 return -EINVAL;
1552                         }
1553
1554                         s->audit_fd = fd;
1555
1556                 } else {
1557
1558                         if (!fds) {
1559                                 fds = fdset_new();
1560                                 if (!fds)
1561                                         return log_oom();
1562                         }
1563
1564                         r = fdset_put(fds, fd);
1565                         if (r < 0)
1566                                 return log_oom();
1567                 }
1568         }
1569
1570         r = server_open_stdout_socket(s, fds);
1571         if (r < 0)
1572                 return r;
1573
1574         if (fdset_size(fds) > 0) {
1575                 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds));
1576                 fds = fdset_free(fds);
1577         }
1578
1579         r = server_open_syslog_socket(s);
1580         if (r < 0)
1581                 return r;
1582
1583         r = server_open_native_socket(s);
1584         if (r < 0)
1585                 return r;
1586
1587         r = server_open_dev_kmsg(s);
1588         if (r < 0)
1589                 return r;
1590
1591         r = server_open_audit(s);
1592         if (r < 0)
1593                 return r;
1594
1595         r = server_open_kernel_seqnum(s);
1596         if (r < 0)
1597                 return r;
1598
1599         r = server_open_hostname(s);
1600         if (r < 0)
1601                 return r;
1602
1603         r = setup_signals(s);
1604         if (r < 0)
1605                 return r;
1606
1607         s->udev = udev_new();
1608         if (!s->udev)
1609                 return -ENOMEM;
1610
1611         s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
1612         if (!s->rate_limit)
1613                 return -ENOMEM;
1614
1615         r = cg_get_root_path(&s->cgroup_root);
1616         if (r < 0)
1617                 return r;
1618
1619         server_cache_hostname(s);
1620         server_cache_boot_id(s);
1621         server_cache_machine_id(s);
1622
1623         r = system_journal_open(s, false);
1624         if (r < 0)
1625                 return r;
1626
1627         return 0;
1628 }
1629
1630 void server_maybe_append_tags(Server *s) {
1631 #ifdef HAVE_GCRYPT
1632         JournalFile *f;
1633         Iterator i;
1634         usec_t n;
1635
1636         n = now(CLOCK_REALTIME);
1637
1638         if (s->system_journal)
1639                 journal_file_maybe_append_tag(s->system_journal, n);
1640
1641         ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
1642                 journal_file_maybe_append_tag(f, n);
1643 #endif
1644 }
1645
1646 void server_done(Server *s) {
1647         JournalFile *f;
1648         assert(s);
1649
1650         while (s->stdout_streams)
1651                 stdout_stream_free(s->stdout_streams);
1652
1653         if (s->system_journal)
1654                 journal_file_close(s->system_journal);
1655
1656         if (s->runtime_journal)
1657                 journal_file_close(s->runtime_journal);
1658
1659         while ((f = ordered_hashmap_steal_first(s->user_journals)))
1660                 journal_file_close(f);
1661
1662         ordered_hashmap_free(s->user_journals);
1663
1664         sd_event_source_unref(s->syslog_event_source);
1665         sd_event_source_unref(s->native_event_source);
1666         sd_event_source_unref(s->stdout_event_source);
1667         sd_event_source_unref(s->dev_kmsg_event_source);
1668         sd_event_source_unref(s->audit_event_source);
1669         sd_event_source_unref(s->sync_event_source);
1670         sd_event_source_unref(s->sigusr1_event_source);
1671         sd_event_source_unref(s->sigusr2_event_source);
1672         sd_event_source_unref(s->sigterm_event_source);
1673         sd_event_source_unref(s->sigint_event_source);
1674         sd_event_source_unref(s->hostname_event_source);
1675         sd_event_unref(s->event);
1676
1677         safe_close(s->syslog_fd);
1678         safe_close(s->native_fd);
1679         safe_close(s->stdout_fd);
1680         safe_close(s->dev_kmsg_fd);
1681         safe_close(s->audit_fd);
1682         safe_close(s->hostname_fd);
1683
1684         if (s->rate_limit)
1685                 journal_rate_limit_free(s->rate_limit);
1686
1687         if (s->kernel_seqnum)
1688                 munmap(s->kernel_seqnum, sizeof(uint64_t));
1689
1690         free(s->buffer);
1691         free(s->tty_path);
1692         free(s->cgroup_root);
1693         free(s->hostname_field);
1694
1695         if (s->mmap)
1696                 mmap_cache_unref(s->mmap);
1697
1698         if (s->udev)
1699                 udev_unref(s->udev);
1700 }