chiark / gitweb /
journald: when we detect the journal file we are about to write to has been deleted...
[elogind.git] / src / journal / journald-server.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
26 #include <sys/mman.h>
27 #include <sys/timerfd.h>
28
29 #include <libudev.h>
30
31 #include "sd-journal.h"
32 #include "sd-messages.h"
33 #include "sd-daemon.h"
34 #include "fileio.h"
35 #include "mkdir.h"
36 #include "hashmap.h"
37 #include "journal-file.h"
38 #include "socket-util.h"
39 #include "cgroup-util.h"
40 #include "list.h"
41 #include "missing.h"
42 #include "conf-parser.h"
43 #include "selinux-util.h"
44 #include "journal-internal.h"
45 #include "journal-vacuum.h"
46 #include "journal-authenticate.h"
47 #include "journald-rate-limit.h"
48 #include "journald-kmsg.h"
49 #include "journald-syslog.h"
50 #include "journald-stream.h"
51 #include "journald-console.h"
52 #include "journald-native.h"
53 #include "journald-audit.h"
54 #include "journald-server.h"
55
56 #ifdef HAVE_ACL
57 #include <sys/acl.h>
58 #include <acl/libacl.h>
59 #include "acl-util.h"
60 #endif
61
62 #ifdef HAVE_SELINUX
63 #include <selinux/selinux.h>
64 #endif
65
66 #define USER_JOURNALS_MAX 1024
67
68 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
69 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
70 #define DEFAULT_RATE_LIMIT_BURST 1000
71 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
72
73 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
74
75 static const char* const storage_table[_STORAGE_MAX] = {
76         [STORAGE_AUTO] = "auto",
77         [STORAGE_VOLATILE] = "volatile",
78         [STORAGE_PERSISTENT] = "persistent",
79         [STORAGE_NONE] = "none"
80 };
81
82 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
83 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
84
85 static const char* const split_mode_table[_SPLIT_MAX] = {
86         [SPLIT_LOGIN] = "login",
87         [SPLIT_UID] = "uid",
88         [SPLIT_NONE] = "none",
89 };
90
91 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
92 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
93
94 static uint64_t available_space(Server *s, bool verbose) {
95         char ids[33];
96         _cleanup_free_ char *p = NULL;
97         sd_id128_t machine;
98         struct statvfs ss;
99         uint64_t sum = 0, ss_avail = 0, avail = 0;
100         int r;
101         _cleanup_closedir_ DIR *d = NULL;
102         usec_t ts;
103         const char *f;
104         JournalMetrics *m;
105
106         ts = now(CLOCK_MONOTONIC);
107
108         if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts
109             && !verbose)
110                 return s->cached_available_space;
111
112         r = sd_id128_get_machine(&machine);
113         if (r < 0)
114                 return 0;
115
116         if (s->system_journal) {
117                 f = "/var/log/journal/";
118                 m = &s->system_metrics;
119         } else {
120                 f = "/run/log/journal/";
121                 m = &s->runtime_metrics;
122         }
123
124         assert(m);
125
126         p = strappend(f, sd_id128_to_string(machine, ids));
127         if (!p)
128                 return 0;
129
130         d = opendir(p);
131         if (!d)
132                 return 0;
133
134         if (fstatvfs(dirfd(d), &ss) < 0)
135                 return 0;
136
137         for (;;) {
138                 struct stat st;
139                 struct dirent *de;
140
141                 errno = 0;
142                 de = readdir(d);
143                 if (!de && errno != 0)
144                         return 0;
145
146                 if (!de)
147                         break;
148
149                 if (!endswith(de->d_name, ".journal") &&
150                     !endswith(de->d_name, ".journal~"))
151                         continue;
152
153                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
154                         continue;
155
156                 if (!S_ISREG(st.st_mode))
157                         continue;
158
159                 sum += (uint64_t) st.st_blocks * 512UL;
160         }
161
162         ss_avail = ss.f_bsize * ss.f_bavail;
163
164         /* If we reached a high mark, we will always allow this much
165          * again, unless usage goes above max_use. This watermark
166          * value is cached so that we don't give up space on pressure,
167          * but hover below the maximum usage. */
168
169         if (m->use < sum)
170                 m->use = sum;
171
172         avail = LESS_BY(ss_avail, m->keep_free);
173
174         s->cached_available_space = LESS_BY(MIN(m->max_use, avail), sum);
175         s->cached_available_space_timestamp = ts;
176
177         if (verbose) {
178                 char    fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
179                         fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX];
180
181                 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
182                                       "%s journal is using %s (max allowed %s, "
183                                       "trying to leave %s free of %s available → current limit %s).",
184                                       s->system_journal ? "Permanent" : "Runtime",
185                                       format_bytes(fb1, sizeof(fb1), sum),
186                                       format_bytes(fb2, sizeof(fb2), m->max_use),
187                                       format_bytes(fb3, sizeof(fb3), m->keep_free),
188                                       format_bytes(fb4, sizeof(fb4), ss_avail),
189                                       format_bytes(fb5, sizeof(fb5), s->cached_available_space + sum));
190         }
191
192         return s->cached_available_space;
193 }
194
195 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
196         int r;
197 #ifdef HAVE_ACL
198         acl_t acl;
199         acl_entry_t entry;
200         acl_permset_t permset;
201 #endif
202
203         assert(f);
204
205         r = fchmod(f->fd, 0640);
206         if (r < 0)
207                 log_warning_errno(r, "Failed to fix access mode on %s, ignoring: %m", f->path);
208
209 #ifdef HAVE_ACL
210         if (uid <= SYSTEM_UID_MAX)
211                 return;
212
213         acl = acl_get_fd(f->fd);
214         if (!acl) {
215                 log_warning_errno(errno, "Failed to read ACL on %s, ignoring: %m", f->path);
216                 return;
217         }
218
219         r = acl_find_uid(acl, uid, &entry);
220         if (r <= 0) {
221
222                 if (acl_create_entry(&acl, &entry) < 0 ||
223                     acl_set_tag_type(entry, ACL_USER) < 0 ||
224                     acl_set_qualifier(entry, &uid) < 0) {
225                         log_warning_errno(errno, "Failed to patch ACL on %s, ignoring: %m", f->path);
226                         goto finish;
227                 }
228         }
229
230         /* We do not recalculate the mask unconditionally here,
231          * so that the fchmod() mask above stays intact. */
232         if (acl_get_permset(entry, &permset) < 0 ||
233             acl_add_perm(permset, ACL_READ) < 0 ||
234             calc_acl_mask_if_needed(&acl) < 0) {
235                 log_warning_errno(errno, "Failed to patch ACL on %s, ignoring: %m", f->path);
236                 goto finish;
237         }
238
239         if (acl_set_fd(f->fd, acl) < 0)
240                 log_warning_errno(errno, "Failed to set ACL on %s, ignoring: %m", f->path);
241
242 finish:
243         acl_free(acl);
244 #endif
245 }
246
247 static JournalFile* find_journal(Server *s, uid_t uid) {
248         _cleanup_free_ char *p = NULL;
249         int r;
250         JournalFile *f;
251         sd_id128_t machine;
252
253         assert(s);
254
255         /* We split up user logs only on /var, not on /run. If the
256          * runtime file is open, we write to it exclusively, in order
257          * to guarantee proper order as soon as we flush /run to
258          * /var and close the runtime file. */
259
260         if (s->runtime_journal)
261                 return s->runtime_journal;
262
263         if (uid <= SYSTEM_UID_MAX)
264                 return s->system_journal;
265
266         r = sd_id128_get_machine(&machine);
267         if (r < 0)
268                 return s->system_journal;
269
270         f = ordered_hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
271         if (f)
272                 return f;
273
274         if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
275                      SD_ID128_FORMAT_VAL(machine), uid) < 0)
276                 return s->system_journal;
277
278         while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
279                 /* Too many open? Then let's close one */
280                 f = ordered_hashmap_steal_first(s->user_journals);
281                 assert(f);
282                 journal_file_close(f);
283         }
284
285         r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
286         if (r < 0)
287                 return s->system_journal;
288
289         server_fix_perms(s, f, uid);
290
291         r = ordered_hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
292         if (r < 0) {
293                 journal_file_close(f);
294                 return s->system_journal;
295         }
296
297         return f;
298 }
299
300 static int do_rotate(
301                 Server *s,
302                 JournalFile **f,
303                 const char* name,
304                 bool seal,
305                 uint32_t uid) {
306
307         int r;
308         assert(s);
309
310         if (!*f)
311                 return -EINVAL;
312
313         r = journal_file_rotate(f, s->compress, seal);
314         if (r < 0)
315                 if (*f)
316                         log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
317                 else
318                         log_error_errno(r, "Failed to create new %s journal: %m", name);
319         else
320                 server_fix_perms(s, *f, uid);
321
322         return r;
323 }
324
325 void server_rotate(Server *s) {
326         JournalFile *f;
327         void *k;
328         Iterator i;
329         int r;
330
331         log_debug("Rotating...");
332
333         do_rotate(s, &s->runtime_journal, "runtime", false, 0);
334         do_rotate(s, &s->system_journal, "system", s->seal, 0);
335
336         ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
337                 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UINT32(k));
338                 if (r >= 0)
339                         ordered_hashmap_replace(s->user_journals, k, f);
340                 else if (!f)
341                         /* Old file has been closed and deallocated */
342                         ordered_hashmap_remove(s->user_journals, k);
343         }
344 }
345
346 void server_sync(Server *s) {
347         JournalFile *f;
348         void *k;
349         Iterator i;
350         int r;
351
352         if (s->system_journal) {
353                 r = journal_file_set_offline(s->system_journal);
354                 if (r < 0)
355                         log_error_errno(r, "Failed to sync system journal: %m");
356         }
357
358         ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
359                 r = journal_file_set_offline(f);
360                 if (r < 0)
361                         log_error_errno(r, "Failed to sync user journal: %m");
362         }
363
364         if (s->sync_event_source) {
365                 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
366                 if (r < 0)
367                         log_error_errno(r, "Failed to disable sync timer source: %m");
368         }
369
370         s->sync_scheduled = false;
371 }
372
373 static void do_vacuum(
374                 Server *s,
375                 const char *id,
376                 JournalFile *f,
377                 const char* path,
378                 JournalMetrics *metrics) {
379
380         const char *p;
381         int r;
382
383         if (!f)
384                 return;
385
386         p = strappenda(path, id);
387         r = journal_directory_vacuum(p, metrics->max_use, s->max_retention_usec, &s->oldest_file_usec, false);
388         if (r < 0 && r != -ENOENT)
389                 log_error_errno(r, "Failed to vacuum %s: %m", p);
390 }
391
392 void server_vacuum(Server *s) {
393         char ids[33];
394         sd_id128_t machine;
395         int r;
396
397         log_debug("Vacuuming...");
398
399         s->oldest_file_usec = 0;
400
401         r = sd_id128_get_machine(&machine);
402         if (r < 0) {
403                 log_error_errno(r, "Failed to get machine ID: %m");
404                 return;
405         }
406         sd_id128_to_string(machine, ids);
407
408         do_vacuum(s, ids, s->system_journal, "/var/log/journal/", &s->system_metrics);
409         do_vacuum(s, ids, s->runtime_journal, "/run/log/journal/", &s->runtime_metrics);
410
411         s->cached_available_space_timestamp = 0;
412 }
413
414 static void server_cache_machine_id(Server *s) {
415         sd_id128_t id;
416         int r;
417
418         assert(s);
419
420         r = sd_id128_get_machine(&id);
421         if (r < 0)
422                 return;
423
424         sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
425 }
426
427 static void server_cache_boot_id(Server *s) {
428         sd_id128_t id;
429         int r;
430
431         assert(s);
432
433         r = sd_id128_get_boot(&id);
434         if (r < 0)
435                 return;
436
437         sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
438 }
439
440 static void server_cache_hostname(Server *s) {
441         _cleanup_free_ char *t = NULL;
442         char *x;
443
444         assert(s);
445
446         t = gethostname_malloc();
447         if (!t)
448                 return;
449
450         x = strappend("_HOSTNAME=", t);
451         if (!x)
452                 return;
453
454         free(s->hostname_field);
455         s->hostname_field = x;
456 }
457
458 static bool shall_try_append_again(JournalFile *f, int r) {
459
460         /* -E2BIG            Hit configured limit
461            -EFBIG            Hit fs limit
462            -EDQUOT           Quota limit hit
463            -ENOSPC           Disk full
464            -EIO              I/O error of some kind (mmap)
465            -EHOSTDOWN        Other machine
466            -EBUSY            Unclean shutdown
467            -EPROTONOSUPPORT  Unsupported feature
468            -EBADMSG          Corrupted
469            -ENODATA          Truncated
470            -ESHUTDOWN        Already archived
471            -EIDRM            Journal file has been deleted */
472
473         if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
474                 log_debug("%s: Allocation limit reached, rotating.", f->path);
475         else if (r == -EHOSTDOWN)
476                 log_info("%s: Journal file from other machine, rotating.", f->path);
477         else if (r == -EBUSY)
478                 log_info("%s: Unclean shutdown, rotating.", f->path);
479         else if (r == -EPROTONOSUPPORT)
480                 log_info("%s: Unsupported feature, rotating.", f->path);
481         else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
482                 log_warning("%s: Journal file corrupted, rotating.", f->path);
483         else if (r == -EIO)
484                 log_warning("%s: IO error, rotating.", f->path);
485         else if (r == -EIDRM)
486                 log_warning("%s: Journal file has been deleted, rotating.", f->path);
487         else
488                 return false;
489
490         return true;
491 }
492
493 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
494         JournalFile *f;
495         bool vacuumed = false;
496         int r;
497
498         assert(s);
499         assert(iovec);
500         assert(n > 0);
501
502         f = find_journal(s, uid);
503         if (!f)
504                 return;
505
506         if (journal_file_rotate_suggested(f, s->max_file_usec)) {
507                 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
508                 server_rotate(s);
509                 server_vacuum(s);
510                 vacuumed = true;
511
512                 f = find_journal(s, uid);
513                 if (!f)
514                         return;
515         }
516
517         r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
518         if (r >= 0) {
519                 server_schedule_sync(s, priority);
520                 return;
521         }
522
523         if (vacuumed || !shall_try_append_again(f, r)) {
524                 size_t size = 0;
525                 unsigned i;
526                 for (i = 0; i < n; i++)
527                         size += iovec[i].iov_len;
528
529                 log_error_errno(r, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n, size);
530                 return;
531         }
532
533         server_rotate(s);
534         server_vacuum(s);
535
536         f = find_journal(s, uid);
537         if (!f)
538                 return;
539
540         log_debug("Retrying write.");
541         r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
542         if (r < 0) {
543                 size_t size = 0;
544                 unsigned i;
545                 for (i = 0; i < n; i++)
546                         size += iovec[i].iov_len;
547
548                 log_error_errno(r, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n, size);
549         } else
550                 server_schedule_sync(s, priority);
551 }
552
553 static void dispatch_message_real(
554                 Server *s,
555                 struct iovec *iovec, unsigned n, unsigned m,
556                 const struct ucred *ucred,
557                 const struct timeval *tv,
558                 const char *label, size_t label_len,
559                 const char *unit_id,
560                 int priority,
561                 pid_t object_pid) {
562
563         char    pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
564                 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
565                 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
566                 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
567                 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
568                 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
569                 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
570                 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
571         uid_t object_uid;
572         gid_t object_gid;
573         char *x;
574         int r;
575         char *t, *c;
576         uid_t realuid = 0, owner = 0, journal_uid;
577         bool owner_valid = false;
578 #ifdef HAVE_AUDIT
579         char    audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
580                 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
581                 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
582                 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
583
584         uint32_t audit;
585         uid_t loginuid;
586 #endif
587
588         assert(s);
589         assert(iovec);
590         assert(n > 0);
591         assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
592
593         if (ucred) {
594                 realuid = ucred->uid;
595
596                 sprintf(pid, "_PID="PID_FMT, ucred->pid);
597                 IOVEC_SET_STRING(iovec[n++], pid);
598
599                 sprintf(uid, "_UID="UID_FMT, ucred->uid);
600                 IOVEC_SET_STRING(iovec[n++], uid);
601
602                 sprintf(gid, "_GID="GID_FMT, ucred->gid);
603                 IOVEC_SET_STRING(iovec[n++], gid);
604
605                 r = get_process_comm(ucred->pid, &t);
606                 if (r >= 0) {
607                         x = strappenda("_COMM=", t);
608                         free(t);
609                         IOVEC_SET_STRING(iovec[n++], x);
610                 }
611
612                 r = get_process_exe(ucred->pid, &t);
613                 if (r >= 0) {
614                         x = strappenda("_EXE=", t);
615                         free(t);
616                         IOVEC_SET_STRING(iovec[n++], x);
617                 }
618
619                 r = get_process_cmdline(ucred->pid, 0, false, &t);
620                 if (r >= 0) {
621                         x = strappenda("_CMDLINE=", t);
622                         free(t);
623                         IOVEC_SET_STRING(iovec[n++], x);
624                 }
625
626                 r = get_process_capeff(ucred->pid, &t);
627                 if (r >= 0) {
628                         x = strappenda("_CAP_EFFECTIVE=", t);
629                         free(t);
630                         IOVEC_SET_STRING(iovec[n++], x);
631                 }
632
633 #ifdef HAVE_AUDIT
634                 r = audit_session_from_pid(ucred->pid, &audit);
635                 if (r >= 0) {
636                         sprintf(audit_session, "_AUDIT_SESSION=%"PRIu32, audit);
637                         IOVEC_SET_STRING(iovec[n++], audit_session);
638                 }
639
640                 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
641                 if (r >= 0) {
642                         sprintf(audit_loginuid, "_AUDIT_LOGINUID="UID_FMT, loginuid);
643                         IOVEC_SET_STRING(iovec[n++], audit_loginuid);
644                 }
645 #endif
646
647                 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
648                 if (r >= 0) {
649                         char *session = NULL;
650
651                         x = strappenda("_SYSTEMD_CGROUP=", c);
652                         IOVEC_SET_STRING(iovec[n++], x);
653
654                         r = cg_path_get_session(c, &t);
655                         if (r >= 0) {
656                                 session = strappenda("_SYSTEMD_SESSION=", t);
657                                 free(t);
658                                 IOVEC_SET_STRING(iovec[n++], session);
659                         }
660
661                         if (cg_path_get_owner_uid(c, &owner) >= 0) {
662                                 owner_valid = true;
663
664                                 sprintf(owner_uid, "_SYSTEMD_OWNER_UID="UID_FMT, owner);
665                                 IOVEC_SET_STRING(iovec[n++], owner_uid);
666                         }
667
668                         if (cg_path_get_unit(c, &t) >= 0) {
669                                 x = strappenda("_SYSTEMD_UNIT=", t);
670                                 free(t);
671                                 IOVEC_SET_STRING(iovec[n++], x);
672                         } else if (unit_id && !session) {
673                                 x = strappenda("_SYSTEMD_UNIT=", unit_id);
674                                 IOVEC_SET_STRING(iovec[n++], x);
675                         }
676
677                         if (cg_path_get_user_unit(c, &t) >= 0) {
678                                 x = strappenda("_SYSTEMD_USER_UNIT=", t);
679                                 free(t);
680                                 IOVEC_SET_STRING(iovec[n++], x);
681                         } else if (unit_id && session) {
682                                 x = strappenda("_SYSTEMD_USER_UNIT=", unit_id);
683                                 IOVEC_SET_STRING(iovec[n++], x);
684                         }
685
686                         if (cg_path_get_slice(c, &t) >= 0) {
687                                 x = strappenda("_SYSTEMD_SLICE=", t);
688                                 free(t);
689                                 IOVEC_SET_STRING(iovec[n++], x);
690                         }
691
692                         free(c);
693                 } else if (unit_id) {
694                         x = strappenda("_SYSTEMD_UNIT=", unit_id);
695                         IOVEC_SET_STRING(iovec[n++], x);
696                 }
697
698 #ifdef HAVE_SELINUX
699                 if (mac_selinux_use()) {
700                         if (label) {
701                                 x = alloca(strlen("_SELINUX_CONTEXT=") + label_len + 1);
702
703                                 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
704                                 IOVEC_SET_STRING(iovec[n++], x);
705                         } else {
706                                 security_context_t con;
707
708                                 if (getpidcon(ucred->pid, &con) >= 0) {
709                                         x = strappenda("_SELINUX_CONTEXT=", con);
710
711                                         freecon(con);
712                                         IOVEC_SET_STRING(iovec[n++], x);
713                                 }
714                         }
715                 }
716 #endif
717         }
718         assert(n <= m);
719
720         if (object_pid) {
721                 r = get_process_uid(object_pid, &object_uid);
722                 if (r >= 0) {
723                         sprintf(o_uid, "OBJECT_UID="UID_FMT, object_uid);
724                         IOVEC_SET_STRING(iovec[n++], o_uid);
725                 }
726
727                 r = get_process_gid(object_pid, &object_gid);
728                 if (r >= 0) {
729                         sprintf(o_gid, "OBJECT_GID="GID_FMT, object_gid);
730                         IOVEC_SET_STRING(iovec[n++], o_gid);
731                 }
732
733                 r = get_process_comm(object_pid, &t);
734                 if (r >= 0) {
735                         x = strappenda("OBJECT_COMM=", t);
736                         free(t);
737                         IOVEC_SET_STRING(iovec[n++], x);
738                 }
739
740                 r = get_process_exe(object_pid, &t);
741                 if (r >= 0) {
742                         x = strappenda("OBJECT_EXE=", t);
743                         free(t);
744                         IOVEC_SET_STRING(iovec[n++], x);
745                 }
746
747                 r = get_process_cmdline(object_pid, 0, false, &t);
748                 if (r >= 0) {
749                         x = strappenda("OBJECT_CMDLINE=", t);
750                         free(t);
751                         IOVEC_SET_STRING(iovec[n++], x);
752                 }
753
754 #ifdef HAVE_AUDIT
755                 r = audit_session_from_pid(object_pid, &audit);
756                 if (r >= 0) {
757                         sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%"PRIu32, audit);
758                         IOVEC_SET_STRING(iovec[n++], o_audit_session);
759                 }
760
761                 r = audit_loginuid_from_pid(object_pid, &loginuid);
762                 if (r >= 0) {
763                         sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID="UID_FMT, loginuid);
764                         IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
765                 }
766 #endif
767
768                 r = cg_pid_get_path_shifted(object_pid, s->cgroup_root, &c);
769                 if (r >= 0) {
770                         x = strappenda("OBJECT_SYSTEMD_CGROUP=", c);
771                         IOVEC_SET_STRING(iovec[n++], x);
772
773                         r = cg_path_get_session(c, &t);
774                         if (r >= 0) {
775                                 x = strappenda("OBJECT_SYSTEMD_SESSION=", t);
776                                 free(t);
777                                 IOVEC_SET_STRING(iovec[n++], x);
778                         }
779
780                         if (cg_path_get_owner_uid(c, &owner) >= 0) {
781                                 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT, owner);
782                                 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
783                         }
784
785                         if (cg_path_get_unit(c, &t) >= 0) {
786                                 x = strappenda("OBJECT_SYSTEMD_UNIT=", t);
787                                 free(t);
788                                 IOVEC_SET_STRING(iovec[n++], x);
789                         }
790
791                         if (cg_path_get_user_unit(c, &t) >= 0) {
792                                 x = strappenda("OBJECT_SYSTEMD_USER_UNIT=", t);
793                                 free(t);
794                                 IOVEC_SET_STRING(iovec[n++], x);
795                         }
796
797                         free(c);
798                 }
799         }
800         assert(n <= m);
801
802         if (tv) {
803                 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
804                 IOVEC_SET_STRING(iovec[n++], source_time);
805         }
806
807         /* Note that strictly speaking storing the boot id here is
808          * redundant since the entry includes this in-line
809          * anyway. However, we need this indexed, too. */
810         if (!isempty(s->boot_id_field))
811                 IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
812
813         if (!isempty(s->machine_id_field))
814                 IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
815
816         if (!isempty(s->hostname_field))
817                 IOVEC_SET_STRING(iovec[n++], s->hostname_field);
818
819         assert(n <= m);
820
821         if (s->split_mode == SPLIT_UID && realuid > 0)
822                 /* Split up strictly by any UID */
823                 journal_uid = realuid;
824         else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
825                 /* Split up by login UIDs.  We do this only if the
826                  * realuid is not root, in order not to accidentally
827                  * leak privileged information to the user that is
828                  * logged by a privileged process that is part of an
829                  * unprivileged session. */
830                 journal_uid = owner;
831         else
832                 journal_uid = 0;
833
834         write_to_journal(s, journal_uid, iovec, n, priority);
835 }
836
837 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
838         char mid[11 + 32 + 1];
839         char buffer[16 + LINE_MAX + 1];
840         struct iovec iovec[N_IOVEC_META_FIELDS + 4];
841         int n = 0;
842         va_list ap;
843         struct ucred ucred = {};
844
845         assert(s);
846         assert(format);
847
848         IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
849         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
850
851         memcpy(buffer, "MESSAGE=", 8);
852         va_start(ap, format);
853         vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
854         va_end(ap);
855         char_array_0(buffer);
856         IOVEC_SET_STRING(iovec[n++], buffer);
857
858         if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
859                 snprintf(mid, sizeof(mid), LOG_MESSAGE_ID(message_id));
860                 char_array_0(mid);
861                 IOVEC_SET_STRING(iovec[n++], mid);
862         }
863
864         ucred.pid = getpid();
865         ucred.uid = getuid();
866         ucred.gid = getgid();
867
868         dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
869 }
870
871 void server_dispatch_message(
872                 Server *s,
873                 struct iovec *iovec, unsigned n, unsigned m,
874                 const struct ucred *ucred,
875                 const struct timeval *tv,
876                 const char *label, size_t label_len,
877                 const char *unit_id,
878                 int priority,
879                 pid_t object_pid) {
880
881         int rl, r;
882         _cleanup_free_ char *path = NULL;
883         char *c;
884
885         assert(s);
886         assert(iovec || n == 0);
887
888         if (n == 0)
889                 return;
890
891         if (LOG_PRI(priority) > s->max_level_store)
892                 return;
893
894         /* Stop early in case the information will not be stored
895          * in a journal. */
896         if (s->storage == STORAGE_NONE)
897                 return;
898
899         if (!ucred)
900                 goto finish;
901
902         r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &path);
903         if (r < 0)
904                 goto finish;
905
906         /* example: /user/lennart/3/foobar
907          *          /system/dbus.service/foobar
908          *
909          * So let's cut of everything past the third /, since that is
910          * where user directories start */
911
912         c = strchr(path, '/');
913         if (c) {
914                 c = strchr(c+1, '/');
915                 if (c) {
916                         c = strchr(c+1, '/');
917                         if (c)
918                                 *c = 0;
919                 }
920         }
921
922         rl = journal_rate_limit_test(s->rate_limit, path,
923                                      priority & LOG_PRIMASK, available_space(s, false));
924
925         if (rl == 0)
926                 return;
927
928         /* Write a suppression message if we suppressed something */
929         if (rl > 1)
930                 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
931                                       "Suppressed %u messages from %s", rl - 1, path);
932
933 finish:
934         dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
935 }
936
937
938 static int system_journal_open(Server *s, bool flush_requested) {
939         int r;
940         char *fn;
941         sd_id128_t machine;
942         char ids[33];
943
944         r = sd_id128_get_machine(&machine);
945         if (r < 0)
946                 return log_error_errno(r, "Failed to get machine id: %m");
947
948         sd_id128_to_string(machine, ids);
949
950         if (!s->system_journal &&
951             (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
952             (flush_requested
953              || access("/run/systemd/journal/flushed", F_OK) >= 0)) {
954
955                 /* If in auto mode: first try to create the machine
956                  * path, but not the prefix.
957                  *
958                  * If in persistent mode: create /var/log/journal and
959                  * the machine path */
960
961                 if (s->storage == STORAGE_PERSISTENT)
962                         (void) mkdir("/var/log/journal/", 0755);
963
964                 fn = strappenda("/var/log/journal/", ids);
965                 (void) mkdir(fn, 0755);
966
967                 fn = strappenda(fn, "/system.journal");
968                 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
969
970                 if (r >= 0)
971                         server_fix_perms(s, s->system_journal, 0);
972                 else if (r < 0) {
973                         if (r != -ENOENT && r != -EROFS)
974                                 log_warning_errno(r, "Failed to open system journal: %m");
975
976                         r = 0;
977                 }
978         }
979
980         if (!s->runtime_journal &&
981             (s->storage != STORAGE_NONE)) {
982
983                 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
984                 if (!fn)
985                         return -ENOMEM;
986
987                 if (s->system_journal) {
988
989                         /* Try to open the runtime journal, but only
990                          * if it already exists, so that we can flush
991                          * it into the system journal */
992
993                         r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
994                         free(fn);
995
996                         if (r < 0) {
997                                 if (r != -ENOENT)
998                                         log_warning_errno(r, "Failed to open runtime journal: %m");
999
1000                                 r = 0;
1001                         }
1002
1003                 } else {
1004
1005                         /* OK, we really need the runtime journal, so create
1006                          * it if necessary. */
1007
1008                         (void) mkdir("/run/log", 0755);
1009                         (void) mkdir("/run/log/journal", 0755);
1010                         (void) mkdir_parents(fn, 0750);
1011
1012                         r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
1013                         free(fn);
1014
1015                         if (r < 0)
1016                                 return log_error_errno(r, "Failed to open runtime journal: %m");
1017                 }
1018
1019                 if (s->runtime_journal)
1020                         server_fix_perms(s, s->runtime_journal, 0);
1021         }
1022
1023         available_space(s, true);
1024
1025         return r;
1026 }
1027
1028 int server_flush_to_var(Server *s) {
1029         sd_id128_t machine;
1030         sd_journal *j = NULL;
1031         char ts[FORMAT_TIMESPAN_MAX];
1032         usec_t start;
1033         unsigned n = 0;
1034         int r;
1035
1036         assert(s);
1037
1038         if (s->storage != STORAGE_AUTO &&
1039             s->storage != STORAGE_PERSISTENT)
1040                 return 0;
1041
1042         if (!s->runtime_journal)
1043                 return 0;
1044
1045         system_journal_open(s, true);
1046
1047         if (!s->system_journal)
1048                 return 0;
1049
1050         log_debug("Flushing to /var...");
1051
1052         start = now(CLOCK_MONOTONIC);
1053
1054         r = sd_id128_get_machine(&machine);
1055         if (r < 0)
1056                 return r;
1057
1058         r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1059         if (r < 0)
1060                 return log_error_errno(r, "Failed to read runtime journal: %m");
1061
1062         sd_journal_set_data_threshold(j, 0);
1063
1064         SD_JOURNAL_FOREACH(j) {
1065                 Object *o = NULL;
1066                 JournalFile *f;
1067
1068                 f = j->current_file;
1069                 assert(f && f->current_offset > 0);
1070
1071                 n++;
1072
1073                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1074                 if (r < 0) {
1075                         log_error_errno(r, "Can't read entry: %m");
1076                         goto finish;
1077                 }
1078
1079                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1080                 if (r >= 0)
1081                         continue;
1082
1083                 if (!shall_try_append_again(s->system_journal, r)) {
1084                         log_error_errno(r, "Can't write entry: %m");
1085                         goto finish;
1086                 }
1087
1088                 server_rotate(s);
1089                 server_vacuum(s);
1090
1091                 if (!s->system_journal) {
1092                         log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1093                         r = -EIO;
1094                         goto finish;
1095                 }
1096
1097                 log_debug("Retrying write.");
1098                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1099                 if (r < 0) {
1100                         log_error_errno(r, "Can't write entry: %m");
1101                         goto finish;
1102                 }
1103         }
1104
1105 finish:
1106         journal_file_post_change(s->system_journal);
1107
1108         journal_file_close(s->runtime_journal);
1109         s->runtime_journal = NULL;
1110
1111         if (r >= 0)
1112                 rm_rf("/run/log/journal", false, true, false);
1113
1114         sd_journal_close(j);
1115
1116         server_driver_message(s, SD_ID128_NULL, "Time spent on flushing to /var is %s for %u entries.", format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0), n);
1117
1118         return r;
1119 }
1120
1121 int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1122         Server *s = userdata;
1123
1124         assert(s);
1125         assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
1126
1127         if (revents != EPOLLIN) {
1128                 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1129                 return -EIO;
1130         }
1131
1132         for (;;) {
1133                 struct ucred *ucred = NULL;
1134                 struct timeval *tv = NULL;
1135                 struct cmsghdr *cmsg;
1136                 char *label = NULL;
1137                 size_t label_len = 0;
1138                 struct iovec iovec;
1139
1140                 union {
1141                         struct cmsghdr cmsghdr;
1142
1143                         /* We use NAME_MAX space for the SELinux label
1144                          * here. The kernel currently enforces no
1145                          * limit, but according to suggestions from
1146                          * the SELinux people this will change and it
1147                          * will probably be identical to NAME_MAX. For
1148                          * now we use that, but this should be updated
1149                          * one day when the final limit is known. */
1150                         uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1151                                     CMSG_SPACE(sizeof(struct timeval)) +
1152                                     CMSG_SPACE(sizeof(int)) + /* fd */
1153                                     CMSG_SPACE(NAME_MAX)]; /* selinux label */
1154                 } control = {};
1155                 union sockaddr_union sa = {};
1156                 struct msghdr msghdr = {
1157                         .msg_iov = &iovec,
1158                         .msg_iovlen = 1,
1159                         .msg_control = &control,
1160                         .msg_controllen = sizeof(control),
1161                         .msg_name = &sa,
1162                         .msg_namelen = sizeof(sa),
1163                 };
1164
1165                 ssize_t n;
1166                 int *fds = NULL;
1167                 unsigned n_fds = 0;
1168                 int v = 0;
1169                 size_t m;
1170
1171                 /* Try to get the right size, if we can. (Not all
1172                  * sockets support SIOCINQ, hence we just try, but
1173                  * don't rely on it. */
1174                 (void) ioctl(fd, SIOCINQ, &v);
1175
1176                 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1177                 m = PAGE_ALIGN(MAX3((size_t) v + 1,
1178                                     (size_t) LINE_MAX,
1179                                     ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
1180
1181                 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1182                         return log_oom();
1183
1184                 iovec.iov_base = s->buffer;
1185                 iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
1186
1187                 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1188                 if (n < 0) {
1189                         if (errno == EINTR || errno == EAGAIN)
1190                                 return 0;
1191
1192                         log_error_errno(errno, "recvmsg() failed: %m");
1193                         return -errno;
1194                 }
1195
1196                 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1197
1198                         if (cmsg->cmsg_level == SOL_SOCKET &&
1199                             cmsg->cmsg_type == SCM_CREDENTIALS &&
1200                             cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1201                                 ucred = (struct ucred*) CMSG_DATA(cmsg);
1202                         else if (cmsg->cmsg_level == SOL_SOCKET &&
1203                                  cmsg->cmsg_type == SCM_SECURITY) {
1204                                 label = (char*) CMSG_DATA(cmsg);
1205                                 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1206                         } else if (cmsg->cmsg_level == SOL_SOCKET &&
1207                                    cmsg->cmsg_type == SO_TIMESTAMP &&
1208                                    cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1209                                 tv = (struct timeval*) CMSG_DATA(cmsg);
1210                         else if (cmsg->cmsg_level == SOL_SOCKET &&
1211                                  cmsg->cmsg_type == SCM_RIGHTS) {
1212                                 fds = (int*) CMSG_DATA(cmsg);
1213                                 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1214                         }
1215                 }
1216
1217                 /* And a trailing NUL, just in case */
1218                 s->buffer[n] = 0;
1219
1220                 if (fd == s->syslog_fd) {
1221                         if (n > 0 && n_fds == 0)
1222                                 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1223                         else if (n_fds > 0)
1224                                 log_warning("Got file descriptors via syslog socket. Ignoring.");
1225
1226                 } else if (fd == s->native_fd) {
1227                         if (n > 0 && n_fds == 0)
1228                                 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1229                         else if (n == 0 && n_fds == 1)
1230                                 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1231                         else if (n_fds > 0)
1232                                 log_warning("Got too many file descriptors via native socket. Ignoring.");
1233
1234                 } else {
1235                         assert(fd == s->audit_fd);
1236
1237                         if (n > 0 && n_fds == 0)
1238                                 server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1239                         else if (n_fds > 0)
1240                                 log_warning("Got file descriptors via audit socket. Ignoring.");
1241                 }
1242
1243                 close_many(fds, n_fds);
1244         }
1245 }
1246
1247 static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1248         Server *s = userdata;
1249
1250         assert(s);
1251
1252         log_info("Received request to flush runtime journal from PID %"PRIu32, si->ssi_pid);
1253
1254         server_flush_to_var(s);
1255         server_sync(s);
1256         server_vacuum(s);
1257
1258         touch("/run/systemd/journal/flushed");
1259
1260         return 0;
1261 }
1262
1263 static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1264         Server *s = userdata;
1265
1266         assert(s);
1267
1268         log_info("Received request to rotate journal from PID %"PRIu32, si->ssi_pid);
1269         server_rotate(s);
1270         server_vacuum(s);
1271
1272         return 0;
1273 }
1274
1275 static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1276         Server *s = userdata;
1277
1278         assert(s);
1279
1280         log_received_signal(LOG_INFO, si);
1281
1282         sd_event_exit(s->event, 0);
1283         return 0;
1284 }
1285
1286 static int setup_signals(Server *s) {
1287         sigset_t mask;
1288         int r;
1289
1290         assert(s);
1291
1292         assert_se(sigemptyset(&mask) == 0);
1293         sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1294         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1295
1296         r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
1297         if (r < 0)
1298                 return r;
1299
1300         r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
1301         if (r < 0)
1302                 return r;
1303
1304         r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
1305         if (r < 0)
1306                 return r;
1307
1308         r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
1309         if (r < 0)
1310                 return r;
1311
1312         return 0;
1313 }
1314
1315 static int server_parse_proc_cmdline(Server *s) {
1316         _cleanup_free_ char *line = NULL;
1317         const char *w, *state;
1318         size_t l;
1319         int r;
1320
1321         r = proc_cmdline(&line);
1322         if (r < 0) {
1323                 log_warning_errno(r, "Failed to read /proc/cmdline, ignoring: %m");
1324                 return 0;
1325         }
1326
1327         FOREACH_WORD_QUOTED(w, l, line, state) {
1328                 _cleanup_free_ char *word;
1329
1330                 word = strndup(w, l);
1331                 if (!word)
1332                         return -ENOMEM;
1333
1334                 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1335                         r = parse_boolean(word + 35);
1336                         if (r < 0)
1337                                 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1338                         else
1339                                 s->forward_to_syslog = r;
1340                 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1341                         r = parse_boolean(word + 33);
1342                         if (r < 0)
1343                                 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1344                         else
1345                                 s->forward_to_kmsg = r;
1346                 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1347                         r = parse_boolean(word + 36);
1348                         if (r < 0)
1349                                 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1350                         else
1351                                 s->forward_to_console = r;
1352                 } else if (startswith(word, "systemd.journald.forward_to_wall=")) {
1353                         r = parse_boolean(word + 33);
1354                         if (r < 0)
1355                                 log_warning("Failed to parse forward to wall switch %s. Ignoring.", word + 33);
1356                         else
1357                                 s->forward_to_wall = r;
1358                 } else if (startswith(word, "systemd.journald"))
1359                         log_warning("Invalid systemd.journald parameter. Ignoring.");
1360         }
1361         /* do not warn about state here, since probably systemd already did */
1362
1363         return 0;
1364 }
1365
1366 static int server_parse_config_file(Server *s) {
1367         assert(s);
1368
1369         return config_parse_many("/etc/systemd/journald.conf",
1370                                  CONF_DIRS_NULSTR("systemd/journald.conf"),
1371                                  "Journal\0",
1372                                  config_item_perf_lookup, journald_gperf_lookup,
1373                                  false, s);
1374 }
1375
1376 static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1377         Server *s = userdata;
1378
1379         assert(s);
1380
1381         server_sync(s);
1382         return 0;
1383 }
1384
1385 int server_schedule_sync(Server *s, int priority) {
1386         int r;
1387
1388         assert(s);
1389
1390         if (priority <= LOG_CRIT) {
1391                 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1392                 server_sync(s);
1393                 return 0;
1394         }
1395
1396         if (s->sync_scheduled)
1397                 return 0;
1398
1399         if (s->sync_interval_usec > 0) {
1400                 usec_t when;
1401
1402                 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
1403                 if (r < 0)
1404                         return r;
1405
1406                 when += s->sync_interval_usec;
1407
1408                 if (!s->sync_event_source) {
1409                         r = sd_event_add_time(
1410                                         s->event,
1411                                         &s->sync_event_source,
1412                                         CLOCK_MONOTONIC,
1413                                         when, 0,
1414                                         server_dispatch_sync, s);
1415                         if (r < 0)
1416                                 return r;
1417
1418                         r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1419                 } else {
1420                         r = sd_event_source_set_time(s->sync_event_source, when);
1421                         if (r < 0)
1422                                 return r;
1423
1424                         r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1425                 }
1426                 if (r < 0)
1427                         return r;
1428
1429                 s->sync_scheduled = true;
1430         }
1431
1432         return 0;
1433 }
1434
1435 static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1436         Server *s = userdata;
1437
1438         assert(s);
1439
1440         server_cache_hostname(s);
1441         return 0;
1442 }
1443
1444 static int server_open_hostname(Server *s) {
1445         int r;
1446
1447         assert(s);
1448
1449         s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
1450         if (s->hostname_fd < 0)
1451                 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
1452
1453         r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
1454         if (r < 0) {
1455                 /* kernels prior to 3.2 don't support polling this file. Ignore
1456                  * the failure. */
1457                 if (r == -EPERM) {
1458                         log_warning("Failed to register hostname fd in event loop: %s. Ignoring.",
1459                                         strerror(-r));
1460                         s->hostname_fd = safe_close(s->hostname_fd);
1461                         return 0;
1462                 }
1463
1464                 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
1465         }
1466
1467         r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
1468         if (r < 0)
1469                 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
1470
1471         return 0;
1472 }
1473
1474 int server_init(Server *s) {
1475         int n, r, fd;
1476
1477         assert(s);
1478
1479         zero(*s);
1480         s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = -1;
1481         s->compress = true;
1482         s->seal = true;
1483
1484         s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1485         s->sync_scheduled = false;
1486
1487         s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1488         s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1489
1490         s->forward_to_wall = true;
1491
1492         s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1493
1494         s->max_level_store = LOG_DEBUG;
1495         s->max_level_syslog = LOG_DEBUG;
1496         s->max_level_kmsg = LOG_NOTICE;
1497         s->max_level_console = LOG_INFO;
1498         s->max_level_wall = LOG_EMERG;
1499
1500         memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1501         memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1502
1503         server_parse_config_file(s);
1504         server_parse_proc_cmdline(s);
1505         if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1506                 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1507                           s->rate_limit_interval, s->rate_limit_burst);
1508                 s->rate_limit_interval = s->rate_limit_burst = 0;
1509         }
1510
1511         mkdir_p("/run/systemd/journal", 0755);
1512
1513         s->user_journals = ordered_hashmap_new(NULL);
1514         if (!s->user_journals)
1515                 return log_oom();
1516
1517         s->mmap = mmap_cache_new();
1518         if (!s->mmap)
1519                 return log_oom();
1520
1521         r = sd_event_default(&s->event);
1522         if (r < 0)
1523                 return log_error_errno(r, "Failed to create event loop: %m");
1524
1525         sd_event_set_watchdog(s->event, true);
1526
1527         n = sd_listen_fds(true);
1528         if (n < 0)
1529                 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
1530
1531         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1532
1533                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1534
1535                         if (s->native_fd >= 0) {
1536                                 log_error("Too many native sockets passed.");
1537                                 return -EINVAL;
1538                         }
1539
1540                         s->native_fd = fd;
1541
1542                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1543
1544                         if (s->stdout_fd >= 0) {
1545                                 log_error("Too many stdout sockets passed.");
1546                                 return -EINVAL;
1547                         }
1548
1549                         s->stdout_fd = fd;
1550
1551                 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1552                            sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1553
1554                         if (s->syslog_fd >= 0) {
1555                                 log_error("Too many /dev/log sockets passed.");
1556                                 return -EINVAL;
1557                         }
1558
1559                         s->syslog_fd = fd;
1560
1561                 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
1562
1563                         if (s->audit_fd >= 0) {
1564                                 log_error("Too many audit sockets passed.");
1565                                 return -EINVAL;
1566                         }
1567
1568                         s->audit_fd = fd;
1569
1570                 } else {
1571                         log_warning("Unknown socket passed as file descriptor %d, ignoring.", fd);
1572
1573                         /* Let's close the fd, better be safe than
1574                            sorry. The fd might reference some resource
1575                            that we really want to release if we don't
1576                            make use of it. */
1577
1578                         safe_close(fd);
1579                 }
1580         }
1581
1582         r = server_open_syslog_socket(s);
1583         if (r < 0)
1584                 return r;
1585
1586         r = server_open_native_socket(s);
1587         if (r < 0)
1588                 return r;
1589
1590         r = server_open_stdout_socket(s);
1591         if (r < 0)
1592                 return r;
1593
1594         r = server_open_dev_kmsg(s);
1595         if (r < 0)
1596                 return r;
1597
1598         r = server_open_audit(s);
1599         if (r < 0)
1600                 return r;
1601
1602         r = server_open_kernel_seqnum(s);
1603         if (r < 0)
1604                 return r;
1605
1606         r = server_open_hostname(s);
1607         if (r < 0)
1608                 return r;
1609
1610         r = setup_signals(s);
1611         if (r < 0)
1612                 return r;
1613
1614         s->udev = udev_new();
1615         if (!s->udev)
1616                 return -ENOMEM;
1617
1618         s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
1619         if (!s->rate_limit)
1620                 return -ENOMEM;
1621
1622         r = cg_get_root_path(&s->cgroup_root);
1623         if (r < 0)
1624                 return r;
1625
1626         server_cache_hostname(s);
1627         server_cache_boot_id(s);
1628         server_cache_machine_id(s);
1629
1630         r = system_journal_open(s, false);
1631         if (r < 0)
1632                 return r;
1633
1634         return 0;
1635 }
1636
1637 void server_maybe_append_tags(Server *s) {
1638 #ifdef HAVE_GCRYPT
1639         JournalFile *f;
1640         Iterator i;
1641         usec_t n;
1642
1643         n = now(CLOCK_REALTIME);
1644
1645         if (s->system_journal)
1646                 journal_file_maybe_append_tag(s->system_journal, n);
1647
1648         ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
1649                 journal_file_maybe_append_tag(f, n);
1650 #endif
1651 }
1652
1653 void server_done(Server *s) {
1654         JournalFile *f;
1655         assert(s);
1656
1657         while (s->stdout_streams)
1658                 stdout_stream_free(s->stdout_streams);
1659
1660         if (s->system_journal)
1661                 journal_file_close(s->system_journal);
1662
1663         if (s->runtime_journal)
1664                 journal_file_close(s->runtime_journal);
1665
1666         while ((f = ordered_hashmap_steal_first(s->user_journals)))
1667                 journal_file_close(f);
1668
1669         ordered_hashmap_free(s->user_journals);
1670
1671         sd_event_source_unref(s->syslog_event_source);
1672         sd_event_source_unref(s->native_event_source);
1673         sd_event_source_unref(s->stdout_event_source);
1674         sd_event_source_unref(s->dev_kmsg_event_source);
1675         sd_event_source_unref(s->audit_event_source);
1676         sd_event_source_unref(s->sync_event_source);
1677         sd_event_source_unref(s->sigusr1_event_source);
1678         sd_event_source_unref(s->sigusr2_event_source);
1679         sd_event_source_unref(s->sigterm_event_source);
1680         sd_event_source_unref(s->sigint_event_source);
1681         sd_event_source_unref(s->hostname_event_source);
1682         sd_event_unref(s->event);
1683
1684         safe_close(s->syslog_fd);
1685         safe_close(s->native_fd);
1686         safe_close(s->stdout_fd);
1687         safe_close(s->dev_kmsg_fd);
1688         safe_close(s->audit_fd);
1689         safe_close(s->hostname_fd);
1690
1691         if (s->rate_limit)
1692                 journal_rate_limit_free(s->rate_limit);
1693
1694         if (s->kernel_seqnum)
1695                 munmap(s->kernel_seqnum, sizeof(uint64_t));
1696
1697         free(s->buffer);
1698         free(s->tty_path);
1699         free(s->cgroup_root);
1700         free(s->hostname_field);
1701
1702         if (s->mmap)
1703                 mmap_cache_unref(s->mmap);
1704
1705         if (s->udev)
1706                 udev_unref(s->udev);
1707 }