chiark / gitweb /
journalctl: add new --flush command and make use of it in systemd-journal-flush.service
[elogind.git] / src / journal / journald-server.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
26 #include <sys/mman.h>
27 #include <sys/timerfd.h>
28
29 #include <libudev.h>
30
31 #include "sd-journal.h"
32 #include "sd-messages.h"
33 #include "sd-daemon.h"
34 #include "fileio.h"
35 #include "mkdir.h"
36 #include "hashmap.h"
37 #include "journal-file.h"
38 #include "socket-util.h"
39 #include "cgroup-util.h"
40 #include "list.h"
41 #include "missing.h"
42 #include "conf-parser.h"
43 #include "selinux-util.h"
44 #include "journal-internal.h"
45 #include "journal-vacuum.h"
46 #include "journal-authenticate.h"
47 #include "journald-rate-limit.h"
48 #include "journald-kmsg.h"
49 #include "journald-syslog.h"
50 #include "journald-stream.h"
51 #include "journald-console.h"
52 #include "journald-native.h"
53 #include "journald-server.h"
54
55 #ifdef HAVE_ACL
56 #include <sys/acl.h>
57 #include <acl/libacl.h>
58 #include "acl-util.h"
59 #endif
60
61 #ifdef HAVE_SELINUX
62 #include <selinux/selinux.h>
63 #endif
64
65 #define USER_JOURNALS_MAX 1024
66
67 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
68 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
69 #define DEFAULT_RATE_LIMIT_BURST 1000
70 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
71
72 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
73
74 static const char* const storage_table[_STORAGE_MAX] = {
75         [STORAGE_AUTO] = "auto",
76         [STORAGE_VOLATILE] = "volatile",
77         [STORAGE_PERSISTENT] = "persistent",
78         [STORAGE_NONE] = "none"
79 };
80
81 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
82 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
83
84 static const char* const split_mode_table[_SPLIT_MAX] = {
85         [SPLIT_LOGIN] = "login",
86         [SPLIT_UID] = "uid",
87         [SPLIT_NONE] = "none",
88 };
89
90 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
91 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
92
93 static uint64_t available_space(Server *s, bool verbose) {
94         char ids[33];
95         _cleanup_free_ char *p = NULL;
96         sd_id128_t machine;
97         struct statvfs ss;
98         uint64_t sum = 0, ss_avail = 0, avail = 0;
99         int r;
100         _cleanup_closedir_ DIR *d = NULL;
101         usec_t ts;
102         const char *f;
103         JournalMetrics *m;
104
105         ts = now(CLOCK_MONOTONIC);
106
107         if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts
108             && !verbose)
109                 return s->cached_available_space;
110
111         r = sd_id128_get_machine(&machine);
112         if (r < 0)
113                 return 0;
114
115         if (s->system_journal) {
116                 f = "/var/log/journal/";
117                 m = &s->system_metrics;
118         } else {
119                 f = "/run/log/journal/";
120                 m = &s->runtime_metrics;
121         }
122
123         assert(m);
124
125         p = strappend(f, sd_id128_to_string(machine, ids));
126         if (!p)
127                 return 0;
128
129         d = opendir(p);
130         if (!d)
131                 return 0;
132
133         if (fstatvfs(dirfd(d), &ss) < 0)
134                 return 0;
135
136         for (;;) {
137                 struct stat st;
138                 struct dirent *de;
139
140                 errno = 0;
141                 de = readdir(d);
142                 if (!de && errno != 0)
143                         return 0;
144
145                 if (!de)
146                         break;
147
148                 if (!endswith(de->d_name, ".journal") &&
149                     !endswith(de->d_name, ".journal~"))
150                         continue;
151
152                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
153                         continue;
154
155                 if (!S_ISREG(st.st_mode))
156                         continue;
157
158                 sum += (uint64_t) st.st_blocks * 512UL;
159         }
160
161         ss_avail = ss.f_bsize * ss.f_bavail;
162
163         /* If we reached a high mark, we will always allow this much
164          * again, unless usage goes above max_use. This watermark
165          * value is cached so that we don't give up space on pressure,
166          * but hover below the maximum usage. */
167
168         if (m->use < sum)
169                 m->use = sum;
170
171         avail = LESS_BY(ss_avail, m->keep_free);
172
173         s->cached_available_space = LESS_BY(MIN(m->max_use, avail), sum);
174         s->cached_available_space_timestamp = ts;
175
176         if (verbose) {
177                 char    fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
178                         fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX];
179
180                 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
181                                       "%s journal is using %s (max allowed %s, "
182                                       "trying to leave %s free of %s available → current limit %s).",
183                                       s->system_journal ? "Permanent" : "Runtime",
184                                       format_bytes(fb1, sizeof(fb1), sum),
185                                       format_bytes(fb2, sizeof(fb2), m->max_use),
186                                       format_bytes(fb3, sizeof(fb3), m->keep_free),
187                                       format_bytes(fb4, sizeof(fb4), ss_avail),
188                                       format_bytes(fb5, sizeof(fb5), s->cached_available_space + sum));
189         }
190
191         return s->cached_available_space;
192 }
193
194 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
195         int r;
196 #ifdef HAVE_ACL
197         acl_t acl;
198         acl_entry_t entry;
199         acl_permset_t permset;
200 #endif
201
202         assert(f);
203
204         r = fchmod(f->fd, 0640);
205         if (r < 0)
206                 log_warning("Failed to fix access mode on %s, ignoring: %s", f->path, strerror(-r));
207
208 #ifdef HAVE_ACL
209         if (uid <= SYSTEM_UID_MAX)
210                 return;
211
212         acl = acl_get_fd(f->fd);
213         if (!acl) {
214                 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
215                 return;
216         }
217
218         r = acl_find_uid(acl, uid, &entry);
219         if (r <= 0) {
220
221                 if (acl_create_entry(&acl, &entry) < 0 ||
222                     acl_set_tag_type(entry, ACL_USER) < 0 ||
223                     acl_set_qualifier(entry, &uid) < 0) {
224                         log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
225                         goto finish;
226                 }
227         }
228
229         /* We do not recalculate the mask unconditionally here,
230          * so that the fchmod() mask above stays intact. */
231         if (acl_get_permset(entry, &permset) < 0 ||
232             acl_add_perm(permset, ACL_READ) < 0 ||
233             calc_acl_mask_if_needed(&acl) < 0) {
234                 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
235                 goto finish;
236         }
237
238         if (acl_set_fd(f->fd, acl) < 0)
239                 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
240
241 finish:
242         acl_free(acl);
243 #endif
244 }
245
246 static JournalFile* find_journal(Server *s, uid_t uid) {
247         _cleanup_free_ char *p = NULL;
248         int r;
249         JournalFile *f;
250         sd_id128_t machine;
251
252         assert(s);
253
254         /* We split up user logs only on /var, not on /run. If the
255          * runtime file is open, we write to it exclusively, in order
256          * to guarantee proper order as soon as we flush /run to
257          * /var and close the runtime file. */
258
259         if (s->runtime_journal)
260                 return s->runtime_journal;
261
262         if (uid <= SYSTEM_UID_MAX)
263                 return s->system_journal;
264
265         r = sd_id128_get_machine(&machine);
266         if (r < 0)
267                 return s->system_journal;
268
269         f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
270         if (f)
271                 return f;
272
273         if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
274                      SD_ID128_FORMAT_VAL(machine), uid) < 0)
275                 return s->system_journal;
276
277         while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
278                 /* Too many open? Then let's close one */
279                 f = hashmap_steal_first(s->user_journals);
280                 assert(f);
281                 journal_file_close(f);
282         }
283
284         r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
285         if (r < 0)
286                 return s->system_journal;
287
288         server_fix_perms(s, f, uid);
289
290         r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
291         if (r < 0) {
292                 journal_file_close(f);
293                 return s->system_journal;
294         }
295
296         return f;
297 }
298
299 static int do_rotate(Server *s, JournalFile **f, const char* name,
300                      bool seal, uint32_t uid) {
301         int r;
302         assert(s);
303
304         if (!*f)
305                 return -EINVAL;
306
307         r = journal_file_rotate(f, s->compress, seal);
308         if (r < 0)
309                 if (*f)
310                         log_error("Failed to rotate %s: %s",
311                                   (*f)->path, strerror(-r));
312                 else
313                         log_error("Failed to create new %s journal: %s",
314                                   name, strerror(-r));
315         else
316                 server_fix_perms(s, *f, uid);
317         return r;
318 }
319
320 void server_rotate(Server *s) {
321         JournalFile *f;
322         void *k;
323         Iterator i;
324         int r;
325
326         log_debug("Rotating...");
327
328         do_rotate(s, &s->runtime_journal, "runtime", false, 0);
329         do_rotate(s, &s->system_journal, "system", s->seal, 0);
330
331         HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
332                 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UINT32(k));
333                 if (r >= 0)
334                         hashmap_replace(s->user_journals, k, f);
335                 else if (!f)
336                         /* Old file has been closed and deallocated */
337                         hashmap_remove(s->user_journals, k);
338         }
339 }
340
341 void server_sync(Server *s) {
342         JournalFile *f;
343         void *k;
344         Iterator i;
345         int r;
346
347         if (s->system_journal) {
348                 r = journal_file_set_offline(s->system_journal);
349                 if (r < 0)
350                         log_error("Failed to sync system journal: %s", strerror(-r));
351         }
352
353         HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
354                 r = journal_file_set_offline(f);
355                 if (r < 0)
356                         log_error("Failed to sync user journal: %s", strerror(-r));
357         }
358
359         if (s->sync_event_source) {
360                 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
361                 if (r < 0)
362                         log_error("Failed to disable sync timer source: %s", strerror(-r));
363         }
364
365         s->sync_scheduled = false;
366 }
367
368 static void do_vacuum(Server *s, char *ids, JournalFile *f, const char* path,
369                       JournalMetrics *metrics) {
370         char *p;
371         int r;
372
373         if (!f)
374                 return;
375
376         p = strappenda(path, ids);
377         r = journal_directory_vacuum(p, metrics->max_use, s->max_retention_usec, &s->oldest_file_usec);
378         if (r < 0 && r != -ENOENT)
379                 log_error("Failed to vacuum %s: %s", p, strerror(-r));
380 }
381
382 void server_vacuum(Server *s) {
383         char ids[33];
384         sd_id128_t machine;
385         int r;
386
387         log_debug("Vacuuming...");
388
389         s->oldest_file_usec = 0;
390
391         r = sd_id128_get_machine(&machine);
392         if (r < 0) {
393                 log_error("Failed to get machine ID: %s", strerror(-r));
394                 return;
395         }
396         sd_id128_to_string(machine, ids);
397
398         do_vacuum(s, ids, s->system_journal, "/var/log/journal/", &s->system_metrics);
399         do_vacuum(s, ids, s->runtime_journal, "/run/log/journal/", &s->runtime_metrics);
400
401         s->cached_available_space_timestamp = 0;
402 }
403
404 static void server_cache_machine_id(Server *s) {
405         sd_id128_t id;
406         int r;
407
408         assert(s);
409
410         r = sd_id128_get_machine(&id);
411         if (r < 0)
412                 return;
413
414         sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
415 }
416
417 static void server_cache_boot_id(Server *s) {
418         sd_id128_t id;
419         int r;
420
421         assert(s);
422
423         r = sd_id128_get_boot(&id);
424         if (r < 0)
425                 return;
426
427         sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
428 }
429
430 static void server_cache_hostname(Server *s) {
431         _cleanup_free_ char *t = NULL;
432         char *x;
433
434         assert(s);
435
436         t = gethostname_malloc();
437         if (!t)
438                 return;
439
440         x = strappend("_HOSTNAME=", t);
441         if (!x)
442                 return;
443
444         free(s->hostname_field);
445         s->hostname_field = x;
446 }
447
448 bool shall_try_append_again(JournalFile *f, int r) {
449
450         /* -E2BIG            Hit configured limit
451            -EFBIG            Hit fs limit
452            -EDQUOT           Quota limit hit
453            -ENOSPC           Disk full
454            -EHOSTDOWN        Other machine
455            -EBUSY            Unclean shutdown
456            -EPROTONOSUPPORT  Unsupported feature
457            -EBADMSG          Corrupted
458            -ENODATA          Truncated
459            -ESHUTDOWN        Already archived */
460
461         if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
462                 log_debug("%s: Allocation limit reached, rotating.", f->path);
463         else if (r == -EHOSTDOWN)
464                 log_info("%s: Journal file from other machine, rotating.", f->path);
465         else if (r == -EBUSY)
466                 log_info("%s: Unclean shutdown, rotating.", f->path);
467         else if (r == -EPROTONOSUPPORT)
468                 log_info("%s: Unsupported feature, rotating.", f->path);
469         else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
470                 log_warning("%s: Journal file corrupted, rotating.", f->path);
471         else
472                 return false;
473
474         return true;
475 }
476
477 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
478         JournalFile *f;
479         bool vacuumed = false;
480         int r;
481
482         assert(s);
483         assert(iovec);
484         assert(n > 0);
485
486         f = find_journal(s, uid);
487         if (!f)
488                 return;
489
490         if (journal_file_rotate_suggested(f, s->max_file_usec)) {
491                 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
492                 server_rotate(s);
493                 server_vacuum(s);
494                 vacuumed = true;
495
496                 f = find_journal(s, uid);
497                 if (!f)
498                         return;
499         }
500
501         r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
502         if (r >= 0) {
503                 server_schedule_sync(s, priority);
504                 return;
505         }
506
507         if (vacuumed || !shall_try_append_again(f, r)) {
508                 size_t size = 0;
509                 unsigned i;
510                 for (i = 0; i < n; i++)
511                         size += iovec[i].iov_len;
512
513                 log_error("Failed to write entry (%d items, %zu bytes), ignoring: %s", n, size, strerror(-r));
514                 return;
515         }
516
517         server_rotate(s);
518         server_vacuum(s);
519
520         f = find_journal(s, uid);
521         if (!f)
522                 return;
523
524         log_debug("Retrying write.");
525         r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
526         if (r < 0) {
527                 size_t size = 0;
528                 unsigned i;
529                 for (i = 0; i < n; i++)
530                         size += iovec[i].iov_len;
531
532                 log_error("Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %s", n, size, strerror(-r));
533         } else
534                 server_schedule_sync(s, priority);
535 }
536
537 static void dispatch_message_real(
538                 Server *s,
539                 struct iovec *iovec, unsigned n, unsigned m,
540                 struct ucred *ucred,
541                 struct timeval *tv,
542                 const char *label, size_t label_len,
543                 const char *unit_id,
544                 int priority,
545                 pid_t object_pid) {
546
547         char    pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
548                 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
549                 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
550                 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
551                 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
552                 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
553                 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
554                 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
555         uid_t object_uid;
556         gid_t object_gid;
557         char *x;
558         int r;
559         char *t, *c;
560         uid_t realuid = 0, owner = 0, journal_uid;
561         bool owner_valid = false;
562 #ifdef HAVE_AUDIT
563         char    audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
564                 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
565                 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
566                 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
567
568         uint32_t audit;
569         uid_t loginuid;
570 #endif
571
572         assert(s);
573         assert(iovec);
574         assert(n > 0);
575         assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
576
577         if (ucred) {
578                 realuid = ucred->uid;
579
580                 sprintf(pid, "_PID="PID_FMT, ucred->pid);
581                 IOVEC_SET_STRING(iovec[n++], pid);
582
583                 sprintf(uid, "_UID="UID_FMT, ucred->uid);
584                 IOVEC_SET_STRING(iovec[n++], uid);
585
586                 sprintf(gid, "_GID="GID_FMT, ucred->gid);
587                 IOVEC_SET_STRING(iovec[n++], gid);
588
589                 r = get_process_comm(ucred->pid, &t);
590                 if (r >= 0) {
591                         x = strappenda("_COMM=", t);
592                         free(t);
593                         IOVEC_SET_STRING(iovec[n++], x);
594                 }
595
596                 r = get_process_exe(ucred->pid, &t);
597                 if (r >= 0) {
598                         x = strappenda("_EXE=", t);
599                         free(t);
600                         IOVEC_SET_STRING(iovec[n++], x);
601                 }
602
603                 r = get_process_cmdline(ucred->pid, 0, false, &t);
604                 if (r >= 0) {
605                         x = strappenda("_CMDLINE=", t);
606                         free(t);
607                         IOVEC_SET_STRING(iovec[n++], x);
608                 }
609
610                 r = get_process_capeff(ucred->pid, &t);
611                 if (r >= 0) {
612                         x = strappenda("_CAP_EFFECTIVE=", t);
613                         free(t);
614                         IOVEC_SET_STRING(iovec[n++], x);
615                 }
616
617 #ifdef HAVE_AUDIT
618                 r = audit_session_from_pid(ucred->pid, &audit);
619                 if (r >= 0) {
620                         sprintf(audit_session, "_AUDIT_SESSION=%"PRIu32, audit);
621                         IOVEC_SET_STRING(iovec[n++], audit_session);
622                 }
623
624                 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
625                 if (r >= 0) {
626                         sprintf(audit_loginuid, "_AUDIT_LOGINUID="UID_FMT, loginuid);
627                         IOVEC_SET_STRING(iovec[n++], audit_loginuid);
628                 }
629 #endif
630
631                 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
632                 if (r >= 0) {
633                         char *session = NULL;
634
635                         x = strappenda("_SYSTEMD_CGROUP=", c);
636                         IOVEC_SET_STRING(iovec[n++], x);
637
638                         r = cg_path_get_session(c, &t);
639                         if (r >= 0) {
640                                 session = strappenda("_SYSTEMD_SESSION=", t);
641                                 free(t);
642                                 IOVEC_SET_STRING(iovec[n++], session);
643                         }
644
645                         if (cg_path_get_owner_uid(c, &owner) >= 0) {
646                                 owner_valid = true;
647
648                                 sprintf(owner_uid, "_SYSTEMD_OWNER_UID="UID_FMT, owner);
649                                 IOVEC_SET_STRING(iovec[n++], owner_uid);
650                         }
651
652                         if (cg_path_get_unit(c, &t) >= 0) {
653                                 x = strappenda("_SYSTEMD_UNIT=", t);
654                                 free(t);
655                                 IOVEC_SET_STRING(iovec[n++], x);
656                         } else if (unit_id && !session) {
657                                 x = strappenda("_SYSTEMD_UNIT=", unit_id);
658                                 IOVEC_SET_STRING(iovec[n++], x);
659                         }
660
661                         if (cg_path_get_user_unit(c, &t) >= 0) {
662                                 x = strappenda("_SYSTEMD_USER_UNIT=", t);
663                                 free(t);
664                                 IOVEC_SET_STRING(iovec[n++], x);
665                         } else if (unit_id && session) {
666                                 x = strappenda("_SYSTEMD_USER_UNIT=", unit_id);
667                                 IOVEC_SET_STRING(iovec[n++], x);
668                         }
669
670                         if (cg_path_get_slice(c, &t) >= 0) {
671                                 x = strappenda("_SYSTEMD_SLICE=", t);
672                                 free(t);
673                                 IOVEC_SET_STRING(iovec[n++], x);
674                         }
675
676                         free(c);
677                 } else if (unit_id) {
678                         x = strappenda("_SYSTEMD_UNIT=", unit_id);
679                         IOVEC_SET_STRING(iovec[n++], x);
680                 }
681
682 #ifdef HAVE_SELINUX
683                 if (use_selinux()) {
684                         if (label) {
685                                 x = alloca(strlen("_SELINUX_CONTEXT=") + label_len + 1);
686
687                                 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
688                                 IOVEC_SET_STRING(iovec[n++], x);
689                         } else {
690                                 security_context_t con;
691
692                                 if (getpidcon(ucred->pid, &con) >= 0) {
693                                         x = strappenda("_SELINUX_CONTEXT=", con);
694
695                                         freecon(con);
696                                         IOVEC_SET_STRING(iovec[n++], x);
697                                 }
698                         }
699                 }
700 #endif
701         }
702         assert(n <= m);
703
704         if (object_pid) {
705                 r = get_process_uid(object_pid, &object_uid);
706                 if (r >= 0) {
707                         sprintf(o_uid, "OBJECT_UID="UID_FMT, object_uid);
708                         IOVEC_SET_STRING(iovec[n++], o_uid);
709                 }
710
711                 r = get_process_gid(object_pid, &object_gid);
712                 if (r >= 0) {
713                         sprintf(o_gid, "OBJECT_GID="GID_FMT, object_gid);
714                         IOVEC_SET_STRING(iovec[n++], o_gid);
715                 }
716
717                 r = get_process_comm(object_pid, &t);
718                 if (r >= 0) {
719                         x = strappenda("OBJECT_COMM=", t);
720                         free(t);
721                         IOVEC_SET_STRING(iovec[n++], x);
722                 }
723
724                 r = get_process_exe(object_pid, &t);
725                 if (r >= 0) {
726                         x = strappenda("OBJECT_EXE=", t);
727                         free(t);
728                         IOVEC_SET_STRING(iovec[n++], x);
729                 }
730
731                 r = get_process_cmdline(object_pid, 0, false, &t);
732                 if (r >= 0) {
733                         x = strappenda("OBJECT_CMDLINE=", t);
734                         free(t);
735                         IOVEC_SET_STRING(iovec[n++], x);
736                 }
737
738 #ifdef HAVE_AUDIT
739                 r = audit_session_from_pid(object_pid, &audit);
740                 if (r >= 0) {
741                         sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%"PRIu32, audit);
742                         IOVEC_SET_STRING(iovec[n++], o_audit_session);
743                 }
744
745                 r = audit_loginuid_from_pid(object_pid, &loginuid);
746                 if (r >= 0) {
747                         sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID="UID_FMT, loginuid);
748                         IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
749                 }
750 #endif
751
752                 r = cg_pid_get_path_shifted(object_pid, s->cgroup_root, &c);
753                 if (r >= 0) {
754                         x = strappenda("OBJECT_SYSTEMD_CGROUP=", c);
755                         IOVEC_SET_STRING(iovec[n++], x);
756
757                         r = cg_path_get_session(c, &t);
758                         if (r >= 0) {
759                                 x = strappenda("OBJECT_SYSTEMD_SESSION=", t);
760                                 free(t);
761                                 IOVEC_SET_STRING(iovec[n++], x);
762                         }
763
764                         if (cg_path_get_owner_uid(c, &owner) >= 0) {
765                                 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT, owner);
766                                 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
767                         }
768
769                         if (cg_path_get_unit(c, &t) >= 0) {
770                                 x = strappenda("OBJECT_SYSTEMD_UNIT=", t);
771                                 free(t);
772                                 IOVEC_SET_STRING(iovec[n++], x);
773                         }
774
775                         if (cg_path_get_user_unit(c, &t) >= 0) {
776                                 x = strappenda("OBJECT_SYSTEMD_USER_UNIT=", t);
777                                 free(t);
778                                 IOVEC_SET_STRING(iovec[n++], x);
779                         }
780
781                         free(c);
782                 }
783         }
784         assert(n <= m);
785
786         if (tv) {
787                 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
788                 IOVEC_SET_STRING(iovec[n++], source_time);
789         }
790
791         /* Note that strictly speaking storing the boot id here is
792          * redundant since the entry includes this in-line
793          * anyway. However, we need this indexed, too. */
794         if (!isempty(s->boot_id_field))
795                 IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
796
797         if (!isempty(s->machine_id_field))
798                 IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
799
800         if (!isempty(s->hostname_field))
801                 IOVEC_SET_STRING(iovec[n++], s->hostname_field);
802
803         assert(n <= m);
804
805         if (s->split_mode == SPLIT_UID && realuid > 0)
806                 /* Split up strictly by any UID */
807                 journal_uid = realuid;
808         else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
809                 /* Split up by login UIDs.  We do this only if the
810                  * realuid is not root, in order not to accidentally
811                  * leak privileged information to the user that is
812                  * logged by a privileged process that is part of an
813                  * unprivileged session.*/
814                 journal_uid = owner;
815         else
816                 journal_uid = 0;
817
818         write_to_journal(s, journal_uid, iovec, n, priority);
819 }
820
821 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
822         char mid[11 + 32 + 1];
823         char buffer[16 + LINE_MAX + 1];
824         struct iovec iovec[N_IOVEC_META_FIELDS + 4];
825         int n = 0;
826         va_list ap;
827         struct ucred ucred = {};
828
829         assert(s);
830         assert(format);
831
832         IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
833         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
834
835         memcpy(buffer, "MESSAGE=", 8);
836         va_start(ap, format);
837         vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
838         va_end(ap);
839         char_array_0(buffer);
840         IOVEC_SET_STRING(iovec[n++], buffer);
841
842         if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
843                 snprintf(mid, sizeof(mid), MESSAGE_ID(message_id));
844                 char_array_0(mid);
845                 IOVEC_SET_STRING(iovec[n++], mid);
846         }
847
848         ucred.pid = getpid();
849         ucred.uid = getuid();
850         ucred.gid = getgid();
851
852         dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
853 }
854
855 void server_dispatch_message(
856                 Server *s,
857                 struct iovec *iovec, unsigned n, unsigned m,
858                 struct ucred *ucred,
859                 struct timeval *tv,
860                 const char *label, size_t label_len,
861                 const char *unit_id,
862                 int priority,
863                 pid_t object_pid) {
864
865         int rl, r;
866         _cleanup_free_ char *path = NULL;
867         char *c;
868
869         assert(s);
870         assert(iovec || n == 0);
871
872         if (n == 0)
873                 return;
874
875         if (LOG_PRI(priority) > s->max_level_store)
876                 return;
877
878         /* Stop early in case the information will not be stored
879          * in a journal. */
880         if (s->storage == STORAGE_NONE)
881                 return;
882
883         if (!ucred)
884                 goto finish;
885
886         r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &path);
887         if (r < 0)
888                 goto finish;
889
890         /* example: /user/lennart/3/foobar
891          *          /system/dbus.service/foobar
892          *
893          * So let's cut of everything past the third /, since that is
894          * where user directories start */
895
896         c = strchr(path, '/');
897         if (c) {
898                 c = strchr(c+1, '/');
899                 if (c) {
900                         c = strchr(c+1, '/');
901                         if (c)
902                                 *c = 0;
903                 }
904         }
905
906         rl = journal_rate_limit_test(s->rate_limit, path,
907                                      priority & LOG_PRIMASK, available_space(s, false));
908
909         if (rl == 0)
910                 return;
911
912         /* Write a suppression message if we suppressed something */
913         if (rl > 1)
914                 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
915                                       "Suppressed %u messages from %s", rl - 1, path);
916
917 finish:
918         dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
919 }
920
921
922 static int system_journal_open(Server *s) {
923         int r;
924         char *fn;
925         sd_id128_t machine;
926         char ids[33];
927
928         r = sd_id128_get_machine(&machine);
929         if (r < 0) {
930                 log_error("Failed to get machine id: %s", strerror(-r));
931                 return r;
932         }
933
934         sd_id128_to_string(machine, ids);
935
936         if (!s->system_journal &&
937             (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
938             access("/run/systemd/journal/flushed", F_OK) >= 0) {
939
940                 /* If in auto mode: first try to create the machine
941                  * path, but not the prefix.
942                  *
943                  * If in persistent mode: create /var/log/journal and
944                  * the machine path */
945
946                 if (s->storage == STORAGE_PERSISTENT)
947                         (void) mkdir("/var/log/journal/", 0755);
948
949                 fn = strappenda("/var/log/journal/", ids);
950                 (void) mkdir(fn, 0755);
951
952                 fn = strappenda(fn, "/system.journal");
953                 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
954
955                 if (r >= 0)
956                         server_fix_perms(s, s->system_journal, 0);
957                 else if (r < 0) {
958                         if (r != -ENOENT && r != -EROFS)
959                                 log_warning("Failed to open system journal: %s", strerror(-r));
960
961                         r = 0;
962                 }
963         }
964
965         if (!s->runtime_journal &&
966             (s->storage != STORAGE_NONE)) {
967
968                 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
969                 if (!fn)
970                         return -ENOMEM;
971
972                 if (s->system_journal) {
973
974                         /* Try to open the runtime journal, but only
975                          * if it already exists, so that we can flush
976                          * it into the system journal */
977
978                         r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
979                         free(fn);
980
981                         if (r < 0) {
982                                 if (r != -ENOENT)
983                                         log_warning("Failed to open runtime journal: %s", strerror(-r));
984
985                                 r = 0;
986                         }
987
988                 } else {
989
990                         /* OK, we really need the runtime journal, so create
991                          * it if necessary. */
992
993                         (void) mkdir("/run/log", 0755);
994                         (void) mkdir("/run/log/journal", 0755);
995                         (void) mkdir_parents(fn, 0750);
996
997                         r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
998                         free(fn);
999
1000                         if (r < 0) {
1001                                 log_error("Failed to open runtime journal: %s", strerror(-r));
1002                                 return r;
1003                         }
1004                 }
1005
1006                 if (s->runtime_journal)
1007                         server_fix_perms(s, s->runtime_journal, 0);
1008         }
1009
1010         available_space(s, true);
1011
1012         return r;
1013 }
1014
1015 int server_flush_to_var(Server *s) {
1016         sd_id128_t machine;
1017         sd_journal *j = NULL;
1018         char ts[FORMAT_TIMESPAN_MAX];
1019         usec_t start;
1020         unsigned n = 0;
1021         int r;
1022
1023         assert(s);
1024
1025         if (s->storage != STORAGE_AUTO &&
1026             s->storage != STORAGE_PERSISTENT)
1027                 return 0;
1028
1029         if (!s->runtime_journal)
1030                 return 0;
1031
1032         system_journal_open(s);
1033
1034         if (!s->system_journal)
1035                 return 0;
1036
1037         log_debug("Flushing to /var...");
1038
1039         start = now(CLOCK_MONOTONIC);
1040
1041         r = sd_id128_get_machine(&machine);
1042         if (r < 0)
1043                 return r;
1044
1045         r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1046         if (r < 0) {
1047                 log_error("Failed to read runtime journal: %s", strerror(-r));
1048                 return r;
1049         }
1050
1051         sd_journal_set_data_threshold(j, 0);
1052
1053         SD_JOURNAL_FOREACH(j) {
1054                 Object *o = NULL;
1055                 JournalFile *f;
1056
1057                 f = j->current_file;
1058                 assert(f && f->current_offset > 0);
1059
1060                 n++;
1061
1062                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1063                 if (r < 0) {
1064                         log_error("Can't read entry: %s", strerror(-r));
1065                         goto finish;
1066                 }
1067
1068                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1069                 if (r >= 0)
1070                         continue;
1071
1072                 if (!shall_try_append_again(s->system_journal, r)) {
1073                         log_error("Can't write entry: %s", strerror(-r));
1074                         goto finish;
1075                 }
1076
1077                 server_rotate(s);
1078                 server_vacuum(s);
1079
1080                 if (!s->system_journal) {
1081                         log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1082                         r = -EIO;
1083                         goto finish;
1084                 }
1085
1086                 log_debug("Retrying write.");
1087                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1088                 if (r < 0) {
1089                         log_error("Can't write entry: %s", strerror(-r));
1090                         goto finish;
1091                 }
1092         }
1093
1094 finish:
1095         journal_file_post_change(s->system_journal);
1096
1097         journal_file_close(s->runtime_journal);
1098         s->runtime_journal = NULL;
1099
1100         if (r >= 0)
1101                 rm_rf("/run/log/journal", false, true, false);
1102
1103         sd_journal_close(j);
1104
1105         server_driver_message(s, SD_ID128_NULL, "Time spent on flushing to /var is %s for %u entries.", format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0), n);
1106
1107         return r;
1108 }
1109
1110 int process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1111         Server *s = userdata;
1112
1113         assert(s);
1114         assert(fd == s->native_fd || fd == s->syslog_fd);
1115
1116         if (revents != EPOLLIN) {
1117                 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1118                 return -EIO;
1119         }
1120
1121         for (;;) {
1122                 struct ucred *ucred = NULL;
1123                 struct timeval *tv = NULL;
1124                 struct cmsghdr *cmsg;
1125                 char *label = NULL;
1126                 size_t label_len = 0;
1127                 struct iovec iovec;
1128
1129                 union {
1130                         struct cmsghdr cmsghdr;
1131
1132                         /* We use NAME_MAX space for the SELinux label
1133                          * here. The kernel currently enforces no
1134                          * limit, but according to suggestions from
1135                          * the SELinux people this will change and it
1136                          * will probably be identical to NAME_MAX. For
1137                          * now we use that, but this should be updated
1138                          * one day when the final limit is known.*/
1139                         uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1140                                     CMSG_SPACE(sizeof(struct timeval)) +
1141                                     CMSG_SPACE(sizeof(int)) + /* fd */
1142                                     CMSG_SPACE(NAME_MAX)]; /* selinux label */
1143                 } control = {};
1144                 struct msghdr msghdr = {
1145                         .msg_iov = &iovec,
1146                         .msg_iovlen = 1,
1147                         .msg_control = &control,
1148                         .msg_controllen = sizeof(control),
1149                 };
1150
1151                 ssize_t n;
1152                 int v;
1153                 int *fds = NULL;
1154                 unsigned n_fds = 0;
1155
1156                 if (ioctl(fd, SIOCINQ, &v) < 0) {
1157                         log_error("SIOCINQ failed: %m");
1158                         return -errno;
1159                 }
1160
1161                 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, LINE_MAX + (size_t) v))
1162                         return log_oom();
1163
1164                 iovec.iov_base = s->buffer;
1165                 iovec.iov_len = s->buffer_size;
1166
1167                 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1168                 if (n < 0) {
1169                         if (errno == EINTR || errno == EAGAIN)
1170                                 return 0;
1171
1172                         log_error("recvmsg() failed: %m");
1173                         return -errno;
1174                 }
1175
1176                 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1177
1178                         if (cmsg->cmsg_level == SOL_SOCKET &&
1179                             cmsg->cmsg_type == SCM_CREDENTIALS &&
1180                             cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1181                                 ucred = (struct ucred*) CMSG_DATA(cmsg);
1182                         else if (cmsg->cmsg_level == SOL_SOCKET &&
1183                                  cmsg->cmsg_type == SCM_SECURITY) {
1184                                 label = (char*) CMSG_DATA(cmsg);
1185                                 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1186                         } else if (cmsg->cmsg_level == SOL_SOCKET &&
1187                                    cmsg->cmsg_type == SO_TIMESTAMP &&
1188                                    cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1189                                 tv = (struct timeval*) CMSG_DATA(cmsg);
1190                         else if (cmsg->cmsg_level == SOL_SOCKET &&
1191                                  cmsg->cmsg_type == SCM_RIGHTS) {
1192                                 fds = (int*) CMSG_DATA(cmsg);
1193                                 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1194                         }
1195                 }
1196
1197                 if (fd == s->syslog_fd) {
1198                         if (n > 0 && n_fds == 0) {
1199                                 s->buffer[n] = 0;
1200                                 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1201                         } else if (n_fds > 0)
1202                                 log_warning("Got file descriptors via syslog socket. Ignoring.");
1203
1204                 } else {
1205                         if (n > 0 && n_fds == 0)
1206                                 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1207                         else if (n == 0 && n_fds == 1)
1208                                 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1209                         else if (n_fds > 0)
1210                                 log_warning("Got too many file descriptors via native socket. Ignoring.");
1211                 }
1212
1213                 close_many(fds, n_fds);
1214         }
1215 }
1216
1217 static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1218         Server *s = userdata;
1219
1220         assert(s);
1221
1222         log_info("Received request to flush runtime journal from PID %"PRIu32, si->ssi_pid);
1223
1224         server_flush_to_var(s);
1225         server_sync(s);
1226         server_vacuum(s);
1227
1228         touch("/run/systemd/journal/flushed");
1229
1230         return 0;
1231 }
1232
1233 static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1234         Server *s = userdata;
1235
1236         assert(s);
1237
1238         log_info("Received request to rotate journal from PID %"PRIu32, si->ssi_pid);
1239         server_rotate(s);
1240         server_vacuum(s);
1241
1242         return 0;
1243 }
1244
1245 static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1246         Server *s = userdata;
1247
1248         assert(s);
1249
1250         log_received_signal(LOG_INFO, si);
1251
1252         sd_event_exit(s->event, 0);
1253         return 0;
1254 }
1255
1256 static int setup_signals(Server *s) {
1257         sigset_t mask;
1258         int r;
1259
1260         assert(s);
1261
1262         assert_se(sigemptyset(&mask) == 0);
1263         sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1264         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1265
1266         r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
1267         if (r < 0)
1268                 return r;
1269
1270         r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
1271         if (r < 0)
1272                 return r;
1273
1274         r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
1275         if (r < 0)
1276                 return r;
1277
1278         r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
1279         if (r < 0)
1280                 return r;
1281
1282         return 0;
1283 }
1284
1285 static int server_parse_proc_cmdline(Server *s) {
1286         _cleanup_free_ char *line = NULL;
1287         const char *w, *state;
1288         size_t l;
1289         int r;
1290
1291         r = proc_cmdline(&line);
1292         if (r < 0)
1293                 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1294         if (r <= 0)
1295                 return 0;
1296
1297         FOREACH_WORD_QUOTED(w, l, line, state) {
1298                 _cleanup_free_ char *word;
1299
1300                 word = strndup(w, l);
1301                 if (!word)
1302                         return -ENOMEM;
1303
1304                 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1305                         r = parse_boolean(word + 35);
1306                         if (r < 0)
1307                                 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1308                         else
1309                                 s->forward_to_syslog = r;
1310                 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1311                         r = parse_boolean(word + 33);
1312                         if (r < 0)
1313                                 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1314                         else
1315                                 s->forward_to_kmsg = r;
1316                 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1317                         r = parse_boolean(word + 36);
1318                         if (r < 0)
1319                                 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1320                         else
1321                                 s->forward_to_console = r;
1322                 } else if (startswith(word, "systemd.journald.forward_to_wall=")) {
1323                         r = parse_boolean(word + 33);
1324                         if (r < 0)
1325                                 log_warning("Failed to parse forward to wall switch %s. Ignoring.", word + 33);
1326                         else
1327                                 s->forward_to_wall = r;
1328                 } else if (startswith(word, "systemd.journald"))
1329                         log_warning("Invalid systemd.journald parameter. Ignoring.");
1330         }
1331         /* do not warn about state here, since probably systemd already did */
1332
1333         return 0;
1334 }
1335
1336 static int server_parse_config_file(Server *s) {
1337         assert(s);
1338
1339         return config_parse(NULL, "/etc/systemd/journald.conf", NULL,
1340                             "Journal\0",
1341                             config_item_perf_lookup, journald_gperf_lookup,
1342                             false, false, true, s);
1343 }
1344
1345 static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1346         Server *s = userdata;
1347
1348         assert(s);
1349
1350         server_sync(s);
1351         return 0;
1352 }
1353
1354 int server_schedule_sync(Server *s, int priority) {
1355         int r;
1356
1357         assert(s);
1358
1359         if (priority <= LOG_CRIT) {
1360                 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1361                 server_sync(s);
1362                 return 0;
1363         }
1364
1365         if (s->sync_scheduled)
1366                 return 0;
1367
1368         if (s->sync_interval_usec > 0) {
1369                 usec_t when;
1370
1371                 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
1372                 if (r < 0)
1373                         return r;
1374
1375                 when += s->sync_interval_usec;
1376
1377                 if (!s->sync_event_source) {
1378                         r = sd_event_add_time(
1379                                         s->event,
1380                                         &s->sync_event_source,
1381                                         CLOCK_MONOTONIC,
1382                                         when, 0,
1383                                         server_dispatch_sync, s);
1384                         if (r < 0)
1385                                 return r;
1386
1387                         r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1388                 } else {
1389                         r = sd_event_source_set_time(s->sync_event_source, when);
1390                         if (r < 0)
1391                                 return r;
1392
1393                         r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1394                 }
1395                 if (r < 0)
1396                         return r;
1397
1398                 s->sync_scheduled = true;
1399         }
1400
1401         return 0;
1402 }
1403
1404 static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1405         Server *s = userdata;
1406
1407         assert(s);
1408
1409         server_cache_hostname(s);
1410         return 0;
1411 }
1412
1413 static int server_open_hostname(Server *s) {
1414         int r;
1415
1416         assert(s);
1417
1418         s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
1419         if (s->hostname_fd < 0) {
1420                 log_error("Failed to open /proc/sys/kernel/hostname: %m");
1421                 return -errno;
1422         }
1423
1424         r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
1425         if (r < 0) {
1426                 /* kernels prior to 3.2 don't support polling this file. Ignore
1427                  * the failure. */
1428                 if (r == -EPERM) {
1429                         log_warning("Failed to register hostname fd in event loop: %s. Ignoring.",
1430                                         strerror(-r));
1431                         s->hostname_fd = safe_close(s->hostname_fd);
1432                         return 0;
1433                 }
1434
1435                 log_error("Failed to register hostname fd in event loop: %s", strerror(-r));
1436                 return r;
1437         }
1438
1439         r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
1440         if (r < 0) {
1441                 log_error("Failed to adjust priority of host name event source: %s", strerror(-r));
1442                 return r;
1443         }
1444
1445         return 0;
1446 }
1447
1448 int server_init(Server *s) {
1449         int n, r, fd;
1450
1451         assert(s);
1452
1453         zero(*s);
1454         s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->hostname_fd = -1;
1455         s->compress = true;
1456         s->seal = true;
1457
1458         s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1459         s->sync_scheduled = false;
1460
1461         s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1462         s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1463
1464         s->forward_to_wall = true;
1465
1466         s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1467
1468         s->max_level_store = LOG_DEBUG;
1469         s->max_level_syslog = LOG_DEBUG;
1470         s->max_level_kmsg = LOG_NOTICE;
1471         s->max_level_console = LOG_INFO;
1472         s->max_level_wall = LOG_EMERG;
1473
1474         memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1475         memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1476
1477         server_parse_config_file(s);
1478         server_parse_proc_cmdline(s);
1479         if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1480                 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1481                           s->rate_limit_interval, s->rate_limit_burst);
1482                 s->rate_limit_interval = s->rate_limit_burst = 0;
1483         }
1484
1485         mkdir_p("/run/systemd/journal", 0755);
1486
1487         s->user_journals = hashmap_new(NULL);
1488         if (!s->user_journals)
1489                 return log_oom();
1490
1491         s->mmap = mmap_cache_new();
1492         if (!s->mmap)
1493                 return log_oom();
1494
1495         r = sd_event_default(&s->event);
1496         if (r < 0) {
1497                 log_error("Failed to create event loop: %s", strerror(-r));
1498                 return r;
1499         }
1500
1501         sd_event_set_watchdog(s->event, true);
1502
1503         n = sd_listen_fds(true);
1504         if (n < 0) {
1505                 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1506                 return n;
1507         }
1508
1509         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1510
1511                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1512
1513                         if (s->native_fd >= 0) {
1514                                 log_error("Too many native sockets passed.");
1515                                 return -EINVAL;
1516                         }
1517
1518                         s->native_fd = fd;
1519
1520                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1521
1522                         if (s->stdout_fd >= 0) {
1523                                 log_error("Too many stdout sockets passed.");
1524                                 return -EINVAL;
1525                         }
1526
1527                         s->stdout_fd = fd;
1528
1529                 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1530                            sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1531
1532                         if (s->syslog_fd >= 0) {
1533                                 log_error("Too many /dev/log sockets passed.");
1534                                 return -EINVAL;
1535                         }
1536
1537                         s->syslog_fd = fd;
1538
1539                 } else {
1540                         log_error("Unknown socket passed.");
1541                         return -EINVAL;
1542                 }
1543         }
1544
1545         r = server_open_syslog_socket(s);
1546         if (r < 0)
1547                 return r;
1548
1549         r = server_open_native_socket(s);
1550         if (r < 0)
1551                 return r;
1552
1553         r = server_open_stdout_socket(s);
1554         if (r < 0)
1555                 return r;
1556
1557         r = server_open_dev_kmsg(s);
1558         if (r < 0)
1559                 return r;
1560
1561         r = server_open_kernel_seqnum(s);
1562         if (r < 0)
1563                 return r;
1564
1565         r = server_open_hostname(s);
1566         if (r < 0)
1567                 return r;
1568
1569         r = setup_signals(s);
1570         if (r < 0)
1571                 return r;
1572
1573         s->udev = udev_new();
1574         if (!s->udev)
1575                 return -ENOMEM;
1576
1577         s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
1578         if (!s->rate_limit)
1579                 return -ENOMEM;
1580
1581         r = cg_get_root_path(&s->cgroup_root);
1582         if (r < 0)
1583                 return r;
1584
1585         server_cache_hostname(s);
1586         server_cache_boot_id(s);
1587         server_cache_machine_id(s);
1588
1589         r = system_journal_open(s);
1590         if (r < 0)
1591                 return r;
1592
1593         return 0;
1594 }
1595
1596 void server_maybe_append_tags(Server *s) {
1597 #ifdef HAVE_GCRYPT
1598         JournalFile *f;
1599         Iterator i;
1600         usec_t n;
1601
1602         n = now(CLOCK_REALTIME);
1603
1604         if (s->system_journal)
1605                 journal_file_maybe_append_tag(s->system_journal, n);
1606
1607         HASHMAP_FOREACH(f, s->user_journals, i)
1608                 journal_file_maybe_append_tag(f, n);
1609 #endif
1610 }
1611
1612 void server_done(Server *s) {
1613         JournalFile *f;
1614         assert(s);
1615
1616         while (s->stdout_streams)
1617                 stdout_stream_free(s->stdout_streams);
1618
1619         if (s->system_journal)
1620                 journal_file_close(s->system_journal);
1621
1622         if (s->runtime_journal)
1623                 journal_file_close(s->runtime_journal);
1624
1625         while ((f = hashmap_steal_first(s->user_journals)))
1626                 journal_file_close(f);
1627
1628         hashmap_free(s->user_journals);
1629
1630         sd_event_source_unref(s->syslog_event_source);
1631         sd_event_source_unref(s->native_event_source);
1632         sd_event_source_unref(s->stdout_event_source);
1633         sd_event_source_unref(s->dev_kmsg_event_source);
1634         sd_event_source_unref(s->sync_event_source);
1635         sd_event_source_unref(s->sigusr1_event_source);
1636         sd_event_source_unref(s->sigusr2_event_source);
1637         sd_event_source_unref(s->sigterm_event_source);
1638         sd_event_source_unref(s->sigint_event_source);
1639         sd_event_source_unref(s->hostname_event_source);
1640         sd_event_unref(s->event);
1641
1642         safe_close(s->syslog_fd);
1643         safe_close(s->native_fd);
1644         safe_close(s->stdout_fd);
1645         safe_close(s->dev_kmsg_fd);
1646         safe_close(s->hostname_fd);
1647
1648         if (s->rate_limit)
1649                 journal_rate_limit_free(s->rate_limit);
1650
1651         if (s->kernel_seqnum)
1652                 munmap(s->kernel_seqnum, sizeof(uint64_t));
1653
1654         free(s->buffer);
1655         free(s->tty_path);
1656         free(s->cgroup_root);
1657
1658         if (s->mmap)
1659                 mmap_cache_unref(s->mmap);
1660
1661         if (s->udev)
1662                 udev_unref(s->udev);
1663 }