chiark / gitweb /
hashmap: introduce hash_ops to make struct Hashmap smaller
[elogind.git] / src / journal / journald-server.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
26 #include <sys/mman.h>
27 #include <sys/timerfd.h>
28
29 #include <libudev.h>
30
31 #include "sd-journal.h"
32 #include "sd-messages.h"
33 #include "sd-daemon.h"
34 #include "fileio.h"
35 #include "mkdir.h"
36 #include "hashmap.h"
37 #include "journal-file.h"
38 #include "socket-util.h"
39 #include "cgroup-util.h"
40 #include "list.h"
41 #include "missing.h"
42 #include "conf-parser.h"
43 #include "selinux-util.h"
44 #include "journal-internal.h"
45 #include "journal-vacuum.h"
46 #include "journal-authenticate.h"
47 #include "journald-rate-limit.h"
48 #include "journald-kmsg.h"
49 #include "journald-syslog.h"
50 #include "journald-stream.h"
51 #include "journald-console.h"
52 #include "journald-native.h"
53 #include "journald-server.h"
54
55 #ifdef HAVE_ACL
56 #include <sys/acl.h>
57 #include <acl/libacl.h>
58 #include "acl-util.h"
59 #endif
60
61 #ifdef HAVE_SELINUX
62 #include <selinux/selinux.h>
63 #endif
64
65 #define USER_JOURNALS_MAX 1024
66
67 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
68 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
69 #define DEFAULT_RATE_LIMIT_BURST 1000
70 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
71
72 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
73
74 static const char* const storage_table[_STORAGE_MAX] = {
75         [STORAGE_AUTO] = "auto",
76         [STORAGE_VOLATILE] = "volatile",
77         [STORAGE_PERSISTENT] = "persistent",
78         [STORAGE_NONE] = "none"
79 };
80
81 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
82 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
83
84 static const char* const split_mode_table[_SPLIT_MAX] = {
85         [SPLIT_LOGIN] = "login",
86         [SPLIT_UID] = "uid",
87         [SPLIT_NONE] = "none",
88 };
89
90 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
91 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
92
93 static uint64_t available_space(Server *s, bool verbose) {
94         char ids[33];
95         _cleanup_free_ char *p = NULL;
96         sd_id128_t machine;
97         struct statvfs ss;
98         uint64_t sum = 0, ss_avail = 0, avail = 0;
99         int r;
100         _cleanup_closedir_ DIR *d = NULL;
101         usec_t ts;
102         const char *f;
103         JournalMetrics *m;
104
105         ts = now(CLOCK_MONOTONIC);
106
107         if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts
108             && !verbose)
109                 return s->cached_available_space;
110
111         r = sd_id128_get_machine(&machine);
112         if (r < 0)
113                 return 0;
114
115         if (s->system_journal) {
116                 f = "/var/log/journal/";
117                 m = &s->system_metrics;
118         } else {
119                 f = "/run/log/journal/";
120                 m = &s->runtime_metrics;
121         }
122
123         assert(m);
124
125         p = strappend(f, sd_id128_to_string(machine, ids));
126         if (!p)
127                 return 0;
128
129         d = opendir(p);
130         if (!d)
131                 return 0;
132
133         if (fstatvfs(dirfd(d), &ss) < 0)
134                 return 0;
135
136         for (;;) {
137                 struct stat st;
138                 struct dirent *de;
139
140                 errno = 0;
141                 de = readdir(d);
142                 if (!de && errno != 0)
143                         return 0;
144
145                 if (!de)
146                         break;
147
148                 if (!endswith(de->d_name, ".journal") &&
149                     !endswith(de->d_name, ".journal~"))
150                         continue;
151
152                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
153                         continue;
154
155                 if (!S_ISREG(st.st_mode))
156                         continue;
157
158                 sum += (uint64_t) st.st_blocks * 512UL;
159         }
160
161         ss_avail = ss.f_bsize * ss.f_bavail;
162
163         /* If we reached a high mark, we will always allow this much
164          * again, unless usage goes above max_use. This watermark
165          * value is cached so that we don't give up space on pressure,
166          * but hover below the maximum usage. */
167
168         if (m->use < sum)
169                 m->use = sum;
170
171         avail = LESS_BY(ss_avail, m->keep_free);
172
173         s->cached_available_space = LESS_BY(MIN(m->max_use, avail), sum);
174         s->cached_available_space_timestamp = ts;
175
176         if (verbose) {
177                 char    fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
178                         fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX];
179
180                 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
181                                       "%s journal is using %s (max allowed %s, "
182                                       "trying to leave %s free of %s available → current limit %s).",
183                                       s->system_journal ? "Permanent" : "Runtime",
184                                       format_bytes(fb1, sizeof(fb1), sum),
185                                       format_bytes(fb2, sizeof(fb2), m->max_use),
186                                       format_bytes(fb3, sizeof(fb3), m->keep_free),
187                                       format_bytes(fb4, sizeof(fb4), ss_avail),
188                                       format_bytes(fb5, sizeof(fb5), s->cached_available_space + sum));
189         }
190
191         return s->cached_available_space;
192 }
193
194 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
195         int r;
196 #ifdef HAVE_ACL
197         acl_t acl;
198         acl_entry_t entry;
199         acl_permset_t permset;
200 #endif
201
202         assert(f);
203
204         r = fchmod(f->fd, 0640);
205         if (r < 0)
206                 log_warning("Failed to fix access mode on %s, ignoring: %s", f->path, strerror(-r));
207
208 #ifdef HAVE_ACL
209         if (uid <= SYSTEM_UID_MAX)
210                 return;
211
212         acl = acl_get_fd(f->fd);
213         if (!acl) {
214                 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
215                 return;
216         }
217
218         r = acl_find_uid(acl, uid, &entry);
219         if (r <= 0) {
220
221                 if (acl_create_entry(&acl, &entry) < 0 ||
222                     acl_set_tag_type(entry, ACL_USER) < 0 ||
223                     acl_set_qualifier(entry, &uid) < 0) {
224                         log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
225                         goto finish;
226                 }
227         }
228
229         /* We do not recalculate the mask unconditionally here,
230          * so that the fchmod() mask above stays intact. */
231         if (acl_get_permset(entry, &permset) < 0 ||
232             acl_add_perm(permset, ACL_READ) < 0 ||
233             calc_acl_mask_if_needed(&acl) < 0) {
234                 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
235                 goto finish;
236         }
237
238         if (acl_set_fd(f->fd, acl) < 0)
239                 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
240
241 finish:
242         acl_free(acl);
243 #endif
244 }
245
246 static JournalFile* find_journal(Server *s, uid_t uid) {
247         _cleanup_free_ char *p = NULL;
248         int r;
249         JournalFile *f;
250         sd_id128_t machine;
251
252         assert(s);
253
254         /* We split up user logs only on /var, not on /run. If the
255          * runtime file is open, we write to it exclusively, in order
256          * to guarantee proper order as soon as we flush /run to
257          * /var and close the runtime file. */
258
259         if (s->runtime_journal)
260                 return s->runtime_journal;
261
262         if (uid <= SYSTEM_UID_MAX)
263                 return s->system_journal;
264
265         r = sd_id128_get_machine(&machine);
266         if (r < 0)
267                 return s->system_journal;
268
269         f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
270         if (f)
271                 return f;
272
273         if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
274                      SD_ID128_FORMAT_VAL(machine), uid) < 0)
275                 return s->system_journal;
276
277         while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
278                 /* Too many open? Then let's close one */
279                 f = hashmap_steal_first(s->user_journals);
280                 assert(f);
281                 journal_file_close(f);
282         }
283
284         r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
285         if (r < 0)
286                 return s->system_journal;
287
288         server_fix_perms(s, f, uid);
289
290         r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
291         if (r < 0) {
292                 journal_file_close(f);
293                 return s->system_journal;
294         }
295
296         return f;
297 }
298
299 static int do_rotate(Server *s, JournalFile **f, const char* name,
300                      bool seal, uint32_t uid) {
301         int r;
302         assert(s);
303
304         if (!*f)
305                 return -EINVAL;
306
307         r = journal_file_rotate(f, s->compress, seal);
308         if (r < 0)
309                 if (*f)
310                         log_error("Failed to rotate %s: %s",
311                                   (*f)->path, strerror(-r));
312                 else
313                         log_error("Failed to create new %s journal: %s",
314                                   name, strerror(-r));
315         else
316                 server_fix_perms(s, *f, uid);
317         return r;
318 }
319
320 void server_rotate(Server *s) {
321         JournalFile *f;
322         void *k;
323         Iterator i;
324         int r;
325
326         log_debug("Rotating...");
327
328         do_rotate(s, &s->runtime_journal, "runtime", false, 0);
329         do_rotate(s, &s->system_journal, "system", s->seal, 0);
330
331         HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
332                 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UINT32(k));
333                 if (r >= 0)
334                         hashmap_replace(s->user_journals, k, f);
335                 else if (!f)
336                         /* Old file has been closed and deallocated */
337                         hashmap_remove(s->user_journals, k);
338         }
339 }
340
341 void server_sync(Server *s) {
342         JournalFile *f;
343         void *k;
344         Iterator i;
345         int r;
346
347         if (s->system_journal) {
348                 r = journal_file_set_offline(s->system_journal);
349                 if (r < 0)
350                         log_error("Failed to sync system journal: %s", strerror(-r));
351         }
352
353         HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
354                 r = journal_file_set_offline(f);
355                 if (r < 0)
356                         log_error("Failed to sync user journal: %s", strerror(-r));
357         }
358
359         if (s->sync_event_source) {
360                 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
361                 if (r < 0)
362                         log_error("Failed to disable sync timer source: %s", strerror(-r));
363         }
364
365         s->sync_scheduled = false;
366 }
367
368 static void do_vacuum(Server *s, char *ids, JournalFile *f, const char* path,
369                       JournalMetrics *metrics) {
370         char *p;
371         int r;
372
373         if (!f)
374                 return;
375
376         p = strappenda(path, ids);
377         r = journal_directory_vacuum(p, metrics->max_use, s->max_retention_usec, &s->oldest_file_usec);
378         if (r < 0 && r != -ENOENT)
379                 log_error("Failed to vacuum %s: %s", p, strerror(-r));
380 }
381
382 void server_vacuum(Server *s) {
383         char ids[33];
384         sd_id128_t machine;
385         int r;
386
387         log_debug("Vacuuming...");
388
389         s->oldest_file_usec = 0;
390
391         r = sd_id128_get_machine(&machine);
392         if (r < 0) {
393                 log_error("Failed to get machine ID: %s", strerror(-r));
394                 return;
395         }
396         sd_id128_to_string(machine, ids);
397
398         do_vacuum(s, ids, s->system_journal, "/var/log/journal/", &s->system_metrics);
399         do_vacuum(s, ids, s->runtime_journal, "/run/log/journal/", &s->runtime_metrics);
400
401         s->cached_available_space_timestamp = 0;
402 }
403
404 static void server_cache_machine_id(Server *s) {
405         sd_id128_t id;
406         int r;
407
408         assert(s);
409
410         r = sd_id128_get_machine(&id);
411         if (r < 0)
412                 return;
413
414         sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
415 }
416
417 static void server_cache_boot_id(Server *s) {
418         sd_id128_t id;
419         int r;
420
421         assert(s);
422
423         r = sd_id128_get_boot(&id);
424         if (r < 0)
425                 return;
426
427         sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
428 }
429
430 static void server_cache_hostname(Server *s) {
431         _cleanup_free_ char *t = NULL;
432         char *x;
433
434         assert(s);
435
436         t = gethostname_malloc();
437         if (!t)
438                 return;
439
440         x = strappend("_HOSTNAME=", t);
441         if (!x)
442                 return;
443
444         free(s->hostname_field);
445         s->hostname_field = x;
446 }
447
448 bool shall_try_append_again(JournalFile *f, int r) {
449
450         /* -E2BIG            Hit configured limit
451            -EFBIG            Hit fs limit
452            -EDQUOT           Quota limit hit
453            -ENOSPC           Disk full
454            -EHOSTDOWN        Other machine
455            -EBUSY            Unclean shutdown
456            -EPROTONOSUPPORT  Unsupported feature
457            -EBADMSG          Corrupted
458            -ENODATA          Truncated
459            -ESHUTDOWN        Already archived */
460
461         if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
462                 log_debug("%s: Allocation limit reached, rotating.", f->path);
463         else if (r == -EHOSTDOWN)
464                 log_info("%s: Journal file from other machine, rotating.", f->path);
465         else if (r == -EBUSY)
466                 log_info("%s: Unclean shutdown, rotating.", f->path);
467         else if (r == -EPROTONOSUPPORT)
468                 log_info("%s: Unsupported feature, rotating.", f->path);
469         else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
470                 log_warning("%s: Journal file corrupted, rotating.", f->path);
471         else
472                 return false;
473
474         return true;
475 }
476
477 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
478         JournalFile *f;
479         bool vacuumed = false;
480         int r;
481
482         assert(s);
483         assert(iovec);
484         assert(n > 0);
485
486         f = find_journal(s, uid);
487         if (!f)
488                 return;
489
490         if (journal_file_rotate_suggested(f, s->max_file_usec)) {
491                 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
492                 server_rotate(s);
493                 server_vacuum(s);
494                 vacuumed = true;
495
496                 f = find_journal(s, uid);
497                 if (!f)
498                         return;
499         }
500
501         r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
502         if (r >= 0) {
503                 server_schedule_sync(s, priority);
504                 return;
505         }
506
507         if (vacuumed || !shall_try_append_again(f, r)) {
508                 size_t size = 0;
509                 unsigned i;
510                 for (i = 0; i < n; i++)
511                         size += iovec[i].iov_len;
512
513                 log_error("Failed to write entry (%d items, %zu bytes), ignoring: %s", n, size, strerror(-r));
514                 return;
515         }
516
517         server_rotate(s);
518         server_vacuum(s);
519
520         f = find_journal(s, uid);
521         if (!f)
522                 return;
523
524         log_debug("Retrying write.");
525         r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
526         if (r < 0) {
527                 size_t size = 0;
528                 unsigned i;
529                 for (i = 0; i < n; i++)
530                         size += iovec[i].iov_len;
531
532                 log_error("Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %s", n, size, strerror(-r));
533         } else
534                 server_schedule_sync(s, priority);
535 }
536
537 static void dispatch_message_real(
538                 Server *s,
539                 struct iovec *iovec, unsigned n, unsigned m,
540                 struct ucred *ucred,
541                 struct timeval *tv,
542                 const char *label, size_t label_len,
543                 const char *unit_id,
544                 int priority,
545                 pid_t object_pid) {
546
547         char    pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
548                 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
549                 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
550                 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
551                 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
552                 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
553                 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
554                 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
555         uid_t object_uid;
556         gid_t object_gid;
557         char *x;
558         int r;
559         char *t, *c;
560         uid_t realuid = 0, owner = 0, journal_uid;
561         bool owner_valid = false;
562 #ifdef HAVE_AUDIT
563         char    audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
564                 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
565                 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
566                 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
567
568         uint32_t audit;
569         uid_t loginuid;
570 #endif
571
572         assert(s);
573         assert(iovec);
574         assert(n > 0);
575         assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
576
577         if (ucred) {
578                 realuid = ucred->uid;
579
580                 sprintf(pid, "_PID="PID_FMT, ucred->pid);
581                 IOVEC_SET_STRING(iovec[n++], pid);
582
583                 sprintf(uid, "_UID="UID_FMT, ucred->uid);
584                 IOVEC_SET_STRING(iovec[n++], uid);
585
586                 sprintf(gid, "_GID="GID_FMT, ucred->gid);
587                 IOVEC_SET_STRING(iovec[n++], gid);
588
589                 r = get_process_comm(ucred->pid, &t);
590                 if (r >= 0) {
591                         x = strappenda("_COMM=", t);
592                         free(t);
593                         IOVEC_SET_STRING(iovec[n++], x);
594                 }
595
596                 r = get_process_exe(ucred->pid, &t);
597                 if (r >= 0) {
598                         x = strappenda("_EXE=", t);
599                         free(t);
600                         IOVEC_SET_STRING(iovec[n++], x);
601                 }
602
603                 r = get_process_cmdline(ucred->pid, 0, false, &t);
604                 if (r >= 0) {
605                         x = strappenda("_CMDLINE=", t);
606                         free(t);
607                         IOVEC_SET_STRING(iovec[n++], x);
608                 }
609
610                 r = get_process_capeff(ucred->pid, &t);
611                 if (r >= 0) {
612                         x = strappenda("_CAP_EFFECTIVE=", t);
613                         free(t);
614                         IOVEC_SET_STRING(iovec[n++], x);
615                 }
616
617 #ifdef HAVE_AUDIT
618                 r = audit_session_from_pid(ucred->pid, &audit);
619                 if (r >= 0) {
620                         sprintf(audit_session, "_AUDIT_SESSION=%"PRIu32, audit);
621                         IOVEC_SET_STRING(iovec[n++], audit_session);
622                 }
623
624                 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
625                 if (r >= 0) {
626                         sprintf(audit_loginuid, "_AUDIT_LOGINUID="UID_FMT, loginuid);
627                         IOVEC_SET_STRING(iovec[n++], audit_loginuid);
628                 }
629 #endif
630
631                 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
632                 if (r >= 0) {
633                         char *session = NULL;
634
635                         x = strappenda("_SYSTEMD_CGROUP=", c);
636                         IOVEC_SET_STRING(iovec[n++], x);
637
638                         r = cg_path_get_session(c, &t);
639                         if (r >= 0) {
640                                 session = strappenda("_SYSTEMD_SESSION=", t);
641                                 free(t);
642                                 IOVEC_SET_STRING(iovec[n++], session);
643                         }
644
645                         if (cg_path_get_owner_uid(c, &owner) >= 0) {
646                                 owner_valid = true;
647
648                                 sprintf(owner_uid, "_SYSTEMD_OWNER_UID="UID_FMT, owner);
649                                 IOVEC_SET_STRING(iovec[n++], owner_uid);
650                         }
651
652                         if (cg_path_get_unit(c, &t) >= 0) {
653                                 x = strappenda("_SYSTEMD_UNIT=", t);
654                                 free(t);
655                                 IOVEC_SET_STRING(iovec[n++], x);
656                         } else if (unit_id && !session) {
657                                 x = strappenda("_SYSTEMD_UNIT=", unit_id);
658                                 IOVEC_SET_STRING(iovec[n++], x);
659                         }
660
661                         if (cg_path_get_user_unit(c, &t) >= 0) {
662                                 x = strappenda("_SYSTEMD_USER_UNIT=", t);
663                                 free(t);
664                                 IOVEC_SET_STRING(iovec[n++], x);
665                         } else if (unit_id && session) {
666                                 x = strappenda("_SYSTEMD_USER_UNIT=", unit_id);
667                                 IOVEC_SET_STRING(iovec[n++], x);
668                         }
669
670                         if (cg_path_get_slice(c, &t) >= 0) {
671                                 x = strappenda("_SYSTEMD_SLICE=", t);
672                                 free(t);
673                                 IOVEC_SET_STRING(iovec[n++], x);
674                         }
675
676                         free(c);
677                 } else if (unit_id) {
678                         x = strappenda("_SYSTEMD_UNIT=", unit_id);
679                         IOVEC_SET_STRING(iovec[n++], x);
680                 }
681
682 #ifdef HAVE_SELINUX
683                 if (use_selinux()) {
684                         if (label) {
685                                 x = alloca(strlen("_SELINUX_CONTEXT=") + label_len + 1);
686
687                                 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
688                                 IOVEC_SET_STRING(iovec[n++], x);
689                         } else {
690                                 security_context_t con;
691
692                                 if (getpidcon(ucred->pid, &con) >= 0) {
693                                         x = strappenda("_SELINUX_CONTEXT=", con);
694
695                                         freecon(con);
696                                         IOVEC_SET_STRING(iovec[n++], x);
697                                 }
698                         }
699                 }
700 #endif
701         }
702         assert(n <= m);
703
704         if (object_pid) {
705                 r = get_process_uid(object_pid, &object_uid);
706                 if (r >= 0) {
707                         sprintf(o_uid, "OBJECT_UID="UID_FMT, object_uid);
708                         IOVEC_SET_STRING(iovec[n++], o_uid);
709                 }
710
711                 r = get_process_gid(object_pid, &object_gid);
712                 if (r >= 0) {
713                         sprintf(o_gid, "OBJECT_GID="GID_FMT, object_gid);
714                         IOVEC_SET_STRING(iovec[n++], o_gid);
715                 }
716
717                 r = get_process_comm(object_pid, &t);
718                 if (r >= 0) {
719                         x = strappenda("OBJECT_COMM=", t);
720                         free(t);
721                         IOVEC_SET_STRING(iovec[n++], x);
722                 }
723
724                 r = get_process_exe(object_pid, &t);
725                 if (r >= 0) {
726                         x = strappenda("OBJECT_EXE=", t);
727                         free(t);
728                         IOVEC_SET_STRING(iovec[n++], x);
729                 }
730
731                 r = get_process_cmdline(object_pid, 0, false, &t);
732                 if (r >= 0) {
733                         x = strappenda("OBJECT_CMDLINE=", t);
734                         free(t);
735                         IOVEC_SET_STRING(iovec[n++], x);
736                 }
737
738 #ifdef HAVE_AUDIT
739                 r = audit_session_from_pid(object_pid, &audit);
740                 if (r >= 0) {
741                         sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%"PRIu32, audit);
742                         IOVEC_SET_STRING(iovec[n++], o_audit_session);
743                 }
744
745                 r = audit_loginuid_from_pid(object_pid, &loginuid);
746                 if (r >= 0) {
747                         sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID="UID_FMT, loginuid);
748                         IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
749                 }
750 #endif
751
752                 r = cg_pid_get_path_shifted(object_pid, s->cgroup_root, &c);
753                 if (r >= 0) {
754                         x = strappenda("OBJECT_SYSTEMD_CGROUP=", c);
755                         IOVEC_SET_STRING(iovec[n++], x);
756
757                         r = cg_path_get_session(c, &t);
758                         if (r >= 0) {
759                                 x = strappenda("OBJECT_SYSTEMD_SESSION=", t);
760                                 free(t);
761                                 IOVEC_SET_STRING(iovec[n++], x);
762                         }
763
764                         if (cg_path_get_owner_uid(c, &owner) >= 0) {
765                                 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT, owner);
766                                 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
767                         }
768
769                         if (cg_path_get_unit(c, &t) >= 0) {
770                                 x = strappenda("OBJECT_SYSTEMD_UNIT=", t);
771                                 free(t);
772                                 IOVEC_SET_STRING(iovec[n++], x);
773                         }
774
775                         if (cg_path_get_user_unit(c, &t) >= 0) {
776                                 x = strappenda("OBJECT_SYSTEMD_USER_UNIT=", t);
777                                 free(t);
778                                 IOVEC_SET_STRING(iovec[n++], x);
779                         }
780
781                         free(c);
782                 }
783         }
784         assert(n <= m);
785
786         if (tv) {
787                 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
788                 IOVEC_SET_STRING(iovec[n++], source_time);
789         }
790
791         /* Note that strictly speaking storing the boot id here is
792          * redundant since the entry includes this in-line
793          * anyway. However, we need this indexed, too. */
794         if (!isempty(s->boot_id_field))
795                 IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
796
797         if (!isempty(s->machine_id_field))
798                 IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
799
800         if (!isempty(s->hostname_field))
801                 IOVEC_SET_STRING(iovec[n++], s->hostname_field);
802
803         assert(n <= m);
804
805         if (s->split_mode == SPLIT_UID && realuid > 0)
806                 /* Split up strictly by any UID */
807                 journal_uid = realuid;
808         else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
809                 /* Split up by login UIDs.  We do this only if the
810                  * realuid is not root, in order not to accidentally
811                  * leak privileged information to the user that is
812                  * logged by a privileged process that is part of an
813                  * unprivileged session.*/
814                 journal_uid = owner;
815         else
816                 journal_uid = 0;
817
818         write_to_journal(s, journal_uid, iovec, n, priority);
819 }
820
821 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
822         char mid[11 + 32 + 1];
823         char buffer[16 + LINE_MAX + 1];
824         struct iovec iovec[N_IOVEC_META_FIELDS + 4];
825         int n = 0;
826         va_list ap;
827         struct ucred ucred = {};
828
829         assert(s);
830         assert(format);
831
832         IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
833         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
834
835         memcpy(buffer, "MESSAGE=", 8);
836         va_start(ap, format);
837         vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
838         va_end(ap);
839         char_array_0(buffer);
840         IOVEC_SET_STRING(iovec[n++], buffer);
841
842         if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
843                 snprintf(mid, sizeof(mid), MESSAGE_ID(message_id));
844                 char_array_0(mid);
845                 IOVEC_SET_STRING(iovec[n++], mid);
846         }
847
848         ucred.pid = getpid();
849         ucred.uid = getuid();
850         ucred.gid = getgid();
851
852         dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
853 }
854
855 void server_dispatch_message(
856                 Server *s,
857                 struct iovec *iovec, unsigned n, unsigned m,
858                 struct ucred *ucred,
859                 struct timeval *tv,
860                 const char *label, size_t label_len,
861                 const char *unit_id,
862                 int priority,
863                 pid_t object_pid) {
864
865         int rl, r;
866         _cleanup_free_ char *path = NULL;
867         char *c;
868
869         assert(s);
870         assert(iovec || n == 0);
871
872         if (n == 0)
873                 return;
874
875         if (LOG_PRI(priority) > s->max_level_store)
876                 return;
877
878         /* Stop early in case the information will not be stored
879          * in a journal. */
880         if (s->storage == STORAGE_NONE)
881                 return;
882
883         if (!ucred)
884                 goto finish;
885
886         r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &path);
887         if (r < 0)
888                 goto finish;
889
890         /* example: /user/lennart/3/foobar
891          *          /system/dbus.service/foobar
892          *
893          * So let's cut of everything past the third /, since that is
894          * where user directories start */
895
896         c = strchr(path, '/');
897         if (c) {
898                 c = strchr(c+1, '/');
899                 if (c) {
900                         c = strchr(c+1, '/');
901                         if (c)
902                                 *c = 0;
903                 }
904         }
905
906         rl = journal_rate_limit_test(s->rate_limit, path,
907                                      priority & LOG_PRIMASK, available_space(s, false));
908
909         if (rl == 0)
910                 return;
911
912         /* Write a suppression message if we suppressed something */
913         if (rl > 1)
914                 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
915                                       "Suppressed %u messages from %s", rl - 1, path);
916
917 finish:
918         dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
919 }
920
921
922 static int system_journal_open(Server *s) {
923         int r;
924         char *fn;
925         sd_id128_t machine;
926         char ids[33];
927
928         r = sd_id128_get_machine(&machine);
929         if (r < 0) {
930                 log_error("Failed to get machine id: %s", strerror(-r));
931                 return r;
932         }
933
934         sd_id128_to_string(machine, ids);
935
936         if (!s->system_journal &&
937             (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
938             access("/run/systemd/journal/flushed", F_OK) >= 0) {
939
940                 /* If in auto mode: first try to create the machine
941                  * path, but not the prefix.
942                  *
943                  * If in persistent mode: create /var/log/journal and
944                  * the machine path */
945
946                 if (s->storage == STORAGE_PERSISTENT)
947                         (void) mkdir("/var/log/journal/", 0755);
948
949                 fn = strappenda("/var/log/journal/", ids);
950                 (void) mkdir(fn, 0755);
951
952                 fn = strappenda(fn, "/system.journal");
953                 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
954
955                 if (r >= 0)
956                         server_fix_perms(s, s->system_journal, 0);
957                 else if (r < 0) {
958                         if (r != -ENOENT && r != -EROFS)
959                                 log_warning("Failed to open system journal: %s", strerror(-r));
960
961                         r = 0;
962                 }
963         }
964
965         if (!s->runtime_journal &&
966             (s->storage != STORAGE_NONE)) {
967
968                 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
969                 if (!fn)
970                         return -ENOMEM;
971
972                 if (s->system_journal) {
973
974                         /* Try to open the runtime journal, but only
975                          * if it already exists, so that we can flush
976                          * it into the system journal */
977
978                         r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
979                         free(fn);
980
981                         if (r < 0) {
982                                 if (r != -ENOENT)
983                                         log_warning("Failed to open runtime journal: %s", strerror(-r));
984
985                                 r = 0;
986                         }
987
988                 } else {
989
990                         /* OK, we really need the runtime journal, so create
991                          * it if necessary. */
992
993                         (void) mkdir("/run/log", 0755);
994                         (void) mkdir("/run/log/journal", 0755);
995                         (void) mkdir_parents(fn, 0750);
996
997                         r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
998                         free(fn);
999
1000                         if (r < 0) {
1001                                 log_error("Failed to open runtime journal: %s", strerror(-r));
1002                                 return r;
1003                         }
1004                 }
1005
1006                 if (s->runtime_journal)
1007                         server_fix_perms(s, s->runtime_journal, 0);
1008         }
1009
1010         available_space(s, true);
1011
1012         return r;
1013 }
1014
1015 int server_flush_to_var(Server *s) {
1016         sd_id128_t machine;
1017         sd_journal *j = NULL;
1018         char ts[FORMAT_TIMESPAN_MAX];
1019         usec_t start;
1020         unsigned n = 0;
1021         int r;
1022
1023         assert(s);
1024
1025         if (s->storage != STORAGE_AUTO &&
1026             s->storage != STORAGE_PERSISTENT)
1027                 return 0;
1028
1029         if (!s->runtime_journal)
1030                 return 0;
1031
1032         system_journal_open(s);
1033
1034         if (!s->system_journal)
1035                 return 0;
1036
1037         log_debug("Flushing to /var...");
1038
1039         start = now(CLOCK_MONOTONIC);
1040
1041         r = sd_id128_get_machine(&machine);
1042         if (r < 0)
1043                 return r;
1044
1045         r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1046         if (r < 0) {
1047                 log_error("Failed to read runtime journal: %s", strerror(-r));
1048                 return r;
1049         }
1050
1051         sd_journal_set_data_threshold(j, 0);
1052
1053         SD_JOURNAL_FOREACH(j) {
1054                 Object *o = NULL;
1055                 JournalFile *f;
1056
1057                 f = j->current_file;
1058                 assert(f && f->current_offset > 0);
1059
1060                 n++;
1061
1062                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1063                 if (r < 0) {
1064                         log_error("Can't read entry: %s", strerror(-r));
1065                         goto finish;
1066                 }
1067
1068                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1069                 if (r >= 0)
1070                         continue;
1071
1072                 if (!shall_try_append_again(s->system_journal, r)) {
1073                         log_error("Can't write entry: %s", strerror(-r));
1074                         goto finish;
1075                 }
1076
1077                 server_rotate(s);
1078                 server_vacuum(s);
1079
1080                 if (!s->system_journal) {
1081                         log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1082                         r = -EIO;
1083                         goto finish;
1084                 }
1085
1086                 log_debug("Retrying write.");
1087                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1088                 if (r < 0) {
1089                         log_error("Can't write entry: %s", strerror(-r));
1090                         goto finish;
1091                 }
1092         }
1093
1094 finish:
1095         journal_file_post_change(s->system_journal);
1096
1097         journal_file_close(s->runtime_journal);
1098         s->runtime_journal = NULL;
1099
1100         if (r >= 0)
1101                 rm_rf("/run/log/journal", false, true, false);
1102
1103         sd_journal_close(j);
1104
1105         server_driver_message(s, SD_ID128_NULL, "Time spent on flushing to /var is %s for %u entries.", format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0), n);
1106
1107         return r;
1108 }
1109
1110 int process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1111         Server *s = userdata;
1112
1113         assert(s);
1114         assert(fd == s->native_fd || fd == s->syslog_fd);
1115
1116         if (revents != EPOLLIN) {
1117                 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1118                 return -EIO;
1119         }
1120
1121         for (;;) {
1122                 struct ucred *ucred = NULL;
1123                 struct timeval *tv = NULL;
1124                 struct cmsghdr *cmsg;
1125                 char *label = NULL;
1126                 size_t label_len = 0;
1127                 struct iovec iovec;
1128
1129                 union {
1130                         struct cmsghdr cmsghdr;
1131
1132                         /* We use NAME_MAX space for the SELinux label
1133                          * here. The kernel currently enforces no
1134                          * limit, but according to suggestions from
1135                          * the SELinux people this will change and it
1136                          * will probably be identical to NAME_MAX. For
1137                          * now we use that, but this should be updated
1138                          * one day when the final limit is known.*/
1139                         uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1140                                     CMSG_SPACE(sizeof(struct timeval)) +
1141                                     CMSG_SPACE(sizeof(int)) + /* fd */
1142                                     CMSG_SPACE(NAME_MAX)]; /* selinux label */
1143                 } control = {};
1144                 struct msghdr msghdr = {
1145                         .msg_iov = &iovec,
1146                         .msg_iovlen = 1,
1147                         .msg_control = &control,
1148                         .msg_controllen = sizeof(control),
1149                 };
1150
1151                 ssize_t n;
1152                 int v;
1153                 int *fds = NULL;
1154                 unsigned n_fds = 0;
1155
1156                 if (ioctl(fd, SIOCINQ, &v) < 0) {
1157                         log_error("SIOCINQ failed: %m");
1158                         return -errno;
1159                 }
1160
1161                 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, LINE_MAX + (size_t) v))
1162                         return log_oom();
1163
1164                 iovec.iov_base = s->buffer;
1165                 iovec.iov_len = s->buffer_size;
1166
1167                 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1168                 if (n < 0) {
1169                         if (errno == EINTR || errno == EAGAIN)
1170                                 return 0;
1171
1172                         log_error("recvmsg() failed: %m");
1173                         return -errno;
1174                 }
1175
1176                 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1177
1178                         if (cmsg->cmsg_level == SOL_SOCKET &&
1179                             cmsg->cmsg_type == SCM_CREDENTIALS &&
1180                             cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1181                                 ucred = (struct ucred*) CMSG_DATA(cmsg);
1182                         else if (cmsg->cmsg_level == SOL_SOCKET &&
1183                                  cmsg->cmsg_type == SCM_SECURITY) {
1184                                 label = (char*) CMSG_DATA(cmsg);
1185                                 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1186                         } else if (cmsg->cmsg_level == SOL_SOCKET &&
1187                                    cmsg->cmsg_type == SO_TIMESTAMP &&
1188                                    cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1189                                 tv = (struct timeval*) CMSG_DATA(cmsg);
1190                         else if (cmsg->cmsg_level == SOL_SOCKET &&
1191                                  cmsg->cmsg_type == SCM_RIGHTS) {
1192                                 fds = (int*) CMSG_DATA(cmsg);
1193                                 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1194                         }
1195                 }
1196
1197                 if (fd == s->syslog_fd) {
1198                         if (n > 0 && n_fds == 0) {
1199                                 s->buffer[n] = 0;
1200                                 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1201                         } else if (n_fds > 0)
1202                                 log_warning("Got file descriptors via syslog socket. Ignoring.");
1203
1204                 } else {
1205                         if (n > 0 && n_fds == 0)
1206                                 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1207                         else if (n == 0 && n_fds == 1)
1208                                 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1209                         else if (n_fds > 0)
1210                                 log_warning("Got too many file descriptors via native socket. Ignoring.");
1211                 }
1212
1213                 close_many(fds, n_fds);
1214         }
1215 }
1216
1217 static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1218         Server *s = userdata;
1219
1220         assert(s);
1221
1222         log_info("Received request to flush runtime journal from PID %"PRIu32, si->ssi_pid);
1223
1224         touch("/run/systemd/journal/flushed");
1225         server_flush_to_var(s);
1226         server_sync(s);
1227
1228         return 0;
1229 }
1230
1231 static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1232         Server *s = userdata;
1233
1234         assert(s);
1235
1236         log_info("Received request to rotate journal from PID %"PRIu32, si->ssi_pid);
1237         server_rotate(s);
1238         server_vacuum(s);
1239
1240         return 0;
1241 }
1242
1243 static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1244         Server *s = userdata;
1245
1246         assert(s);
1247
1248         log_received_signal(LOG_INFO, si);
1249
1250         sd_event_exit(s->event, 0);
1251         return 0;
1252 }
1253
1254 static int setup_signals(Server *s) {
1255         sigset_t mask;
1256         int r;
1257
1258         assert(s);
1259
1260         assert_se(sigemptyset(&mask) == 0);
1261         sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1262         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1263
1264         r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
1265         if (r < 0)
1266                 return r;
1267
1268         r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
1269         if (r < 0)
1270                 return r;
1271
1272         r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
1273         if (r < 0)
1274                 return r;
1275
1276         r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
1277         if (r < 0)
1278                 return r;
1279
1280         return 0;
1281 }
1282
1283 static int server_parse_proc_cmdline(Server *s) {
1284         _cleanup_free_ char *line = NULL;
1285         const char *w, *state;
1286         size_t l;
1287         int r;
1288
1289         r = proc_cmdline(&line);
1290         if (r < 0)
1291                 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1292         if (r <= 0)
1293                 return 0;
1294
1295         FOREACH_WORD_QUOTED(w, l, line, state) {
1296                 _cleanup_free_ char *word;
1297
1298                 word = strndup(w, l);
1299                 if (!word)
1300                         return -ENOMEM;
1301
1302                 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1303                         r = parse_boolean(word + 35);
1304                         if (r < 0)
1305                                 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1306                         else
1307                                 s->forward_to_syslog = r;
1308                 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1309                         r = parse_boolean(word + 33);
1310                         if (r < 0)
1311                                 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1312                         else
1313                                 s->forward_to_kmsg = r;
1314                 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1315                         r = parse_boolean(word + 36);
1316                         if (r < 0)
1317                                 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1318                         else
1319                                 s->forward_to_console = r;
1320                 } else if (startswith(word, "systemd.journald.forward_to_wall=")) {
1321                         r = parse_boolean(word + 33);
1322                         if (r < 0)
1323                                 log_warning("Failed to parse forward to wall switch %s. Ignoring.", word + 33);
1324                         else
1325                                 s->forward_to_wall = r;
1326                 } else if (startswith(word, "systemd.journald"))
1327                         log_warning("Invalid systemd.journald parameter. Ignoring.");
1328         }
1329         /* do not warn about state here, since probably systemd already did */
1330
1331         return 0;
1332 }
1333
1334 static int server_parse_config_file(Server *s) {
1335         assert(s);
1336
1337         return config_parse(NULL, "/etc/systemd/journald.conf", NULL,
1338                             "Journal\0",
1339                             config_item_perf_lookup, journald_gperf_lookup,
1340                             false, false, true, s);
1341 }
1342
1343 static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1344         Server *s = userdata;
1345
1346         assert(s);
1347
1348         server_sync(s);
1349         return 0;
1350 }
1351
1352 int server_schedule_sync(Server *s, int priority) {
1353         int r;
1354
1355         assert(s);
1356
1357         if (priority <= LOG_CRIT) {
1358                 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1359                 server_sync(s);
1360                 return 0;
1361         }
1362
1363         if (s->sync_scheduled)
1364                 return 0;
1365
1366         if (s->sync_interval_usec > 0) {
1367                 usec_t when;
1368
1369                 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
1370                 if (r < 0)
1371                         return r;
1372
1373                 when += s->sync_interval_usec;
1374
1375                 if (!s->sync_event_source) {
1376                         r = sd_event_add_time(
1377                                         s->event,
1378                                         &s->sync_event_source,
1379                                         CLOCK_MONOTONIC,
1380                                         when, 0,
1381                                         server_dispatch_sync, s);
1382                         if (r < 0)
1383                                 return r;
1384
1385                         r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1386                 } else {
1387                         r = sd_event_source_set_time(s->sync_event_source, when);
1388                         if (r < 0)
1389                                 return r;
1390
1391                         r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1392                 }
1393                 if (r < 0)
1394                         return r;
1395
1396                 s->sync_scheduled = true;
1397         }
1398
1399         return 0;
1400 }
1401
1402 static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1403         Server *s = userdata;
1404
1405         assert(s);
1406
1407         server_cache_hostname(s);
1408         return 0;
1409 }
1410
1411 static int server_open_hostname(Server *s) {
1412         int r;
1413
1414         assert(s);
1415
1416         s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
1417         if (s->hostname_fd < 0) {
1418                 log_error("Failed to open /proc/sys/kernel/hostname: %m");
1419                 return -errno;
1420         }
1421
1422         r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
1423         if (r < 0) {
1424                 /* kernels prior to 3.2 don't support polling this file. Ignore
1425                  * the failure. */
1426                 if (r == -EPERM) {
1427                         log_warning("Failed to register hostname fd in event loop: %s. Ignoring.",
1428                                         strerror(-r));
1429                         s->hostname_fd = safe_close(s->hostname_fd);
1430                         return 0;
1431                 }
1432
1433                 log_error("Failed to register hostname fd in event loop: %s", strerror(-r));
1434                 return r;
1435         }
1436
1437         r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
1438         if (r < 0) {
1439                 log_error("Failed to adjust priority of host name event source: %s", strerror(-r));
1440                 return r;
1441         }
1442
1443         return 0;
1444 }
1445
1446 int server_init(Server *s) {
1447         int n, r, fd;
1448
1449         assert(s);
1450
1451         zero(*s);
1452         s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->hostname_fd = -1;
1453         s->compress = true;
1454         s->seal = true;
1455
1456         s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1457         s->sync_scheduled = false;
1458
1459         s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1460         s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1461
1462         s->forward_to_wall = true;
1463
1464         s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1465
1466         s->max_level_store = LOG_DEBUG;
1467         s->max_level_syslog = LOG_DEBUG;
1468         s->max_level_kmsg = LOG_NOTICE;
1469         s->max_level_console = LOG_INFO;
1470         s->max_level_wall = LOG_EMERG;
1471
1472         memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1473         memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1474
1475         server_parse_config_file(s);
1476         server_parse_proc_cmdline(s);
1477         if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1478                 log_debug("Setting both rate limit interval and burst from %llu,%u to 0,0",
1479                           (long long unsigned) s->rate_limit_interval,
1480                           s->rate_limit_burst);
1481                 s->rate_limit_interval = s->rate_limit_burst = 0;
1482         }
1483
1484         mkdir_p("/run/systemd/journal", 0755);
1485
1486         s->user_journals = hashmap_new(NULL);
1487         if (!s->user_journals)
1488                 return log_oom();
1489
1490         s->mmap = mmap_cache_new();
1491         if (!s->mmap)
1492                 return log_oom();
1493
1494         r = sd_event_default(&s->event);
1495         if (r < 0) {
1496                 log_error("Failed to create event loop: %s", strerror(-r));
1497                 return r;
1498         }
1499
1500         sd_event_set_watchdog(s->event, true);
1501
1502         n = sd_listen_fds(true);
1503         if (n < 0) {
1504                 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1505                 return n;
1506         }
1507
1508         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1509
1510                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1511
1512                         if (s->native_fd >= 0) {
1513                                 log_error("Too many native sockets passed.");
1514                                 return -EINVAL;
1515                         }
1516
1517                         s->native_fd = fd;
1518
1519                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1520
1521                         if (s->stdout_fd >= 0) {
1522                                 log_error("Too many stdout sockets passed.");
1523                                 return -EINVAL;
1524                         }
1525
1526                         s->stdout_fd = fd;
1527
1528                 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1529                            sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1530
1531                         if (s->syslog_fd >= 0) {
1532                                 log_error("Too many /dev/log sockets passed.");
1533                                 return -EINVAL;
1534                         }
1535
1536                         s->syslog_fd = fd;
1537
1538                 } else {
1539                         log_error("Unknown socket passed.");
1540                         return -EINVAL;
1541                 }
1542         }
1543
1544         r = server_open_syslog_socket(s);
1545         if (r < 0)
1546                 return r;
1547
1548         r = server_open_native_socket(s);
1549         if (r < 0)
1550                 return r;
1551
1552         r = server_open_stdout_socket(s);
1553         if (r < 0)
1554                 return r;
1555
1556         r = server_open_dev_kmsg(s);
1557         if (r < 0)
1558                 return r;
1559
1560         r = server_open_kernel_seqnum(s);
1561         if (r < 0)
1562                 return r;
1563
1564         r = server_open_hostname(s);
1565         if (r < 0)
1566                 return r;
1567
1568         r = setup_signals(s);
1569         if (r < 0)
1570                 return r;
1571
1572         s->udev = udev_new();
1573         if (!s->udev)
1574                 return -ENOMEM;
1575
1576         s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
1577         if (!s->rate_limit)
1578                 return -ENOMEM;
1579
1580         r = cg_get_root_path(&s->cgroup_root);
1581         if (r < 0)
1582                 return r;
1583
1584         server_cache_hostname(s);
1585         server_cache_boot_id(s);
1586         server_cache_machine_id(s);
1587
1588         r = system_journal_open(s);
1589         if (r < 0)
1590                 return r;
1591
1592         return 0;
1593 }
1594
1595 void server_maybe_append_tags(Server *s) {
1596 #ifdef HAVE_GCRYPT
1597         JournalFile *f;
1598         Iterator i;
1599         usec_t n;
1600
1601         n = now(CLOCK_REALTIME);
1602
1603         if (s->system_journal)
1604                 journal_file_maybe_append_tag(s->system_journal, n);
1605
1606         HASHMAP_FOREACH(f, s->user_journals, i)
1607                 journal_file_maybe_append_tag(f, n);
1608 #endif
1609 }
1610
1611 void server_done(Server *s) {
1612         JournalFile *f;
1613         assert(s);
1614
1615         while (s->stdout_streams)
1616                 stdout_stream_free(s->stdout_streams);
1617
1618         if (s->system_journal)
1619                 journal_file_close(s->system_journal);
1620
1621         if (s->runtime_journal)
1622                 journal_file_close(s->runtime_journal);
1623
1624         while ((f = hashmap_steal_first(s->user_journals)))
1625                 journal_file_close(f);
1626
1627         hashmap_free(s->user_journals);
1628
1629         sd_event_source_unref(s->syslog_event_source);
1630         sd_event_source_unref(s->native_event_source);
1631         sd_event_source_unref(s->stdout_event_source);
1632         sd_event_source_unref(s->dev_kmsg_event_source);
1633         sd_event_source_unref(s->sync_event_source);
1634         sd_event_source_unref(s->sigusr1_event_source);
1635         sd_event_source_unref(s->sigusr2_event_source);
1636         sd_event_source_unref(s->sigterm_event_source);
1637         sd_event_source_unref(s->sigint_event_source);
1638         sd_event_source_unref(s->hostname_event_source);
1639         sd_event_unref(s->event);
1640
1641         safe_close(s->syslog_fd);
1642         safe_close(s->native_fd);
1643         safe_close(s->stdout_fd);
1644         safe_close(s->dev_kmsg_fd);
1645         safe_close(s->hostname_fd);
1646
1647         if (s->rate_limit)
1648                 journal_rate_limit_free(s->rate_limit);
1649
1650         if (s->kernel_seqnum)
1651                 munmap(s->kernel_seqnum, sizeof(uint64_t));
1652
1653         free(s->buffer);
1654         free(s->tty_path);
1655         free(s->cgroup_root);
1656
1657         if (s->mmap)
1658                 mmap_cache_unref(s->mmap);
1659
1660         if (s->udev)
1661                 udev_unref(s->udev);
1662 }