chiark / gitweb /
fc8115c31c29922225e004fcffc63b9398f395c8
[elogind.git] / src / journal / journald-server.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
26 #include <sys/mman.h>
27 #include <sys/timerfd.h>
28
29 #include <libudev.h>
30
31 #include "sd-journal.h"
32 #include "sd-messages.h"
33 #include "sd-daemon.h"
34 #include "fileio.h"
35 #include "mkdir.h"
36 #include "hashmap.h"
37 #include "journal-file.h"
38 #include "socket-util.h"
39 #include "cgroup-util.h"
40 #include "list.h"
41 #include "missing.h"
42 #include "conf-parser.h"
43 #include "selinux-util.h"
44 #include "journal-internal.h"
45 #include "journal-vacuum.h"
46 #include "journal-authenticate.h"
47 #include "journald-rate-limit.h"
48 #include "journald-kmsg.h"
49 #include "journald-syslog.h"
50 #include "journald-stream.h"
51 #include "journald-console.h"
52 #include "journald-native.h"
53 #include "journald-server.h"
54
55 #ifdef HAVE_ACL
56 #include <sys/acl.h>
57 #include <acl/libacl.h>
58 #include "acl-util.h"
59 #endif
60
61 #ifdef HAVE_SELINUX
62 #include <selinux/selinux.h>
63 #endif
64
65 #define USER_JOURNALS_MAX 1024
66
67 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
68 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
69 #define DEFAULT_RATE_LIMIT_BURST 1000
70
71 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
72
73 static const char* const storage_table[] = {
74         [STORAGE_AUTO] = "auto",
75         [STORAGE_VOLATILE] = "volatile",
76         [STORAGE_PERSISTENT] = "persistent",
77         [STORAGE_NONE] = "none"
78 };
79
80 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
81 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
82
83 static const char* const split_mode_table[] = {
84         [SPLIT_NONE] = "none",
85         [SPLIT_UID] = "uid",
86         [SPLIT_LOGIN] = "login"
87 };
88
89 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
90 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
91
92 static uint64_t available_space(Server *s, bool verbose) {
93         char ids[33];
94         _cleanup_free_ char *p = NULL;
95         sd_id128_t machine;
96         struct statvfs ss;
97         uint64_t sum = 0, ss_avail = 0, avail = 0;
98         int r;
99         _cleanup_closedir_ DIR *d = NULL;
100         usec_t ts;
101         const char *f;
102         JournalMetrics *m;
103
104         ts = now(CLOCK_MONOTONIC);
105
106         if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts
107             && !verbose)
108                 return s->cached_available_space;
109
110         r = sd_id128_get_machine(&machine);
111         if (r < 0)
112                 return 0;
113
114         if (s->system_journal) {
115                 f = "/var/log/journal/";
116                 m = &s->system_metrics;
117         } else {
118                 f = "/run/log/journal/";
119                 m = &s->runtime_metrics;
120         }
121
122         assert(m);
123
124         p = strappend(f, sd_id128_to_string(machine, ids));
125         if (!p)
126                 return 0;
127
128         d = opendir(p);
129         if (!d)
130                 return 0;
131
132         if (fstatvfs(dirfd(d), &ss) < 0)
133                 return 0;
134
135         for (;;) {
136                 struct stat st;
137                 struct dirent *de;
138                 union dirent_storage buf;
139
140                 r = readdir_r(d, &buf.de, &de);
141                 if (r != 0)
142                         break;
143
144                 if (!de)
145                         break;
146
147                 if (!endswith(de->d_name, ".journal") &&
148                     !endswith(de->d_name, ".journal~"))
149                         continue;
150
151                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
152                         continue;
153
154                 if (!S_ISREG(st.st_mode))
155                         continue;
156
157                 sum += (uint64_t) st.st_blocks * 512UL;
158         }
159
160         ss_avail = ss.f_bsize * ss.f_bavail;
161         avail = ss_avail > m->keep_free ? ss_avail - m->keep_free : 0;
162
163         s->cached_available_space = MIN(m->max_use, avail) > sum ? MIN(m->max_use, avail) - sum : 0;
164         s->cached_available_space_timestamp = ts;
165
166         if (verbose) {
167                 char    fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
168                         fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX];
169
170                 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
171                                       "%s journal is using %s (max %s, leaving %s of free %s, current limit %s).",
172                                       s->system_journal ? "Permanent" : "Runtime",
173                                       format_bytes(fb1, sizeof(fb1), sum),
174                                       format_bytes(fb2, sizeof(fb2), m->max_use),
175                                       format_bytes(fb3, sizeof(fb3), m->keep_free),
176                                       format_bytes(fb4, sizeof(fb4), ss_avail),
177                                       format_bytes(fb5, sizeof(fb5), MIN(m->max_use, avail)));
178         }
179
180         return s->cached_available_space;
181 }
182
183 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
184         int r;
185 #ifdef HAVE_ACL
186         acl_t acl;
187         acl_entry_t entry;
188         acl_permset_t permset;
189 #endif
190
191         assert(f);
192
193         r = fchmod(f->fd, 0640);
194         if (r < 0)
195                 log_warning("Failed to fix access mode on %s, ignoring: %s", f->path, strerror(-r));
196
197 #ifdef HAVE_ACL
198         if (uid <= 0)
199                 return;
200
201         acl = acl_get_fd(f->fd);
202         if (!acl) {
203                 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
204                 return;
205         }
206
207         r = acl_find_uid(acl, uid, &entry);
208         if (r <= 0) {
209
210                 if (acl_create_entry(&acl, &entry) < 0 ||
211                     acl_set_tag_type(entry, ACL_USER) < 0 ||
212                     acl_set_qualifier(entry, &uid) < 0) {
213                         log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
214                         goto finish;
215                 }
216         }
217
218         /* We do not recalculate the mask unconditionally here,
219          * so that the fchmod() mask above stays intact. */
220         if (acl_get_permset(entry, &permset) < 0 ||
221             acl_add_perm(permset, ACL_READ) < 0 ||
222             calc_acl_mask_if_needed(&acl) < 0) {
223                 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
224                 goto finish;
225         }
226
227         if (acl_set_fd(f->fd, acl) < 0)
228                 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
229
230 finish:
231         acl_free(acl);
232 #endif
233 }
234
235 static JournalFile* find_journal(Server *s, uid_t uid) {
236         _cleanup_free_ char *p = NULL;
237         int r;
238         JournalFile *f;
239         sd_id128_t machine;
240
241         assert(s);
242
243         /* We split up user logs only on /var, not on /run. If the
244          * runtime file is open, we write to it exclusively, in order
245          * to guarantee proper order as soon as we flush /run to
246          * /var and close the runtime file. */
247
248         if (s->runtime_journal)
249                 return s->runtime_journal;
250
251         if (uid <= 0)
252                 return s->system_journal;
253
254         r = sd_id128_get_machine(&machine);
255         if (r < 0)
256                 return s->system_journal;
257
258         f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
259         if (f)
260                 return f;
261
262         if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
263                      SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
264                 return s->system_journal;
265
266         while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
267                 /* Too many open? Then let's close one */
268                 f = hashmap_steal_first(s->user_journals);
269                 assert(f);
270                 journal_file_close(f);
271         }
272
273         r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
274         if (r < 0)
275                 return s->system_journal;
276
277         server_fix_perms(s, f, uid);
278
279         r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
280         if (r < 0) {
281                 journal_file_close(f);
282                 return s->system_journal;
283         }
284
285         return f;
286 }
287
288 void server_rotate(Server *s) {
289         JournalFile *f;
290         void *k;
291         Iterator i;
292         int r;
293
294         log_debug("Rotating...");
295
296         if (s->runtime_journal) {
297                 r = journal_file_rotate(&s->runtime_journal, s->compress, false);
298                 if (r < 0)
299                         if (s->runtime_journal)
300                                 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
301                         else
302                                 log_error("Failed to create new runtime journal: %s", strerror(-r));
303                 else
304                         server_fix_perms(s, s->runtime_journal, 0);
305         }
306
307         if (s->system_journal) {
308                 r = journal_file_rotate(&s->system_journal, s->compress, s->seal);
309                 if (r < 0)
310                         if (s->system_journal)
311                                 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
312                         else
313                                 log_error("Failed to create new system journal: %s", strerror(-r));
314
315                 else
316                         server_fix_perms(s, s->system_journal, 0);
317         }
318
319         HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
320                 r = journal_file_rotate(&f, s->compress, s->seal);
321                 if (r < 0)
322                         if (f)
323                                 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
324                         else {
325                                 log_error("Failed to create user journal: %s", strerror(-r));
326                                 hashmap_remove(s->user_journals, k);
327                         }
328                 else {
329                         hashmap_replace(s->user_journals, k, f);
330                         server_fix_perms(s, f, PTR_TO_UINT32(k));
331                 }
332         }
333 }
334
335 void server_sync(Server *s) {
336         static const struct itimerspec sync_timer_disable = {};
337         JournalFile *f;
338         void *k;
339         Iterator i;
340         int r;
341
342         if (s->system_journal) {
343                 r = journal_file_set_offline(s->system_journal);
344                 if (r < 0)
345                         log_error("Failed to sync system journal: %s", strerror(-r));
346         }
347
348         HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
349                 r = journal_file_set_offline(f);
350                 if (r < 0)
351                         log_error("Failed to sync user journal: %s", strerror(-r));
352         }
353
354         r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_disable, NULL);
355         if (r < 0)
356                 log_error("Failed to disable max timer: %m");
357
358         s->sync_scheduled = false;
359 }
360
361 void server_vacuum(Server *s) {
362         char ids[33];
363         sd_id128_t machine;
364         int r;
365
366         log_debug("Vacuuming...");
367
368         s->oldest_file_usec = 0;
369
370         r = sd_id128_get_machine(&machine);
371         if (r < 0) {
372                 log_error("Failed to get machine ID: %s", strerror(-r));
373                 return;
374         }
375
376         sd_id128_to_string(machine, ids);
377
378         if (s->system_journal) {
379                 char *p = strappenda("/var/log/journal/", ids);
380
381                 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
382                 if (r < 0 && r != -ENOENT)
383                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
384         }
385
386         if (s->runtime_journal) {
387                 char *p = strappenda("/run/log/journal/", ids);
388
389                 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
390                 if (r < 0 && r != -ENOENT)
391                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
392         }
393
394         s->cached_available_space_timestamp = 0;
395 }
396
397 bool shall_try_append_again(JournalFile *f, int r) {
398
399         /* -E2BIG            Hit configured limit
400            -EFBIG            Hit fs limit
401            -EDQUOT           Quota limit hit
402            -ENOSPC           Disk full
403            -EHOSTDOWN        Other machine
404            -EBUSY            Unclean shutdown
405            -EPROTONOSUPPORT  Unsupported feature
406            -EBADMSG          Corrupted
407            -ENODATA          Truncated
408            -ESHUTDOWN        Already archived */
409
410         if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
411                 log_debug("%s: Allocation limit reached, rotating.", f->path);
412         else if (r == -EHOSTDOWN)
413                 log_info("%s: Journal file from other machine, rotating.", f->path);
414         else if (r == -EBUSY)
415                 log_info("%s: Unclean shutdown, rotating.", f->path);
416         else if (r == -EPROTONOSUPPORT)
417                 log_info("%s: Unsupported feature, rotating.", f->path);
418         else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
419                 log_warning("%s: Journal file corrupted, rotating.", f->path);
420         else
421                 return false;
422
423         return true;
424 }
425
426 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
427         JournalFile *f;
428         bool vacuumed = false;
429         int r;
430
431         assert(s);
432         assert(iovec);
433         assert(n > 0);
434
435         f = find_journal(s, uid);
436         if (!f)
437                 return;
438
439         if (journal_file_rotate_suggested(f, s->max_file_usec)) {
440                 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
441                 server_rotate(s);
442                 server_vacuum(s);
443                 vacuumed = true;
444
445                 f = find_journal(s, uid);
446                 if (!f)
447                         return;
448         }
449
450         r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
451         if (r >= 0) {
452                 server_schedule_sync(s, priority);
453                 return;
454         }
455
456         if (vacuumed || !shall_try_append_again(f, r)) {
457                 size_t size = 0;
458                 unsigned i;
459                 for (i = 0; i < n; i++)
460                         size += iovec[i].iov_len;
461
462                 log_error("Failed to write entry (%d items, %zu bytes), ignoring: %s", n, size, strerror(-r));
463                 return;
464         }
465
466         server_rotate(s);
467         server_vacuum(s);
468
469         f = find_journal(s, uid);
470         if (!f)
471                 return;
472
473         log_debug("Retrying write.");
474         r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
475         if (r < 0) {
476                 size_t size = 0;
477                 unsigned i;
478                 for (i = 0; i < n; i++)
479                         size += iovec[i].iov_len;
480
481                 log_error("Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %s", n, size, strerror(-r));
482         } else
483                 server_schedule_sync(s, priority);
484 }
485
486 static void dispatch_message_real(
487                 Server *s,
488                 struct iovec *iovec, unsigned n, unsigned m,
489                 struct ucred *ucred,
490                 struct timeval *tv,
491                 const char *label, size_t label_len,
492                 const char *unit_id,
493                 int priority,
494                 pid_t object_pid) {
495
496         char    pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
497                 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
498                 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
499                 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
500                 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
501                 boot_id[sizeof("_BOOT_ID=") + 32] = "_BOOT_ID=",
502                 machine_id[sizeof("_MACHINE_ID=") + 32] = "_MACHINE_ID=",
503                 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
504                 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
505                 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
506         uid_t object_uid;
507         gid_t object_gid;
508         char *x;
509         sd_id128_t id;
510         int r;
511         char *t, *c;
512         uid_t realuid = 0, owner = 0, journal_uid;
513         bool owner_valid = false;
514 #ifdef HAVE_AUDIT
515         char    audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
516                 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
517                 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
518                 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
519
520         uint32_t audit;
521         uid_t loginuid;
522 #endif
523
524         assert(s);
525         assert(iovec);
526         assert(n > 0);
527         assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
528
529         if (ucred) {
530                 realuid = ucred->uid;
531
532                 sprintf(pid, "_PID=%lu", (unsigned long) ucred->pid);
533                 IOVEC_SET_STRING(iovec[n++], pid);
534
535                 sprintf(uid, "_UID=%lu", (unsigned long) ucred->uid);
536                 IOVEC_SET_STRING(iovec[n++], uid);
537
538                 sprintf(gid, "_GID=%lu", (unsigned long) ucred->gid);
539                 IOVEC_SET_STRING(iovec[n++], gid);
540
541                 r = get_process_comm(ucred->pid, &t);
542                 if (r >= 0) {
543                         x = strappenda("_COMM=", t);
544                         free(t);
545                         IOVEC_SET_STRING(iovec[n++], x);
546                 }
547
548                 r = get_process_exe(ucred->pid, &t);
549                 if (r >= 0) {
550                         x = strappenda("_EXE=", t);
551                         free(t);
552                         IOVEC_SET_STRING(iovec[n++], x);
553                 }
554
555                 r = get_process_cmdline(ucred->pid, 0, false, &t);
556                 if (r >= 0) {
557                         x = strappenda("_CMDLINE=", t);
558                         free(t);
559                         IOVEC_SET_STRING(iovec[n++], x);
560                 }
561
562                 r = get_process_capeff(ucred->pid, &t);
563                 if (r >= 0) {
564                         x = strappenda("_CAP_EFFECTIVE=", t);
565                         free(t);
566                         IOVEC_SET_STRING(iovec[n++], x);
567                 }
568
569 #ifdef HAVE_AUDIT
570                 r = audit_session_from_pid(ucred->pid, &audit);
571                 if (r >= 0) {
572                         sprintf(audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit);
573                         IOVEC_SET_STRING(iovec[n++], audit_session);
574                 }
575
576                 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
577                 if (r >= 0) {
578                         sprintf(audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid);
579                         IOVEC_SET_STRING(iovec[n++], audit_loginuid);
580                 }
581 #endif
582
583                 r = cg_pid_get_path_shifted(ucred->pid, NULL, &c);
584                 if (r >= 0) {
585                         char *session = NULL;
586
587                         x = strappenda("_SYSTEMD_CGROUP=", c);
588                         IOVEC_SET_STRING(iovec[n++], x);
589
590                         r = cg_path_get_session(c, &t);
591                         if (r >= 0) {
592                                 session = strappenda("_SYSTEMD_SESSION=", t);
593                                 free(t);
594                                 IOVEC_SET_STRING(iovec[n++], session);
595                         }
596
597                         if (cg_path_get_owner_uid(c, &owner) >= 0) {
598                                 owner_valid = true;
599
600                                 sprintf(owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner);
601                                 IOVEC_SET_STRING(iovec[n++], owner_uid);
602                         }
603
604                         if (cg_path_get_unit(c, &t) >= 0) {
605                                 x = strappenda("_SYSTEMD_UNIT=", t);
606                                 free(t);
607                                 IOVEC_SET_STRING(iovec[n++], x);
608                         } else if (unit_id && !session) {
609                                 x = strappenda("_SYSTEMD_UNIT=", unit_id);
610                                 IOVEC_SET_STRING(iovec[n++], x);
611                         }
612
613                         if (cg_path_get_user_unit(c, &t) >= 0) {
614                                 x = strappenda("_SYSTEMD_USER_UNIT=", t);
615                                 free(t);
616                                 IOVEC_SET_STRING(iovec[n++], x);
617                         } else if (unit_id && session) {
618                                 x = strappenda("_SYSTEMD_USER_UNIT=", unit_id);
619                                 IOVEC_SET_STRING(iovec[n++], x);
620                         }
621
622                         if (cg_path_get_slice(c, &t) >= 0) {
623                                 x = strappenda("_SYSTEMD_SLICE=", t);
624                                 free(t);
625                                 IOVEC_SET_STRING(iovec[n++], x);
626                         }
627
628                         free(c);
629                 } else if (unit_id) {
630                         x = strappenda("_SYSTEMD_UNIT=", unit_id);
631                         IOVEC_SET_STRING(iovec[n++], x);
632                 }
633
634 #ifdef HAVE_SELINUX
635                 if (use_selinux()) {
636                         if (label) {
637                                 x = alloca(sizeof("_SELINUX_CONTEXT=") + label_len);
638
639                                 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
640                                 IOVEC_SET_STRING(iovec[n++], x);
641                         } else {
642                                 security_context_t con;
643
644                                 if (getpidcon(ucred->pid, &con) >= 0) {
645                                         x = strappenda("_SELINUX_CONTEXT=", con);
646
647                                         freecon(con);
648                                         IOVEC_SET_STRING(iovec[n++], x);
649                                 }
650                         }
651                 }
652 #endif
653         }
654         assert(n <= m);
655
656         if (object_pid) {
657                 r = get_process_uid(object_pid, &object_uid);
658                 if (r >= 0) {
659                         sprintf(o_uid, "OBJECT_UID=%lu", (unsigned long) object_uid);
660                         IOVEC_SET_STRING(iovec[n++], o_uid);
661                 }
662
663                 r = get_process_gid(object_pid, &object_gid);
664                 if (r >= 0) {
665                         sprintf(o_gid, "OBJECT_GID=%lu", (unsigned long) object_gid);
666                         IOVEC_SET_STRING(iovec[n++], o_gid);
667                 }
668
669                 r = get_process_comm(object_pid, &t);
670                 if (r >= 0) {
671                         x = strappenda("OBJECT_COMM=", t);
672                         free(t);
673                         IOVEC_SET_STRING(iovec[n++], x);
674                 }
675
676                 r = get_process_exe(object_pid, &t);
677                 if (r >= 0) {
678                         x = strappenda("OBJECT_EXE=", t);
679                         free(t);
680                         IOVEC_SET_STRING(iovec[n++], x);
681                 }
682
683                 r = get_process_cmdline(object_pid, 0, false, &t);
684                 if (r >= 0) {
685                         x = strappenda("OBJECT_CMDLINE=", t);
686                         free(t);
687                         IOVEC_SET_STRING(iovec[n++], x);
688                 }
689
690 #ifdef HAVE_AUDIT
691                 r = audit_session_from_pid(object_pid, &audit);
692                 if (r >= 0) {
693                         sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%lu", (unsigned long) audit);
694                         IOVEC_SET_STRING(iovec[n++], o_audit_session);
695                 }
696
697                 r = audit_loginuid_from_pid(object_pid, &loginuid);
698                 if (r >= 0) {
699                         sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID=%lu", (unsigned long) loginuid);
700                         IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
701                 }
702 #endif
703
704                 r = cg_pid_get_path_shifted(object_pid, NULL, &c);
705                 if (r >= 0) {
706                         x = strappenda("OBJECT_SYSTEMD_CGROUP=", c);
707                         IOVEC_SET_STRING(iovec[n++], x);
708
709                         r = cg_path_get_session(c, &t);
710                         if (r >= 0) {
711                                 x = strappenda("OBJECT_SYSTEMD_SESSION=", t);
712                                 free(t);
713                                 IOVEC_SET_STRING(iovec[n++], x);
714                         }
715
716                         if (cg_path_get_owner_uid(c, &owner) >= 0) {
717                                 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner);
718                                 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
719                         }
720
721                         if (cg_path_get_unit(c, &t) >= 0) {
722                                 x = strappenda("OBJECT_SYSTEMD_UNIT=", t);
723                                 free(t);
724                                 IOVEC_SET_STRING(iovec[n++], x);
725                         }
726
727                         if (cg_path_get_user_unit(c, &t) >= 0) {
728                                 x = strappenda("OBJECT_SYSTEMD_USER_UNIT=", t);
729                                 free(t);
730                                 IOVEC_SET_STRING(iovec[n++], x);
731                         }
732
733                         free(c);
734                 }
735         }
736         assert(n <= m);
737
738         if (tv) {
739                 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
740                 IOVEC_SET_STRING(iovec[n++], source_time);
741         }
742
743         /* Note that strictly speaking storing the boot id here is
744          * redundant since the entry includes this in-line
745          * anyway. However, we need this indexed, too. */
746         r = sd_id128_get_boot(&id);
747         if (r >= 0) {
748                 sd_id128_to_string(id, boot_id + strlen("_BOOT_ID="));
749                 IOVEC_SET_STRING(iovec[n++], boot_id);
750         }
751
752         r = sd_id128_get_machine(&id);
753         if (r >= 0) {
754                 sd_id128_to_string(id, machine_id + strlen("_MACHINE_ID="));
755                 IOVEC_SET_STRING(iovec[n++], machine_id);
756         }
757
758         t = gethostname_malloc();
759         if (t) {
760                 x = strappenda("_HOSTNAME=", t);
761                 free(t);
762                 IOVEC_SET_STRING(iovec[n++], x);
763         }
764
765         assert(n <= m);
766
767         if (s->split_mode == SPLIT_UID && realuid > 0)
768                 /* Split up strictly by any UID */
769                 journal_uid = realuid;
770         else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
771                 /* Split up by login UIDs, this avoids creation of
772                  * individual journals for system UIDs.  We do this
773                  * only if the realuid is not root, in order not to
774                  * accidentally leak privileged information to the
775                  * user that is logged by a privileged process that is
776                  * part of an unprivileged session.*/
777                 journal_uid = owner;
778         else
779                 journal_uid = 0;
780
781         write_to_journal(s, journal_uid, iovec, n, priority);
782 }
783
784 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
785         char mid[11 + 32 + 1];
786         char buffer[16 + LINE_MAX + 1];
787         struct iovec iovec[N_IOVEC_META_FIELDS + 4];
788         int n = 0;
789         va_list ap;
790         struct ucred ucred = {};
791
792         assert(s);
793         assert(format);
794
795         IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
796         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
797
798         memcpy(buffer, "MESSAGE=", 8);
799         va_start(ap, format);
800         vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
801         va_end(ap);
802         char_array_0(buffer);
803         IOVEC_SET_STRING(iovec[n++], buffer);
804
805         if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
806                 snprintf(mid, sizeof(mid), MESSAGE_ID(message_id));
807                 char_array_0(mid);
808                 IOVEC_SET_STRING(iovec[n++], mid);
809         }
810
811         ucred.pid = getpid();
812         ucred.uid = getuid();
813         ucred.gid = getgid();
814
815         dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
816 }
817
818 void server_dispatch_message(
819                 Server *s,
820                 struct iovec *iovec, unsigned n, unsigned m,
821                 struct ucred *ucred,
822                 struct timeval *tv,
823                 const char *label, size_t label_len,
824                 const char *unit_id,
825                 int priority,
826                 pid_t object_pid) {
827
828         int rl, r;
829         _cleanup_free_ char *path = NULL;
830         char *c;
831
832         assert(s);
833         assert(iovec || n == 0);
834
835         if (n == 0)
836                 return;
837
838         if (LOG_PRI(priority) > s->max_level_store)
839                 return;
840
841         /* Stop early in case the information will not be stored
842          * in a journal. */
843         if (s->storage == STORAGE_NONE)
844                 return;
845
846         if (!ucred)
847                 goto finish;
848
849         r = cg_pid_get_path_shifted(ucred->pid, NULL, &path);
850         if (r < 0)
851                 goto finish;
852
853         /* example: /user/lennart/3/foobar
854          *          /system/dbus.service/foobar
855          *
856          * So let's cut of everything past the third /, since that is
857          * where user directories start */
858
859         c = strchr(path, '/');
860         if (c) {
861                 c = strchr(c+1, '/');
862                 if (c) {
863                         c = strchr(c+1, '/');
864                         if (c)
865                                 *c = 0;
866                 }
867         }
868
869         rl = journal_rate_limit_test(s->rate_limit, path,
870                                      priority & LOG_PRIMASK, available_space(s, false));
871
872         if (rl == 0)
873                 return;
874
875         /* Write a suppression message if we suppressed something */
876         if (rl > 1)
877                 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
878                                       "Suppressed %u messages from %s", rl - 1, path);
879
880 finish:
881         dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
882 }
883
884
885 static int system_journal_open(Server *s) {
886         int r;
887         char *fn;
888         sd_id128_t machine;
889         char ids[33];
890
891         r = sd_id128_get_machine(&machine);
892         if (r < 0) {
893                 log_error("Failed to get machine id: %s", strerror(-r));
894                 return r;
895         }
896
897         sd_id128_to_string(machine, ids);
898
899         if (!s->system_journal &&
900             (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
901             access("/run/systemd/journal/flushed", F_OK) >= 0) {
902
903                 /* If in auto mode: first try to create the machine
904                  * path, but not the prefix.
905                  *
906                  * If in persistent mode: create /var/log/journal and
907                  * the machine path */
908
909                 if (s->storage == STORAGE_PERSISTENT)
910                         (void) mkdir("/var/log/journal/", 0755);
911
912                 fn = strappenda("/var/log/journal/", ids);
913                 (void) mkdir(fn, 0755);
914
915                 fn = strappenda(fn, "/system.journal");
916                 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
917
918                 if (r >= 0)
919                         server_fix_perms(s, s->system_journal, 0);
920                 else if (r < 0) {
921                         if (r != -ENOENT && r != -EROFS)
922                                 log_warning("Failed to open system journal: %s", strerror(-r));
923
924                         r = 0;
925                 }
926         }
927
928         if (!s->runtime_journal &&
929             (s->storage != STORAGE_NONE)) {
930
931                 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
932                 if (!fn)
933                         return -ENOMEM;
934
935                 if (s->system_journal) {
936
937                         /* Try to open the runtime journal, but only
938                          * if it already exists, so that we can flush
939                          * it into the system journal */
940
941                         r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
942                         free(fn);
943
944                         if (r < 0) {
945                                 if (r != -ENOENT)
946                                         log_warning("Failed to open runtime journal: %s", strerror(-r));
947
948                                 r = 0;
949                         }
950
951                 } else {
952
953                         /* OK, we really need the runtime journal, so create
954                          * it if necessary. */
955
956                         (void) mkdir_parents(fn, 0755);
957                         r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
958                         free(fn);
959
960                         if (r < 0) {
961                                 log_error("Failed to open runtime journal: %s", strerror(-r));
962                                 return r;
963                         }
964                 }
965
966                 if (s->runtime_journal)
967                         server_fix_perms(s, s->runtime_journal, 0);
968         }
969
970         available_space(s, true);
971
972         return r;
973 }
974
975 int server_flush_to_var(Server *s) {
976         sd_id128_t machine;
977         sd_journal *j = NULL;
978         char ts[FORMAT_TIMESPAN_MAX];
979         usec_t start;
980         unsigned n = 0;
981         int r;
982
983         assert(s);
984
985         if (s->storage != STORAGE_AUTO &&
986             s->storage != STORAGE_PERSISTENT)
987                 return 0;
988
989         if (!s->runtime_journal)
990                 return 0;
991
992         system_journal_open(s);
993
994         if (!s->system_journal)
995                 return 0;
996
997         log_debug("Flushing to /var...");
998
999         start = now(CLOCK_MONOTONIC);
1000
1001         r = sd_id128_get_machine(&machine);
1002         if (r < 0)
1003                 return r;
1004
1005         r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1006         if (r < 0) {
1007                 log_error("Failed to read runtime journal: %s", strerror(-r));
1008                 return r;
1009         }
1010
1011         sd_journal_set_data_threshold(j, 0);
1012
1013         SD_JOURNAL_FOREACH(j) {
1014                 Object *o = NULL;
1015                 JournalFile *f;
1016
1017                 f = j->current_file;
1018                 assert(f && f->current_offset > 0);
1019
1020                 n++;
1021
1022                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1023                 if (r < 0) {
1024                         log_error("Can't read entry: %s", strerror(-r));
1025                         goto finish;
1026                 }
1027
1028                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1029                 if (r >= 0)
1030                         continue;
1031
1032                 if (!shall_try_append_again(s->system_journal, r)) {
1033                         log_error("Can't write entry: %s", strerror(-r));
1034                         goto finish;
1035                 }
1036
1037                 server_rotate(s);
1038                 server_vacuum(s);
1039
1040                 if (!s->system_journal) {
1041                         log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1042                         r = -EIO;
1043                         goto finish;
1044                 }
1045
1046                 log_debug("Retrying write.");
1047                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1048                 if (r < 0) {
1049                         log_error("Can't write entry: %s", strerror(-r));
1050                         goto finish;
1051                 }
1052         }
1053
1054 finish:
1055         journal_file_post_change(s->system_journal);
1056
1057         journal_file_close(s->runtime_journal);
1058         s->runtime_journal = NULL;
1059
1060         if (r >= 0)
1061                 rm_rf("/run/log/journal", false, true, false);
1062
1063         sd_journal_close(j);
1064
1065         server_driver_message(s, SD_ID128_NULL, "Time spent on flushing to /var is %s for %u entries.", format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0), n);
1066
1067         return r;
1068 }
1069
1070 int process_event(Server *s, struct epoll_event *ev) {
1071         assert(s);
1072         assert(ev);
1073
1074         if (ev->data.fd == s->signal_fd) {
1075                 struct signalfd_siginfo sfsi;
1076                 ssize_t n;
1077
1078                 if (ev->events != EPOLLIN) {
1079                         log_error("Got invalid event from epoll for %s: %"PRIx32,
1080                                   "signal fd", ev->events);
1081                         return -EIO;
1082                 }
1083
1084                 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
1085                 if (n != sizeof(sfsi)) {
1086
1087                         if (n >= 0)
1088                                 return -EIO;
1089
1090                         if (errno == EINTR || errno == EAGAIN)
1091                                 return 1;
1092
1093                         return -errno;
1094                 }
1095
1096                 if (sfsi.ssi_signo == SIGUSR1) {
1097                         log_info("Received request to flush runtime journal from PID %"PRIu32,
1098                                  sfsi.ssi_pid);
1099                         touch("/run/systemd/journal/flushed");
1100                         server_flush_to_var(s);
1101                         server_sync(s);
1102                         return 1;
1103                 }
1104
1105                 if (sfsi.ssi_signo == SIGUSR2) {
1106                         log_info("Received request to rotate journal from PID %"PRIu32,
1107                                  sfsi.ssi_pid);
1108                         server_rotate(s);
1109                         server_vacuum(s);
1110                         return 1;
1111                 }
1112
1113                 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
1114
1115                 return 0;
1116
1117         } else if (ev->data.fd == s->sync_timer_fd) {
1118                 int r;
1119                 uint64_t t;
1120
1121                 log_debug("Got sync request from epoll.");
1122
1123                 r = read(ev->data.fd, (void *)&t, sizeof(t));
1124                 if (r < 0)
1125                         return 0;
1126
1127                 server_sync(s);
1128                 return 1;
1129
1130         } else if (ev->data.fd == s->dev_kmsg_fd) {
1131                 int r;
1132
1133                 if (ev->events & EPOLLERR)
1134                         log_warning("/dev/kmsg buffer overrun, some messages lost.");
1135
1136                 if (!(ev->events & EPOLLIN)) {
1137                         log_error("Got invalid event from epoll for %s: %"PRIx32,
1138                                   "/dev/kmsg", ev->events);
1139                         return -EIO;
1140                 }
1141
1142                 r = server_read_dev_kmsg(s);
1143                 if (r < 0)
1144                         return r;
1145
1146                 return 1;
1147
1148         } else if (ev->data.fd == s->native_fd ||
1149                    ev->data.fd == s->syslog_fd) {
1150
1151                 if (ev->events != EPOLLIN) {
1152                         log_error("Got invalid event from epoll for %s: %"PRIx32,
1153                                   ev->data.fd == s->native_fd ? "native fd" : "syslog fd",
1154                                   ev->events);
1155                         return -EIO;
1156                 }
1157
1158                 for (;;) {
1159                         struct ucred *ucred = NULL;
1160                         struct timeval *tv = NULL;
1161                         struct cmsghdr *cmsg;
1162                         char *label = NULL;
1163                         size_t label_len = 0;
1164
1165                         struct iovec iovec;
1166                         union {
1167                                 struct cmsghdr cmsghdr;
1168
1169                                 /* We use NAME_MAX space for the
1170                                  * SELinux label here. The kernel
1171                                  * currently enforces no limit, but
1172                                  * according to suggestions from the
1173                                  * SELinux people this will change and
1174                                  * it will probably be identical to
1175                                  * NAME_MAX. For now we use that, but
1176                                  * this should be updated one day when
1177                                  * the final limit is known.*/
1178                                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1179                                             CMSG_SPACE(sizeof(struct timeval)) +
1180                                             CMSG_SPACE(sizeof(int)) + /* fd */
1181                                             CMSG_SPACE(NAME_MAX)]; /* selinux label */
1182                         } control = {};
1183                         struct msghdr msghdr = {
1184                                 .msg_iov = &iovec,
1185                                 .msg_iovlen = 1,
1186                                 .msg_control = &control,
1187                                 .msg_controllen = sizeof(control),
1188                         };
1189
1190                         ssize_t n;
1191                         int v;
1192                         int *fds = NULL;
1193                         unsigned n_fds = 0;
1194
1195                         if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
1196                                 log_error("SIOCINQ failed: %m");
1197                                 return -errno;
1198                         }
1199
1200                         if (!GREEDY_REALLOC(s->buffer, s->buffer_size, LINE_MAX + (size_t) v))
1201                                 return log_oom();
1202
1203                         iovec.iov_base = s->buffer;
1204                         iovec.iov_len = s->buffer_size;
1205
1206                         n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1207                         if (n < 0) {
1208                                 if (errno == EINTR || errno == EAGAIN)
1209                                         return 1;
1210
1211                                 log_error("recvmsg() failed: %m");
1212                                 return -errno;
1213                         }
1214
1215                         for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1216
1217                                 if (cmsg->cmsg_level == SOL_SOCKET &&
1218                                     cmsg->cmsg_type == SCM_CREDENTIALS &&
1219                                     cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1220                                         ucred = (struct ucred*) CMSG_DATA(cmsg);
1221                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
1222                                          cmsg->cmsg_type == SCM_SECURITY) {
1223                                         label = (char*) CMSG_DATA(cmsg);
1224                                         label_len = cmsg->cmsg_len - CMSG_LEN(0);
1225                                 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1226                                            cmsg->cmsg_type == SO_TIMESTAMP &&
1227                                            cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1228                                         tv = (struct timeval*) CMSG_DATA(cmsg);
1229                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
1230                                          cmsg->cmsg_type == SCM_RIGHTS) {
1231                                         fds = (int*) CMSG_DATA(cmsg);
1232                                         n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1233                                 }
1234                         }
1235
1236                         if (ev->data.fd == s->syslog_fd) {
1237                                 if (n > 0 && n_fds == 0) {
1238                                         s->buffer[n] = 0;
1239                                         server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1240                                 } else if (n_fds > 0)
1241                                         log_warning("Got file descriptors via syslog socket. Ignoring.");
1242
1243                         } else {
1244                                 if (n > 0 && n_fds == 0)
1245                                         server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1246                                 else if (n == 0 && n_fds == 1)
1247                                         server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1248                                 else if (n_fds > 0)
1249                                         log_warning("Got too many file descriptors via native socket. Ignoring.");
1250                         }
1251
1252                         close_many(fds, n_fds);
1253                 }
1254
1255                 return 1;
1256
1257         } else if (ev->data.fd == s->stdout_fd) {
1258
1259                 if (ev->events != EPOLLIN) {
1260                         log_error("Got invalid event from epoll for %s: %"PRIx32,
1261                                   "stdout fd", ev->events);
1262                         return -EIO;
1263                 }
1264
1265                 stdout_stream_new(s);
1266                 return 1;
1267
1268         } else {
1269                 StdoutStream *stream;
1270
1271                 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
1272                         log_error("Got invalid event from epoll for %s: %"PRIx32,
1273                                   "stdout stream", ev->events);
1274                         return -EIO;
1275                 }
1276
1277                 /* If it is none of the well-known fds, it must be an
1278                  * stdout stream fd. Note that this is a bit ugly here
1279                  * (since we rely that none of the well-known fds
1280                  * could be interpreted as pointer), but nonetheless
1281                  * safe, since the well-known fds would never get an
1282                  * fd > 4096, i.e. beyond the first memory page */
1283
1284                 stream = ev->data.ptr;
1285
1286                 if (stdout_stream_process(stream) <= 0)
1287                         stdout_stream_free(stream);
1288
1289                 return 1;
1290         }
1291
1292         log_error("Unknown event.");
1293         return 0;
1294 }
1295
1296 static int open_signalfd(Server *s) {
1297         sigset_t mask;
1298         struct epoll_event ev;
1299
1300         assert(s);
1301
1302         assert_se(sigemptyset(&mask) == 0);
1303         sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1304         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1305
1306         s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
1307         if (s->signal_fd < 0) {
1308                 log_error("signalfd(): %m");
1309                 return -errno;
1310         }
1311
1312         zero(ev);
1313         ev.events = EPOLLIN;
1314         ev.data.fd = s->signal_fd;
1315
1316         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
1317                 log_error("epoll_ctl(): %m");
1318                 return -errno;
1319         }
1320
1321         return 0;
1322 }
1323
1324 static int server_parse_proc_cmdline(Server *s) {
1325         _cleanup_free_ char *line = NULL;
1326         char *w, *state;
1327         size_t l;
1328         int r;
1329
1330         r = proc_cmdline(&line);
1331         if (r < 0)
1332                 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1333         if (r <= 0)
1334                 return 0;
1335
1336         FOREACH_WORD_QUOTED(w, l, line, state) {
1337                 _cleanup_free_ char *word;
1338
1339                 word = strndup(w, l);
1340                 if (!word)
1341                         return -ENOMEM;
1342
1343                 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1344                         r = parse_boolean(word + 35);
1345                         if (r < 0)
1346                                 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1347                         else
1348                                 s->forward_to_syslog = r;
1349                 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1350                         r = parse_boolean(word + 33);
1351                         if (r < 0)
1352                                 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1353                         else
1354                                 s->forward_to_kmsg = r;
1355                 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1356                         r = parse_boolean(word + 36);
1357                         if (r < 0)
1358                                 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1359                         else
1360                                 s->forward_to_console = r;
1361                 } else if (startswith(word, "systemd.journald"))
1362                         log_warning("Invalid systemd.journald parameter. Ignoring.");
1363         }
1364
1365         return 0;
1366 }
1367
1368 static int server_parse_config_file(Server *s) {
1369         static const char fn[] = "/etc/systemd/journald.conf";
1370         _cleanup_fclose_ FILE *f = NULL;
1371         int r;
1372
1373         assert(s);
1374
1375         f = fopen(fn, "re");
1376         if (!f) {
1377                 if (errno == ENOENT)
1378                         return 0;
1379
1380                 log_warning("Failed to open configuration file %s: %m", fn);
1381                 return -errno;
1382         }
1383
1384         r = config_parse(NULL, fn, f, "Journal\0", config_item_perf_lookup,
1385                          (void*) journald_gperf_lookup, false, false, s);
1386         if (r < 0)
1387                 log_warning("Failed to parse configuration file: %s", strerror(-r));
1388
1389         return r;
1390 }
1391
1392 static int server_open_sync_timer(Server *s) {
1393         int r;
1394         struct epoll_event ev;
1395
1396         assert(s);
1397
1398         s->sync_timer_fd = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC);
1399         if (s->sync_timer_fd < 0)
1400                 return -errno;
1401
1402         zero(ev);
1403         ev.events = EPOLLIN;
1404         ev.data.fd = s->sync_timer_fd;
1405
1406         r = epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->sync_timer_fd, &ev);
1407         if (r < 0) {
1408                 log_error("Failed to add idle timer fd to epoll object: %m");
1409                 return -errno;
1410         }
1411
1412         return 0;
1413 }
1414
1415 int server_schedule_sync(Server *s, int priority) {
1416         int r;
1417
1418         assert(s);
1419
1420         if (priority <= LOG_CRIT) {
1421                 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1422                 server_sync(s);
1423                 return 0;
1424         }
1425
1426         if (s->sync_scheduled)
1427                 return 0;
1428
1429         if (s->sync_interval_usec) {
1430                 struct itimerspec sync_timer_enable = {};
1431
1432                 timespec_store(&sync_timer_enable.it_value, s->sync_interval_usec);
1433
1434                 r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_enable, NULL);
1435                 if (r < 0)
1436                         return -errno;
1437         }
1438
1439         s->sync_scheduled = true;
1440
1441         return 0;
1442 }
1443
1444 int server_init(Server *s) {
1445         int n, r, fd;
1446
1447         assert(s);
1448
1449         zero(*s);
1450         s->sync_timer_fd = s->syslog_fd = s->native_fd = s->stdout_fd =
1451                 s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
1452         s->compress = true;
1453         s->seal = true;
1454
1455         s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1456         s->sync_scheduled = false;
1457
1458         s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1459         s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1460
1461         s->forward_to_syslog = true;
1462
1463         s->max_level_store = LOG_DEBUG;
1464         s->max_level_syslog = LOG_DEBUG;
1465         s->max_level_kmsg = LOG_NOTICE;
1466         s->max_level_console = LOG_INFO;
1467
1468         memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1469         memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1470
1471         server_parse_config_file(s);
1472         server_parse_proc_cmdline(s);
1473         if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1474                 log_debug("Setting both rate limit interval and burst from %llu,%u to 0,0",
1475                           (long long unsigned) s->rate_limit_interval,
1476                           s->rate_limit_burst);
1477                 s->rate_limit_interval = s->rate_limit_burst = 0;
1478         }
1479
1480         mkdir_p("/run/systemd/journal", 0755);
1481
1482         s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
1483         if (!s->user_journals)
1484                 return log_oom();
1485
1486         s->mmap = mmap_cache_new();
1487         if (!s->mmap)
1488                 return log_oom();
1489
1490         s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
1491         if (s->epoll_fd < 0) {
1492                 log_error("Failed to create epoll object: %m");
1493                 return -errno;
1494         }
1495
1496         n = sd_listen_fds(true);
1497         if (n < 0) {
1498                 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1499                 return n;
1500         }
1501
1502         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1503
1504                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1505
1506                         if (s->native_fd >= 0) {
1507                                 log_error("Too many native sockets passed.");
1508                                 return -EINVAL;
1509                         }
1510
1511                         s->native_fd = fd;
1512
1513                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1514
1515                         if (s->stdout_fd >= 0) {
1516                                 log_error("Too many stdout sockets passed.");
1517                                 return -EINVAL;
1518                         }
1519
1520                         s->stdout_fd = fd;
1521
1522                 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
1523
1524                         if (s->syslog_fd >= 0) {
1525                                 log_error("Too many /dev/log sockets passed.");
1526                                 return -EINVAL;
1527                         }
1528
1529                         s->syslog_fd = fd;
1530
1531                 } else {
1532                         log_error("Unknown socket passed.");
1533                         return -EINVAL;
1534                 }
1535         }
1536
1537         r = server_open_syslog_socket(s);
1538         if (r < 0)
1539                 return r;
1540
1541         r = server_open_native_socket(s);
1542         if (r < 0)
1543                 return r;
1544
1545         r = server_open_stdout_socket(s);
1546         if (r < 0)
1547                 return r;
1548
1549         r = server_open_dev_kmsg(s);
1550         if (r < 0)
1551                 return r;
1552
1553         r = server_open_kernel_seqnum(s);
1554         if (r < 0)
1555                 return r;
1556
1557         r = server_open_sync_timer(s);
1558         if (r < 0)
1559                 return r;
1560
1561         r = open_signalfd(s);
1562         if (r < 0)
1563                 return r;
1564
1565         s->udev = udev_new();
1566         if (!s->udev)
1567                 return -ENOMEM;
1568
1569         s->rate_limit = journal_rate_limit_new(s->rate_limit_interval,
1570                                                s->rate_limit_burst);
1571         if (!s->rate_limit)
1572                 return -ENOMEM;
1573
1574         r = system_journal_open(s);
1575         if (r < 0)
1576                 return r;
1577
1578         return 0;
1579 }
1580
1581 void server_maybe_append_tags(Server *s) {
1582 #ifdef HAVE_GCRYPT
1583         JournalFile *f;
1584         Iterator i;
1585         usec_t n;
1586
1587         n = now(CLOCK_REALTIME);
1588
1589         if (s->system_journal)
1590                 journal_file_maybe_append_tag(s->system_journal, n);
1591
1592         HASHMAP_FOREACH(f, s->user_journals, i)
1593                 journal_file_maybe_append_tag(f, n);
1594 #endif
1595 }
1596
1597 void server_done(Server *s) {
1598         JournalFile *f;
1599         assert(s);
1600
1601         while (s->stdout_streams)
1602                 stdout_stream_free(s->stdout_streams);
1603
1604         if (s->system_journal)
1605                 journal_file_close(s->system_journal);
1606
1607         if (s->runtime_journal)
1608                 journal_file_close(s->runtime_journal);
1609
1610         while ((f = hashmap_steal_first(s->user_journals)))
1611                 journal_file_close(f);
1612
1613         hashmap_free(s->user_journals);
1614
1615         if (s->epoll_fd >= 0)
1616                 close_nointr_nofail(s->epoll_fd);
1617
1618         if (s->signal_fd >= 0)
1619                 close_nointr_nofail(s->signal_fd);
1620
1621         if (s->syslog_fd >= 0)
1622                 close_nointr_nofail(s->syslog_fd);
1623
1624         if (s->native_fd >= 0)
1625                 close_nointr_nofail(s->native_fd);
1626
1627         if (s->stdout_fd >= 0)
1628                 close_nointr_nofail(s->stdout_fd);
1629
1630         if (s->dev_kmsg_fd >= 0)
1631                 close_nointr_nofail(s->dev_kmsg_fd);
1632
1633         if (s->sync_timer_fd >= 0)
1634                 close_nointr_nofail(s->sync_timer_fd);
1635
1636         if (s->rate_limit)
1637                 journal_rate_limit_free(s->rate_limit);
1638
1639         if (s->kernel_seqnum)
1640                 munmap(s->kernel_seqnum, sizeof(uint64_t));
1641
1642         free(s->buffer);
1643         free(s->tty_path);
1644
1645         if (s->mmap)
1646                 mmap_cache_unref(s->mmap);
1647
1648         if (s->udev)
1649                 udev_unref(s->udev);
1650 }