chiark / gitweb /
29429f6eadfae1ce4703020ac849afcf6cdac95d
[elogind.git] / src / journal / journald-server.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
26 #include <sys/mman.h>
27 #include <sys/timerfd.h>
28
29 #include <libudev.h>
30 #include <systemd/sd-journal.h>
31 #include <systemd/sd-messages.h>
32 #include <systemd/sd-daemon.h>
33
34 #include "fileio.h"
35 #include "mkdir.h"
36 #include "hashmap.h"
37 #include "journal-file.h"
38 #include "socket-util.h"
39 #include "cgroup-util.h"
40 #include "list.h"
41 #include "virt.h"
42 #include "missing.h"
43 #include "conf-parser.h"
44 #include "journal-internal.h"
45 #include "journal-vacuum.h"
46 #include "journal-authenticate.h"
47 #include "journald-server.h"
48 #include "journald-rate-limit.h"
49 #include "journald-kmsg.h"
50 #include "journald-syslog.h"
51 #include "journald-stream.h"
52 #include "journald-console.h"
53 #include "journald-native.h"
54 #include "selinux-util.h"
55
56 #ifdef HAVE_ACL
57 #include <sys/acl.h>
58 #include <acl/libacl.h>
59 #include "acl-util.h"
60 #endif
61
62 #ifdef HAVE_SELINUX
63 #include <selinux/selinux.h>
64 #endif
65
66 #define USER_JOURNALS_MAX 1024
67
68 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
69 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
70 #define DEFAULT_RATE_LIMIT_BURST 1000
71
72 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
73
74 static const char* const storage_table[] = {
75         [STORAGE_AUTO] = "auto",
76         [STORAGE_VOLATILE] = "volatile",
77         [STORAGE_PERSISTENT] = "persistent",
78         [STORAGE_NONE] = "none"
79 };
80
81 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
82 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
83
84 static const char* const split_mode_table[] = {
85         [SPLIT_NONE] = "none",
86         [SPLIT_UID] = "uid",
87         [SPLIT_LOGIN] = "login"
88 };
89
90 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
91 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
92
93 static uint64_t available_space(Server *s, bool verbose) {
94         char ids[33];
95         _cleanup_free_ char *p = NULL;
96         sd_id128_t machine;
97         struct statvfs ss;
98         uint64_t sum = 0, ss_avail = 0, avail = 0;
99         int r;
100         _cleanup_closedir_ DIR *d = NULL;
101         usec_t ts;
102         const char *f;
103         JournalMetrics *m;
104
105         ts = now(CLOCK_MONOTONIC);
106
107         if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts
108             && !verbose)
109                 return s->cached_available_space;
110
111         r = sd_id128_get_machine(&machine);
112         if (r < 0)
113                 return 0;
114
115         if (s->system_journal) {
116                 f = "/var/log/journal/";
117                 m = &s->system_metrics;
118         } else {
119                 f = "/run/log/journal/";
120                 m = &s->runtime_metrics;
121         }
122
123         assert(m);
124
125         p = strappend(f, sd_id128_to_string(machine, ids));
126         if (!p)
127                 return 0;
128
129         d = opendir(p);
130         if (!d)
131                 return 0;
132
133         if (fstatvfs(dirfd(d), &ss) < 0)
134                 return 0;
135
136         for (;;) {
137                 struct stat st;
138                 struct dirent *de;
139                 union dirent_storage buf;
140
141                 r = readdir_r(d, &buf.de, &de);
142                 if (r != 0)
143                         break;
144
145                 if (!de)
146                         break;
147
148                 if (!endswith(de->d_name, ".journal") &&
149                     !endswith(de->d_name, ".journal~"))
150                         continue;
151
152                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
153                         continue;
154
155                 if (!S_ISREG(st.st_mode))
156                         continue;
157
158                 sum += (uint64_t) st.st_blocks * 512UL;
159         }
160
161         ss_avail = ss.f_bsize * ss.f_bavail;
162         avail = ss_avail > m->keep_free ? ss_avail - m->keep_free : 0;
163
164         s->cached_available_space = MIN(m->max_use, avail) > sum ? MIN(m->max_use, avail) - sum : 0;
165         s->cached_available_space_timestamp = ts;
166
167         if (verbose) {
168                 char    fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
169                         fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX];
170
171                 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
172                                       "%s journal is using %s (max %s, leaving %s of free %s, current limit %s).",
173                                       s->system_journal ? "Permanent" : "Runtime",
174                                       format_bytes(fb1, sizeof(fb1), sum),
175                                       format_bytes(fb2, sizeof(fb2), m->max_use),
176                                       format_bytes(fb3, sizeof(fb3), m->keep_free),
177                                       format_bytes(fb4, sizeof(fb4), ss_avail),
178                                       format_bytes(fb5, sizeof(fb5), MIN(m->max_use, avail)));
179         }
180
181         return s->cached_available_space;
182 }
183
184 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
185         int r;
186 #ifdef HAVE_ACL
187         acl_t acl;
188         acl_entry_t entry;
189         acl_permset_t permset;
190 #endif
191
192         assert(f);
193
194         r = fchmod(f->fd, 0640);
195         if (r < 0)
196                 log_warning("Failed to fix access mode on %s, ignoring: %s", f->path, strerror(-r));
197
198 #ifdef HAVE_ACL
199         if (uid <= 0)
200                 return;
201
202         acl = acl_get_fd(f->fd);
203         if (!acl) {
204                 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
205                 return;
206         }
207
208         r = acl_find_uid(acl, uid, &entry);
209         if (r <= 0) {
210
211                 if (acl_create_entry(&acl, &entry) < 0 ||
212                     acl_set_tag_type(entry, ACL_USER) < 0 ||
213                     acl_set_qualifier(entry, &uid) < 0) {
214                         log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
215                         goto finish;
216                 }
217         }
218
219         /* We do not recalculate the mask unconditionally here,
220          * so that the fchmod() mask above stays intact. */
221         if (acl_get_permset(entry, &permset) < 0 ||
222             acl_add_perm(permset, ACL_READ) < 0 ||
223             calc_acl_mask_if_needed(&acl) < 0) {
224                 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
225                 goto finish;
226         }
227
228         if (acl_set_fd(f->fd, acl) < 0)
229                 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
230
231 finish:
232         acl_free(acl);
233 #endif
234 }
235
236 static JournalFile* find_journal(Server *s, uid_t uid) {
237         _cleanup_free_ char *p = NULL;
238         int r;
239         JournalFile *f;
240         sd_id128_t machine;
241
242         assert(s);
243
244         /* We split up user logs only on /var, not on /run. If the
245          * runtime file is open, we write to it exclusively, in order
246          * to guarantee proper order as soon as we flush /run to
247          * /var and close the runtime file. */
248
249         if (s->runtime_journal)
250                 return s->runtime_journal;
251
252         if (uid <= 0)
253                 return s->system_journal;
254
255         r = sd_id128_get_machine(&machine);
256         if (r < 0)
257                 return s->system_journal;
258
259         f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
260         if (f)
261                 return f;
262
263         if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
264                      SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
265                 return s->system_journal;
266
267         while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
268                 /* Too many open? Then let's close one */
269                 f = hashmap_steal_first(s->user_journals);
270                 assert(f);
271                 journal_file_close(f);
272         }
273
274         r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
275         if (r < 0)
276                 return s->system_journal;
277
278         server_fix_perms(s, f, uid);
279
280         r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
281         if (r < 0) {
282                 journal_file_close(f);
283                 return s->system_journal;
284         }
285
286         return f;
287 }
288
289 void server_rotate(Server *s) {
290         JournalFile *f;
291         void *k;
292         Iterator i;
293         int r;
294
295         log_debug("Rotating...");
296
297         if (s->runtime_journal) {
298                 r = journal_file_rotate(&s->runtime_journal, s->compress, false);
299                 if (r < 0)
300                         if (s->runtime_journal)
301                                 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
302                         else
303                                 log_error("Failed to create new runtime journal: %s", strerror(-r));
304                 else
305                         server_fix_perms(s, s->runtime_journal, 0);
306         }
307
308         if (s->system_journal) {
309                 r = journal_file_rotate(&s->system_journal, s->compress, s->seal);
310                 if (r < 0)
311                         if (s->system_journal)
312                                 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
313                         else
314                                 log_error("Failed to create new system journal: %s", strerror(-r));
315
316                 else
317                         server_fix_perms(s, s->system_journal, 0);
318         }
319
320         HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
321                 r = journal_file_rotate(&f, s->compress, s->seal);
322                 if (r < 0)
323                         if (f)
324                                 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
325                         else {
326                                 log_error("Failed to create user journal: %s", strerror(-r));
327                                 hashmap_remove(s->user_journals, k);
328                         }
329                 else {
330                         hashmap_replace(s->user_journals, k, f);
331                         server_fix_perms(s, f, PTR_TO_UINT32(k));
332                 }
333         }
334 }
335
336 void server_sync(Server *s) {
337         static const struct itimerspec sync_timer_disable = {};
338         JournalFile *f;
339         void *k;
340         Iterator i;
341         int r;
342
343         if (s->system_journal) {
344                 r = journal_file_set_offline(s->system_journal);
345                 if (r < 0)
346                         log_error("Failed to sync system journal: %s", strerror(-r));
347         }
348
349         HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
350                 r = journal_file_set_offline(f);
351                 if (r < 0)
352                         log_error("Failed to sync user journal: %s", strerror(-r));
353         }
354
355         r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_disable, NULL);
356         if (r < 0)
357                 log_error("Failed to disable max timer: %m");
358
359         s->sync_scheduled = false;
360 }
361
362 void server_vacuum(Server *s) {
363         char ids[33];
364         sd_id128_t machine;
365         int r;
366
367         log_debug("Vacuuming...");
368
369         s->oldest_file_usec = 0;
370
371         r = sd_id128_get_machine(&machine);
372         if (r < 0) {
373                 log_error("Failed to get machine ID: %s", strerror(-r));
374                 return;
375         }
376
377         sd_id128_to_string(machine, ids);
378
379         if (s->system_journal) {
380                 char *p = strappenda("/var/log/journal/", ids);
381
382                 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
383                 if (r < 0 && r != -ENOENT)
384                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
385         }
386
387         if (s->runtime_journal) {
388                 char *p = strappenda("/run/log/journal/", ids);
389
390                 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
391                 if (r < 0 && r != -ENOENT)
392                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
393         }
394
395         s->cached_available_space_timestamp = 0;
396 }
397
398 bool shall_try_append_again(JournalFile *f, int r) {
399
400         /* -E2BIG            Hit configured limit
401            -EFBIG            Hit fs limit
402            -EDQUOT           Quota limit hit
403            -ENOSPC           Disk full
404            -EHOSTDOWN        Other machine
405            -EBUSY            Unclean shutdown
406            -EPROTONOSUPPORT  Unsupported feature
407            -EBADMSG          Corrupted
408            -ENODATA          Truncated
409            -ESHUTDOWN        Already archived */
410
411         if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
412                 log_debug("%s: Allocation limit reached, rotating.", f->path);
413         else if (r == -EHOSTDOWN)
414                 log_info("%s: Journal file from other machine, rotating.", f->path);
415         else if (r == -EBUSY)
416                 log_info("%s: Unclean shutdown, rotating.", f->path);
417         else if (r == -EPROTONOSUPPORT)
418                 log_info("%s: Unsupported feature, rotating.", f->path);
419         else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
420                 log_warning("%s: Journal file corrupted, rotating.", f->path);
421         else
422                 return false;
423
424         return true;
425 }
426
427 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
428         JournalFile *f;
429         bool vacuumed = false;
430         int r;
431
432         assert(s);
433         assert(iovec);
434         assert(n > 0);
435
436         f = find_journal(s, uid);
437         if (!f)
438                 return;
439
440         if (journal_file_rotate_suggested(f, s->max_file_usec)) {
441                 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
442                 server_rotate(s);
443                 server_vacuum(s);
444                 vacuumed = true;
445
446                 f = find_journal(s, uid);
447                 if (!f)
448                         return;
449         }
450
451         r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
452         if (r >= 0) {
453                 server_schedule_sync(s, priority);
454                 return;
455         }
456
457         if (vacuumed || !shall_try_append_again(f, r)) {
458                 size_t size = 0;
459                 unsigned i;
460                 for (i = 0; i < n; i++)
461                         size += iovec[i].iov_len;
462
463                 log_error("Failed to write entry (%d items, %zu bytes), ignoring: %s", n, size, strerror(-r));
464                 return;
465         }
466
467         server_rotate(s);
468         server_vacuum(s);
469
470         f = find_journal(s, uid);
471         if (!f)
472                 return;
473
474         log_debug("Retrying write.");
475         r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
476         if (r < 0) {
477                 size_t size = 0;
478                 unsigned i;
479                 for (i = 0; i < n; i++)
480                         size += iovec[i].iov_len;
481
482                 log_error("Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %s", n, size, strerror(-r));
483         } else
484                 server_schedule_sync(s, priority);
485 }
486
487 static void dispatch_message_real(
488                 Server *s,
489                 struct iovec *iovec, unsigned n, unsigned m,
490                 struct ucred *ucred,
491                 struct timeval *tv,
492                 const char *label, size_t label_len,
493                 const char *unit_id,
494                 int priority,
495                 pid_t object_pid) {
496
497         char    pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
498                 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
499                 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
500                 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
501                 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
502                 boot_id[sizeof("_BOOT_ID=") + 32] = "_BOOT_ID=",
503                 machine_id[sizeof("_MACHINE_ID=") + 32] = "_MACHINE_ID=",
504                 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
505                 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
506                 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
507         uid_t object_uid;
508         gid_t object_gid;
509         char *x;
510         sd_id128_t id;
511         int r;
512         char *t, *c;
513         uid_t realuid = 0, owner = 0, journal_uid;
514         bool owner_valid = false;
515 #ifdef HAVE_AUDIT
516         char    audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
517                 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
518                 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
519                 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
520
521         uint32_t audit;
522         uid_t loginuid;
523 #endif
524
525         assert(s);
526         assert(iovec);
527         assert(n > 0);
528         assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
529
530         if (ucred) {
531                 realuid = ucred->uid;
532
533                 sprintf(pid, "_PID=%lu", (unsigned long) ucred->pid);
534                 IOVEC_SET_STRING(iovec[n++], pid);
535
536                 sprintf(uid, "_UID=%lu", (unsigned long) ucred->uid);
537                 IOVEC_SET_STRING(iovec[n++], uid);
538
539                 sprintf(gid, "_GID=%lu", (unsigned long) ucred->gid);
540                 IOVEC_SET_STRING(iovec[n++], gid);
541
542                 r = get_process_comm(ucred->pid, &t);
543                 if (r >= 0) {
544                         x = strappenda("_COMM=", t);
545                         free(t);
546                         IOVEC_SET_STRING(iovec[n++], x);
547                 }
548
549                 r = get_process_exe(ucred->pid, &t);
550                 if (r >= 0) {
551                         x = strappenda("_EXE=", t);
552                         free(t);
553                         IOVEC_SET_STRING(iovec[n++], x);
554                 }
555
556                 r = get_process_cmdline(ucred->pid, 0, false, &t);
557                 if (r >= 0) {
558                         x = strappenda("_CMDLINE=", t);
559                         free(t);
560                         IOVEC_SET_STRING(iovec[n++], x);
561                 }
562
563                 r = get_process_capeff(ucred->pid, &t);
564                 if (r >= 0) {
565                         x = strappenda("_CAP_EFFECTIVE=", t);
566                         free(t);
567                         IOVEC_SET_STRING(iovec[n++], x);
568                 }
569
570 #ifdef HAVE_AUDIT
571                 r = audit_session_from_pid(ucred->pid, &audit);
572                 if (r >= 0) {
573                         sprintf(audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit);
574                         IOVEC_SET_STRING(iovec[n++], audit_session);
575                 }
576
577                 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
578                 if (r >= 0) {
579                         sprintf(audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid);
580                         IOVEC_SET_STRING(iovec[n++], audit_loginuid);
581                 }
582 #endif
583
584                 r = cg_pid_get_path_shifted(ucred->pid, NULL, &c);
585                 if (r >= 0) {
586                         char *session = NULL;
587
588                         x = strappenda("_SYSTEMD_CGROUP=", c);
589                         IOVEC_SET_STRING(iovec[n++], x);
590
591                         r = cg_path_get_session(c, &t);
592                         if (r >= 0) {
593                                 session = strappenda("_SYSTEMD_SESSION=", t);
594                                 free(t);
595                                 IOVEC_SET_STRING(iovec[n++], session);
596                         }
597
598                         if (cg_path_get_owner_uid(c, &owner) >= 0) {
599                                 owner_valid = true;
600
601                                 sprintf(owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner);
602                                 IOVEC_SET_STRING(iovec[n++], owner_uid);
603                         }
604
605                         if (cg_path_get_unit(c, &t) >= 0) {
606                                 x = strappenda("_SYSTEMD_UNIT=", t);
607                                 free(t);
608                                 IOVEC_SET_STRING(iovec[n++], x);
609                         } else if (unit_id && !session) {
610                                 x = strappenda("_SYSTEMD_UNIT=", unit_id);
611                                 IOVEC_SET_STRING(iovec[n++], x);
612                         }
613
614                         if (cg_path_get_user_unit(c, &t) >= 0) {
615                                 x = strappenda("_SYSTEMD_USER_UNIT=", t);
616                                 free(t);
617                                 IOVEC_SET_STRING(iovec[n++], x);
618                         } else if (unit_id && session) {
619                                 x = strappenda("_SYSTEMD_USER_UNIT=", unit_id);
620                                 IOVEC_SET_STRING(iovec[n++], x);
621                         }
622
623                         if (cg_path_get_slice(c, &t) >= 0) {
624                                 x = strappenda("_SYSTEMD_SLICE=", t);
625                                 free(t);
626                                 IOVEC_SET_STRING(iovec[n++], x);
627                         }
628
629                         free(c);
630                 }
631
632 #ifdef HAVE_SELINUX
633                 if (use_selinux()) {
634                         if (label) {
635                                 x = alloca(sizeof("_SELINUX_CONTEXT=") + label_len);
636
637                                 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
638                                 IOVEC_SET_STRING(iovec[n++], x);
639                         } else {
640                                 security_context_t con;
641
642                                 if (getpidcon(ucred->pid, &con) >= 0) {
643                                         x = strappenda("_SELINUX_CONTEXT=", con);
644
645                                         freecon(con);
646                                         IOVEC_SET_STRING(iovec[n++], x);
647                                 }
648                         }
649                 }
650 #endif
651         }
652         assert(n <= m);
653
654         if (object_pid) {
655                 r = get_process_uid(object_pid, &object_uid);
656                 if (r >= 0) {
657                         sprintf(o_uid, "OBJECT_UID=%lu", (unsigned long) object_uid);
658                         IOVEC_SET_STRING(iovec[n++], o_uid);
659                 }
660
661                 r = get_process_gid(object_pid, &object_gid);
662                 if (r >= 0) {
663                         sprintf(o_gid, "OBJECT_GID=%lu", (unsigned long) object_gid);
664                         IOVEC_SET_STRING(iovec[n++], o_gid);
665                 }
666
667                 r = get_process_comm(object_pid, &t);
668                 if (r >= 0) {
669                         x = strappenda("OBJECT_COMM=", t);
670                         free(t);
671                         IOVEC_SET_STRING(iovec[n++], x);
672                 }
673
674                 r = get_process_exe(object_pid, &t);
675                 if (r >= 0) {
676                         x = strappenda("OBJECT_EXE=", t);
677                         free(t);
678                         IOVEC_SET_STRING(iovec[n++], x);
679                 }
680
681                 r = get_process_cmdline(object_pid, 0, false, &t);
682                 if (r >= 0) {
683                         x = strappenda("OBJECT_CMDLINE=", t);
684                         free(t);
685                         IOVEC_SET_STRING(iovec[n++], x);
686                 }
687
688 #ifdef HAVE_AUDIT
689                 r = audit_session_from_pid(object_pid, &audit);
690                 if (r >= 0) {
691                         sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%lu", (unsigned long) audit);
692                         IOVEC_SET_STRING(iovec[n++], o_audit_session);
693                 }
694
695                 r = audit_loginuid_from_pid(object_pid, &loginuid);
696                 if (r >= 0) {
697                         sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID=%lu", (unsigned long) loginuid);
698                         IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
699                 }
700 #endif
701
702                 r = cg_pid_get_path_shifted(object_pid, NULL, &c);
703                 if (r >= 0) {
704                         x = strappenda("OBJECT_SYSTEMD_CGROUP=", c);
705                         IOVEC_SET_STRING(iovec[n++], x);
706
707                         r = cg_path_get_session(c, &t);
708                         if (r >= 0) {
709                                 x = strappenda("OBJECT_SYSTEMD_SESSION=", t);
710                                 free(t);
711                                 IOVEC_SET_STRING(iovec[n++], x);
712                         }
713
714                         if (cg_path_get_owner_uid(c, &owner) >= 0) {
715                                 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner);
716                                 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
717                         }
718
719                         if (cg_path_get_unit(c, &t) >= 0) {
720                                 x = strappenda("OBJECT_SYSTEMD_UNIT=", t);
721                                 free(t);
722                                 IOVEC_SET_STRING(iovec[n++], x);
723                         }
724
725                         if (cg_path_get_user_unit(c, &t) >= 0) {
726                                 x = strappenda("OBJECT_SYSTEMD_USER_UNIT=", t);
727                                 free(t);
728                                 IOVEC_SET_STRING(iovec[n++], x);
729                         }
730
731                         free(c);
732                 }
733         }
734         assert(n <= m);
735
736         if (tv) {
737                 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
738                 IOVEC_SET_STRING(iovec[n++], source_time);
739         }
740
741         /* Note that strictly speaking storing the boot id here is
742          * redundant since the entry includes this in-line
743          * anyway. However, we need this indexed, too. */
744         r = sd_id128_get_boot(&id);
745         if (r >= 0) {
746                 sd_id128_to_string(id, boot_id + strlen("_BOOT_ID="));
747                 IOVEC_SET_STRING(iovec[n++], boot_id);
748         }
749
750         r = sd_id128_get_machine(&id);
751         if (r >= 0) {
752                 sd_id128_to_string(id, machine_id + strlen("_MACHINE_ID="));
753                 IOVEC_SET_STRING(iovec[n++], machine_id);
754         }
755
756         t = gethostname_malloc();
757         if (t) {
758                 x = strappenda("_HOSTNAME=", t);
759                 free(t);
760                 IOVEC_SET_STRING(iovec[n++], x);
761         }
762
763         assert(n <= m);
764
765         if (s->split_mode == SPLIT_UID && realuid > 0)
766                 /* Split up strictly by any UID */
767                 journal_uid = realuid;
768         else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
769                 /* Split up by login UIDs, this avoids creation of
770                  * individual journals for system UIDs.  We do this
771                  * only if the realuid is not root, in order not to
772                  * accidentally leak privileged information to the
773                  * user that is logged by a privileged process that is
774                  * part of an unprivileged session.*/
775                 journal_uid = owner;
776         else
777                 journal_uid = 0;
778
779         write_to_journal(s, journal_uid, iovec, n, priority);
780 }
781
782 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
783         char mid[11 + 32 + 1];
784         char buffer[16 + LINE_MAX + 1];
785         struct iovec iovec[N_IOVEC_META_FIELDS + 4];
786         int n = 0;
787         va_list ap;
788         struct ucred ucred = {};
789
790         assert(s);
791         assert(format);
792
793         IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
794         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
795
796         memcpy(buffer, "MESSAGE=", 8);
797         va_start(ap, format);
798         vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
799         va_end(ap);
800         char_array_0(buffer);
801         IOVEC_SET_STRING(iovec[n++], buffer);
802
803         if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
804                 snprintf(mid, sizeof(mid), MESSAGE_ID(message_id));
805                 char_array_0(mid);
806                 IOVEC_SET_STRING(iovec[n++], mid);
807         }
808
809         ucred.pid = getpid();
810         ucred.uid = getuid();
811         ucred.gid = getgid();
812
813         dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
814 }
815
816 void server_dispatch_message(
817                 Server *s,
818                 struct iovec *iovec, unsigned n, unsigned m,
819                 struct ucred *ucred,
820                 struct timeval *tv,
821                 const char *label, size_t label_len,
822                 const char *unit_id,
823                 int priority,
824                 pid_t object_pid) {
825
826         int rl, r;
827         _cleanup_free_ char *path = NULL;
828         char *c;
829
830         assert(s);
831         assert(iovec || n == 0);
832
833         if (n == 0)
834                 return;
835
836         if (LOG_PRI(priority) > s->max_level_store)
837                 return;
838
839         /* Stop early in case the information will not be stored
840          * in a journal. */
841         if (s->storage == STORAGE_NONE)
842                 return;
843
844         if (!ucred)
845                 goto finish;
846
847         r = cg_pid_get_path_shifted(ucred->pid, NULL, &path);
848         if (r < 0)
849                 goto finish;
850
851         /* example: /user/lennart/3/foobar
852          *          /system/dbus.service/foobar
853          *
854          * So let's cut of everything past the third /, since that is
855          * where user directories start */
856
857         c = strchr(path, '/');
858         if (c) {
859                 c = strchr(c+1, '/');
860                 if (c) {
861                         c = strchr(c+1, '/');
862                         if (c)
863                                 *c = 0;
864                 }
865         }
866
867         rl = journal_rate_limit_test(s->rate_limit, path,
868                                      priority & LOG_PRIMASK, available_space(s, false));
869
870         if (rl == 0)
871                 return;
872
873         /* Write a suppression message if we suppressed something */
874         if (rl > 1)
875                 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
876                                       "Suppressed %u messages from %s", rl - 1, path);
877
878 finish:
879         dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
880 }
881
882
883 static int system_journal_open(Server *s) {
884         int r;
885         char *fn;
886         sd_id128_t machine;
887         char ids[33];
888
889         r = sd_id128_get_machine(&machine);
890         if (r < 0) {
891                 log_error("Failed to get machine id: %s", strerror(-r));
892                 return r;
893         }
894
895         sd_id128_to_string(machine, ids);
896
897         if (!s->system_journal &&
898             (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
899             access("/run/systemd/journal/flushed", F_OK) >= 0) {
900
901                 /* If in auto mode: first try to create the machine
902                  * path, but not the prefix.
903                  *
904                  * If in persistent mode: create /var/log/journal and
905                  * the machine path */
906
907                 if (s->storage == STORAGE_PERSISTENT)
908                         (void) mkdir("/var/log/journal/", 0755);
909
910                 fn = strappenda("/var/log/journal/", ids);
911                 (void) mkdir(fn, 0755);
912
913                 fn = strappenda(fn, "/system.journal");
914                 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
915
916                 if (r >= 0)
917                         server_fix_perms(s, s->system_journal, 0);
918                 else if (r < 0) {
919                         if (r != -ENOENT && r != -EROFS)
920                                 log_warning("Failed to open system journal: %s", strerror(-r));
921
922                         r = 0;
923                 }
924         }
925
926         if (!s->runtime_journal &&
927             (s->storage != STORAGE_NONE)) {
928
929                 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
930                 if (!fn)
931                         return -ENOMEM;
932
933                 if (s->system_journal) {
934
935                         /* Try to open the runtime journal, but only
936                          * if it already exists, so that we can flush
937                          * it into the system journal */
938
939                         r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
940                         free(fn);
941
942                         if (r < 0) {
943                                 if (r != -ENOENT)
944                                         log_warning("Failed to open runtime journal: %s", strerror(-r));
945
946                                 r = 0;
947                         }
948
949                 } else {
950
951                         /* OK, we really need the runtime journal, so create
952                          * it if necessary. */
953
954                         (void) mkdir_parents(fn, 0755);
955                         r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
956                         free(fn);
957
958                         if (r < 0) {
959                                 log_error("Failed to open runtime journal: %s", strerror(-r));
960                                 return r;
961                         }
962                 }
963
964                 if (s->runtime_journal)
965                         server_fix_perms(s, s->runtime_journal, 0);
966         }
967
968         available_space(s, true);
969
970         return r;
971 }
972
973 int server_flush_to_var(Server *s) {
974         int r;
975         sd_id128_t machine;
976         sd_journal *j = NULL;
977
978         assert(s);
979
980         if (s->storage != STORAGE_AUTO &&
981             s->storage != STORAGE_PERSISTENT)
982                 return 0;
983
984         if (!s->runtime_journal)
985                 return 0;
986
987         system_journal_open(s);
988
989         if (!s->system_journal)
990                 return 0;
991
992         log_debug("Flushing to /var...");
993
994         r = sd_id128_get_machine(&machine);
995         if (r < 0)
996                 return r;
997
998         r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
999         if (r < 0) {
1000                 log_error("Failed to read runtime journal: %s", strerror(-r));
1001                 return r;
1002         }
1003
1004         sd_journal_set_data_threshold(j, 0);
1005
1006         SD_JOURNAL_FOREACH(j) {
1007                 Object *o = NULL;
1008                 JournalFile *f;
1009
1010                 f = j->current_file;
1011                 assert(f && f->current_offset > 0);
1012
1013                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1014                 if (r < 0) {
1015                         log_error("Can't read entry: %s", strerror(-r));
1016                         goto finish;
1017                 }
1018
1019                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1020                 if (r >= 0)
1021                         continue;
1022
1023                 if (!shall_try_append_again(s->system_journal, r)) {
1024                         log_error("Can't write entry: %s", strerror(-r));
1025                         goto finish;
1026                 }
1027
1028                 server_rotate(s);
1029                 server_vacuum(s);
1030
1031                 if (!s->system_journal) {
1032                         log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1033                         r = -EIO;
1034                         goto finish;
1035                 }
1036
1037                 log_debug("Retrying write.");
1038                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1039                 if (r < 0) {
1040                         log_error("Can't write entry: %s", strerror(-r));
1041                         goto finish;
1042                 }
1043         }
1044
1045 finish:
1046         journal_file_post_change(s->system_journal);
1047
1048         journal_file_close(s->runtime_journal);
1049         s->runtime_journal = NULL;
1050
1051         if (r >= 0)
1052                 rm_rf("/run/log/journal", false, true, false);
1053
1054         sd_journal_close(j);
1055
1056         return r;
1057 }
1058
1059 int process_event(Server *s, struct epoll_event *ev) {
1060         assert(s);
1061         assert(ev);
1062
1063         if (ev->data.fd == s->signal_fd) {
1064                 struct signalfd_siginfo sfsi;
1065                 ssize_t n;
1066
1067                 if (ev->events != EPOLLIN) {
1068                         log_error("Got invalid event from epoll for %s: %"PRIx32,
1069                                   "signal fd", ev->events);
1070                         return -EIO;
1071                 }
1072
1073                 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
1074                 if (n != sizeof(sfsi)) {
1075
1076                         if (n >= 0)
1077                                 return -EIO;
1078
1079                         if (errno == EINTR || errno == EAGAIN)
1080                                 return 1;
1081
1082                         return -errno;
1083                 }
1084
1085                 if (sfsi.ssi_signo == SIGUSR1) {
1086                         log_info("Received request to flush runtime journal from PID %"PRIu32,
1087                                  sfsi.ssi_pid);
1088                         touch("/run/systemd/journal/flushed");
1089                         server_flush_to_var(s);
1090                         server_sync(s);
1091                         return 1;
1092                 }
1093
1094                 if (sfsi.ssi_signo == SIGUSR2) {
1095                         log_info("Received request to rotate journal from PID %"PRIu32,
1096                                  sfsi.ssi_pid);
1097                         server_rotate(s);
1098                         server_vacuum(s);
1099                         return 1;
1100                 }
1101
1102                 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
1103
1104                 return 0;
1105
1106         } else if (ev->data.fd == s->sync_timer_fd) {
1107                 int r;
1108                 uint64_t t;
1109
1110                 log_debug("Got sync request from epoll.");
1111
1112                 r = read(ev->data.fd, (void *)&t, sizeof(t));
1113                 if (r < 0)
1114                         return 0;
1115
1116                 server_sync(s);
1117                 return 1;
1118
1119         } else if (ev->data.fd == s->dev_kmsg_fd) {
1120                 int r;
1121
1122                 if (ev->events & EPOLLERR)
1123                         log_warning("/dev/kmsg buffer overrun, some messages lost.");
1124
1125                 if (!(ev->events & EPOLLIN)) {
1126                         log_error("Got invalid event from epoll for %s: %"PRIx32,
1127                                   "/dev/kmsg", ev->events);
1128                         return -EIO;
1129                 }
1130
1131                 r = server_read_dev_kmsg(s);
1132                 if (r < 0)
1133                         return r;
1134
1135                 return 1;
1136
1137         } else if (ev->data.fd == s->native_fd ||
1138                    ev->data.fd == s->syslog_fd) {
1139
1140                 if (ev->events != EPOLLIN) {
1141                         log_error("Got invalid event from epoll for %s: %"PRIx32,
1142                                   ev->data.fd == s->native_fd ? "native fd" : "syslog fd",
1143                                   ev->events);
1144                         return -EIO;
1145                 }
1146
1147                 for (;;) {
1148                         struct msghdr msghdr;
1149                         struct iovec iovec;
1150                         struct ucred *ucred = NULL;
1151                         struct timeval *tv = NULL;
1152                         struct cmsghdr *cmsg;
1153                         char *label = NULL;
1154                         size_t label_len = 0;
1155                         union {
1156                                 struct cmsghdr cmsghdr;
1157
1158                                 /* We use NAME_MAX space for the
1159                                  * SELinux label here. The kernel
1160                                  * currently enforces no limit, but
1161                                  * according to suggestions from the
1162                                  * SELinux people this will change and
1163                                  * it will probably be identical to
1164                                  * NAME_MAX. For now we use that, but
1165                                  * this should be updated one day when
1166                                  * the final limit is known.*/
1167                                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1168                                             CMSG_SPACE(sizeof(struct timeval)) +
1169                                             CMSG_SPACE(sizeof(int)) + /* fd */
1170                                             CMSG_SPACE(NAME_MAX)]; /* selinux label */
1171                         } control;
1172                         ssize_t n;
1173                         int v;
1174                         int *fds = NULL;
1175                         unsigned n_fds = 0;
1176
1177                         if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
1178                                 log_error("SIOCINQ failed: %m");
1179                                 return -errno;
1180                         }
1181
1182                         if (s->buffer_size < (size_t) v) {
1183                                 void *b;
1184                                 size_t l;
1185
1186                                 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
1187                                 b = realloc(s->buffer, l+1);
1188
1189                                 if (!b) {
1190                                         log_error("Couldn't increase buffer.");
1191                                         return -ENOMEM;
1192                                 }
1193
1194                                 s->buffer_size = l;
1195                                 s->buffer = b;
1196                         }
1197
1198                         zero(iovec);
1199                         iovec.iov_base = s->buffer;
1200                         iovec.iov_len = s->buffer_size;
1201
1202                         zero(control);
1203                         zero(msghdr);
1204                         msghdr.msg_iov = &iovec;
1205                         msghdr.msg_iovlen = 1;
1206                         msghdr.msg_control = &control;
1207                         msghdr.msg_controllen = sizeof(control);
1208
1209                         n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1210                         if (n < 0) {
1211
1212                                 if (errno == EINTR || errno == EAGAIN)
1213                                         return 1;
1214
1215                                 log_error("recvmsg() failed: %m");
1216                                 return -errno;
1217                         }
1218
1219                         for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1220
1221                                 if (cmsg->cmsg_level == SOL_SOCKET &&
1222                                     cmsg->cmsg_type == SCM_CREDENTIALS &&
1223                                     cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1224                                         ucred = (struct ucred*) CMSG_DATA(cmsg);
1225                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
1226                                          cmsg->cmsg_type == SCM_SECURITY) {
1227                                         label = (char*) CMSG_DATA(cmsg);
1228                                         label_len = cmsg->cmsg_len - CMSG_LEN(0);
1229                                 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1230                                            cmsg->cmsg_type == SO_TIMESTAMP &&
1231                                            cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1232                                         tv = (struct timeval*) CMSG_DATA(cmsg);
1233                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
1234                                          cmsg->cmsg_type == SCM_RIGHTS) {
1235                                         fds = (int*) CMSG_DATA(cmsg);
1236                                         n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1237                                 }
1238                         }
1239
1240                         if (ev->data.fd == s->syslog_fd) {
1241                                 if (n > 0 && n_fds == 0) {
1242                                         s->buffer[n] = 0;
1243                                         server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1244                                 } else if (n_fds > 0)
1245                                         log_warning("Got file descriptors via syslog socket. Ignoring.");
1246
1247                         } else {
1248                                 if (n > 0 && n_fds == 0)
1249                                         server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1250                                 else if (n == 0 && n_fds == 1)
1251                                         server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1252                                 else if (n_fds > 0)
1253                                         log_warning("Got too many file descriptors via native socket. Ignoring.");
1254                         }
1255
1256                         close_many(fds, n_fds);
1257                 }
1258
1259                 return 1;
1260
1261         } else if (ev->data.fd == s->stdout_fd) {
1262
1263                 if (ev->events != EPOLLIN) {
1264                         log_error("Got invalid event from epoll for %s: %"PRIx32,
1265                                   "stdout fd", ev->events);
1266                         return -EIO;
1267                 }
1268
1269                 stdout_stream_new(s);
1270                 return 1;
1271
1272         } else {
1273                 StdoutStream *stream;
1274
1275                 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
1276                         log_error("Got invalid event from epoll for %s: %"PRIx32,
1277                                   "stdout stream", ev->events);
1278                         return -EIO;
1279                 }
1280
1281                 /* If it is none of the well-known fds, it must be an
1282                  * stdout stream fd. Note that this is a bit ugly here
1283                  * (since we rely that none of the well-known fds
1284                  * could be interpreted as pointer), but nonetheless
1285                  * safe, since the well-known fds would never get an
1286                  * fd > 4096, i.e. beyond the first memory page */
1287
1288                 stream = ev->data.ptr;
1289
1290                 if (stdout_stream_process(stream) <= 0)
1291                         stdout_stream_free(stream);
1292
1293                 return 1;
1294         }
1295
1296         log_error("Unknown event.");
1297         return 0;
1298 }
1299
1300 static int open_signalfd(Server *s) {
1301         sigset_t mask;
1302         struct epoll_event ev;
1303
1304         assert(s);
1305
1306         assert_se(sigemptyset(&mask) == 0);
1307         sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1308         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1309
1310         s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
1311         if (s->signal_fd < 0) {
1312                 log_error("signalfd(): %m");
1313                 return -errno;
1314         }
1315
1316         zero(ev);
1317         ev.events = EPOLLIN;
1318         ev.data.fd = s->signal_fd;
1319
1320         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
1321                 log_error("epoll_ctl(): %m");
1322                 return -errno;
1323         }
1324
1325         return 0;
1326 }
1327
1328 static int server_parse_proc_cmdline(Server *s) {
1329         _cleanup_free_ char *line = NULL;
1330         char *w, *state;
1331         int r;
1332         size_t l;
1333
1334         if (detect_container(NULL) > 0)
1335                 return 0;
1336
1337         r = read_one_line_file("/proc/cmdline", &line);
1338         if (r < 0) {
1339                 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1340                 return 0;
1341         }
1342
1343         FOREACH_WORD_QUOTED(w, l, line, state) {
1344                 _cleanup_free_ char *word;
1345
1346                 word = strndup(w, l);
1347                 if (!word)
1348                         return -ENOMEM;
1349
1350                 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1351                         r = parse_boolean(word + 35);
1352                         if (r < 0)
1353                                 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1354                         else
1355                                 s->forward_to_syslog = r;
1356                 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1357                         r = parse_boolean(word + 33);
1358                         if (r < 0)
1359                                 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1360                         else
1361                                 s->forward_to_kmsg = r;
1362                 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1363                         r = parse_boolean(word + 36);
1364                         if (r < 0)
1365                                 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1366                         else
1367                                 s->forward_to_console = r;
1368                 } else if (startswith(word, "systemd.journald"))
1369                         log_warning("Invalid systemd.journald parameter. Ignoring.");
1370         }
1371
1372         return 0;
1373 }
1374
1375 static int server_parse_config_file(Server *s) {
1376         static const char fn[] = "/etc/systemd/journald.conf";
1377         _cleanup_fclose_ FILE *f = NULL;
1378         int r;
1379
1380         assert(s);
1381
1382         f = fopen(fn, "re");
1383         if (!f) {
1384                 if (errno == ENOENT)
1385                         return 0;
1386
1387                 log_warning("Failed to open configuration file %s: %m", fn);
1388                 return -errno;
1389         }
1390
1391         r = config_parse(NULL, fn, f, "Journal\0", config_item_perf_lookup,
1392                          (void*) journald_gperf_lookup, false, false, s);
1393         if (r < 0)
1394                 log_warning("Failed to parse configuration file: %s", strerror(-r));
1395
1396         return r;
1397 }
1398
1399 static int server_open_sync_timer(Server *s) {
1400         int r;
1401         struct epoll_event ev;
1402
1403         assert(s);
1404
1405         s->sync_timer_fd = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC);
1406         if (s->sync_timer_fd < 0)
1407                 return -errno;
1408
1409         zero(ev);
1410         ev.events = EPOLLIN;
1411         ev.data.fd = s->sync_timer_fd;
1412
1413         r = epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->sync_timer_fd, &ev);
1414         if (r < 0) {
1415                 log_error("Failed to add idle timer fd to epoll object: %m");
1416                 return -errno;
1417         }
1418
1419         return 0;
1420 }
1421
1422 int server_schedule_sync(Server *s, int priority) {
1423         int r;
1424
1425         assert(s);
1426
1427         if (priority <= LOG_CRIT) {
1428                 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1429                 server_sync(s);
1430                 return 0;
1431         }
1432
1433         if (s->sync_scheduled)
1434                 return 0;
1435
1436         if (s->sync_interval_usec) {
1437                 struct itimerspec sync_timer_enable = {};
1438
1439                 timespec_store(&sync_timer_enable.it_value, s->sync_interval_usec);
1440
1441                 r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_enable, NULL);
1442                 if (r < 0)
1443                         return -errno;
1444         }
1445
1446         s->sync_scheduled = true;
1447
1448         return 0;
1449 }
1450
1451 int server_init(Server *s) {
1452         int n, r, fd;
1453
1454         assert(s);
1455
1456         zero(*s);
1457         s->sync_timer_fd = s->syslog_fd = s->native_fd = s->stdout_fd =
1458                 s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
1459         s->compress = true;
1460         s->seal = true;
1461
1462         s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1463         s->sync_scheduled = false;
1464
1465         s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1466         s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1467
1468         s->forward_to_syslog = true;
1469
1470         s->max_level_store = LOG_DEBUG;
1471         s->max_level_syslog = LOG_DEBUG;
1472         s->max_level_kmsg = LOG_NOTICE;
1473         s->max_level_console = LOG_INFO;
1474
1475         memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1476         memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1477
1478         server_parse_config_file(s);
1479         server_parse_proc_cmdline(s);
1480         if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1481                 log_debug("Setting both rate limit interval and burst from %llu,%u to 0,0",
1482                           (long long unsigned) s->rate_limit_interval,
1483                           s->rate_limit_burst);
1484                 s->rate_limit_interval = s->rate_limit_burst = 0;
1485         }
1486
1487         mkdir_p("/run/systemd/journal", 0755);
1488
1489         s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
1490         if (!s->user_journals)
1491                 return log_oom();
1492
1493         s->mmap = mmap_cache_new();
1494         if (!s->mmap)
1495                 return log_oom();
1496
1497         s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
1498         if (s->epoll_fd < 0) {
1499                 log_error("Failed to create epoll object: %m");
1500                 return -errno;
1501         }
1502
1503         n = sd_listen_fds(true);
1504         if (n < 0) {
1505                 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1506                 return n;
1507         }
1508
1509         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1510
1511                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1512
1513                         if (s->native_fd >= 0) {
1514                                 log_error("Too many native sockets passed.");
1515                                 return -EINVAL;
1516                         }
1517
1518                         s->native_fd = fd;
1519
1520                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1521
1522                         if (s->stdout_fd >= 0) {
1523                                 log_error("Too many stdout sockets passed.");
1524                                 return -EINVAL;
1525                         }
1526
1527                         s->stdout_fd = fd;
1528
1529                 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
1530
1531                         if (s->syslog_fd >= 0) {
1532                                 log_error("Too many /dev/log sockets passed.");
1533                                 return -EINVAL;
1534                         }
1535
1536                         s->syslog_fd = fd;
1537
1538                 } else {
1539                         log_error("Unknown socket passed.");
1540                         return -EINVAL;
1541                 }
1542         }
1543
1544         r = server_open_syslog_socket(s);
1545         if (r < 0)
1546                 return r;
1547
1548         r = server_open_native_socket(s);
1549         if (r < 0)
1550                 return r;
1551
1552         r = server_open_stdout_socket(s);
1553         if (r < 0)
1554                 return r;
1555
1556         r = server_open_dev_kmsg(s);
1557         if (r < 0)
1558                 return r;
1559
1560         r = server_open_kernel_seqnum(s);
1561         if (r < 0)
1562                 return r;
1563
1564         r = server_open_sync_timer(s);
1565         if (r < 0)
1566                 return r;
1567
1568         r = open_signalfd(s);
1569         if (r < 0)
1570                 return r;
1571
1572         s->udev = udev_new();
1573         if (!s->udev)
1574                 return -ENOMEM;
1575
1576         s->rate_limit = journal_rate_limit_new(s->rate_limit_interval,
1577                                                s->rate_limit_burst);
1578         if (!s->rate_limit)
1579                 return -ENOMEM;
1580
1581         r = system_journal_open(s);
1582         if (r < 0)
1583                 return r;
1584
1585         return 0;
1586 }
1587
1588 void server_maybe_append_tags(Server *s) {
1589 #ifdef HAVE_GCRYPT
1590         JournalFile *f;
1591         Iterator i;
1592         usec_t n;
1593
1594         n = now(CLOCK_REALTIME);
1595
1596         if (s->system_journal)
1597                 journal_file_maybe_append_tag(s->system_journal, n);
1598
1599         HASHMAP_FOREACH(f, s->user_journals, i)
1600                 journal_file_maybe_append_tag(f, n);
1601 #endif
1602 }
1603
1604 void server_done(Server *s) {
1605         JournalFile *f;
1606         assert(s);
1607
1608         while (s->stdout_streams)
1609                 stdout_stream_free(s->stdout_streams);
1610
1611         if (s->system_journal)
1612                 journal_file_close(s->system_journal);
1613
1614         if (s->runtime_journal)
1615                 journal_file_close(s->runtime_journal);
1616
1617         while ((f = hashmap_steal_first(s->user_journals)))
1618                 journal_file_close(f);
1619
1620         hashmap_free(s->user_journals);
1621
1622         if (s->epoll_fd >= 0)
1623                 close_nointr_nofail(s->epoll_fd);
1624
1625         if (s->signal_fd >= 0)
1626                 close_nointr_nofail(s->signal_fd);
1627
1628         if (s->syslog_fd >= 0)
1629                 close_nointr_nofail(s->syslog_fd);
1630
1631         if (s->native_fd >= 0)
1632                 close_nointr_nofail(s->native_fd);
1633
1634         if (s->stdout_fd >= 0)
1635                 close_nointr_nofail(s->stdout_fd);
1636
1637         if (s->dev_kmsg_fd >= 0)
1638                 close_nointr_nofail(s->dev_kmsg_fd);
1639
1640         if (s->sync_timer_fd >= 0)
1641                 close_nointr_nofail(s->sync_timer_fd);
1642
1643         if (s->rate_limit)
1644                 journal_rate_limit_free(s->rate_limit);
1645
1646         if (s->kernel_seqnum)
1647                 munmap(s->kernel_seqnum, sizeof(uint64_t));
1648
1649         free(s->buffer);
1650         free(s->tty_path);
1651
1652         if (s->mmap)
1653                 mmap_cache_unref(s->mmap);
1654
1655         if (s->udev)
1656                 udev_unref(s->udev);
1657 }