chiark / gitweb /
security: rework selinux, smack, ima, apparmor detection logic
[elogind.git] / src / journal / journald-server.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
26 #include <sys/mman.h>
27 #include <sys/timerfd.h>
28
29 #include <libudev.h>
30 #include <systemd/sd-journal.h>
31 #include <systemd/sd-messages.h>
32 #include <systemd/sd-daemon.h>
33
34 #include "fileio.h"
35 #include "mkdir.h"
36 #include "hashmap.h"
37 #include "journal-file.h"
38 #include "socket-util.h"
39 #include "cgroup-util.h"
40 #include "list.h"
41 #include "virt.h"
42 #include "missing.h"
43 #include "conf-parser.h"
44 #include "journal-internal.h"
45 #include "journal-vacuum.h"
46 #include "journal-authenticate.h"
47 #include "journald-server.h"
48 #include "journald-rate-limit.h"
49 #include "journald-kmsg.h"
50 #include "journald-syslog.h"
51 #include "journald-stream.h"
52 #include "journald-console.h"
53 #include "journald-native.h"
54
55 #ifdef HAVE_ACL
56 #include <sys/acl.h>
57 #include <acl/libacl.h>
58 #include "acl-util.h"
59 #endif
60
61 #ifdef HAVE_SELINUX
62 #include <selinux/selinux.h>
63 #endif
64
65 #define USER_JOURNALS_MAX 1024
66
67 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
68 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
69 #define DEFAULT_RATE_LIMIT_BURST 1000
70
71 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
72
73 static const char* const storage_table[] = {
74         [STORAGE_AUTO] = "auto",
75         [STORAGE_VOLATILE] = "volatile",
76         [STORAGE_PERSISTENT] = "persistent",
77         [STORAGE_NONE] = "none"
78 };
79
80 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
81 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
82
83 static const char* const split_mode_table[] = {
84         [SPLIT_NONE] = "none",
85         [SPLIT_UID] = "uid",
86         [SPLIT_LOGIN] = "login"
87 };
88
89 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
90 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
91
92 static uint64_t available_space(Server *s, bool verbose) {
93         char ids[33];
94         _cleanup_free_ char *p = NULL;
95         sd_id128_t machine;
96         struct statvfs ss;
97         uint64_t sum = 0, ss_avail = 0, avail = 0;
98         int r;
99         _cleanup_closedir_ DIR *d = NULL;
100         usec_t ts;
101         const char *f;
102         JournalMetrics *m;
103
104         ts = now(CLOCK_MONOTONIC);
105
106         if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts
107             && !verbose)
108                 return s->cached_available_space;
109
110         r = sd_id128_get_machine(&machine);
111         if (r < 0)
112                 return 0;
113
114         if (s->system_journal) {
115                 f = "/var/log/journal/";
116                 m = &s->system_metrics;
117         } else {
118                 f = "/run/log/journal/";
119                 m = &s->runtime_metrics;
120         }
121
122         assert(m);
123
124         p = strappend(f, sd_id128_to_string(machine, ids));
125         if (!p)
126                 return 0;
127
128         d = opendir(p);
129         if (!d)
130                 return 0;
131
132         if (fstatvfs(dirfd(d), &ss) < 0)
133                 return 0;
134
135         for (;;) {
136                 struct stat st;
137                 struct dirent *de;
138                 union dirent_storage buf;
139
140                 r = readdir_r(d, &buf.de, &de);
141                 if (r != 0)
142                         break;
143
144                 if (!de)
145                         break;
146
147                 if (!endswith(de->d_name, ".journal") &&
148                     !endswith(de->d_name, ".journal~"))
149                         continue;
150
151                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
152                         continue;
153
154                 if (!S_ISREG(st.st_mode))
155                         continue;
156
157                 sum += (uint64_t) st.st_blocks * 512UL;
158         }
159
160         ss_avail = ss.f_bsize * ss.f_bavail;
161         avail = ss_avail > m->keep_free ? ss_avail - m->keep_free : 0;
162
163         s->cached_available_space = MIN(m->max_use, avail) > sum ? MIN(m->max_use, avail) - sum : 0;
164         s->cached_available_space_timestamp = ts;
165
166         if (verbose) {
167                 char    fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
168                         fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX];
169
170                 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
171                                       "%s journal is using %s (max %s, leaving %s of free %s, current limit %s).",
172                                       s->system_journal ? "Permanent" : "Runtime",
173                                       format_bytes(fb1, sizeof(fb1), sum),
174                                       format_bytes(fb2, sizeof(fb2), m->max_use),
175                                       format_bytes(fb3, sizeof(fb3), m->keep_free),
176                                       format_bytes(fb4, sizeof(fb4), ss_avail),
177                                       format_bytes(fb5, sizeof(fb5), MIN(m->max_use, avail)));
178         }
179
180         return s->cached_available_space;
181 }
182
183 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
184         int r;
185 #ifdef HAVE_ACL
186         acl_t acl;
187         acl_entry_t entry;
188         acl_permset_t permset;
189 #endif
190
191         assert(f);
192
193         r = fchmod(f->fd, 0640);
194         if (r < 0)
195                 log_warning("Failed to fix access mode on %s, ignoring: %s", f->path, strerror(-r));
196
197 #ifdef HAVE_ACL
198         if (uid <= 0)
199                 return;
200
201         acl = acl_get_fd(f->fd);
202         if (!acl) {
203                 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
204                 return;
205         }
206
207         r = acl_find_uid(acl, uid, &entry);
208         if (r <= 0) {
209
210                 if (acl_create_entry(&acl, &entry) < 0 ||
211                     acl_set_tag_type(entry, ACL_USER) < 0 ||
212                     acl_set_qualifier(entry, &uid) < 0) {
213                         log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
214                         goto finish;
215                 }
216         }
217
218         /* We do not recalculate the mask unconditionally here,
219          * so that the fchmod() mask above stays intact. */
220         if (acl_get_permset(entry, &permset) < 0 ||
221             acl_add_perm(permset, ACL_READ) < 0 ||
222             calc_acl_mask_if_needed(&acl) < 0) {
223                 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
224                 goto finish;
225         }
226
227         if (acl_set_fd(f->fd, acl) < 0)
228                 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
229
230 finish:
231         acl_free(acl);
232 #endif
233 }
234
235 static JournalFile* find_journal(Server *s, uid_t uid) {
236         _cleanup_free_ char *p = NULL;
237         int r;
238         JournalFile *f;
239         sd_id128_t machine;
240
241         assert(s);
242
243         /* We split up user logs only on /var, not on /run. If the
244          * runtime file is open, we write to it exclusively, in order
245          * to guarantee proper order as soon as we flush /run to
246          * /var and close the runtime file. */
247
248         if (s->runtime_journal)
249                 return s->runtime_journal;
250
251         if (uid <= 0)
252                 return s->system_journal;
253
254         r = sd_id128_get_machine(&machine);
255         if (r < 0)
256                 return s->system_journal;
257
258         f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
259         if (f)
260                 return f;
261
262         if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
263                      SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
264                 return s->system_journal;
265
266         while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
267                 /* Too many open? Then let's close one */
268                 f = hashmap_steal_first(s->user_journals);
269                 assert(f);
270                 journal_file_close(f);
271         }
272
273         r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
274         if (r < 0)
275                 return s->system_journal;
276
277         server_fix_perms(s, f, uid);
278
279         r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
280         if (r < 0) {
281                 journal_file_close(f);
282                 return s->system_journal;
283         }
284
285         return f;
286 }
287
288 void server_rotate(Server *s) {
289         JournalFile *f;
290         void *k;
291         Iterator i;
292         int r;
293
294         log_debug("Rotating...");
295
296         if (s->runtime_journal) {
297                 r = journal_file_rotate(&s->runtime_journal, s->compress, false);
298                 if (r < 0)
299                         if (s->runtime_journal)
300                                 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
301                         else
302                                 log_error("Failed to create new runtime journal: %s", strerror(-r));
303                 else
304                         server_fix_perms(s, s->runtime_journal, 0);
305         }
306
307         if (s->system_journal) {
308                 r = journal_file_rotate(&s->system_journal, s->compress, s->seal);
309                 if (r < 0)
310                         if (s->system_journal)
311                                 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
312                         else
313                                 log_error("Failed to create new system journal: %s", strerror(-r));
314
315                 else
316                         server_fix_perms(s, s->system_journal, 0);
317         }
318
319         HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
320                 r = journal_file_rotate(&f, s->compress, s->seal);
321                 if (r < 0)
322                         if (f)
323                                 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
324                         else {
325                                 log_error("Failed to create user journal: %s", strerror(-r));
326                                 hashmap_remove(s->user_journals, k);
327                         }
328                 else {
329                         hashmap_replace(s->user_journals, k, f);
330                         server_fix_perms(s, f, PTR_TO_UINT32(k));
331                 }
332         }
333 }
334
335 void server_sync(Server *s) {
336         static const struct itimerspec sync_timer_disable = {};
337         JournalFile *f;
338         void *k;
339         Iterator i;
340         int r;
341
342         if (s->system_journal) {
343                 r = journal_file_set_offline(s->system_journal);
344                 if (r < 0)
345                         log_error("Failed to sync system journal: %s", strerror(-r));
346         }
347
348         HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
349                 r = journal_file_set_offline(f);
350                 if (r < 0)
351                         log_error("Failed to sync user journal: %s", strerror(-r));
352         }
353
354         r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_disable, NULL);
355         if (r < 0)
356                 log_error("Failed to disable max timer: %m");
357
358         s->sync_scheduled = false;
359 }
360
361 void server_vacuum(Server *s) {
362         char ids[33];
363         sd_id128_t machine;
364         int r;
365
366         log_debug("Vacuuming...");
367
368         s->oldest_file_usec = 0;
369
370         r = sd_id128_get_machine(&machine);
371         if (r < 0) {
372                 log_error("Failed to get machine ID: %s", strerror(-r));
373                 return;
374         }
375
376         sd_id128_to_string(machine, ids);
377
378         if (s->system_journal) {
379                 char *p = strappenda("/var/log/journal/", ids);
380
381                 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
382                 if (r < 0 && r != -ENOENT)
383                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
384         }
385
386         if (s->runtime_journal) {
387                 char *p = strappenda("/run/log/journal/", ids);
388
389                 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
390                 if (r < 0 && r != -ENOENT)
391                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
392         }
393
394         s->cached_available_space_timestamp = 0;
395 }
396
397 bool shall_try_append_again(JournalFile *f, int r) {
398
399         /* -E2BIG            Hit configured limit
400            -EFBIG            Hit fs limit
401            -EDQUOT           Quota limit hit
402            -ENOSPC           Disk full
403            -EHOSTDOWN        Other machine
404            -EBUSY            Unclean shutdown
405            -EPROTONOSUPPORT  Unsupported feature
406            -EBADMSG          Corrupted
407            -ENODATA          Truncated
408            -ESHUTDOWN        Already archived */
409
410         if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
411                 log_debug("%s: Allocation limit reached, rotating.", f->path);
412         else if (r == -EHOSTDOWN)
413                 log_info("%s: Journal file from other machine, rotating.", f->path);
414         else if (r == -EBUSY)
415                 log_info("%s: Unclean shutdown, rotating.", f->path);
416         else if (r == -EPROTONOSUPPORT)
417                 log_info("%s: Unsupported feature, rotating.", f->path);
418         else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
419                 log_warning("%s: Journal file corrupted, rotating.", f->path);
420         else
421                 return false;
422
423         return true;
424 }
425
426 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
427         JournalFile *f;
428         bool vacuumed = false;
429         int r;
430
431         assert(s);
432         assert(iovec);
433         assert(n > 0);
434
435         f = find_journal(s, uid);
436         if (!f)
437                 return;
438
439         if (journal_file_rotate_suggested(f, s->max_file_usec)) {
440                 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
441                 server_rotate(s);
442                 server_vacuum(s);
443                 vacuumed = true;
444
445                 f = find_journal(s, uid);
446                 if (!f)
447                         return;
448         }
449
450         r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
451         if (r >= 0) {
452                 server_schedule_sync(s, priority);
453                 return;
454         }
455
456         if (vacuumed || !shall_try_append_again(f, r)) {
457                 size_t size = 0;
458                 unsigned i;
459                 for (i = 0; i < n; i++)
460                         size += iovec[i].iov_len;
461
462                 log_error("Failed to write entry (%d items, %zu bytes), ignoring: %s", n, size, strerror(-r));
463                 return;
464         }
465
466         server_rotate(s);
467         server_vacuum(s);
468
469         f = find_journal(s, uid);
470         if (!f)
471                 return;
472
473         log_debug("Retrying write.");
474         r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
475         if (r < 0) {
476                 size_t size = 0;
477                 unsigned i;
478                 for (i = 0; i < n; i++)
479                         size += iovec[i].iov_len;
480
481                 log_error("Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %s", n, size, strerror(-r));
482         } else
483                 server_schedule_sync(s, priority);
484 }
485
486 static void dispatch_message_real(
487                 Server *s,
488                 struct iovec *iovec, unsigned n, unsigned m,
489                 struct ucred *ucred,
490                 struct timeval *tv,
491                 const char *label, size_t label_len,
492                 const char *unit_id,
493                 int priority,
494                 pid_t object_pid) {
495
496         char    pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
497                 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
498                 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
499                 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
500                 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
501                 boot_id[sizeof("_BOOT_ID=") + 32] = "_BOOT_ID=",
502                 machine_id[sizeof("_MACHINE_ID=") + 32] = "_MACHINE_ID=",
503                 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
504                 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
505                 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
506         uid_t object_uid;
507         gid_t object_gid;
508         char *x;
509         sd_id128_t id;
510         int r;
511         char *t, *c;
512         uid_t realuid = 0, owner = 0, journal_uid;
513         bool owner_valid = false;
514 #ifdef HAVE_AUDIT
515         char    audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
516                 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
517                 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
518                 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
519
520         uint32_t audit;
521         uid_t loginuid;
522 #endif
523
524         assert(s);
525         assert(iovec);
526         assert(n > 0);
527         assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
528
529         if (ucred) {
530                 realuid = ucred->uid;
531
532                 sprintf(pid, "_PID=%lu", (unsigned long) ucred->pid);
533                 IOVEC_SET_STRING(iovec[n++], pid);
534
535                 sprintf(uid, "_UID=%lu", (unsigned long) ucred->uid);
536                 IOVEC_SET_STRING(iovec[n++], uid);
537
538                 sprintf(gid, "_GID=%lu", (unsigned long) ucred->gid);
539                 IOVEC_SET_STRING(iovec[n++], gid);
540
541                 r = get_process_comm(ucred->pid, &t);
542                 if (r >= 0) {
543                         x = strappenda("_COMM=", t);
544                         free(t);
545                         IOVEC_SET_STRING(iovec[n++], x);
546                 }
547
548                 r = get_process_exe(ucred->pid, &t);
549                 if (r >= 0) {
550                         x = strappenda("_EXE=", t);
551                         free(t);
552                         IOVEC_SET_STRING(iovec[n++], x);
553                 }
554
555                 r = get_process_cmdline(ucred->pid, 0, false, &t);
556                 if (r >= 0) {
557                         x = strappenda("_CMDLINE=", t);
558                         free(t);
559                         IOVEC_SET_STRING(iovec[n++], x);
560                 }
561
562                 r = get_process_capeff(ucred->pid, &t);
563                 if (r >= 0) {
564                         x = strappenda("_CAP_EFFECTIVE=", t);
565                         free(t);
566                         IOVEC_SET_STRING(iovec[n++], x);
567                 }
568
569 #ifdef HAVE_AUDIT
570                 r = audit_session_from_pid(ucred->pid, &audit);
571                 if (r >= 0) {
572                         sprintf(audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit);
573                         IOVEC_SET_STRING(iovec[n++], audit_session);
574                 }
575
576                 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
577                 if (r >= 0) {
578                         sprintf(audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid);
579                         IOVEC_SET_STRING(iovec[n++], audit_loginuid);
580                 }
581 #endif
582
583                 r = cg_pid_get_path_shifted(ucred->pid, NULL, &c);
584                 if (r >= 0) {
585                         char *session = NULL;
586
587                         x = strappenda("_SYSTEMD_CGROUP=", c);
588                         IOVEC_SET_STRING(iovec[n++], x);
589
590                         r = cg_path_get_session(c, &t);
591                         if (r >= 0) {
592                                 session = strappenda("_SYSTEMD_SESSION=", t);
593                                 free(t);
594                                 IOVEC_SET_STRING(iovec[n++], session);
595                         }
596
597                         if (cg_path_get_owner_uid(c, &owner) >= 0) {
598                                 owner_valid = true;
599
600                                 sprintf(owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner);
601                                 IOVEC_SET_STRING(iovec[n++], owner_uid);
602                         }
603
604                         if (cg_path_get_unit(c, &t) >= 0) {
605                                 x = strappenda("_SYSTEMD_UNIT=", t);
606                                 free(t);
607                                 IOVEC_SET_STRING(iovec[n++], x);
608                         } else if (unit_id && !session) {
609                                 x = strappenda("_SYSTEMD_UNIT=", unit_id);
610                                 IOVEC_SET_STRING(iovec[n++], x);
611                         }
612
613                         if (cg_path_get_user_unit(c, &t) >= 0) {
614                                 x = strappenda("_SYSTEMD_USER_UNIT=", t);
615                                 free(t);
616                                 IOVEC_SET_STRING(iovec[n++], x);
617                         } else if (unit_id && session) {
618                                 x = strappenda("_SYSTEMD_USER_UNIT=", unit_id);
619                                 IOVEC_SET_STRING(iovec[n++], x);
620                         }
621
622                         if (cg_path_get_slice(c, &t) >= 0) {
623                                 x = strappenda("_SYSTEMD_SLICE=", t);
624                                 free(t);
625                                 IOVEC_SET_STRING(iovec[n++], x);
626                         }
627
628                         free(c);
629                 }
630
631 #ifdef HAVE_SELINUX
632                 if (use_selinux()) {
633                         if (label) {
634                                 x = alloca(sizeof("_SELINUX_CONTEXT=") + label_len);
635
636                                 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
637                                 IOVEC_SET_STRING(iovec[n++], x);
638                         } else {
639                                 security_context_t con;
640
641                                 if (getpidcon(ucred->pid, &con) >= 0) {
642                                         x = strappenda("_SELINUX_CONTEXT=", con);
643
644                                         freecon(con);
645                                         IOVEC_SET_STRING(iovec[n++], x);
646                                 }
647                         }
648                 }
649 #endif
650         }
651         assert(n <= m);
652
653         if (object_pid) {
654                 r = get_process_uid(object_pid, &object_uid);
655                 if (r >= 0) {
656                         sprintf(o_uid, "OBJECT_UID=%lu", (unsigned long) object_uid);
657                         IOVEC_SET_STRING(iovec[n++], o_uid);
658                 }
659
660                 r = get_process_gid(object_pid, &object_gid);
661                 if (r >= 0) {
662                         sprintf(o_gid, "OBJECT_GID=%lu", (unsigned long) object_gid);
663                         IOVEC_SET_STRING(iovec[n++], o_gid);
664                 }
665
666                 r = get_process_comm(object_pid, &t);
667                 if (r >= 0) {
668                         x = strappenda("OBJECT_COMM=", t);
669                         free(t);
670                         IOVEC_SET_STRING(iovec[n++], x);
671                 }
672
673                 r = get_process_exe(object_pid, &t);
674                 if (r >= 0) {
675                         x = strappenda("OBJECT_EXE=", t);
676                         free(t);
677                         IOVEC_SET_STRING(iovec[n++], x);
678                 }
679
680                 r = get_process_cmdline(object_pid, 0, false, &t);
681                 if (r >= 0) {
682                         x = strappenda("OBJECT_CMDLINE=", t);
683                         free(t);
684                         IOVEC_SET_STRING(iovec[n++], x);
685                 }
686
687 #ifdef HAVE_AUDIT
688                 r = audit_session_from_pid(object_pid, &audit);
689                 if (r >= 0) {
690                         sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%lu", (unsigned long) audit);
691                         IOVEC_SET_STRING(iovec[n++], o_audit_session);
692                 }
693
694                 r = audit_loginuid_from_pid(object_pid, &loginuid);
695                 if (r >= 0) {
696                         sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID=%lu", (unsigned long) loginuid);
697                         IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
698                 }
699 #endif
700
701                 r = cg_pid_get_path_shifted(object_pid, NULL, &c);
702                 if (r >= 0) {
703                         x = strappenda("OBJECT_SYSTEMD_CGROUP=", c);
704                         IOVEC_SET_STRING(iovec[n++], x);
705
706                         r = cg_path_get_session(c, &t);
707                         if (r >= 0) {
708                                 x = strappenda("OBJECT_SYSTEMD_SESSION=", t);
709                                 free(t);
710                                 IOVEC_SET_STRING(iovec[n++], x);
711                         }
712
713                         if (cg_path_get_owner_uid(c, &owner) >= 0) {
714                                 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner);
715                                 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
716                         }
717
718                         if (cg_path_get_unit(c, &t) >= 0) {
719                                 x = strappenda("OBJECT_SYSTEMD_UNIT=", t);
720                                 free(t);
721                                 IOVEC_SET_STRING(iovec[n++], x);
722                         }
723
724                         if (cg_path_get_user_unit(c, &t) >= 0) {
725                                 x = strappenda("OBJECT_SYSTEMD_USER_UNIT=", t);
726                                 free(t);
727                                 IOVEC_SET_STRING(iovec[n++], x);
728                         }
729
730                         free(c);
731                 }
732         }
733         assert(n <= m);
734
735         if (tv) {
736                 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
737                 IOVEC_SET_STRING(iovec[n++], source_time);
738         }
739
740         /* Note that strictly speaking storing the boot id here is
741          * redundant since the entry includes this in-line
742          * anyway. However, we need this indexed, too. */
743         r = sd_id128_get_boot(&id);
744         if (r >= 0) {
745                 sd_id128_to_string(id, boot_id + strlen("_BOOT_ID="));
746                 IOVEC_SET_STRING(iovec[n++], boot_id);
747         }
748
749         r = sd_id128_get_machine(&id);
750         if (r >= 0) {
751                 sd_id128_to_string(id, machine_id + strlen("_MACHINE_ID="));
752                 IOVEC_SET_STRING(iovec[n++], machine_id);
753         }
754
755         t = gethostname_malloc();
756         if (t) {
757                 x = strappenda("_HOSTNAME=", t);
758                 free(t);
759                 IOVEC_SET_STRING(iovec[n++], x);
760         }
761
762         assert(n <= m);
763
764         if (s->split_mode == SPLIT_UID && realuid > 0)
765                 /* Split up strictly by any UID */
766                 journal_uid = realuid;
767         else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
768                 /* Split up by login UIDs, this avoids creation of
769                  * individual journals for system UIDs.  We do this
770                  * only if the realuid is not root, in order not to
771                  * accidentally leak privileged information to the
772                  * user that is logged by a privileged process that is
773                  * part of an unprivileged session.*/
774                 journal_uid = owner;
775         else
776                 journal_uid = 0;
777
778         write_to_journal(s, journal_uid, iovec, n, priority);
779 }
780
781 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
782         char mid[11 + 32 + 1];
783         char buffer[16 + LINE_MAX + 1];
784         struct iovec iovec[N_IOVEC_META_FIELDS + 4];
785         int n = 0;
786         va_list ap;
787         struct ucred ucred = {};
788
789         assert(s);
790         assert(format);
791
792         IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
793         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
794
795         memcpy(buffer, "MESSAGE=", 8);
796         va_start(ap, format);
797         vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
798         va_end(ap);
799         char_array_0(buffer);
800         IOVEC_SET_STRING(iovec[n++], buffer);
801
802         if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
803                 snprintf(mid, sizeof(mid), MESSAGE_ID(message_id));
804                 char_array_0(mid);
805                 IOVEC_SET_STRING(iovec[n++], mid);
806         }
807
808         ucred.pid = getpid();
809         ucred.uid = getuid();
810         ucred.gid = getgid();
811
812         dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
813 }
814
815 void server_dispatch_message(
816                 Server *s,
817                 struct iovec *iovec, unsigned n, unsigned m,
818                 struct ucred *ucred,
819                 struct timeval *tv,
820                 const char *label, size_t label_len,
821                 const char *unit_id,
822                 int priority,
823                 pid_t object_pid) {
824
825         int rl, r;
826         _cleanup_free_ char *path = NULL;
827         char *c;
828
829         assert(s);
830         assert(iovec || n == 0);
831
832         if (n == 0)
833                 return;
834
835         if (LOG_PRI(priority) > s->max_level_store)
836                 return;
837
838         /* Stop early in case the information will not be stored
839          * in a journal. */
840         if (s->storage == STORAGE_NONE)
841                 return;
842
843         if (!ucred)
844                 goto finish;
845
846         r = cg_pid_get_path_shifted(ucred->pid, NULL, &path);
847         if (r < 0)
848                 goto finish;
849
850         /* example: /user/lennart/3/foobar
851          *          /system/dbus.service/foobar
852          *
853          * So let's cut of everything past the third /, since that is
854          * where user directories start */
855
856         c = strchr(path, '/');
857         if (c) {
858                 c = strchr(c+1, '/');
859                 if (c) {
860                         c = strchr(c+1, '/');
861                         if (c)
862                                 *c = 0;
863                 }
864         }
865
866         rl = journal_rate_limit_test(s->rate_limit, path,
867                                      priority & LOG_PRIMASK, available_space(s, false));
868
869         if (rl == 0)
870                 return;
871
872         /* Write a suppression message if we suppressed something */
873         if (rl > 1)
874                 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
875                                       "Suppressed %u messages from %s", rl - 1, path);
876
877 finish:
878         dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
879 }
880
881
882 static int system_journal_open(Server *s) {
883         int r;
884         char *fn;
885         sd_id128_t machine;
886         char ids[33];
887
888         r = sd_id128_get_machine(&machine);
889         if (r < 0) {
890                 log_error("Failed to get machine id: %s", strerror(-r));
891                 return r;
892         }
893
894         sd_id128_to_string(machine, ids);
895
896         if (!s->system_journal &&
897             (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
898             access("/run/systemd/journal/flushed", F_OK) >= 0) {
899
900                 /* If in auto mode: first try to create the machine
901                  * path, but not the prefix.
902                  *
903                  * If in persistent mode: create /var/log/journal and
904                  * the machine path */
905
906                 if (s->storage == STORAGE_PERSISTENT)
907                         (void) mkdir("/var/log/journal/", 0755);
908
909                 fn = strappenda("/var/log/journal/", ids);
910                 (void) mkdir(fn, 0755);
911
912                 fn = strappenda(fn, "/system.journal");
913                 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
914
915                 if (r >= 0)
916                         server_fix_perms(s, s->system_journal, 0);
917                 else if (r < 0) {
918                         if (r != -ENOENT && r != -EROFS)
919                                 log_warning("Failed to open system journal: %s", strerror(-r));
920
921                         r = 0;
922                 }
923         }
924
925         if (!s->runtime_journal &&
926             (s->storage != STORAGE_NONE)) {
927
928                 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
929                 if (!fn)
930                         return -ENOMEM;
931
932                 if (s->system_journal) {
933
934                         /* Try to open the runtime journal, but only
935                          * if it already exists, so that we can flush
936                          * it into the system journal */
937
938                         r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
939                         free(fn);
940
941                         if (r < 0) {
942                                 if (r != -ENOENT)
943                                         log_warning("Failed to open runtime journal: %s", strerror(-r));
944
945                                 r = 0;
946                         }
947
948                 } else {
949
950                         /* OK, we really need the runtime journal, so create
951                          * it if necessary. */
952
953                         (void) mkdir_parents(fn, 0755);
954                         r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
955                         free(fn);
956
957                         if (r < 0) {
958                                 log_error("Failed to open runtime journal: %s", strerror(-r));
959                                 return r;
960                         }
961                 }
962
963                 if (s->runtime_journal)
964                         server_fix_perms(s, s->runtime_journal, 0);
965         }
966
967         available_space(s, true);
968
969         return r;
970 }
971
972 int server_flush_to_var(Server *s) {
973         int r;
974         sd_id128_t machine;
975         sd_journal *j = NULL;
976
977         assert(s);
978
979         if (s->storage != STORAGE_AUTO &&
980             s->storage != STORAGE_PERSISTENT)
981                 return 0;
982
983         if (!s->runtime_journal)
984                 return 0;
985
986         system_journal_open(s);
987
988         if (!s->system_journal)
989                 return 0;
990
991         log_debug("Flushing to /var...");
992
993         r = sd_id128_get_machine(&machine);
994         if (r < 0)
995                 return r;
996
997         r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
998         if (r < 0) {
999                 log_error("Failed to read runtime journal: %s", strerror(-r));
1000                 return r;
1001         }
1002
1003         sd_journal_set_data_threshold(j, 0);
1004
1005         SD_JOURNAL_FOREACH(j) {
1006                 Object *o = NULL;
1007                 JournalFile *f;
1008
1009                 f = j->current_file;
1010                 assert(f && f->current_offset > 0);
1011
1012                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1013                 if (r < 0) {
1014                         log_error("Can't read entry: %s", strerror(-r));
1015                         goto finish;
1016                 }
1017
1018                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1019                 if (r >= 0)
1020                         continue;
1021
1022                 if (!shall_try_append_again(s->system_journal, r)) {
1023                         log_error("Can't write entry: %s", strerror(-r));
1024                         goto finish;
1025                 }
1026
1027                 server_rotate(s);
1028                 server_vacuum(s);
1029
1030                 if (!s->system_journal) {
1031                         log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1032                         r = -EIO;
1033                         goto finish;
1034                 }
1035
1036                 log_debug("Retrying write.");
1037                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1038                 if (r < 0) {
1039                         log_error("Can't write entry: %s", strerror(-r));
1040                         goto finish;
1041                 }
1042         }
1043
1044 finish:
1045         journal_file_post_change(s->system_journal);
1046
1047         journal_file_close(s->runtime_journal);
1048         s->runtime_journal = NULL;
1049
1050         if (r >= 0)
1051                 rm_rf("/run/log/journal", false, true, false);
1052
1053         sd_journal_close(j);
1054
1055         return r;
1056 }
1057
1058 int process_event(Server *s, struct epoll_event *ev) {
1059         assert(s);
1060         assert(ev);
1061
1062         if (ev->data.fd == s->signal_fd) {
1063                 struct signalfd_siginfo sfsi;
1064                 ssize_t n;
1065
1066                 if (ev->events != EPOLLIN) {
1067                         log_error("Got invalid event from epoll for %s: %"PRIx32,
1068                                   "signal fd", ev->events);
1069                         return -EIO;
1070                 }
1071
1072                 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
1073                 if (n != sizeof(sfsi)) {
1074
1075                         if (n >= 0)
1076                                 return -EIO;
1077
1078                         if (errno == EINTR || errno == EAGAIN)
1079                                 return 1;
1080
1081                         return -errno;
1082                 }
1083
1084                 if (sfsi.ssi_signo == SIGUSR1) {
1085                         log_info("Received request to flush runtime journal from PID %"PRIu32,
1086                                  sfsi.ssi_pid);
1087                         touch("/run/systemd/journal/flushed");
1088                         server_flush_to_var(s);
1089                         server_sync(s);
1090                         return 1;
1091                 }
1092
1093                 if (sfsi.ssi_signo == SIGUSR2) {
1094                         log_info("Received request to rotate journal from PID %"PRIu32,
1095                                  sfsi.ssi_pid);
1096                         server_rotate(s);
1097                         server_vacuum(s);
1098                         return 1;
1099                 }
1100
1101                 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
1102
1103                 return 0;
1104
1105         } else if (ev->data.fd == s->sync_timer_fd) {
1106                 int r;
1107                 uint64_t t;
1108
1109                 log_debug("Got sync request from epoll.");
1110
1111                 r = read(ev->data.fd, (void *)&t, sizeof(t));
1112                 if (r < 0)
1113                         return 0;
1114
1115                 server_sync(s);
1116                 return 1;
1117
1118         } else if (ev->data.fd == s->dev_kmsg_fd) {
1119                 int r;
1120
1121                 if (ev->events & EPOLLERR)
1122                         log_warning("/dev/kmsg buffer overrun, some messages lost.");
1123
1124                 if (!(ev->events & EPOLLIN)) {
1125                         log_error("Got invalid event from epoll for %s: %"PRIx32,
1126                                   "/dev/kmsg", ev->events);
1127                         return -EIO;
1128                 }
1129
1130                 r = server_read_dev_kmsg(s);
1131                 if (r < 0)
1132                         return r;
1133
1134                 return 1;
1135
1136         } else if (ev->data.fd == s->native_fd ||
1137                    ev->data.fd == s->syslog_fd) {
1138
1139                 if (ev->events != EPOLLIN) {
1140                         log_error("Got invalid event from epoll for %s: %"PRIx32,
1141                                   ev->data.fd == s->native_fd ? "native fd" : "syslog fd",
1142                                   ev->events);
1143                         return -EIO;
1144                 }
1145
1146                 for (;;) {
1147                         struct msghdr msghdr;
1148                         struct iovec iovec;
1149                         struct ucred *ucred = NULL;
1150                         struct timeval *tv = NULL;
1151                         struct cmsghdr *cmsg;
1152                         char *label = NULL;
1153                         size_t label_len = 0;
1154                         union {
1155                                 struct cmsghdr cmsghdr;
1156
1157                                 /* We use NAME_MAX space for the
1158                                  * SELinux label here. The kernel
1159                                  * currently enforces no limit, but
1160                                  * according to suggestions from the
1161                                  * SELinux people this will change and
1162                                  * it will probably be identical to
1163                                  * NAME_MAX. For now we use that, but
1164                                  * this should be updated one day when
1165                                  * the final limit is known.*/
1166                                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1167                                             CMSG_SPACE(sizeof(struct timeval)) +
1168                                             CMSG_SPACE(sizeof(int)) + /* fd */
1169                                             CMSG_SPACE(NAME_MAX)]; /* selinux label */
1170                         } control;
1171                         ssize_t n;
1172                         int v;
1173                         int *fds = NULL;
1174                         unsigned n_fds = 0;
1175
1176                         if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
1177                                 log_error("SIOCINQ failed: %m");
1178                                 return -errno;
1179                         }
1180
1181                         if (s->buffer_size < (size_t) v) {
1182                                 void *b;
1183                                 size_t l;
1184
1185                                 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
1186                                 b = realloc(s->buffer, l+1);
1187
1188                                 if (!b) {
1189                                         log_error("Couldn't increase buffer.");
1190                                         return -ENOMEM;
1191                                 }
1192
1193                                 s->buffer_size = l;
1194                                 s->buffer = b;
1195                         }
1196
1197                         zero(iovec);
1198                         iovec.iov_base = s->buffer;
1199                         iovec.iov_len = s->buffer_size;
1200
1201                         zero(control);
1202                         zero(msghdr);
1203                         msghdr.msg_iov = &iovec;
1204                         msghdr.msg_iovlen = 1;
1205                         msghdr.msg_control = &control;
1206                         msghdr.msg_controllen = sizeof(control);
1207
1208                         n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1209                         if (n < 0) {
1210
1211                                 if (errno == EINTR || errno == EAGAIN)
1212                                         return 1;
1213
1214                                 log_error("recvmsg() failed: %m");
1215                                 return -errno;
1216                         }
1217
1218                         for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1219
1220                                 if (cmsg->cmsg_level == SOL_SOCKET &&
1221                                     cmsg->cmsg_type == SCM_CREDENTIALS &&
1222                                     cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1223                                         ucred = (struct ucred*) CMSG_DATA(cmsg);
1224                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
1225                                          cmsg->cmsg_type == SCM_SECURITY) {
1226                                         label = (char*) CMSG_DATA(cmsg);
1227                                         label_len = cmsg->cmsg_len - CMSG_LEN(0);
1228                                 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1229                                            cmsg->cmsg_type == SO_TIMESTAMP &&
1230                                            cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1231                                         tv = (struct timeval*) CMSG_DATA(cmsg);
1232                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
1233                                          cmsg->cmsg_type == SCM_RIGHTS) {
1234                                         fds = (int*) CMSG_DATA(cmsg);
1235                                         n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1236                                 }
1237                         }
1238
1239                         if (ev->data.fd == s->syslog_fd) {
1240                                 if (n > 0 && n_fds == 0) {
1241                                         s->buffer[n] = 0;
1242                                         server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1243                                 } else if (n_fds > 0)
1244                                         log_warning("Got file descriptors via syslog socket. Ignoring.");
1245
1246                         } else {
1247                                 if (n > 0 && n_fds == 0)
1248                                         server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1249                                 else if (n == 0 && n_fds == 1)
1250                                         server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1251                                 else if (n_fds > 0)
1252                                         log_warning("Got too many file descriptors via native socket. Ignoring.");
1253                         }
1254
1255                         close_many(fds, n_fds);
1256                 }
1257
1258                 return 1;
1259
1260         } else if (ev->data.fd == s->stdout_fd) {
1261
1262                 if (ev->events != EPOLLIN) {
1263                         log_error("Got invalid event from epoll for %s: %"PRIx32,
1264                                   "stdout fd", ev->events);
1265                         return -EIO;
1266                 }
1267
1268                 stdout_stream_new(s);
1269                 return 1;
1270
1271         } else {
1272                 StdoutStream *stream;
1273
1274                 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
1275                         log_error("Got invalid event from epoll for %s: %"PRIx32,
1276                                   "stdout stream", ev->events);
1277                         return -EIO;
1278                 }
1279
1280                 /* If it is none of the well-known fds, it must be an
1281                  * stdout stream fd. Note that this is a bit ugly here
1282                  * (since we rely that none of the well-known fds
1283                  * could be interpreted as pointer), but nonetheless
1284                  * safe, since the well-known fds would never get an
1285                  * fd > 4096, i.e. beyond the first memory page */
1286
1287                 stream = ev->data.ptr;
1288
1289                 if (stdout_stream_process(stream) <= 0)
1290                         stdout_stream_free(stream);
1291
1292                 return 1;
1293         }
1294
1295         log_error("Unknown event.");
1296         return 0;
1297 }
1298
1299 static int open_signalfd(Server *s) {
1300         sigset_t mask;
1301         struct epoll_event ev;
1302
1303         assert(s);
1304
1305         assert_se(sigemptyset(&mask) == 0);
1306         sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1307         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1308
1309         s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
1310         if (s->signal_fd < 0) {
1311                 log_error("signalfd(): %m");
1312                 return -errno;
1313         }
1314
1315         zero(ev);
1316         ev.events = EPOLLIN;
1317         ev.data.fd = s->signal_fd;
1318
1319         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
1320                 log_error("epoll_ctl(): %m");
1321                 return -errno;
1322         }
1323
1324         return 0;
1325 }
1326
1327 static int server_parse_proc_cmdline(Server *s) {
1328         _cleanup_free_ char *line = NULL;
1329         char *w, *state;
1330         int r;
1331         size_t l;
1332
1333         if (detect_container(NULL) > 0)
1334                 return 0;
1335
1336         r = read_one_line_file("/proc/cmdline", &line);
1337         if (r < 0) {
1338                 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1339                 return 0;
1340         }
1341
1342         FOREACH_WORD_QUOTED(w, l, line, state) {
1343                 _cleanup_free_ char *word;
1344
1345                 word = strndup(w, l);
1346                 if (!word)
1347                         return -ENOMEM;
1348
1349                 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1350                         r = parse_boolean(word + 35);
1351                         if (r < 0)
1352                                 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1353                         else
1354                                 s->forward_to_syslog = r;
1355                 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1356                         r = parse_boolean(word + 33);
1357                         if (r < 0)
1358                                 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1359                         else
1360                                 s->forward_to_kmsg = r;
1361                 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1362                         r = parse_boolean(word + 36);
1363                         if (r < 0)
1364                                 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1365                         else
1366                                 s->forward_to_console = r;
1367                 } else if (startswith(word, "systemd.journald"))
1368                         log_warning("Invalid systemd.journald parameter. Ignoring.");
1369         }
1370
1371         return 0;
1372 }
1373
1374 static int server_parse_config_file(Server *s) {
1375         static const char fn[] = "/etc/systemd/journald.conf";
1376         _cleanup_fclose_ FILE *f = NULL;
1377         int r;
1378
1379         assert(s);
1380
1381         f = fopen(fn, "re");
1382         if (!f) {
1383                 if (errno == ENOENT)
1384                         return 0;
1385
1386                 log_warning("Failed to open configuration file %s: %m", fn);
1387                 return -errno;
1388         }
1389
1390         r = config_parse(NULL, fn, f, "Journal\0", config_item_perf_lookup,
1391                          (void*) journald_gperf_lookup, false, false, s);
1392         if (r < 0)
1393                 log_warning("Failed to parse configuration file: %s", strerror(-r));
1394
1395         return r;
1396 }
1397
1398 static int server_open_sync_timer(Server *s) {
1399         int r;
1400         struct epoll_event ev;
1401
1402         assert(s);
1403
1404         s->sync_timer_fd = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC);
1405         if (s->sync_timer_fd < 0)
1406                 return -errno;
1407
1408         zero(ev);
1409         ev.events = EPOLLIN;
1410         ev.data.fd = s->sync_timer_fd;
1411
1412         r = epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->sync_timer_fd, &ev);
1413         if (r < 0) {
1414                 log_error("Failed to add idle timer fd to epoll object: %m");
1415                 return -errno;
1416         }
1417
1418         return 0;
1419 }
1420
1421 int server_schedule_sync(Server *s, int priority) {
1422         int r;
1423
1424         assert(s);
1425
1426         if (priority <= LOG_CRIT) {
1427                 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1428                 server_sync(s);
1429                 return 0;
1430         }
1431
1432         if (s->sync_scheduled)
1433                 return 0;
1434
1435         if (s->sync_interval_usec) {
1436                 struct itimerspec sync_timer_enable = {};
1437
1438                 timespec_store(&sync_timer_enable.it_value, s->sync_interval_usec);
1439
1440                 r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_enable, NULL);
1441                 if (r < 0)
1442                         return -errno;
1443         }
1444
1445         s->sync_scheduled = true;
1446
1447         return 0;
1448 }
1449
1450 int server_init(Server *s) {
1451         int n, r, fd;
1452
1453         assert(s);
1454
1455         zero(*s);
1456         s->sync_timer_fd = s->syslog_fd = s->native_fd = s->stdout_fd =
1457                 s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
1458         s->compress = true;
1459         s->seal = true;
1460
1461         s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1462         s->sync_scheduled = false;
1463
1464         s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1465         s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1466
1467         s->forward_to_syslog = true;
1468
1469         s->max_level_store = LOG_DEBUG;
1470         s->max_level_syslog = LOG_DEBUG;
1471         s->max_level_kmsg = LOG_NOTICE;
1472         s->max_level_console = LOG_INFO;
1473
1474         memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1475         memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1476
1477         server_parse_config_file(s);
1478         server_parse_proc_cmdline(s);
1479         if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1480                 log_debug("Setting both rate limit interval and burst from %llu,%u to 0,0",
1481                           (long long unsigned) s->rate_limit_interval,
1482                           s->rate_limit_burst);
1483                 s->rate_limit_interval = s->rate_limit_burst = 0;
1484         }
1485
1486         mkdir_p("/run/systemd/journal", 0755);
1487
1488         s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
1489         if (!s->user_journals)
1490                 return log_oom();
1491
1492         s->mmap = mmap_cache_new();
1493         if (!s->mmap)
1494                 return log_oom();
1495
1496         s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
1497         if (s->epoll_fd < 0) {
1498                 log_error("Failed to create epoll object: %m");
1499                 return -errno;
1500         }
1501
1502         n = sd_listen_fds(true);
1503         if (n < 0) {
1504                 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1505                 return n;
1506         }
1507
1508         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1509
1510                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1511
1512                         if (s->native_fd >= 0) {
1513                                 log_error("Too many native sockets passed.");
1514                                 return -EINVAL;
1515                         }
1516
1517                         s->native_fd = fd;
1518
1519                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1520
1521                         if (s->stdout_fd >= 0) {
1522                                 log_error("Too many stdout sockets passed.");
1523                                 return -EINVAL;
1524                         }
1525
1526                         s->stdout_fd = fd;
1527
1528                 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
1529
1530                         if (s->syslog_fd >= 0) {
1531                                 log_error("Too many /dev/log sockets passed.");
1532                                 return -EINVAL;
1533                         }
1534
1535                         s->syslog_fd = fd;
1536
1537                 } else {
1538                         log_error("Unknown socket passed.");
1539                         return -EINVAL;
1540                 }
1541         }
1542
1543         r = server_open_syslog_socket(s);
1544         if (r < 0)
1545                 return r;
1546
1547         r = server_open_native_socket(s);
1548         if (r < 0)
1549                 return r;
1550
1551         r = server_open_stdout_socket(s);
1552         if (r < 0)
1553                 return r;
1554
1555         r = server_open_dev_kmsg(s);
1556         if (r < 0)
1557                 return r;
1558
1559         r = server_open_kernel_seqnum(s);
1560         if (r < 0)
1561                 return r;
1562
1563         r = server_open_sync_timer(s);
1564         if (r < 0)
1565                 return r;
1566
1567         r = open_signalfd(s);
1568         if (r < 0)
1569                 return r;
1570
1571         s->udev = udev_new();
1572         if (!s->udev)
1573                 return -ENOMEM;
1574
1575         s->rate_limit = journal_rate_limit_new(s->rate_limit_interval,
1576                                                s->rate_limit_burst);
1577         if (!s->rate_limit)
1578                 return -ENOMEM;
1579
1580         r = system_journal_open(s);
1581         if (r < 0)
1582                 return r;
1583
1584         return 0;
1585 }
1586
1587 void server_maybe_append_tags(Server *s) {
1588 #ifdef HAVE_GCRYPT
1589         JournalFile *f;
1590         Iterator i;
1591         usec_t n;
1592
1593         n = now(CLOCK_REALTIME);
1594
1595         if (s->system_journal)
1596                 journal_file_maybe_append_tag(s->system_journal, n);
1597
1598         HASHMAP_FOREACH(f, s->user_journals, i)
1599                 journal_file_maybe_append_tag(f, n);
1600 #endif
1601 }
1602
1603 void server_done(Server *s) {
1604         JournalFile *f;
1605         assert(s);
1606
1607         while (s->stdout_streams)
1608                 stdout_stream_free(s->stdout_streams);
1609
1610         if (s->system_journal)
1611                 journal_file_close(s->system_journal);
1612
1613         if (s->runtime_journal)
1614                 journal_file_close(s->runtime_journal);
1615
1616         while ((f = hashmap_steal_first(s->user_journals)))
1617                 journal_file_close(f);
1618
1619         hashmap_free(s->user_journals);
1620
1621         if (s->epoll_fd >= 0)
1622                 close_nointr_nofail(s->epoll_fd);
1623
1624         if (s->signal_fd >= 0)
1625                 close_nointr_nofail(s->signal_fd);
1626
1627         if (s->syslog_fd >= 0)
1628                 close_nointr_nofail(s->syslog_fd);
1629
1630         if (s->native_fd >= 0)
1631                 close_nointr_nofail(s->native_fd);
1632
1633         if (s->stdout_fd >= 0)
1634                 close_nointr_nofail(s->stdout_fd);
1635
1636         if (s->dev_kmsg_fd >= 0)
1637                 close_nointr_nofail(s->dev_kmsg_fd);
1638
1639         if (s->sync_timer_fd >= 0)
1640                 close_nointr_nofail(s->sync_timer_fd);
1641
1642         if (s->rate_limit)
1643                 journal_rate_limit_free(s->rate_limit);
1644
1645         if (s->kernel_seqnum)
1646                 munmap(s->kernel_seqnum, sizeof(uint64_t));
1647
1648         free(s->buffer);
1649         free(s->tty_path);
1650
1651         if (s->mmap)
1652                 mmap_cache_unref(s->mmap);
1653
1654         if (s->udev)
1655                 udev_unref(s->udev);
1656 }