chiark / gitweb /
d431953f0701ef148b0977aef104e3f54ba1a7eb
[elogind.git] / src / journal / journald.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/epoll.h>
23 #include <sys/socket.h>
24 #include <errno.h>
25 #include <sys/signalfd.h>
26 #include <unistd.h>
27 #include <fcntl.h>
28 #include <stddef.h>
29 #include <sys/ioctl.h>
30 #include <linux/sockios.h>
31 #include <sys/statvfs.h>
32 #include <sys/mman.h>
33
34 #include <systemd/sd-journal.h>
35 #include <systemd/sd-messages.h>
36 #include <systemd/sd-daemon.h>
37
38 #ifdef HAVE_LOGIND
39 #include <systemd/sd-login.h>
40 #endif
41
42 #include "mkdir.h"
43 #include "hashmap.h"
44 #include "journal-file.h"
45 #include "socket-util.h"
46 #include "cgroup-util.h"
47 #include "list.h"
48 #include "journal-rate-limit.h"
49 #include "journal-internal.h"
50 #include "journal-vacuum.h"
51 #include "conf-parser.h"
52 #include "journald.h"
53 #include "virt.h"
54 #include "missing.h"
55
56 #ifdef HAVE_ACL
57 #include <sys/acl.h>
58 #include <acl/libacl.h>
59 #include "acl-util.h"
60 #endif
61
62 #ifdef HAVE_SELINUX
63 #include <selinux/selinux.h>
64 #endif
65
66 #define USER_JOURNALS_MAX 1024
67 #define STDOUT_STREAMS_MAX 4096
68
69 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
70 #define DEFAULT_RATE_LIMIT_BURST 200
71
72 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
73
74 #define N_IOVEC_META_FIELDS 17
75 #define N_IOVEC_KERNEL_FIELDS 64
76
77 #define ENTRY_SIZE_MAX (1024*1024*32)
78
79 typedef enum StdoutStreamState {
80         STDOUT_STREAM_IDENTIFIER,
81         STDOUT_STREAM_UNIT_ID,
82         STDOUT_STREAM_PRIORITY,
83         STDOUT_STREAM_LEVEL_PREFIX,
84         STDOUT_STREAM_FORWARD_TO_SYSLOG,
85         STDOUT_STREAM_FORWARD_TO_KMSG,
86         STDOUT_STREAM_FORWARD_TO_CONSOLE,
87         STDOUT_STREAM_RUNNING
88 } StdoutStreamState;
89
90 struct StdoutStream {
91         Server *server;
92         StdoutStreamState state;
93
94         int fd;
95
96         struct ucred ucred;
97 #ifdef HAVE_SELINUX
98         security_context_t security_context;
99 #endif
100
101         char *identifier;
102         char *unit_id;
103         int priority;
104         bool level_prefix:1;
105         bool forward_to_syslog:1;
106         bool forward_to_kmsg:1;
107         bool forward_to_console:1;
108
109         char buffer[LINE_MAX+1];
110         size_t length;
111
112         LIST_FIELDS(StdoutStream, stdout_stream);
113 };
114
115 static const char* const storage_table[] = {
116         [STORAGE_AUTO] = "auto",
117         [STORAGE_VOLATILE] = "volatile",
118         [STORAGE_PERSISTENT] = "persistent",
119         [STORAGE_NONE] = "none"
120 };
121
122 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
123 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
124
125 static uint64_t available_space(Server *s) {
126         char ids[33], *p;
127         const char *f;
128         sd_id128_t machine;
129         struct statvfs ss;
130         uint64_t sum = 0, avail = 0, ss_avail = 0;
131         int r;
132         DIR *d;
133         usec_t ts;
134         JournalMetrics *m;
135
136         ts = now(CLOCK_MONOTONIC);
137
138         if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
139                 return s->cached_available_space;
140
141         r = sd_id128_get_machine(&machine);
142         if (r < 0)
143                 return 0;
144
145         if (s->system_journal) {
146                 f = "/var/log/journal/";
147                 m = &s->system_metrics;
148         } else {
149                 f = "/run/log/journal/";
150                 m = &s->runtime_metrics;
151         }
152
153         assert(m);
154
155         p = strappend(f, sd_id128_to_string(machine, ids));
156         if (!p)
157                 return 0;
158
159         d = opendir(p);
160         free(p);
161
162         if (!d)
163                 return 0;
164
165         if (fstatvfs(dirfd(d), &ss) < 0)
166                 goto finish;
167
168         for (;;) {
169                 struct stat st;
170                 struct dirent buf, *de;
171
172                 r = readdir_r(d, &buf, &de);
173                 if (r != 0)
174                         break;
175
176                 if (!de)
177                         break;
178
179                 if (!endswith(de->d_name, ".journal") &&
180                     !endswith(de->d_name, ".journal~"))
181                         continue;
182
183                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
184                         continue;
185
186                 if (!S_ISREG(st.st_mode))
187                         continue;
188
189                 sum += (uint64_t) st.st_blocks * 512UL;
190         }
191
192         avail = sum >= m->max_use ? 0 : m->max_use - sum;
193
194         ss_avail = ss.f_bsize * ss.f_bavail;
195
196         ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
197
198         if (ss_avail < avail)
199                 avail = ss_avail;
200
201         s->cached_available_space = avail;
202         s->cached_available_space_timestamp = ts;
203
204 finish:
205         closedir(d);
206
207         return avail;
208 }
209
210 static void server_read_file_gid(Server *s) {
211         const char *adm = "adm";
212         int r;
213
214         assert(s);
215
216         if (s->file_gid_valid)
217                 return;
218
219         r = get_group_creds(&adm, &s->file_gid);
220         if (r < 0)
221                 log_warning("Failed to resolve 'adm' group: %s", strerror(-r));
222
223         /* if we couldn't read the gid, then it will be 0, but that's
224          * fine and we shouldn't try to resolve the group again, so
225          * let's just pretend it worked right-away. */
226         s->file_gid_valid = true;
227 }
228
229 static void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
230         int r;
231 #ifdef HAVE_ACL
232         acl_t acl;
233         acl_entry_t entry;
234         acl_permset_t permset;
235 #endif
236
237         assert(f);
238
239         server_read_file_gid(s);
240
241         r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
242         if (r < 0)
243                 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
244
245 #ifdef HAVE_ACL
246         if (uid <= 0)
247                 return;
248
249         acl = acl_get_fd(f->fd);
250         if (!acl) {
251                 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
252                 return;
253         }
254
255         r = acl_find_uid(acl, uid, &entry);
256         if (r <= 0) {
257
258                 if (acl_create_entry(&acl, &entry) < 0 ||
259                     acl_set_tag_type(entry, ACL_USER) < 0 ||
260                     acl_set_qualifier(entry, &uid) < 0) {
261                         log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
262                         goto finish;
263                 }
264         }
265
266         if (acl_get_permset(entry, &permset) < 0 ||
267             acl_add_perm(permset, ACL_READ) < 0 ||
268             acl_calc_mask(&acl) < 0) {
269                 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
270                 goto finish;
271         }
272
273         if (acl_set_fd(f->fd, acl) < 0)
274                 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
275
276 finish:
277         acl_free(acl);
278 #endif
279 }
280
281 static JournalFile* find_journal(Server *s, uid_t uid) {
282         char *p;
283         int r;
284         JournalFile *f;
285         sd_id128_t machine;
286
287         assert(s);
288
289         /* We split up user logs only on /var, not on /run. If the
290          * runtime file is open, we write to it exclusively, in order
291          * to guarantee proper order as soon as we flush /run to
292          * /var and close the runtime file. */
293
294         if (s->runtime_journal)
295                 return s->runtime_journal;
296
297         if (uid <= 0)
298                 return s->system_journal;
299
300         r = sd_id128_get_machine(&machine);
301         if (r < 0)
302                 return s->system_journal;
303
304         f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
305         if (f)
306                 return f;
307
308         if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
309                      SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
310                 return s->system_journal;
311
312         while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
313                 /* Too many open? Then let's close one */
314                 f = hashmap_steal_first(s->user_journals);
315                 assert(f);
316                 journal_file_close(f);
317         }
318
319         r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, false, &s->system_metrics, s->mmap, s->system_journal, &f);
320         free(p);
321
322         if (r < 0)
323                 return s->system_journal;
324
325         server_fix_perms(s, f, uid);
326
327         r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
328         if (r < 0) {
329                 journal_file_close(f);
330                 return s->system_journal;
331         }
332
333         return f;
334 }
335
336 static void server_rotate(Server *s) {
337         JournalFile *f;
338         void *k;
339         Iterator i;
340         int r;
341
342         log_info("Rotating...");
343
344         if (s->runtime_journal) {
345                 r = journal_file_rotate(&s->runtime_journal, s->compress, false);
346                 if (r < 0)
347                         if (s->runtime_journal)
348                                 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
349                         else
350                                 log_error("Failed to create new runtime journal: %s", strerror(-r));
351                 else
352                         server_fix_perms(s, s->runtime_journal, 0);
353         }
354
355         if (s->system_journal) {
356                 r = journal_file_rotate(&s->system_journal, s->compress, true);
357                 if (r < 0)
358                         if (s->system_journal)
359                                 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
360                         else
361                                 log_error("Failed to create new system journal: %s", strerror(-r));
362
363                 else
364                         server_fix_perms(s, s->system_journal, 0);
365         }
366
367         HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
368                 r = journal_file_rotate(&f, s->compress, false);
369                 if (r < 0)
370                         if (f->path)
371                                 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
372                         else
373                                 log_error("Failed to create user journal: %s", strerror(-r));
374                 else {
375                         hashmap_replace(s->user_journals, k, f);
376                         server_fix_perms(s, s->system_journal, PTR_TO_UINT32(k));
377                 }
378         }
379 }
380
381 static void server_vacuum(Server *s) {
382         char *p;
383         char ids[33];
384         sd_id128_t machine;
385         int r;
386
387         log_info("Vacuuming...");
388
389         r = sd_id128_get_machine(&machine);
390         if (r < 0) {
391                 log_error("Failed to get machine ID: %s", strerror(-r));
392                 return;
393         }
394
395         sd_id128_to_string(machine, ids);
396
397         if (s->system_journal) {
398                 if (asprintf(&p, "/var/log/journal/%s", ids) < 0) {
399                         log_oom();
400                         return;
401                 }
402
403                 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free);
404                 if (r < 0 && r != -ENOENT)
405                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
406                 free(p);
407         }
408
409         if (s->runtime_journal) {
410                 if (asprintf(&p, "/run/log/journal/%s", ids) < 0) {
411                         log_oom();
412                         return;
413                 }
414
415                 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free);
416                 if (r < 0 && r != -ENOENT)
417                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
418                 free(p);
419         }
420
421         s->cached_available_space_timestamp = 0;
422 }
423
424 static char *shortened_cgroup_path(pid_t pid) {
425         int r;
426         char *process_path, *init_path, *path;
427
428         assert(pid > 0);
429
430         r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, pid, &process_path);
431         if (r < 0)
432                 return NULL;
433
434         r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &init_path);
435         if (r < 0) {
436                 free(process_path);
437                 return NULL;
438         }
439
440         if (endswith(init_path, "/system"))
441                 init_path[strlen(init_path) - 7] = 0;
442         else if (streq(init_path, "/"))
443                 init_path[0] = 0;
444
445         if (startswith(process_path, init_path)) {
446                 char *p;
447
448                 p = strdup(process_path + strlen(init_path));
449                 if (!p) {
450                         free(process_path);
451                         free(init_path);
452                         return NULL;
453                 }
454                 path = p;
455         } else {
456                 path = process_path;
457                 process_path = NULL;
458         }
459
460         free(process_path);
461         free(init_path);
462
463         return path;
464 }
465
466 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n) {
467         JournalFile *f;
468         bool vacuumed = false;
469         int r;
470
471         assert(s);
472         assert(iovec);
473         assert(n > 0);
474
475         f = find_journal(s, uid);
476         if (!f)
477                 return;
478
479         if (journal_file_rotate_suggested(f)) {
480                 log_info("Journal header limits reached or header out-of-date, rotating.");
481                 server_rotate(s);
482                 server_vacuum(s);
483                 vacuumed = true;
484
485                 f = find_journal(s, uid);
486                 if (!f)
487                         return;
488         }
489
490         for (;;) {
491                 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
492                 if (r >= 0)
493                         return;
494
495                 if (vacuumed ||
496                     (r != -E2BIG && /* hit limit */
497                      r != -EFBIG && /* hit fs limit */
498                      r != -EDQUOT && /* quota hit */
499                      r != -ENOSPC && /* disk full */
500                      r != -EBADMSG && /* corrupted */
501                      r != -ENODATA && /* truncated */
502                      r != -EHOSTDOWN && /* other machine */
503                      r != -EPROTONOSUPPORT && /* unsupported feature */
504                      r != -EBUSY && /* unclean shutdown */
505                      r != -ESHUTDOWN /* already archived */)) {
506                         log_error("Failed to write entry, ignoring: %s", strerror(-r));
507                         return;
508                 }
509
510                 if (r == -E2BIG || r == -EFBIG || r == EDQUOT || r == ENOSPC)
511                         log_info("Allocation limit reached, rotating.");
512                 else if (r == -EHOSTDOWN)
513                         log_info("Journal file from other machine, rotating.");
514                 else if (r == -EBUSY)
515                         log_info("Unlcean shutdown, rotating.");
516                 else
517                         log_warning("Journal file corrupted, rotating.");
518
519                 server_rotate(s);
520                 server_vacuum(s);
521                 vacuumed = true;
522
523                 f = find_journal(s, uid);
524                 if (!f)
525                         return;
526
527                 log_info("Retrying write.");
528         }
529 }
530
531 static void dispatch_message_real(
532                 Server *s,
533                 struct iovec *iovec, unsigned n, unsigned m,
534                 struct ucred *ucred,
535                 struct timeval *tv,
536                 const char *label, size_t label_len,
537                 const char *unit_id) {
538
539         char *pid = NULL, *uid = NULL, *gid = NULL,
540                 *source_time = NULL, *boot_id = NULL, *machine_id = NULL,
541                 *comm = NULL, *cmdline = NULL, *hostname = NULL,
542                 *audit_session = NULL, *audit_loginuid = NULL,
543                 *exe = NULL, *cgroup = NULL, *session = NULL,
544                 *owner_uid = NULL, *unit = NULL, *selinux_context = NULL;
545
546         char idbuf[33];
547         sd_id128_t id;
548         int r;
549         char *t;
550         uid_t loginuid = 0, realuid = 0;
551
552         assert(s);
553         assert(iovec);
554         assert(n > 0);
555         assert(n + N_IOVEC_META_FIELDS <= m);
556
557         if (ucred) {
558                 uint32_t audit;
559 #ifdef HAVE_LOGIND
560                 uid_t owner;
561 #endif
562
563                 realuid = ucred->uid;
564
565                 if (asprintf(&pid, "_PID=%lu", (unsigned long) ucred->pid) >= 0)
566                         IOVEC_SET_STRING(iovec[n++], pid);
567
568                 if (asprintf(&uid, "_UID=%lu", (unsigned long) ucred->uid) >= 0)
569                         IOVEC_SET_STRING(iovec[n++], uid);
570
571                 if (asprintf(&gid, "_GID=%lu", (unsigned long) ucred->gid) >= 0)
572                         IOVEC_SET_STRING(iovec[n++], gid);
573
574                 r = get_process_comm(ucred->pid, &t);
575                 if (r >= 0) {
576                         comm = strappend("_COMM=", t);
577                         free(t);
578
579                         if (comm)
580                                 IOVEC_SET_STRING(iovec[n++], comm);
581                 }
582
583                 r = get_process_exe(ucred->pid, &t);
584                 if (r >= 0) {
585                         exe = strappend("_EXE=", t);
586                         free(t);
587
588                         if (exe)
589                                 IOVEC_SET_STRING(iovec[n++], exe);
590                 }
591
592                 r = get_process_cmdline(ucred->pid, LINE_MAX, false, &t);
593                 if (r >= 0) {
594                         cmdline = strappend("_CMDLINE=", t);
595                         free(t);
596
597                         if (cmdline)
598                                 IOVEC_SET_STRING(iovec[n++], cmdline);
599                 }
600
601                 r = audit_session_from_pid(ucred->pid, &audit);
602                 if (r >= 0)
603                         if (asprintf(&audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit) >= 0)
604                                 IOVEC_SET_STRING(iovec[n++], audit_session);
605
606                 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
607                 if (r >= 0)
608                         if (asprintf(&audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0)
609                                 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
610
611                 t = shortened_cgroup_path(ucred->pid);
612                 if (t) {
613                         cgroup = strappend("_SYSTEMD_CGROUP=", t);
614                         free(t);
615
616                         if (cgroup)
617                                 IOVEC_SET_STRING(iovec[n++], cgroup);
618                 }
619
620 #ifdef HAVE_LOGIND
621                 if (sd_pid_get_session(ucred->pid, &t) >= 0) {
622                         session = strappend("_SYSTEMD_SESSION=", t);
623                         free(t);
624
625                         if (session)
626                                 IOVEC_SET_STRING(iovec[n++], session);
627                 }
628
629                 if (sd_pid_get_owner_uid(ucred->uid, &owner) >= 0)
630                         if (asprintf(&owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner) >= 0)
631                                 IOVEC_SET_STRING(iovec[n++], owner_uid);
632 #endif
633
634                 if (cg_pid_get_unit(ucred->pid, &t) >= 0) {
635                         unit = strappend("_SYSTEMD_UNIT=", t);
636                         free(t);
637                 } else if (unit_id)
638                         unit = strappend("_SYSTEMD_UNIT=", unit_id);
639
640                 if (unit)
641                         IOVEC_SET_STRING(iovec[n++], unit);
642
643 #ifdef HAVE_SELINUX
644                 if (label) {
645                         selinux_context = malloc(sizeof("_SELINUX_CONTEXT=") + label_len);
646                         if (selinux_context) {
647                                 memcpy(selinux_context, "_SELINUX_CONTEXT=", sizeof("_SELINUX_CONTEXT=")-1);
648                                 memcpy(selinux_context+sizeof("_SELINUX_CONTEXT=")-1, label, label_len);
649                                 selinux_context[sizeof("_SELINUX_CONTEXT=")-1+label_len] = 0;
650                                 IOVEC_SET_STRING(iovec[n++], selinux_context);
651                         }
652                 } else {
653                         security_context_t con;
654
655                         if (getpidcon(ucred->pid, &con) >= 0) {
656                                 selinux_context = strappend("_SELINUX_CONTEXT=", con);
657                                 if (selinux_context)
658                                         IOVEC_SET_STRING(iovec[n++], selinux_context);
659
660                                 freecon(con);
661                         }
662                 }
663 #endif
664         }
665
666         if (tv) {
667                 if (asprintf(&source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu",
668                              (unsigned long long) timeval_load(tv)) >= 0)
669                         IOVEC_SET_STRING(iovec[n++], source_time);
670         }
671
672         /* Note that strictly speaking storing the boot id here is
673          * redundant since the entry includes this in-line
674          * anyway. However, we need this indexed, too. */
675         r = sd_id128_get_boot(&id);
676         if (r >= 0)
677                 if (asprintf(&boot_id, "_BOOT_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
678                         IOVEC_SET_STRING(iovec[n++], boot_id);
679
680         r = sd_id128_get_machine(&id);
681         if (r >= 0)
682                 if (asprintf(&machine_id, "_MACHINE_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
683                         IOVEC_SET_STRING(iovec[n++], machine_id);
684
685         t = gethostname_malloc();
686         if (t) {
687                 hostname = strappend("_HOSTNAME=", t);
688                 free(t);
689                 if (hostname)
690                         IOVEC_SET_STRING(iovec[n++], hostname);
691         }
692
693         assert(n <= m);
694
695         write_to_journal(s, realuid == 0 ? 0 : loginuid, iovec, n);
696
697         free(pid);
698         free(uid);
699         free(gid);
700         free(comm);
701         free(exe);
702         free(cmdline);
703         free(source_time);
704         free(boot_id);
705         free(machine_id);
706         free(hostname);
707         free(audit_session);
708         free(audit_loginuid);
709         free(cgroup);
710         free(session);
711         free(owner_uid);
712         free(unit);
713         free(selinux_context);
714 }
715
716 static void driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
717         char mid[11 + 32 + 1];
718         char buffer[16 + LINE_MAX + 1];
719         struct iovec iovec[N_IOVEC_META_FIELDS + 4];
720         int n = 0;
721         va_list ap;
722         struct ucred ucred;
723
724         assert(s);
725         assert(format);
726
727         IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
728         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
729
730         memcpy(buffer, "MESSAGE=", 8);
731         va_start(ap, format);
732         vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
733         va_end(ap);
734         char_array_0(buffer);
735         IOVEC_SET_STRING(iovec[n++], buffer);
736
737         snprintf(mid, sizeof(mid), "MESSAGE_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(message_id));
738         char_array_0(mid);
739         IOVEC_SET_STRING(iovec[n++], mid);
740
741         zero(ucred);
742         ucred.pid = getpid();
743         ucred.uid = getuid();
744         ucred.gid = getgid();
745
746         dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL);
747 }
748
749 static void dispatch_message(Server *s,
750                              struct iovec *iovec, unsigned n, unsigned m,
751                              struct ucred *ucred,
752                              struct timeval *tv,
753                              const char *label, size_t label_len,
754                              const char *unit_id,
755                              int priority) {
756         int rl;
757         char *path = NULL, *c;
758
759         assert(s);
760         assert(iovec || n == 0);
761
762         if (n == 0)
763                 return;
764
765         if (LOG_PRI(priority) > s->max_level_store)
766                 return;
767
768         if (!ucred)
769                 goto finish;
770
771         path = shortened_cgroup_path(ucred->pid);
772         if (!path)
773                 goto finish;
774
775         /* example: /user/lennart/3/foobar
776          *          /system/dbus.service/foobar
777          *
778          * So let's cut of everything past the third /, since that is
779          * wher user directories start */
780
781         c = strchr(path, '/');
782         if (c) {
783                 c = strchr(c+1, '/');
784                 if (c) {
785                         c = strchr(c+1, '/');
786                         if (c)
787                                 *c = 0;
788                 }
789         }
790
791         rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available_space(s));
792
793         if (rl == 0) {
794                 free(path);
795                 return;
796         }
797
798         /* Write a suppression message if we suppressed something */
799         if (rl > 1)
800                 driver_message(s, SD_MESSAGE_JOURNAL_DROPPED, "Suppressed %u messages from %s", rl - 1, path);
801
802         free(path);
803
804 finish:
805         dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id);
806 }
807
808 static void forward_syslog_iovec(Server *s, const struct iovec *iovec, unsigned n_iovec, struct ucred *ucred, struct timeval *tv) {
809         struct msghdr msghdr;
810         struct cmsghdr *cmsg;
811         union {
812                 struct cmsghdr cmsghdr;
813                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred))];
814         } control;
815         union sockaddr_union sa;
816
817         assert(s);
818         assert(iovec);
819         assert(n_iovec > 0);
820
821         zero(msghdr);
822         msghdr.msg_iov = (struct iovec*) iovec;
823         msghdr.msg_iovlen = n_iovec;
824
825         zero(sa);
826         sa.un.sun_family = AF_UNIX;
827         strncpy(sa.un.sun_path, "/run/systemd/journal/syslog", sizeof(sa.un.sun_path));
828         msghdr.msg_name = &sa;
829         msghdr.msg_namelen = offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path);
830
831         if (ucred) {
832                 zero(control);
833                 msghdr.msg_control = &control;
834                 msghdr.msg_controllen = sizeof(control);
835
836                 cmsg = CMSG_FIRSTHDR(&msghdr);
837                 cmsg->cmsg_level = SOL_SOCKET;
838                 cmsg->cmsg_type = SCM_CREDENTIALS;
839                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred));
840                 memcpy(CMSG_DATA(cmsg), ucred, sizeof(struct ucred));
841                 msghdr.msg_controllen = cmsg->cmsg_len;
842         }
843
844         /* Forward the syslog message we received via /dev/log to
845          * /run/systemd/syslog. Unfortunately we currently can't set
846          * the SO_TIMESTAMP auxiliary data, and hence we don't. */
847
848         if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
849                 return;
850
851         /* The socket is full? I guess the syslog implementation is
852          * too slow, and we shouldn't wait for that... */
853         if (errno == EAGAIN)
854                 return;
855
856         if (ucred && errno == ESRCH) {
857                 struct ucred u;
858
859                 /* Hmm, presumably the sender process vanished
860                  * by now, so let's fix it as good as we
861                  * can, and retry */
862
863                 u = *ucred;
864                 u.pid = getpid();
865                 memcpy(CMSG_DATA(cmsg), &u, sizeof(struct ucred));
866
867                 if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
868                         return;
869
870                 if (errno == EAGAIN)
871                         return;
872         }
873
874         if (errno != ENOENT)
875                 log_debug("Failed to forward syslog message: %m");
876 }
877
878 static void forward_syslog_raw(Server *s, int priority, const char *buffer, struct ucred *ucred, struct timeval *tv) {
879         struct iovec iovec;
880
881         assert(s);
882         assert(buffer);
883
884         if (LOG_PRI(priority) > s->max_level_syslog)
885                 return;
886
887         IOVEC_SET_STRING(iovec, buffer);
888         forward_syslog_iovec(s, &iovec, 1, ucred, tv);
889 }
890
891 static void forward_syslog(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred, struct timeval *tv) {
892         struct iovec iovec[5];
893         char header_priority[6], header_time[64], header_pid[16];
894         int n = 0;
895         time_t t;
896         struct tm *tm;
897         char *ident_buf = NULL;
898
899         assert(s);
900         assert(priority >= 0);
901         assert(priority <= 999);
902         assert(message);
903
904         if (LOG_PRI(priority) > s->max_level_syslog)
905                 return;
906
907         /* First: priority field */
908         snprintf(header_priority, sizeof(header_priority), "<%i>", priority);
909         char_array_0(header_priority);
910         IOVEC_SET_STRING(iovec[n++], header_priority);
911
912         /* Second: timestamp */
913         t = tv ? tv->tv_sec : ((time_t) (now(CLOCK_REALTIME) / USEC_PER_SEC));
914         tm = localtime(&t);
915         if (!tm)
916                 return;
917         if (strftime(header_time, sizeof(header_time), "%h %e %T ", tm) <= 0)
918                 return;
919         IOVEC_SET_STRING(iovec[n++], header_time);
920
921         /* Third: identifier and PID */
922         if (ucred) {
923                 if (!identifier) {
924                         get_process_comm(ucred->pid, &ident_buf);
925                         identifier = ident_buf;
926                 }
927
928                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
929                 char_array_0(header_pid);
930
931                 if (identifier)
932                         IOVEC_SET_STRING(iovec[n++], identifier);
933
934                 IOVEC_SET_STRING(iovec[n++], header_pid);
935         } else if (identifier) {
936                 IOVEC_SET_STRING(iovec[n++], identifier);
937                 IOVEC_SET_STRING(iovec[n++], ": ");
938         }
939
940         /* Fourth: message */
941         IOVEC_SET_STRING(iovec[n++], message);
942
943         forward_syslog_iovec(s, iovec, n, ucred, tv);
944
945         free(ident_buf);
946 }
947
948 static int fixup_priority(int priority) {
949
950         if ((priority & LOG_FACMASK) == 0)
951                 return (priority & LOG_PRIMASK) | LOG_USER;
952
953         return priority;
954 }
955
956 static void forward_kmsg(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred) {
957         struct iovec iovec[5];
958         char header_priority[6], header_pid[16];
959         int n = 0;
960         char *ident_buf = NULL;
961
962         assert(s);
963         assert(priority >= 0);
964         assert(priority <= 999);
965         assert(message);
966
967         if (_unlikely_(LOG_PRI(priority) > s->max_level_kmsg))
968                 return;
969
970         if (_unlikely_(s->dev_kmsg_fd < 0))
971                 return;
972
973         /* Never allow messages with kernel facility to be written to
974          * kmsg, regardless where the data comes from. */
975         priority = fixup_priority(priority);
976
977         /* First: priority field */
978         snprintf(header_priority, sizeof(header_priority), "<%i>", priority);
979         char_array_0(header_priority);
980         IOVEC_SET_STRING(iovec[n++], header_priority);
981
982         /* Second: identifier and PID */
983         if (ucred) {
984                 if (!identifier) {
985                         get_process_comm(ucred->pid, &ident_buf);
986                         identifier = ident_buf;
987                 }
988
989                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
990                 char_array_0(header_pid);
991
992                 if (identifier)
993                         IOVEC_SET_STRING(iovec[n++], identifier);
994
995                 IOVEC_SET_STRING(iovec[n++], header_pid);
996         } else if (identifier) {
997                 IOVEC_SET_STRING(iovec[n++], identifier);
998                 IOVEC_SET_STRING(iovec[n++], ": ");
999         }
1000
1001         /* Fourth: message */
1002         IOVEC_SET_STRING(iovec[n++], message);
1003         IOVEC_SET_STRING(iovec[n++], "\n");
1004
1005         if (writev(s->dev_kmsg_fd, iovec, n) < 0)
1006                 log_debug("Failed to write to /dev/kmsg for logging: %s", strerror(errno));
1007
1008         free(ident_buf);
1009 }
1010
1011 static void forward_console(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred) {
1012         struct iovec iovec[4];
1013         char header_pid[16];
1014         int n = 0, fd;
1015         char *ident_buf = NULL;
1016         const char *tty;
1017
1018         assert(s);
1019         assert(message);
1020
1021         if (LOG_PRI(priority) > s->max_level_console)
1022                 return;
1023
1024         /* First: identifier and PID */
1025         if (ucred) {
1026                 if (!identifier) {
1027                         get_process_comm(ucred->pid, &ident_buf);
1028                         identifier = ident_buf;
1029                 }
1030
1031                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
1032                 char_array_0(header_pid);
1033
1034                 if (identifier)
1035                         IOVEC_SET_STRING(iovec[n++], identifier);
1036
1037                 IOVEC_SET_STRING(iovec[n++], header_pid);
1038         } else if (identifier) {
1039                 IOVEC_SET_STRING(iovec[n++], identifier);
1040                 IOVEC_SET_STRING(iovec[n++], ": ");
1041         }
1042
1043         /* Third: message */
1044         IOVEC_SET_STRING(iovec[n++], message);
1045         IOVEC_SET_STRING(iovec[n++], "\n");
1046
1047         tty = s->tty_path ? s->tty_path : "/dev/console";
1048
1049         fd = open_terminal(tty, O_WRONLY|O_NOCTTY|O_CLOEXEC);
1050         if (fd < 0) {
1051                 log_debug("Failed to open %s for logging: %s", tty, strerror(errno));
1052                 goto finish;
1053         }
1054
1055         if (writev(fd, iovec, n) < 0)
1056                 log_debug("Failed to write to %s for logging: %s", tty, strerror(errno));
1057
1058         close_nointr_nofail(fd);
1059
1060 finish:
1061         free(ident_buf);
1062 }
1063
1064 static void read_identifier(const char **buf, char **identifier, char **pid) {
1065         const char *p;
1066         char *t;
1067         size_t l, e;
1068
1069         assert(buf);
1070         assert(identifier);
1071         assert(pid);
1072
1073         p = *buf;
1074
1075         p += strspn(p, WHITESPACE);
1076         l = strcspn(p, WHITESPACE);
1077
1078         if (l <= 0 ||
1079             p[l-1] != ':')
1080                 return;
1081
1082         e = l;
1083         l--;
1084
1085         if (p[l-1] == ']') {
1086                 size_t k = l-1;
1087
1088                 for (;;) {
1089
1090                         if (p[k] == '[') {
1091                                 t = strndup(p+k+1, l-k-2);
1092                                 if (t)
1093                                         *pid = t;
1094
1095                                 l = k;
1096                                 break;
1097                         }
1098
1099                         if (k == 0)
1100                                 break;
1101
1102                         k--;
1103                 }
1104         }
1105
1106         t = strndup(p, l);
1107         if (t)
1108                 *identifier = t;
1109
1110         *buf = p + e;
1111         *buf += strspn(*buf, WHITESPACE);
1112 }
1113
1114 static void process_syslog_message(Server *s, const char *buf, struct ucred *ucred, struct timeval *tv, const char *label, size_t label_len) {
1115         char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *syslog_pid = NULL;
1116         struct iovec iovec[N_IOVEC_META_FIELDS + 6];
1117         unsigned n = 0;
1118         int priority = LOG_USER | LOG_INFO;
1119         char *identifier = NULL, *pid = NULL;
1120         const char *orig;
1121
1122         assert(s);
1123         assert(buf);
1124
1125         orig = buf;
1126         parse_syslog_priority((char**) &buf, &priority);
1127
1128         if (s->forward_to_syslog)
1129                 forward_syslog_raw(s, priority, orig, ucred, tv);
1130
1131         skip_syslog_date((char**) &buf);
1132         read_identifier(&buf, &identifier, &pid);
1133
1134         if (s->forward_to_kmsg)
1135                 forward_kmsg(s, priority, identifier, buf, ucred);
1136
1137         if (s->forward_to_console)
1138                 forward_console(s, priority, identifier, buf, ucred);
1139
1140         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=syslog");
1141
1142         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1143                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1144
1145         if (priority & LOG_FACMASK)
1146                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1147                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1148
1149         if (identifier) {
1150                 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
1151                 if (syslog_identifier)
1152                         IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1153         }
1154
1155         if (pid) {
1156                 syslog_pid = strappend("SYSLOG_PID=", pid);
1157                 if (syslog_pid)
1158                         IOVEC_SET_STRING(iovec[n++], syslog_pid);
1159         }
1160
1161         message = strappend("MESSAGE=", buf);
1162         if (message)
1163                 IOVEC_SET_STRING(iovec[n++], message);
1164
1165         dispatch_message(s, iovec, n, ELEMENTSOF(iovec), ucred, tv, label, label_len, NULL, priority);
1166
1167         free(message);
1168         free(identifier);
1169         free(pid);
1170         free(syslog_priority);
1171         free(syslog_facility);
1172         free(syslog_identifier);
1173         free(syslog_pid);
1174 }
1175
1176 static bool valid_user_field(const char *p, size_t l) {
1177         const char *a;
1178
1179         /* We kinda enforce POSIX syntax recommendations for
1180            environment variables here, but make a couple of additional
1181            requirements.
1182
1183            http://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html */
1184
1185         /* No empty field names */
1186         if (l <= 0)
1187                 return false;
1188
1189         /* Don't allow names longer than 64 chars */
1190         if (l > 64)
1191                 return false;
1192
1193         /* Variables starting with an underscore are protected */
1194         if (p[0] == '_')
1195                 return false;
1196
1197         /* Don't allow digits as first character */
1198         if (p[0] >= '0' && p[0] <= '9')
1199                 return false;
1200
1201         /* Only allow A-Z0-9 and '_' */
1202         for (a = p; a < p + l; a++)
1203                 if (!((*a >= 'A' && *a <= 'Z') ||
1204                       (*a >= '0' && *a <= '9') ||
1205                       *a == '_'))
1206                         return false;
1207
1208         return true;
1209 }
1210
1211 static void process_native_message(
1212                 Server *s,
1213                 const void *buffer, size_t buffer_size,
1214                 struct ucred *ucred,
1215                 struct timeval *tv,
1216                 const char *label, size_t label_len) {
1217
1218         struct iovec *iovec = NULL;
1219         unsigned n = 0, m = 0, j, tn = (unsigned) -1;
1220         const char *p;
1221         size_t remaining;
1222         int priority = LOG_INFO;
1223         char *identifier = NULL, *message = NULL;
1224
1225         assert(s);
1226         assert(buffer || buffer_size == 0);
1227
1228         p = buffer;
1229         remaining = buffer_size;
1230
1231         while (remaining > 0) {
1232                 const char *e, *q;
1233
1234                 e = memchr(p, '\n', remaining);
1235
1236                 if (!e) {
1237                         /* Trailing noise, let's ignore it, and flush what we collected */
1238                         log_debug("Received message with trailing noise, ignoring.");
1239                         break;
1240                 }
1241
1242                 if (e == p) {
1243                         /* Entry separator */
1244                         dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, NULL, priority);
1245                         n = 0;
1246                         priority = LOG_INFO;
1247
1248                         p++;
1249                         remaining--;
1250                         continue;
1251                 }
1252
1253                 if (*p == '.' || *p == '#') {
1254                         /* Ignore control commands for now, and
1255                          * comments too. */
1256                         remaining -= (e - p) + 1;
1257                         p = e + 1;
1258                         continue;
1259                 }
1260
1261                 /* A property follows */
1262
1263                 if (n+N_IOVEC_META_FIELDS >= m) {
1264                         struct iovec *c;
1265                         unsigned u;
1266
1267                         u = MAX((n+N_IOVEC_META_FIELDS+1) * 2U, 4U);
1268                         c = realloc(iovec, u * sizeof(struct iovec));
1269                         if (!c) {
1270                                 log_oom();
1271                                 break;
1272                         }
1273
1274                         iovec = c;
1275                         m = u;
1276                 }
1277
1278                 q = memchr(p, '=', e - p);
1279                 if (q) {
1280                         if (valid_user_field(p, q - p)) {
1281                                 size_t l;
1282
1283                                 l = e - p;
1284
1285                                 /* If the field name starts with an
1286                                  * underscore, skip the variable,
1287                                  * since that indidates a trusted
1288                                  * field */
1289                                 iovec[n].iov_base = (char*) p;
1290                                 iovec[n].iov_len = l;
1291                                 n++;
1292
1293                                 /* We need to determine the priority
1294                                  * of this entry for the rate limiting
1295                                  * logic */
1296                                 if (l == 10 &&
1297                                     memcmp(p, "PRIORITY=", 9) == 0 &&
1298                                     p[9] >= '0' && p[9] <= '9')
1299                                         priority = (priority & LOG_FACMASK) | (p[9] - '0');
1300
1301                                 else if (l == 17 &&
1302                                          memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
1303                                          p[16] >= '0' && p[16] <= '9')
1304                                         priority = (priority & LOG_PRIMASK) | ((p[16] - '0') << 3);
1305
1306                                 else if (l == 18 &&
1307                                          memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
1308                                          p[16] >= '0' && p[16] <= '9' &&
1309                                          p[17] >= '0' && p[17] <= '9')
1310                                         priority = (priority & LOG_PRIMASK) | (((p[16] - '0')*10 + (p[17] - '0')) << 3);
1311
1312                                 else if (l >= 19 &&
1313                                          memcmp(p, "SYSLOG_IDENTIFIER=", 18) == 0) {
1314                                         char *t;
1315
1316                                         t = strndup(p + 18, l - 18);
1317                                         if (t) {
1318                                                 free(identifier);
1319                                                 identifier = t;
1320                                         }
1321                                 } else if (l >= 8 &&
1322                                            memcmp(p, "MESSAGE=", 8) == 0) {
1323                                         char *t;
1324
1325                                         t = strndup(p + 8, l - 8);
1326                                         if (t) {
1327                                                 free(message);
1328                                                 message = t;
1329                                         }
1330                                 }
1331                         }
1332
1333                         remaining -= (e - p) + 1;
1334                         p = e + 1;
1335                         continue;
1336                 } else {
1337                         le64_t l_le;
1338                         uint64_t l;
1339                         char *k;
1340
1341                         if (remaining < e - p + 1 + sizeof(uint64_t) + 1) {
1342                                 log_debug("Failed to parse message, ignoring.");
1343                                 break;
1344                         }
1345
1346                         memcpy(&l_le, e + 1, sizeof(uint64_t));
1347                         l = le64toh(l_le);
1348
1349                         if (remaining < e - p + 1 + sizeof(uint64_t) + l + 1 ||
1350                             e[1+sizeof(uint64_t)+l] != '\n') {
1351                                 log_debug("Failed to parse message, ignoring.");
1352                                 break;
1353                         }
1354
1355                         k = malloc((e - p) + 1 + l);
1356                         if (!k) {
1357                                 log_oom();
1358                                 break;
1359                         }
1360
1361                         memcpy(k, p, e - p);
1362                         k[e - p] = '=';
1363                         memcpy(k + (e - p) + 1, e + 1 + sizeof(uint64_t), l);
1364
1365                         if (valid_user_field(p, e - p)) {
1366                                 iovec[n].iov_base = k;
1367                                 iovec[n].iov_len = (e - p) + 1 + l;
1368                                 n++;
1369                         } else
1370                                 free(k);
1371
1372                         remaining -= (e - p) + 1 + sizeof(uint64_t) + l + 1;
1373                         p = e + 1 + sizeof(uint64_t) + l + 1;
1374                 }
1375         }
1376
1377         if (n <= 0)
1378                 goto finish;
1379
1380         tn = n++;
1381         IOVEC_SET_STRING(iovec[tn], "_TRANSPORT=journal");
1382
1383         if (message) {
1384                 if (s->forward_to_syslog)
1385                         forward_syslog(s, priority, identifier, message, ucred, tv);
1386
1387                 if (s->forward_to_kmsg)
1388                         forward_kmsg(s, priority, identifier, message, ucred);
1389
1390                 if (s->forward_to_console)
1391                         forward_console(s, priority, identifier, message, ucred);
1392         }
1393
1394         dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, NULL, priority);
1395
1396 finish:
1397         for (j = 0; j < n; j++)  {
1398                 if (j == tn)
1399                         continue;
1400
1401                 if (iovec[j].iov_base < buffer ||
1402                     (const uint8_t*) iovec[j].iov_base >= (const uint8_t*) buffer + buffer_size)
1403                         free(iovec[j].iov_base);
1404         }
1405
1406         free(iovec);
1407         free(identifier);
1408         free(message);
1409 }
1410
1411 static void process_native_file(
1412                 Server *s,
1413                 int fd,
1414                 struct ucred *ucred,
1415                 struct timeval *tv,
1416                 const char *label, size_t label_len) {
1417
1418         struct stat st;
1419         void *p;
1420         ssize_t n;
1421
1422         assert(s);
1423         assert(fd >= 0);
1424
1425         /* Data is in the passed file, since it didn't fit in a
1426          * datagram. We can't map the file here, since clients might
1427          * then truncate it and trigger a SIGBUS for us. So let's
1428          * stupidly read it */
1429
1430         if (fstat(fd, &st) < 0) {
1431                 log_error("Failed to stat passed file, ignoring: %m");
1432                 return;
1433         }
1434
1435         if (!S_ISREG(st.st_mode)) {
1436                 log_error("File passed is not regular. Ignoring.");
1437                 return;
1438         }
1439
1440         if (st.st_size <= 0)
1441                 return;
1442
1443         if (st.st_size > ENTRY_SIZE_MAX) {
1444                 log_error("File passed too large. Ignoring.");
1445                 return;
1446         }
1447
1448         p = malloc(st.st_size);
1449         if (!p) {
1450                 log_oom();
1451                 return;
1452         }
1453
1454         n = pread(fd, p, st.st_size, 0);
1455         if (n < 0)
1456                 log_error("Failed to read file, ignoring: %s", strerror(-n));
1457         else if (n > 0)
1458                 process_native_message(s, p, n, ucred, tv, label, label_len);
1459
1460         free(p);
1461 }
1462
1463 static int stdout_stream_log(StdoutStream *s, const char *p) {
1464         struct iovec iovec[N_IOVEC_META_FIELDS + 5];
1465         char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *syslog_identifier = NULL;
1466         unsigned n = 0;
1467         int priority;
1468         char *label = NULL;
1469         size_t label_len = 0;
1470
1471         assert(s);
1472         assert(p);
1473
1474         if (isempty(p))
1475                 return 0;
1476
1477         priority = s->priority;
1478
1479         if (s->level_prefix)
1480                 parse_syslog_priority((char**) &p, &priority);
1481
1482         if (s->forward_to_syslog || s->server->forward_to_syslog)
1483                 forward_syslog(s->server, fixup_priority(priority), s->identifier, p, &s->ucred, NULL);
1484
1485         if (s->forward_to_kmsg || s->server->forward_to_kmsg)
1486                 forward_kmsg(s->server, priority, s->identifier, p, &s->ucred);
1487
1488         if (s->forward_to_console || s->server->forward_to_console)
1489                 forward_console(s->server, priority, s->identifier, p, &s->ucred);
1490
1491         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=stdout");
1492
1493         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1494                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1495
1496         if (priority & LOG_FACMASK)
1497                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1498                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1499
1500         if (s->identifier) {
1501                 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", s->identifier);
1502                 if (syslog_identifier)
1503                         IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1504         }
1505
1506         message = strappend("MESSAGE=", p);
1507         if (message)
1508                 IOVEC_SET_STRING(iovec[n++], message);
1509
1510 #ifdef HAVE_SELINUX
1511         if (s->security_context) {
1512                 label = (char*) s->security_context;
1513                 label_len = strlen((char*) s->security_context);
1514         }
1515 #endif
1516
1517         dispatch_message(s->server, iovec, n, ELEMENTSOF(iovec), &s->ucred, NULL, label, label_len, s->unit_id, priority);
1518
1519         free(message);
1520         free(syslog_priority);
1521         free(syslog_facility);
1522         free(syslog_identifier);
1523
1524         return 0;
1525 }
1526
1527 static int stdout_stream_line(StdoutStream *s, char *p) {
1528         int r;
1529
1530         assert(s);
1531         assert(p);
1532
1533         p = strstrip(p);
1534
1535         switch (s->state) {
1536
1537         case STDOUT_STREAM_IDENTIFIER:
1538                 if (isempty(p))
1539                         s->identifier = NULL;
1540                 else  {
1541                         s->identifier = strdup(p);
1542                         if (!s->identifier)
1543                                 return log_oom();
1544                 }
1545
1546                 s->state = STDOUT_STREAM_UNIT_ID;
1547                 return 0;
1548
1549         case STDOUT_STREAM_UNIT_ID:
1550                 if (s->ucred.uid == 0) {
1551                         if (isempty(p))
1552                                 s->unit_id = NULL;
1553                         else  {
1554                                 s->unit_id = strdup(p);
1555                                 if (!s->unit_id)
1556                                         return log_oom();
1557                         }
1558                 }
1559
1560                 s->state = STDOUT_STREAM_PRIORITY;
1561                 return 0;
1562
1563         case STDOUT_STREAM_PRIORITY:
1564                 r = safe_atoi(p, &s->priority);
1565                 if (r < 0 || s->priority <= 0 || s->priority >= 999) {
1566                         log_warning("Failed to parse log priority line.");
1567                         return -EINVAL;
1568                 }
1569
1570                 s->state = STDOUT_STREAM_LEVEL_PREFIX;
1571                 return 0;
1572
1573         case STDOUT_STREAM_LEVEL_PREFIX:
1574                 r = parse_boolean(p);
1575                 if (r < 0) {
1576                         log_warning("Failed to parse level prefix line.");
1577                         return -EINVAL;
1578                 }
1579
1580                 s->level_prefix = !!r;
1581                 s->state = STDOUT_STREAM_FORWARD_TO_SYSLOG;
1582                 return 0;
1583
1584         case STDOUT_STREAM_FORWARD_TO_SYSLOG:
1585                 r = parse_boolean(p);
1586                 if (r < 0) {
1587                         log_warning("Failed to parse forward to syslog line.");
1588                         return -EINVAL;
1589                 }
1590
1591                 s->forward_to_syslog = !!r;
1592                 s->state = STDOUT_STREAM_FORWARD_TO_KMSG;
1593                 return 0;
1594
1595         case STDOUT_STREAM_FORWARD_TO_KMSG:
1596                 r = parse_boolean(p);
1597                 if (r < 0) {
1598                         log_warning("Failed to parse copy to kmsg line.");
1599                         return -EINVAL;
1600                 }
1601
1602                 s->forward_to_kmsg = !!r;
1603                 s->state = STDOUT_STREAM_FORWARD_TO_CONSOLE;
1604                 return 0;
1605
1606         case STDOUT_STREAM_FORWARD_TO_CONSOLE:
1607                 r = parse_boolean(p);
1608                 if (r < 0) {
1609                         log_warning("Failed to parse copy to console line.");
1610                         return -EINVAL;
1611                 }
1612
1613                 s->forward_to_console = !!r;
1614                 s->state = STDOUT_STREAM_RUNNING;
1615                 return 0;
1616
1617         case STDOUT_STREAM_RUNNING:
1618                 return stdout_stream_log(s, p);
1619         }
1620
1621         assert_not_reached("Unknown stream state");
1622 }
1623
1624 static int stdout_stream_scan(StdoutStream *s, bool force_flush) {
1625         char *p;
1626         size_t remaining;
1627         int r;
1628
1629         assert(s);
1630
1631         p = s->buffer;
1632         remaining = s->length;
1633         for (;;) {
1634                 char *end;
1635                 size_t skip;
1636
1637                 end = memchr(p, '\n', remaining);
1638                 if (end)
1639                         skip = end - p + 1;
1640                 else if (remaining >= sizeof(s->buffer) - 1) {
1641                         end = p + sizeof(s->buffer) - 1;
1642                         skip = remaining;
1643                 } else
1644                         break;
1645
1646                 *end = 0;
1647
1648                 r = stdout_stream_line(s, p);
1649                 if (r < 0)
1650                         return r;
1651
1652                 remaining -= skip;
1653                 p += skip;
1654         }
1655
1656         if (force_flush && remaining > 0) {
1657                 p[remaining] = 0;
1658                 r = stdout_stream_line(s, p);
1659                 if (r < 0)
1660                         return r;
1661
1662                 p += remaining;
1663                 remaining = 0;
1664         }
1665
1666         if (p > s->buffer) {
1667                 memmove(s->buffer, p, remaining);
1668                 s->length = remaining;
1669         }
1670
1671         return 0;
1672 }
1673
1674 static int stdout_stream_process(StdoutStream *s) {
1675         ssize_t l;
1676         int r;
1677
1678         assert(s);
1679
1680         l = read(s->fd, s->buffer+s->length, sizeof(s->buffer)-1-s->length);
1681         if (l < 0) {
1682
1683                 if (errno == EAGAIN)
1684                         return 0;
1685
1686                 log_warning("Failed to read from stream: %m");
1687                 return -errno;
1688         }
1689
1690         if (l == 0) {
1691                 r = stdout_stream_scan(s, true);
1692                 if (r < 0)
1693                         return r;
1694
1695                 return 0;
1696         }
1697
1698         s->length += l;
1699         r = stdout_stream_scan(s, false);
1700         if (r < 0)
1701                 return r;
1702
1703         return 1;
1704
1705 }
1706
1707 static void stdout_stream_free(StdoutStream *s) {
1708         assert(s);
1709
1710         if (s->server) {
1711                 assert(s->server->n_stdout_streams > 0);
1712                 s->server->n_stdout_streams --;
1713                 LIST_REMOVE(StdoutStream, stdout_stream, s->server->stdout_streams, s);
1714         }
1715
1716         if (s->fd >= 0) {
1717                 if (s->server)
1718                         epoll_ctl(s->server->epoll_fd, EPOLL_CTL_DEL, s->fd, NULL);
1719
1720                 close_nointr_nofail(s->fd);
1721         }
1722
1723 #ifdef HAVE_SELINUX
1724         if (s->security_context)
1725                 freecon(s->security_context);
1726 #endif
1727
1728         free(s->identifier);
1729         free(s);
1730 }
1731
1732 static int stdout_stream_new(Server *s) {
1733         StdoutStream *stream;
1734         int fd, r;
1735         socklen_t len;
1736         struct epoll_event ev;
1737
1738         assert(s);
1739
1740         fd = accept4(s->stdout_fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC);
1741         if (fd < 0) {
1742                 if (errno == EAGAIN)
1743                         return 0;
1744
1745                 log_error("Failed to accept stdout connection: %m");
1746                 return -errno;
1747         }
1748
1749         if (s->n_stdout_streams >= STDOUT_STREAMS_MAX) {
1750                 log_warning("Too many stdout streams, refusing connection.");
1751                 close_nointr_nofail(fd);
1752                 return 0;
1753         }
1754
1755         stream = new0(StdoutStream, 1);
1756         if (!stream) {
1757                 close_nointr_nofail(fd);
1758                 return log_oom();
1759         }
1760
1761         stream->fd = fd;
1762
1763         len = sizeof(stream->ucred);
1764         if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &stream->ucred, &len) < 0) {
1765                 log_error("Failed to determine peer credentials: %m");
1766                 r = -errno;
1767                 goto fail;
1768         }
1769
1770 #ifdef HAVE_SELINUX
1771         if (getpeercon(fd, &stream->security_context) < 0 && errno != ENOPROTOOPT)
1772                 log_error("Failed to determine peer security context: %m");
1773 #endif
1774
1775         if (shutdown(fd, SHUT_WR) < 0) {
1776                 log_error("Failed to shutdown writing side of socket: %m");
1777                 r = -errno;
1778                 goto fail;
1779         }
1780
1781         zero(ev);
1782         ev.data.ptr = stream;
1783         ev.events = EPOLLIN;
1784         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0) {
1785                 log_error("Failed to add stream to event loop: %m");
1786                 r = -errno;
1787                 goto fail;
1788         }
1789
1790         stream->server = s;
1791         LIST_PREPEND(StdoutStream, stdout_stream, s->stdout_streams, stream);
1792         s->n_stdout_streams ++;
1793
1794         return 0;
1795
1796 fail:
1797         stdout_stream_free(stream);
1798         return r;
1799 }
1800
1801 static bool is_us(const char *pid) {
1802         pid_t t;
1803
1804         assert(pid);
1805
1806         if (parse_pid(pid, &t) < 0)
1807                 return false;
1808
1809         return t == getpid();
1810 }
1811
1812 static void dev_kmsg_record(Server *s, char *p, size_t l) {
1813         struct iovec iovec[N_IOVEC_META_FIELDS + 7 + N_IOVEC_KERNEL_FIELDS];
1814         char *message = NULL, *syslog_priority = NULL, *syslog_pid = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *source_time = NULL;
1815         int priority, r;
1816         unsigned n = 0, z = 0, j;
1817         usec_t usec;
1818         char *identifier = NULL, *pid = NULL, *e, *f, *k;
1819         uint64_t serial;
1820         size_t pl;
1821
1822         assert(s);
1823         assert(p);
1824
1825         if (l <= 0)
1826                 return;
1827
1828         e = memchr(p, ',', l);
1829         if (!e)
1830                 return;
1831         *e = 0;
1832
1833         r = safe_atoi(p, &priority);
1834         if (r < 0 || priority < 0 || priority > 999)
1835                 return;
1836
1837         if (s->forward_to_kmsg && (priority & LOG_FACMASK) != LOG_KERN)
1838                 return;
1839
1840         l -= (e - p) + 1;
1841         p = e + 1;
1842         e = memchr(p, ',', l);
1843         if (!e)
1844                 return;
1845         *e = 0;
1846
1847         r = safe_atou64(p, &serial);
1848         if (r < 0)
1849                 return;
1850
1851         if (s->kernel_seqnum) {
1852                 /* We already read this one? */
1853                 if (serial < *s->kernel_seqnum)
1854                         return;
1855
1856                 /* Did we lose any? */
1857                 if (serial > *s->kernel_seqnum)
1858                         driver_message(s, SD_MESSAGE_JOURNAL_MISSED, "Missed %llu kernel messages", (unsigned long long) serial - *s->kernel_seqnum - 1);
1859
1860                 /* Make sure we never read this one again. Note that
1861                  * we always store the next message serial we expect
1862                  * here, simply because this makes handling the first
1863                  * message with serial 0 easy. */
1864                 *s->kernel_seqnum = serial + 1;
1865         }
1866
1867         l -= (e - p) + 1;
1868         p = e + 1;
1869         f = memchr(p, ';', l);
1870         if (!f)
1871                 return;
1872         /* Kernel 3.6 has the flags field, kernel 3.5 lacks that */
1873         e = memchr(p, ',', l);
1874         if (!e || f < e)
1875                 e = f;
1876         *e = 0;
1877
1878         r = parse_usec(p, &usec);
1879         if (r < 0)
1880                 return;
1881
1882         l -= (f - p) + 1;
1883         p = f + 1;
1884         e = memchr(p, '\n', l);
1885         if (!e)
1886                 return;
1887         *e = 0;
1888
1889         pl = e - p;
1890         l -= (e - p) + 1;
1891         k = e + 1;
1892
1893         for (j = 0; l > 0 && j < N_IOVEC_KERNEL_FIELDS; j++) {
1894                 char *m;
1895                 /* Meta data fields attached */
1896
1897                 if (*k != ' ')
1898                         break;
1899
1900                 k ++, l --;
1901
1902                 e = memchr(k, '\n', l);
1903                 if (!e)
1904                         return;
1905
1906                 *e = 0;
1907
1908                 m = cunescape_length_with_prefix(k, e - k, "_KERNEL_");
1909                 if (!m)
1910                         break;
1911
1912                 IOVEC_SET_STRING(iovec[n++], m);
1913                 z++;
1914
1915                 l -= (e - k) + 1;
1916                 k = e + 1;
1917         }
1918
1919         if (asprintf(&source_time, "_SOURCE_MONOTONIC_TIMESTAMP=%llu",
1920                      (unsigned long long) usec) >= 0)
1921                 IOVEC_SET_STRING(iovec[n++], source_time);
1922
1923         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=kernel");
1924
1925         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1926                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1927
1928         if ((priority & LOG_FACMASK) == LOG_KERN)
1929                 IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=kernel");
1930         else {
1931                 read_identifier((const char**) &p, &identifier, &pid);
1932
1933                 /* Avoid any messages we generated ourselves via
1934                  * log_info() and friends. */
1935                 if (pid && is_us(pid))
1936                         goto finish;
1937
1938                 if (identifier) {
1939                         syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
1940                         if (syslog_identifier)
1941                                 IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1942                 }
1943
1944                 if (pid) {
1945                         syslog_pid = strappend("SYSLOG_PID=", pid);
1946                         if (syslog_pid)
1947                                 IOVEC_SET_STRING(iovec[n++], syslog_pid);
1948                 }
1949
1950                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1951                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1952         }
1953
1954         message = cunescape_length_with_prefix(p, pl, "MESSAGE=");
1955         if (message)
1956                 IOVEC_SET_STRING(iovec[n++], message);
1957
1958         dispatch_message(s, iovec, n, ELEMENTSOF(iovec), NULL, NULL, NULL, 0, NULL, priority);
1959
1960 finish:
1961         for (j = 0; j < z; j++)
1962                 free(iovec[j].iov_base);
1963
1964         free(message);
1965         free(syslog_priority);
1966         free(syslog_identifier);
1967         free(syslog_pid);
1968         free(syslog_facility);
1969         free(source_time);
1970         free(identifier);
1971         free(pid);
1972 }
1973
1974 static int system_journal_open(Server *s) {
1975         int r;
1976         char *fn;
1977         sd_id128_t machine;
1978         char ids[33];
1979
1980         r = sd_id128_get_machine(&machine);
1981         if (r < 0)
1982                 return r;
1983
1984         sd_id128_to_string(machine, ids);
1985
1986         if (!s->system_journal &&
1987             (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
1988             access("/run/systemd/journal/flushed", F_OK) >= 0) {
1989
1990                 /* If in auto mode: first try to create the machine
1991                  * path, but not the prefix.
1992                  *
1993                  * If in persistent mode: create /var/log/journal and
1994                  * the machine path */
1995
1996                 if (s->storage == STORAGE_PERSISTENT)
1997                         (void) mkdir("/var/log/journal/", 0755);
1998
1999                 fn = strappend("/var/log/journal/", ids);
2000                 if (!fn)
2001                         return -ENOMEM;
2002
2003                 (void) mkdir(fn, 0755);
2004                 free(fn);
2005
2006                 fn = strjoin("/var/log/journal/", ids, "/system.journal", NULL);
2007                 if (!fn)
2008                         return -ENOMEM;
2009
2010                 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, true, &s->system_metrics, s->mmap, NULL, &s->system_journal);
2011                 free(fn);
2012
2013                 if (r >= 0)
2014                         server_fix_perms(s, s->system_journal, 0);
2015                 else if (r < 0) {
2016
2017                         if (r != -ENOENT && r != -EROFS)
2018                                 log_warning("Failed to open system journal: %s", strerror(-r));
2019
2020                         r = 0;
2021                 }
2022         }
2023
2024         if (!s->runtime_journal &&
2025             (s->storage != STORAGE_NONE)) {
2026
2027                 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
2028                 if (!fn)
2029                         return -ENOMEM;
2030
2031                 if (s->system_journal) {
2032
2033                         /* Try to open the runtime journal, but only
2034                          * if it already exists, so that we can flush
2035                          * it into the system journal */
2036
2037                         r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
2038                         free(fn);
2039
2040                         if (r < 0) {
2041                                 if (r != -ENOENT)
2042                                         log_warning("Failed to open runtime journal: %s", strerror(-r));
2043
2044                                 r = 0;
2045                         }
2046
2047                 } else {
2048
2049                         /* OK, we really need the runtime journal, so create
2050                          * it if necessary. */
2051
2052                         (void) mkdir_parents(fn, 0755);
2053                         r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
2054                         free(fn);
2055
2056                         if (r < 0) {
2057                                 log_error("Failed to open runtime journal: %s", strerror(-r));
2058                                 return r;
2059                         }
2060                 }
2061
2062                 if (s->runtime_journal)
2063                         server_fix_perms(s, s->runtime_journal, 0);
2064         }
2065
2066         return r;
2067 }
2068
2069 static int server_flush_to_var(Server *s) {
2070         Object *o = NULL;
2071         int r;
2072         sd_id128_t machine;
2073         sd_journal *j;
2074
2075         assert(s);
2076
2077         if (s->storage != STORAGE_AUTO &&
2078             s->storage != STORAGE_PERSISTENT)
2079                 return 0;
2080
2081         if (!s->runtime_journal)
2082                 return 0;
2083
2084         system_journal_open(s);
2085
2086         if (!s->system_journal)
2087                 return 0;
2088
2089         log_info("Flushing to /var...");
2090
2091         r = sd_id128_get_machine(&machine);
2092         if (r < 0) {
2093                 log_error("Failed to get machine id: %s", strerror(-r));
2094                 return r;
2095         }
2096
2097         r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
2098         if (r < 0) {
2099                 log_error("Failed to read runtime journal: %s", strerror(-r));
2100                 return r;
2101         }
2102
2103         SD_JOURNAL_FOREACH(j) {
2104                 JournalFile *f;
2105
2106                 f = j->current_file;
2107                 assert(f && f->current_offset > 0);
2108
2109                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
2110                 if (r < 0) {
2111                         log_error("Can't read entry: %s", strerror(-r));
2112                         goto finish;
2113                 }
2114
2115                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
2116                 if (r == -E2BIG) {
2117                         log_info("Allocation limit reached.");
2118
2119                         journal_file_post_change(s->system_journal);
2120                         server_rotate(s);
2121                         server_vacuum(s);
2122
2123                         r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
2124                 }
2125
2126                 if (r < 0) {
2127                         log_error("Can't write entry: %s", strerror(-r));
2128                         goto finish;
2129                 }
2130         }
2131
2132 finish:
2133         journal_file_post_change(s->system_journal);
2134
2135         journal_file_close(s->runtime_journal);
2136         s->runtime_journal = NULL;
2137
2138         if (r >= 0)
2139                 rm_rf("/run/log/journal", false, true, false);
2140
2141         return r;
2142 }
2143
2144 static int server_read_dev_kmsg(Server *s) {
2145         char buffer[8192+1]; /* the kernel-side limit per record is 8K currently */
2146         ssize_t l;
2147
2148         assert(s);
2149         assert(s->dev_kmsg_fd >= 0);
2150
2151         l = read(s->dev_kmsg_fd, buffer, sizeof(buffer) - 1);
2152         if (l == 0)
2153                 return 0;
2154         if (l < 0) {
2155                 /* Old kernels who don't allow reading from /dev/kmsg
2156                  * return EINVAL when we try. So handle this cleanly,
2157                  * but don' try to ever read from it again. */
2158                 if (errno == EINVAL) {
2159                         epoll_ctl(s->epoll_fd, EPOLL_CTL_DEL, s->dev_kmsg_fd, NULL);
2160                         return 0;
2161                 }
2162
2163                 if (errno == EAGAIN || errno == EINTR)
2164                         return 0;
2165
2166                 log_error("Failed to read from kernel: %m");
2167                 return -errno;
2168         }
2169
2170         dev_kmsg_record(s, buffer, l);
2171         return 1;
2172 }
2173
2174 static int server_flush_dev_kmsg(Server *s) {
2175         int r;
2176
2177         assert(s);
2178
2179         if (s->dev_kmsg_fd < 0)
2180                 return 0;
2181
2182         if (!s->dev_kmsg_readable)
2183                 return 0;
2184
2185         log_info("Flushing /dev/kmsg...");
2186
2187         for (;;) {
2188                 r = server_read_dev_kmsg(s);
2189                 if (r < 0)
2190                         return r;
2191
2192                 if (r == 0)
2193                         break;
2194         }
2195
2196         return 0;
2197 }
2198
2199 static int process_event(Server *s, struct epoll_event *ev) {
2200         assert(s);
2201         assert(ev);
2202
2203         if (ev->data.fd == s->signal_fd) {
2204                 struct signalfd_siginfo sfsi;
2205                 ssize_t n;
2206
2207                 if (ev->events != EPOLLIN) {
2208                         log_info("Got invalid event from epoll.");
2209                         return -EIO;
2210                 }
2211
2212                 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
2213                 if (n != sizeof(sfsi)) {
2214
2215                         if (n >= 0)
2216                                 return -EIO;
2217
2218                         if (errno == EINTR || errno == EAGAIN)
2219                                 return 1;
2220
2221                         return -errno;
2222                 }
2223
2224                 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
2225
2226                 if (sfsi.ssi_signo == SIGUSR1) {
2227                         touch("/run/systemd/journal/flushed");
2228                         server_flush_to_var(s);
2229                         return 1;
2230                 }
2231
2232                 if (sfsi.ssi_signo == SIGUSR2) {
2233                         server_rotate(s);
2234                         server_vacuum(s);
2235                         return 1;
2236                 }
2237
2238                 return 0;
2239
2240         } else if (ev->data.fd == s->dev_kmsg_fd) {
2241                 int r;
2242
2243                 if (ev->events != EPOLLIN) {
2244                         log_info("Got invalid event from epoll.");
2245                         return -EIO;
2246                 }
2247
2248                 r = server_read_dev_kmsg(s);
2249                 if (r < 0)
2250                         return r;
2251
2252                 return 1;
2253
2254         } else if (ev->data.fd == s->native_fd ||
2255                    ev->data.fd == s->syslog_fd) {
2256
2257                 if (ev->events != EPOLLIN) {
2258                         log_info("Got invalid event from epoll.");
2259                         return -EIO;
2260                 }
2261
2262                 for (;;) {
2263                         struct msghdr msghdr;
2264                         struct iovec iovec;
2265                         struct ucred *ucred = NULL;
2266                         struct timeval *tv = NULL;
2267                         struct cmsghdr *cmsg;
2268                         char *label = NULL;
2269                         size_t label_len = 0;
2270                         union {
2271                                 struct cmsghdr cmsghdr;
2272
2273                                 /* We use NAME_MAX space for the
2274                                  * SELinux label here. The kernel
2275                                  * currently enforces no limit, but
2276                                  * according to suggestions from the
2277                                  * SELinux people this will change and
2278                                  * it will probably be identical to
2279                                  * NAME_MAX. For now we use that, but
2280                                  * this should be updated one day when
2281                                  * the final limit is known.*/
2282                                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
2283                                             CMSG_SPACE(sizeof(struct timeval)) +
2284                                             CMSG_SPACE(sizeof(int)) + /* fd */
2285                                             CMSG_SPACE(NAME_MAX)]; /* selinux label */
2286                         } control;
2287                         ssize_t n;
2288                         int v;
2289                         int *fds = NULL;
2290                         unsigned n_fds = 0;
2291
2292                         if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
2293                                 log_error("SIOCINQ failed: %m");
2294                                 return -errno;
2295                         }
2296
2297                         if (s->buffer_size < (size_t) v) {
2298                                 void *b;
2299                                 size_t l;
2300
2301                                 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
2302                                 b = realloc(s->buffer, l+1);
2303
2304                                 if (!b) {
2305                                         log_error("Couldn't increase buffer.");
2306                                         return -ENOMEM;
2307                                 }
2308
2309                                 s->buffer_size = l;
2310                                 s->buffer = b;
2311                         }
2312
2313                         zero(iovec);
2314                         iovec.iov_base = s->buffer;
2315                         iovec.iov_len = s->buffer_size;
2316
2317                         zero(control);
2318                         zero(msghdr);
2319                         msghdr.msg_iov = &iovec;
2320                         msghdr.msg_iovlen = 1;
2321                         msghdr.msg_control = &control;
2322                         msghdr.msg_controllen = sizeof(control);
2323
2324                         n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
2325                         if (n < 0) {
2326
2327                                 if (errno == EINTR || errno == EAGAIN)
2328                                         return 1;
2329
2330                                 log_error("recvmsg() failed: %m");
2331                                 return -errno;
2332                         }
2333
2334                         for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
2335
2336                                 if (cmsg->cmsg_level == SOL_SOCKET &&
2337                                     cmsg->cmsg_type == SCM_CREDENTIALS &&
2338                                     cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
2339                                         ucred = (struct ucred*) CMSG_DATA(cmsg);
2340                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
2341                                          cmsg->cmsg_type == SCM_SECURITY) {
2342                                         label = (char*) CMSG_DATA(cmsg);
2343                                         label_len = cmsg->cmsg_len - CMSG_LEN(0);
2344                                 } else if (cmsg->cmsg_level == SOL_SOCKET &&
2345                                          cmsg->cmsg_type == SO_TIMESTAMP &&
2346                                          cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
2347                                         tv = (struct timeval*) CMSG_DATA(cmsg);
2348                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
2349                                          cmsg->cmsg_type == SCM_RIGHTS) {
2350                                         fds = (int*) CMSG_DATA(cmsg);
2351                                         n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
2352                                 }
2353                         }
2354
2355                         if (ev->data.fd == s->syslog_fd) {
2356                                 char *e;
2357
2358                                 if (n > 0 && n_fds == 0) {
2359                                         e = memchr(s->buffer, '\n', n);
2360                                         if (e)
2361                                                 *e = 0;
2362                                         else
2363                                                 s->buffer[n] = 0;
2364
2365                                         process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
2366                                 } else if (n_fds > 0)
2367                                         log_warning("Got file descriptors via syslog socket. Ignoring.");
2368
2369                         } else {
2370                                 if (n > 0 && n_fds == 0)
2371                                         process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
2372                                 else if (n == 0 && n_fds == 1)
2373                                         process_native_file(s, fds[0], ucred, tv, label, label_len);
2374                                 else if (n_fds > 0)
2375                                         log_warning("Got too many file descriptors via native socket. Ignoring.");
2376                         }
2377
2378                         close_many(fds, n_fds);
2379                 }
2380
2381                 return 1;
2382
2383         } else if (ev->data.fd == s->stdout_fd) {
2384
2385                 if (ev->events != EPOLLIN) {
2386                         log_info("Got invalid event from epoll.");
2387                         return -EIO;
2388                 }
2389
2390                 stdout_stream_new(s);
2391                 return 1;
2392
2393         } else {
2394                 StdoutStream *stream;
2395
2396                 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
2397                         log_info("Got invalid event from epoll.");
2398                         return -EIO;
2399                 }
2400
2401                 /* If it is none of the well-known fds, it must be an
2402                  * stdout stream fd. Note that this is a bit ugly here
2403                  * (since we rely that none of the well-known fds
2404                  * could be interpreted as pointer), but nonetheless
2405                  * safe, since the well-known fds would never get an
2406                  * fd > 4096, i.e. beyond the first memory page */
2407
2408                 stream = ev->data.ptr;
2409
2410                 if (stdout_stream_process(stream) <= 0)
2411                         stdout_stream_free(stream);
2412
2413                 return 1;
2414         }
2415
2416         log_error("Unknown event.");
2417         return 0;
2418 }
2419
2420 static int open_syslog_socket(Server *s) {
2421         union sockaddr_union sa;
2422         int one, r;
2423         struct epoll_event ev;
2424
2425         assert(s);
2426
2427         if (s->syslog_fd < 0) {
2428
2429                 s->syslog_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2430                 if (s->syslog_fd < 0) {
2431                         log_error("socket() failed: %m");
2432                         return -errno;
2433                 }
2434
2435                 zero(sa);
2436                 sa.un.sun_family = AF_UNIX;
2437                 strncpy(sa.un.sun_path, "/dev/log", sizeof(sa.un.sun_path));
2438
2439                 unlink(sa.un.sun_path);
2440
2441                 r = bind(s->syslog_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2442                 if (r < 0) {
2443                         log_error("bind() failed: %m");
2444                         return -errno;
2445                 }
2446
2447                 chmod(sa.un.sun_path, 0666);
2448         } else
2449                 fd_nonblock(s->syslog_fd, 1);
2450
2451         one = 1;
2452         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
2453         if (r < 0) {
2454                 log_error("SO_PASSCRED failed: %m");
2455                 return -errno;
2456         }
2457
2458 #ifdef HAVE_SELINUX
2459         one = 1;
2460         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
2461         if (r < 0)
2462                 log_warning("SO_PASSSEC failed: %m");
2463 #endif
2464
2465         one = 1;
2466         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
2467         if (r < 0) {
2468                 log_error("SO_TIMESTAMP failed: %m");
2469                 return -errno;
2470         }
2471
2472         zero(ev);
2473         ev.events = EPOLLIN;
2474         ev.data.fd = s->syslog_fd;
2475         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->syslog_fd, &ev) < 0) {
2476                 log_error("Failed to add syslog server fd to epoll object: %m");
2477                 return -errno;
2478         }
2479
2480         return 0;
2481 }
2482
2483 static int open_native_socket(Server*s) {
2484         union sockaddr_union sa;
2485         int one, r;
2486         struct epoll_event ev;
2487
2488         assert(s);
2489
2490         if (s->native_fd < 0) {
2491
2492                 s->native_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2493                 if (s->native_fd < 0) {
2494                         log_error("socket() failed: %m");
2495                         return -errno;
2496                 }
2497
2498                 zero(sa);
2499                 sa.un.sun_family = AF_UNIX;
2500                 strncpy(sa.un.sun_path, "/run/systemd/journal/socket", sizeof(sa.un.sun_path));
2501
2502                 unlink(sa.un.sun_path);
2503
2504                 r = bind(s->native_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2505                 if (r < 0) {
2506                         log_error("bind() failed: %m");
2507                         return -errno;
2508                 }
2509
2510                 chmod(sa.un.sun_path, 0666);
2511         } else
2512                 fd_nonblock(s->native_fd, 1);
2513
2514         one = 1;
2515         r = setsockopt(s->native_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
2516         if (r < 0) {
2517                 log_error("SO_PASSCRED failed: %m");
2518                 return -errno;
2519         }
2520
2521 #ifdef HAVE_SELINUX
2522         one = 1;
2523         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
2524         if (r < 0)
2525                 log_warning("SO_PASSSEC failed: %m");
2526 #endif
2527
2528         one = 1;
2529         r = setsockopt(s->native_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
2530         if (r < 0) {
2531                 log_error("SO_TIMESTAMP failed: %m");
2532                 return -errno;
2533         }
2534
2535         zero(ev);
2536         ev.events = EPOLLIN;
2537         ev.data.fd = s->native_fd;
2538         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->native_fd, &ev) < 0) {
2539                 log_error("Failed to add native server fd to epoll object: %m");
2540                 return -errno;
2541         }
2542
2543         return 0;
2544 }
2545
2546 static int open_stdout_socket(Server *s) {
2547         union sockaddr_union sa;
2548         int r;
2549         struct epoll_event ev;
2550
2551         assert(s);
2552
2553         if (s->stdout_fd < 0) {
2554
2555                 s->stdout_fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2556                 if (s->stdout_fd < 0) {
2557                         log_error("socket() failed: %m");
2558                         return -errno;
2559                 }
2560
2561                 zero(sa);
2562                 sa.un.sun_family = AF_UNIX;
2563                 strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
2564
2565                 unlink(sa.un.sun_path);
2566
2567                 r = bind(s->stdout_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2568                 if (r < 0) {
2569                         log_error("bind() failed: %m");
2570                         return -errno;
2571                 }
2572
2573                 chmod(sa.un.sun_path, 0666);
2574
2575                 if (listen(s->stdout_fd, SOMAXCONN) < 0) {
2576                         log_error("liste() failed: %m");
2577                         return -errno;
2578                 }
2579         } else
2580                 fd_nonblock(s->stdout_fd, 1);
2581
2582         zero(ev);
2583         ev.events = EPOLLIN;
2584         ev.data.fd = s->stdout_fd;
2585         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->stdout_fd, &ev) < 0) {
2586                 log_error("Failed to add stdout server fd to epoll object: %m");
2587                 return -errno;
2588         }
2589
2590         return 0;
2591 }
2592
2593 static int open_dev_kmsg(Server *s) {
2594         struct epoll_event ev;
2595
2596         assert(s);
2597
2598         s->dev_kmsg_fd = open("/dev/kmsg", O_RDWR|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
2599         if (s->dev_kmsg_fd < 0) {
2600                 log_warning("Failed to open /dev/kmsg, ignoring: %m");
2601                 return 0;
2602         }
2603
2604         zero(ev);
2605         ev.events = EPOLLIN;
2606         ev.data.fd = s->dev_kmsg_fd;
2607         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->dev_kmsg_fd, &ev) < 0) {
2608
2609                 /* This will fail with EPERM on older kernels where
2610                  * /dev/kmsg is not readable. */
2611                 if (errno == EPERM)
2612                         return 0;
2613
2614                 log_error("Failed to add /dev/kmsg fd to epoll object: %m");
2615                 return -errno;
2616         }
2617
2618         s->dev_kmsg_readable = true;
2619
2620         return 0;
2621 }
2622
2623 static int open_kernel_seqnum(Server *s) {
2624         int fd;
2625         uint64_t *p;
2626
2627         assert(s);
2628
2629         /* We store the seqnum we last read in an mmaped file. That
2630          * way we can just use it like a variable, but it is
2631          * persistant and automatically flushed at reboot. */
2632
2633         fd = open("/run/systemd/journal/kernel-seqnum", O_RDWR|O_CREAT|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW, 0644);
2634         if (fd < 0) {
2635                 log_error("Failed to open /run/systemd/journal/kernel-seqnum, ignoring: %m");
2636                 return 0;
2637         }
2638
2639         if (posix_fallocate(fd, 0, sizeof(uint64_t)) < 0) {
2640                 log_error("Failed to allocate sequential number file, ignoring: %m");
2641                 close_nointr_nofail(fd);
2642                 return 0;
2643         }
2644
2645         p = mmap(NULL, sizeof(uint64_t), PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2646         if (p == MAP_FAILED) {
2647                 log_error("Failed to map sequential number file, ignoring: %m");
2648                 close_nointr_nofail(fd);
2649                 return 0;
2650         }
2651
2652         close_nointr_nofail(fd);
2653         s->kernel_seqnum = p;
2654
2655         return 0;
2656 }
2657
2658 static int open_signalfd(Server *s) {
2659         sigset_t mask;
2660         struct epoll_event ev;
2661
2662         assert(s);
2663
2664         assert_se(sigemptyset(&mask) == 0);
2665         sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
2666         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
2667
2668         s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
2669         if (s->signal_fd < 0) {
2670                 log_error("signalfd(): %m");
2671                 return -errno;
2672         }
2673
2674         zero(ev);
2675         ev.events = EPOLLIN;
2676         ev.data.fd = s->signal_fd;
2677
2678         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
2679                 log_error("epoll_ctl(): %m");
2680                 return -errno;
2681         }
2682
2683         return 0;
2684 }
2685
2686 static int server_parse_proc_cmdline(Server *s) {
2687         char *line, *w, *state;
2688         int r;
2689         size_t l;
2690
2691         if (detect_container(NULL) > 0)
2692                 return 0;
2693
2694         r = read_one_line_file("/proc/cmdline", &line);
2695         if (r < 0) {
2696                 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
2697                 return 0;
2698         }
2699
2700         FOREACH_WORD_QUOTED(w, l, line, state) {
2701                 char *word;
2702
2703                 word = strndup(w, l);
2704                 if (!word) {
2705                         r = -ENOMEM;
2706                         goto finish;
2707                 }
2708
2709                 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
2710                         r = parse_boolean(word + 35);
2711                         if (r < 0)
2712                                 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
2713                         else
2714                                 s->forward_to_syslog = r;
2715                 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
2716                         r = parse_boolean(word + 33);
2717                         if (r < 0)
2718                                 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
2719                         else
2720                                 s->forward_to_kmsg = r;
2721                 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
2722                         r = parse_boolean(word + 36);
2723                         if (r < 0)
2724                                 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
2725                         else
2726                                 s->forward_to_console = r;
2727                 } else if (startswith(word, "systemd.journald"))
2728                         log_warning("Invalid systemd.journald parameter. Ignoring.");
2729
2730                 free(word);
2731         }
2732
2733         r = 0;
2734
2735 finish:
2736         free(line);
2737         return r;
2738 }
2739
2740 static int server_parse_config_file(Server *s) {
2741         FILE *f;
2742         const char *fn;
2743         int r;
2744
2745         assert(s);
2746
2747         fn = "/etc/systemd/journald.conf";
2748         f = fopen(fn, "re");
2749         if (!f) {
2750                 if (errno == ENOENT)
2751                         return 0;
2752
2753                 log_warning("Failed to open configuration file %s: %m", fn);
2754                 return -errno;
2755         }
2756
2757         r = config_parse(fn, f, "Journal\0", config_item_perf_lookup, (void*) journald_gperf_lookup, false, s);
2758         if (r < 0)
2759                 log_warning("Failed to parse configuration file: %s", strerror(-r));
2760
2761         fclose(f);
2762
2763         return r;
2764 }
2765
2766 static int server_init(Server *s) {
2767         int n, r, fd;
2768
2769         assert(s);
2770
2771         zero(*s);
2772         s->syslog_fd = s->native_fd = s->stdout_fd = s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
2773         s->compress = true;
2774
2775         s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
2776         s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
2777
2778         s->forward_to_syslog = true;
2779
2780         s->max_level_store = LOG_DEBUG;
2781         s->max_level_syslog = LOG_DEBUG;
2782         s->max_level_kmsg = LOG_NOTICE;
2783         s->max_level_console = LOG_INFO;
2784
2785         memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
2786         memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
2787
2788         server_parse_config_file(s);
2789         server_parse_proc_cmdline(s);
2790
2791         mkdir_p("/run/systemd/journal", 0755);
2792
2793         s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
2794         if (!s->user_journals)
2795                 return log_oom();
2796
2797         s->mmap = mmap_cache_new(_OBJECT_TYPE_MAX, USER_JOURNALS_MAX + 2);
2798         if (!s->mmap)
2799                 return log_oom();
2800
2801         s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
2802         if (s->epoll_fd < 0) {
2803                 log_error("Failed to create epoll object: %m");
2804                 return -errno;
2805         }
2806
2807         n = sd_listen_fds(true);
2808         if (n < 0) {
2809                 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
2810                 return n;
2811         }
2812
2813         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
2814
2815                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
2816
2817                         if (s->native_fd >= 0) {
2818                                 log_error("Too many native sockets passed.");
2819                                 return -EINVAL;
2820                         }
2821
2822                         s->native_fd = fd;
2823
2824                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
2825
2826                         if (s->stdout_fd >= 0) {
2827                                 log_error("Too many stdout sockets passed.");
2828                                 return -EINVAL;
2829                         }
2830
2831                         s->stdout_fd = fd;
2832
2833                 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
2834
2835                         if (s->syslog_fd >= 0) {
2836                                 log_error("Too many /dev/log sockets passed.");
2837                                 return -EINVAL;
2838                         }
2839
2840                         s->syslog_fd = fd;
2841
2842                 } else {
2843                         log_error("Unknown socket passed.");
2844                         return -EINVAL;
2845                 }
2846         }
2847
2848         r = open_syslog_socket(s);
2849         if (r < 0)
2850                 return r;
2851
2852         r = open_native_socket(s);
2853         if (r < 0)
2854                 return r;
2855
2856         r = open_stdout_socket(s);
2857         if (r < 0)
2858                 return r;
2859
2860         r = open_dev_kmsg(s);
2861         if (r < 0)
2862                 return r;
2863
2864         r = open_kernel_seqnum(s);
2865         if (r < 0)
2866                 return r;
2867
2868         r = open_signalfd(s);
2869         if (r < 0)
2870                 return r;
2871
2872         s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
2873         if (!s->rate_limit)
2874                 return -ENOMEM;
2875
2876         r = system_journal_open(s);
2877         if (r < 0)
2878                 return r;
2879
2880         return 0;
2881 }
2882
2883 static void server_done(Server *s) {
2884         JournalFile *f;
2885         assert(s);
2886
2887         while (s->stdout_streams)
2888                 stdout_stream_free(s->stdout_streams);
2889
2890         if (s->system_journal)
2891                 journal_file_close(s->system_journal);
2892
2893         if (s->runtime_journal)
2894                 journal_file_close(s->runtime_journal);
2895
2896         while ((f = hashmap_steal_first(s->user_journals)))
2897                 journal_file_close(f);
2898
2899         hashmap_free(s->user_journals);
2900
2901         if (s->epoll_fd >= 0)
2902                 close_nointr_nofail(s->epoll_fd);
2903
2904         if (s->signal_fd >= 0)
2905                 close_nointr_nofail(s->signal_fd);
2906
2907         if (s->syslog_fd >= 0)
2908                 close_nointr_nofail(s->syslog_fd);
2909
2910         if (s->native_fd >= 0)
2911                 close_nointr_nofail(s->native_fd);
2912
2913         if (s->stdout_fd >= 0)
2914                 close_nointr_nofail(s->stdout_fd);
2915
2916         if (s->dev_kmsg_fd >= 0)
2917                 close_nointr_nofail(s->dev_kmsg_fd);
2918
2919         if (s->rate_limit)
2920                 journal_rate_limit_free(s->rate_limit);
2921
2922         if (s->kernel_seqnum)
2923                 munmap(s->kernel_seqnum, sizeof(uint64_t));
2924
2925         free(s->buffer);
2926         free(s->tty_path);
2927
2928         if (s->mmap)
2929                 mmap_cache_unref(s->mmap);
2930 }
2931
2932 int main(int argc, char *argv[]) {
2933         Server server;
2934         int r;
2935
2936         /* if (getppid() != 1) { */
2937         /*         log_error("This program should be invoked by init only."); */
2938         /*         return EXIT_FAILURE; */
2939         /* } */
2940
2941         if (argc > 1) {
2942                 log_error("This program does not take arguments.");
2943                 return EXIT_FAILURE;
2944         }
2945
2946         log_set_target(LOG_TARGET_SAFE);
2947         log_set_facility(LOG_SYSLOG);
2948         log_set_max_level(LOG_DEBUG);
2949         log_parse_environment();
2950         log_open();
2951
2952         umask(0022);
2953
2954         r = server_init(&server);
2955         if (r < 0)
2956                 goto finish;
2957
2958         server_vacuum(&server);
2959         server_flush_to_var(&server);
2960         server_flush_dev_kmsg(&server);
2961
2962         log_debug("systemd-journald running as pid %lu", (unsigned long) getpid());
2963         driver_message(&server, SD_MESSAGE_JOURNAL_START, "Journal started");
2964
2965         sd_notify(false,
2966                   "READY=1\n"
2967                   "STATUS=Processing requests...");
2968
2969         for (;;) {
2970                 struct epoll_event event;
2971
2972                 r = epoll_wait(server.epoll_fd, &event, 1, -1);
2973                 if (r < 0) {
2974
2975                         if (errno == EINTR)
2976                                 continue;
2977
2978                         log_error("epoll_wait() failed: %m");
2979                         r = -errno;
2980                         goto finish;
2981                 } else if (r == 0)
2982                         break;
2983
2984                 r = process_event(&server, &event);
2985                 if (r < 0)
2986                         goto finish;
2987                 else if (r == 0)
2988                         break;
2989         }
2990
2991         log_debug("systemd-journald stopped as pid %lu", (unsigned long) getpid());
2992         driver_message(&server, SD_MESSAGE_JOURNAL_STOP, "Journal stopped");
2993
2994 finish:
2995         sd_notify(false,
2996                   "STATUS=Shutting down...");
2997
2998         server_done(&server);
2999
3000         return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
3001 }