chiark / gitweb /
journald: properly unescape messages from /dev/kmsg
[elogind.git] / src / journal / journald.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/epoll.h>
23 #include <sys/socket.h>
24 #include <errno.h>
25 #include <sys/signalfd.h>
26 #include <unistd.h>
27 #include <fcntl.h>
28 #include <stddef.h>
29 #include <sys/ioctl.h>
30 #include <linux/sockios.h>
31 #include <sys/statvfs.h>
32
33 #include <systemd/sd-journal.h>
34 #include <systemd/sd-messages.h>
35 #include <systemd/sd-daemon.h>
36
37 #ifdef HAVE_LOGIND
38 #include <systemd/sd-login.h>
39 #endif
40
41 #include "mkdir.h"
42 #include "hashmap.h"
43 #include "journal-file.h"
44 #include "socket-util.h"
45 #include "cgroup-util.h"
46 #include "list.h"
47 #include "journal-rate-limit.h"
48 #include "journal-internal.h"
49 #include "conf-parser.h"
50 #include "journald.h"
51 #include "virt.h"
52 #include "missing.h"
53
54 #ifdef HAVE_ACL
55 #include <sys/acl.h>
56 #include <acl/libacl.h>
57 #include "acl-util.h"
58 #endif
59
60 #ifdef HAVE_SELINUX
61 #include <selinux/selinux.h>
62 #endif
63
64 #define USER_JOURNALS_MAX 1024
65 #define STDOUT_STREAMS_MAX 4096
66
67 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
68 #define DEFAULT_RATE_LIMIT_BURST 200
69
70 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
71
72 #define N_IOVEC_META_FIELDS 17
73 #define N_IOVEC_KERNEL_FIELDS 64
74
75 #define ENTRY_SIZE_MAX (1024*1024*32)
76
77 typedef enum StdoutStreamState {
78         STDOUT_STREAM_IDENTIFIER,
79         STDOUT_STREAM_UNIT_ID,
80         STDOUT_STREAM_PRIORITY,
81         STDOUT_STREAM_LEVEL_PREFIX,
82         STDOUT_STREAM_FORWARD_TO_SYSLOG,
83         STDOUT_STREAM_FORWARD_TO_KMSG,
84         STDOUT_STREAM_FORWARD_TO_CONSOLE,
85         STDOUT_STREAM_RUNNING
86 } StdoutStreamState;
87
88 struct StdoutStream {
89         Server *server;
90         StdoutStreamState state;
91
92         int fd;
93
94         struct ucred ucred;
95 #ifdef HAVE_SELINUX
96         security_context_t security_context;
97 #endif
98
99         char *identifier;
100         char *unit_id;
101         int priority;
102         bool level_prefix:1;
103         bool forward_to_syslog:1;
104         bool forward_to_kmsg:1;
105         bool forward_to_console:1;
106
107         char buffer[LINE_MAX+1];
108         size_t length;
109
110         LIST_FIELDS(StdoutStream, stdout_stream);
111 };
112
113 static const char* const storage_table[] = {
114         [STORAGE_AUTO] = "auto",
115         [STORAGE_VOLATILE] = "volatile",
116         [STORAGE_PERSISTENT] = "persistent",
117         [STORAGE_NONE] = "none"
118 };
119
120 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
121 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
122
123 static uint64_t available_space(Server *s) {
124         char ids[33], *p;
125         const char *f;
126         sd_id128_t machine;
127         struct statvfs ss;
128         uint64_t sum = 0, avail = 0, ss_avail = 0;
129         int r;
130         DIR *d;
131         usec_t ts;
132         JournalMetrics *m;
133
134         ts = now(CLOCK_MONOTONIC);
135
136         if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
137                 return s->cached_available_space;
138
139         r = sd_id128_get_machine(&machine);
140         if (r < 0)
141                 return 0;
142
143         if (s->system_journal) {
144                 f = "/var/log/journal/";
145                 m = &s->system_metrics;
146         } else {
147                 f = "/run/log/journal/";
148                 m = &s->runtime_metrics;
149         }
150
151         assert(m);
152
153         p = strappend(f, sd_id128_to_string(machine, ids));
154         if (!p)
155                 return 0;
156
157         d = opendir(p);
158         free(p);
159
160         if (!d)
161                 return 0;
162
163         if (fstatvfs(dirfd(d), &ss) < 0)
164                 goto finish;
165
166         for (;;) {
167                 struct stat st;
168                 struct dirent buf, *de;
169
170                 r = readdir_r(d, &buf, &de);
171                 if (r != 0)
172                         break;
173
174                 if (!de)
175                         break;
176
177                 if (!endswith(de->d_name, ".journal") &&
178                     !endswith(de->d_name, ".journal~"))
179                         continue;
180
181                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
182                         continue;
183
184                 if (!S_ISREG(st.st_mode))
185                         continue;
186
187                 sum += (uint64_t) st.st_blocks * 512UL;
188         }
189
190         avail = sum >= m->max_use ? 0 : m->max_use - sum;
191
192         ss_avail = ss.f_bsize * ss.f_bavail;
193
194         ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
195
196         if (ss_avail < avail)
197                 avail = ss_avail;
198
199         s->cached_available_space = avail;
200         s->cached_available_space_timestamp = ts;
201
202 finish:
203         closedir(d);
204
205         return avail;
206 }
207
208 static void server_read_file_gid(Server *s) {
209         const char *adm = "adm";
210         int r;
211
212         assert(s);
213
214         if (s->file_gid_valid)
215                 return;
216
217         r = get_group_creds(&adm, &s->file_gid);
218         if (r < 0)
219                 log_warning("Failed to resolve 'adm' group: %s", strerror(-r));
220
221         /* if we couldn't read the gid, then it will be 0, but that's
222          * fine and we shouldn't try to resolve the group again, so
223          * let's just pretend it worked right-away. */
224         s->file_gid_valid = true;
225 }
226
227 static void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
228         int r;
229 #ifdef HAVE_ACL
230         acl_t acl;
231         acl_entry_t entry;
232         acl_permset_t permset;
233 #endif
234
235         assert(f);
236
237         server_read_file_gid(s);
238
239         r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
240         if (r < 0)
241                 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
242
243 #ifdef HAVE_ACL
244         if (uid <= 0)
245                 return;
246
247         acl = acl_get_fd(f->fd);
248         if (!acl) {
249                 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
250                 return;
251         }
252
253         r = acl_find_uid(acl, uid, &entry);
254         if (r <= 0) {
255
256                 if (acl_create_entry(&acl, &entry) < 0 ||
257                     acl_set_tag_type(entry, ACL_USER) < 0 ||
258                     acl_set_qualifier(entry, &uid) < 0) {
259                         log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
260                         goto finish;
261                 }
262         }
263
264         if (acl_get_permset(entry, &permset) < 0 ||
265             acl_add_perm(permset, ACL_READ) < 0 ||
266             acl_calc_mask(&acl) < 0) {
267                 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
268                 goto finish;
269         }
270
271         if (acl_set_fd(f->fd, acl) < 0)
272                 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
273
274 finish:
275         acl_free(acl);
276 #endif
277 }
278
279 static JournalFile* find_journal(Server *s, uid_t uid) {
280         char *p;
281         int r;
282         JournalFile *f;
283         char ids[33];
284         sd_id128_t machine;
285
286         assert(s);
287
288         /* We split up user logs only on /var, not on /run. If the
289          * runtime file is open, we write to it exclusively, in order
290          * to guarantee proper order as soon as we flush /run to
291          * /var and close the runtime file. */
292
293         if (s->runtime_journal)
294                 return s->runtime_journal;
295
296         if (uid <= 0)
297                 return s->system_journal;
298
299         r = sd_id128_get_machine(&machine);
300         if (r < 0)
301                 return s->system_journal;
302
303         f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
304         if (f)
305                 return f;
306
307         if (asprintf(&p, "/var/log/journal/%s/user-%lu.journal", sd_id128_to_string(machine, ids), (unsigned long) uid) < 0)
308                 return s->system_journal;
309
310         while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
311                 /* Too many open? Then let's close one */
312                 f = hashmap_steal_first(s->user_journals);
313                 assert(f);
314                 journal_file_close(f);
315         }
316
317         r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, &s->system_metrics, s->system_journal, &f);
318         free(p);
319
320         if (r < 0)
321                 return s->system_journal;
322
323         server_fix_perms(s, f, uid);
324
325         r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
326         if (r < 0) {
327                 journal_file_close(f);
328                 return s->system_journal;
329         }
330
331         return f;
332 }
333
334 static void server_rotate(Server *s) {
335         JournalFile *f;
336         void *k;
337         Iterator i;
338         int r;
339
340         log_info("Rotating...");
341
342         if (s->runtime_journal) {
343                 r = journal_file_rotate(&s->runtime_journal);
344                 if (r < 0)
345                         if (s->runtime_journal)
346                                 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
347                         else
348                                 log_error("Failed to create new runtime journal: %s", strerror(-r));
349                 else
350                         server_fix_perms(s, s->runtime_journal, 0);
351         }
352
353         if (s->system_journal) {
354                 r = journal_file_rotate(&s->system_journal);
355                 if (r < 0)
356                         if (s->system_journal)
357                                 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
358                         else
359                                 log_error("Failed to create new system journal: %s", strerror(-r));
360
361                 else
362                         server_fix_perms(s, s->system_journal, 0);
363         }
364
365         HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
366                 r = journal_file_rotate(&f);
367                 if (r < 0)
368                         if (f->path)
369                                 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
370                         else
371                                 log_error("Failed to create user journal: %s", strerror(-r));
372                 else {
373                         hashmap_replace(s->user_journals, k, f);
374                         server_fix_perms(s, s->system_journal, PTR_TO_UINT32(k));
375                 }
376         }
377 }
378
379 static void server_vacuum(Server *s) {
380         char *p;
381         char ids[33];
382         sd_id128_t machine;
383         int r;
384
385         log_info("Vacuuming...");
386
387         r = sd_id128_get_machine(&machine);
388         if (r < 0) {
389                 log_error("Failed to get machine ID: %s", strerror(-r));
390                 return;
391         }
392
393         sd_id128_to_string(machine, ids);
394
395         if (s->system_journal) {
396                 if (asprintf(&p, "/var/log/journal/%s", ids) < 0) {
397                         log_oom();
398                         return;
399                 }
400
401                 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free);
402                 if (r < 0 && r != -ENOENT)
403                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
404                 free(p);
405         }
406
407         if (s->runtime_journal) {
408                 if (asprintf(&p, "/run/log/journal/%s", ids) < 0) {
409                         log_oom();
410                         return;
411                 }
412
413                 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free);
414                 if (r < 0 && r != -ENOENT)
415                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
416                 free(p);
417         }
418
419         s->cached_available_space_timestamp = 0;
420 }
421
422 static char *shortened_cgroup_path(pid_t pid) {
423         int r;
424         char *process_path, *init_path, *path;
425
426         assert(pid > 0);
427
428         r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, pid, &process_path);
429         if (r < 0)
430                 return NULL;
431
432         r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &init_path);
433         if (r < 0) {
434                 free(process_path);
435                 return NULL;
436         }
437
438         if (endswith(init_path, "/system"))
439                 init_path[strlen(init_path) - 7] = 0;
440         else if (streq(init_path, "/"))
441                 init_path[0] = 0;
442
443         if (startswith(process_path, init_path)) {
444                 char *p;
445
446                 p = strdup(process_path + strlen(init_path));
447                 if (!p) {
448                         free(process_path);
449                         free(init_path);
450                         return NULL;
451                 }
452                 path = p;
453         } else {
454                 path = process_path;
455                 process_path = NULL;
456         }
457
458         free(process_path);
459         free(init_path);
460
461         return path;
462 }
463
464 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n) {
465         JournalFile *f;
466         bool vacuumed = false;
467         int r;
468
469         assert(s);
470         assert(iovec);
471         assert(n > 0);
472
473         f = find_journal(s, uid);
474         if (!f)
475                 return;
476
477         if (journal_file_rotate_suggested(f)) {
478                 log_info("Journal header limits reached or header out-of-date, rotating.");
479                 server_rotate(s);
480                 server_vacuum(s);
481                 vacuumed = true;
482
483                 f = find_journal(s, uid);
484                 if (!f)
485                         return;
486         }
487
488         for (;;) {
489                 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
490                 if (r >= 0)
491                         return;
492
493                 if (vacuumed ||
494                     (r != -E2BIG && /* hit limit */
495                      r != -EFBIG && /* hit fs limit */
496                      r != -EDQUOT && /* quota hit */
497                      r != -ENOSPC && /* disk full */
498                      r != -EBADMSG && /* corrupted */
499                      r != -ENODATA && /* truncated */
500                      r != -EHOSTDOWN && /* other machine */
501                      r != -EPROTONOSUPPORT && /* unsupported feature */
502                      r != -EBUSY && /* unclean shutdown */
503                      r != -ESHUTDOWN /* already archived */)) {
504                         log_error("Failed to write entry, ignoring: %s", strerror(-r));
505                         return;
506                 }
507
508                 if (r == -E2BIG || r == -EFBIG || r == EDQUOT || r == ENOSPC)
509                         log_info("Allocation limit reached, rotating.");
510                 else if (r == -EHOSTDOWN)
511                         log_info("Journal file from other machine, rotating.");
512                 else if (r == -EBUSY)
513                         log_info("Unlcean shutdown, rotating.");
514                 else
515                         log_warning("Journal file corrupted, rotating.");
516
517                 server_rotate(s);
518                 server_vacuum(s);
519                 vacuumed = true;
520
521                 f = find_journal(s, uid);
522                 if (!f)
523                         return;
524
525                 log_info("Retrying write.");
526         }
527 }
528
529 static void dispatch_message_real(
530                 Server *s,
531                 struct iovec *iovec, unsigned n, unsigned m,
532                 struct ucred *ucred,
533                 struct timeval *tv,
534                 const char *label, size_t label_len,
535                 const char *unit_id) {
536
537         char *pid = NULL, *uid = NULL, *gid = NULL,
538                 *source_time = NULL, *boot_id = NULL, *machine_id = NULL,
539                 *comm = NULL, *cmdline = NULL, *hostname = NULL,
540                 *audit_session = NULL, *audit_loginuid = NULL,
541                 *exe = NULL, *cgroup = NULL, *session = NULL,
542                 *owner_uid = NULL, *unit = NULL, *selinux_context = NULL;
543
544         char idbuf[33];
545         sd_id128_t id;
546         int r;
547         char *t;
548         uid_t loginuid = 0, realuid = 0;
549
550         assert(s);
551         assert(iovec);
552         assert(n > 0);
553         assert(n + N_IOVEC_META_FIELDS <= m);
554
555         if (ucred) {
556                 uint32_t audit;
557 #ifdef HAVE_LOGIND
558                 uid_t owner;
559 #endif
560
561                 realuid = ucred->uid;
562
563                 if (asprintf(&pid, "_PID=%lu", (unsigned long) ucred->pid) >= 0)
564                         IOVEC_SET_STRING(iovec[n++], pid);
565
566                 if (asprintf(&uid, "_UID=%lu", (unsigned long) ucred->uid) >= 0)
567                         IOVEC_SET_STRING(iovec[n++], uid);
568
569                 if (asprintf(&gid, "_GID=%lu", (unsigned long) ucred->gid) >= 0)
570                         IOVEC_SET_STRING(iovec[n++], gid);
571
572                 r = get_process_comm(ucred->pid, &t);
573                 if (r >= 0) {
574                         comm = strappend("_COMM=", t);
575                         free(t);
576
577                         if (comm)
578                                 IOVEC_SET_STRING(iovec[n++], comm);
579                 }
580
581                 r = get_process_exe(ucred->pid, &t);
582                 if (r >= 0) {
583                         exe = strappend("_EXE=", t);
584                         free(t);
585
586                         if (exe)
587                                 IOVEC_SET_STRING(iovec[n++], exe);
588                 }
589
590                 r = get_process_cmdline(ucred->pid, LINE_MAX, false, &t);
591                 if (r >= 0) {
592                         cmdline = strappend("_CMDLINE=", t);
593                         free(t);
594
595                         if (cmdline)
596                                 IOVEC_SET_STRING(iovec[n++], cmdline);
597                 }
598
599                 r = audit_session_from_pid(ucred->pid, &audit);
600                 if (r >= 0)
601                         if (asprintf(&audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit) >= 0)
602                                 IOVEC_SET_STRING(iovec[n++], audit_session);
603
604                 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
605                 if (r >= 0)
606                         if (asprintf(&audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0)
607                                 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
608
609                 t = shortened_cgroup_path(ucred->pid);
610                 if (t) {
611                         cgroup = strappend("_SYSTEMD_CGROUP=", t);
612                         free(t);
613
614                         if (cgroup)
615                                 IOVEC_SET_STRING(iovec[n++], cgroup);
616                 }
617
618 #ifdef HAVE_LOGIND
619                 if (sd_pid_get_session(ucred->pid, &t) >= 0) {
620                         session = strappend("_SYSTEMD_SESSION=", t);
621                         free(t);
622
623                         if (session)
624                                 IOVEC_SET_STRING(iovec[n++], session);
625                 }
626
627                 if (sd_pid_get_owner_uid(ucred->uid, &owner) >= 0)
628                         if (asprintf(&owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner) >= 0)
629                                 IOVEC_SET_STRING(iovec[n++], owner_uid);
630 #endif
631
632                 if (cg_pid_get_unit(ucred->pid, &t) >= 0) {
633                         unit = strappend("_SYSTEMD_UNIT=", t);
634                         free(t);
635                 } else if (unit_id)
636                         unit = strappend("_SYSTEMD_UNIT=", unit_id);
637
638                 if (unit)
639                         IOVEC_SET_STRING(iovec[n++], unit);
640
641 #ifdef HAVE_SELINUX
642                 if (label) {
643                         selinux_context = malloc(sizeof("_SELINUX_CONTEXT=") + label_len);
644                         if (selinux_context) {
645                                 memcpy(selinux_context, "_SELINUX_CONTEXT=", sizeof("_SELINUX_CONTEXT=")-1);
646                                 memcpy(selinux_context+sizeof("_SELINUX_CONTEXT=")-1, label, label_len);
647                                 selinux_context[sizeof("_SELINUX_CONTEXT=")-1+label_len] = 0;
648                                 IOVEC_SET_STRING(iovec[n++], selinux_context);
649                         }
650                 } else {
651                         security_context_t con;
652
653                         if (getpidcon(ucred->pid, &con) >= 0) {
654                                 selinux_context = strappend("_SELINUX_CONTEXT=", con);
655                                 if (selinux_context)
656                                         IOVEC_SET_STRING(iovec[n++], selinux_context);
657
658                                 freecon(con);
659                         }
660                 }
661 #endif
662         }
663
664         if (tv) {
665                 if (asprintf(&source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu",
666                              (unsigned long long) timeval_load(tv)) >= 0)
667                         IOVEC_SET_STRING(iovec[n++], source_time);
668         }
669
670         /* Note that strictly speaking storing the boot id here is
671          * redundant since the entry includes this in-line
672          * anyway. However, we need this indexed, too. */
673         r = sd_id128_get_boot(&id);
674         if (r >= 0)
675                 if (asprintf(&boot_id, "_BOOT_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
676                         IOVEC_SET_STRING(iovec[n++], boot_id);
677
678         r = sd_id128_get_machine(&id);
679         if (r >= 0)
680                 if (asprintf(&machine_id, "_MACHINE_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
681                         IOVEC_SET_STRING(iovec[n++], machine_id);
682
683         t = gethostname_malloc();
684         if (t) {
685                 hostname = strappend("_HOSTNAME=", t);
686                 free(t);
687                 if (hostname)
688                         IOVEC_SET_STRING(iovec[n++], hostname);
689         }
690
691         assert(n <= m);
692
693         write_to_journal(s, realuid == 0 ? 0 : loginuid, iovec, n);
694
695         free(pid);
696         free(uid);
697         free(gid);
698         free(comm);
699         free(exe);
700         free(cmdline);
701         free(source_time);
702         free(boot_id);
703         free(machine_id);
704         free(hostname);
705         free(audit_session);
706         free(audit_loginuid);
707         free(cgroup);
708         free(session);
709         free(owner_uid);
710         free(unit);
711         free(selinux_context);
712 }
713
714 static void driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
715         char mid[11 + 32 + 1];
716         char buffer[16 + LINE_MAX + 1];
717         struct iovec iovec[N_IOVEC_META_FIELDS + 4];
718         int n = 0;
719         va_list ap;
720         struct ucred ucred;
721
722         assert(s);
723         assert(format);
724
725         IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
726         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
727
728         memcpy(buffer, "MESSAGE=", 8);
729         va_start(ap, format);
730         vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
731         va_end(ap);
732         char_array_0(buffer);
733         IOVEC_SET_STRING(iovec[n++], buffer);
734
735         snprintf(mid, sizeof(mid), "MESSAGE_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(message_id));
736         char_array_0(mid);
737         IOVEC_SET_STRING(iovec[n++], mid);
738
739         zero(ucred);
740         ucred.pid = getpid();
741         ucred.uid = getuid();
742         ucred.gid = getgid();
743
744         dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL);
745 }
746
747 static void dispatch_message(Server *s,
748                              struct iovec *iovec, unsigned n, unsigned m,
749                              struct ucred *ucred,
750                              struct timeval *tv,
751                              const char *label, size_t label_len,
752                              const char *unit_id,
753                              int priority) {
754         int rl;
755         char *path = NULL, *c;
756
757         assert(s);
758         assert(iovec || n == 0);
759
760         if (n == 0)
761                 return;
762
763         if (LOG_PRI(priority) > s->max_level_store)
764                 return;
765
766         if (!ucred)
767                 goto finish;
768
769         path = shortened_cgroup_path(ucred->pid);
770         if (!path)
771                 goto finish;
772
773         /* example: /user/lennart/3/foobar
774          *          /system/dbus.service/foobar
775          *
776          * So let's cut of everything past the third /, since that is
777          * wher user directories start */
778
779         c = strchr(path, '/');
780         if (c) {
781                 c = strchr(c+1, '/');
782                 if (c) {
783                         c = strchr(c+1, '/');
784                         if (c)
785                                 *c = 0;
786                 }
787         }
788
789         rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available_space(s));
790
791         if (rl == 0) {
792                 free(path);
793                 return;
794         }
795
796         /* Write a suppression message if we suppressed something */
797         if (rl > 1)
798                 driver_message(s, SD_MESSAGE_JOURNAL_DROPPED, "Suppressed %u messages from %s", rl - 1, path);
799
800         free(path);
801
802 finish:
803         dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id);
804 }
805
806 static void forward_syslog_iovec(Server *s, const struct iovec *iovec, unsigned n_iovec, struct ucred *ucred, struct timeval *tv) {
807         struct msghdr msghdr;
808         struct cmsghdr *cmsg;
809         union {
810                 struct cmsghdr cmsghdr;
811                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred))];
812         } control;
813         union sockaddr_union sa;
814
815         assert(s);
816         assert(iovec);
817         assert(n_iovec > 0);
818
819         zero(msghdr);
820         msghdr.msg_iov = (struct iovec*) iovec;
821         msghdr.msg_iovlen = n_iovec;
822
823         zero(sa);
824         sa.un.sun_family = AF_UNIX;
825         strncpy(sa.un.sun_path, "/run/systemd/journal/syslog", sizeof(sa.un.sun_path));
826         msghdr.msg_name = &sa;
827         msghdr.msg_namelen = offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path);
828
829         if (ucred) {
830                 zero(control);
831                 msghdr.msg_control = &control;
832                 msghdr.msg_controllen = sizeof(control);
833
834                 cmsg = CMSG_FIRSTHDR(&msghdr);
835                 cmsg->cmsg_level = SOL_SOCKET;
836                 cmsg->cmsg_type = SCM_CREDENTIALS;
837                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred));
838                 memcpy(CMSG_DATA(cmsg), ucred, sizeof(struct ucred));
839                 msghdr.msg_controllen = cmsg->cmsg_len;
840         }
841
842         /* Forward the syslog message we received via /dev/log to
843          * /run/systemd/syslog. Unfortunately we currently can't set
844          * the SO_TIMESTAMP auxiliary data, and hence we don't. */
845
846         if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
847                 return;
848
849         /* The socket is full? I guess the syslog implementation is
850          * too slow, and we shouldn't wait for that... */
851         if (errno == EAGAIN)
852                 return;
853
854         if (ucred && errno == ESRCH) {
855                 struct ucred u;
856
857                 /* Hmm, presumably the sender process vanished
858                  * by now, so let's fix it as good as we
859                  * can, and retry */
860
861                 u = *ucred;
862                 u.pid = getpid();
863                 memcpy(CMSG_DATA(cmsg), &u, sizeof(struct ucred));
864
865                 if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
866                         return;
867
868                 if (errno == EAGAIN)
869                         return;
870         }
871
872         if (errno != ENOENT)
873                 log_debug("Failed to forward syslog message: %m");
874 }
875
876 static void forward_syslog_raw(Server *s, int priority, const char *buffer, struct ucred *ucred, struct timeval *tv) {
877         struct iovec iovec;
878
879         assert(s);
880         assert(buffer);
881
882         if (LOG_PRI(priority) > s->max_level_syslog)
883                 return;
884
885         IOVEC_SET_STRING(iovec, buffer);
886         forward_syslog_iovec(s, &iovec, 1, ucred, tv);
887 }
888
889 static void forward_syslog(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred, struct timeval *tv) {
890         struct iovec iovec[5];
891         char header_priority[6], header_time[64], header_pid[16];
892         int n = 0;
893         time_t t;
894         struct tm *tm;
895         char *ident_buf = NULL;
896
897         assert(s);
898         assert(priority >= 0);
899         assert(priority <= 999);
900         assert(message);
901
902         if (LOG_PRI(priority) > s->max_level_syslog)
903                 return;
904
905         /* First: priority field */
906         snprintf(header_priority, sizeof(header_priority), "<%i>", priority);
907         char_array_0(header_priority);
908         IOVEC_SET_STRING(iovec[n++], header_priority);
909
910         /* Second: timestamp */
911         t = tv ? tv->tv_sec : ((time_t) (now(CLOCK_REALTIME) / USEC_PER_SEC));
912         tm = localtime(&t);
913         if (!tm)
914                 return;
915         if (strftime(header_time, sizeof(header_time), "%h %e %T ", tm) <= 0)
916                 return;
917         IOVEC_SET_STRING(iovec[n++], header_time);
918
919         /* Third: identifier and PID */
920         if (ucred) {
921                 if (!identifier) {
922                         get_process_comm(ucred->pid, &ident_buf);
923                         identifier = ident_buf;
924                 }
925
926                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
927                 char_array_0(header_pid);
928
929                 if (identifier)
930                         IOVEC_SET_STRING(iovec[n++], identifier);
931
932                 IOVEC_SET_STRING(iovec[n++], header_pid);
933         } else if (identifier) {
934                 IOVEC_SET_STRING(iovec[n++], identifier);
935                 IOVEC_SET_STRING(iovec[n++], ": ");
936         }
937
938         /* Fourth: message */
939         IOVEC_SET_STRING(iovec[n++], message);
940
941         forward_syslog_iovec(s, iovec, n, ucred, tv);
942
943         free(ident_buf);
944 }
945
946 static int fixup_priority(int priority) {
947
948         if ((priority & LOG_FACMASK) == 0)
949                 return (priority & LOG_PRIMASK) | LOG_USER;
950
951         return priority;
952 }
953
954 static void forward_kmsg(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred) {
955         struct iovec iovec[5];
956         char header_priority[6], header_pid[16];
957         int n = 0;
958         char *ident_buf = NULL;
959
960         assert(s);
961         assert(priority >= 0);
962         assert(priority <= 999);
963         assert(message);
964
965         if (_unlikely_(LOG_PRI(priority) > s->max_level_kmsg))
966                 return;
967
968         if (_unlikely_(s->dev_kmsg_fd < 0))
969                 return;
970
971         /* Never allow messages with kernel facility to be written to
972          * kmsg, regardless where the data comes from. */
973         priority = fixup_priority(priority);
974
975         /* First: priority field */
976         snprintf(header_priority, sizeof(header_priority), "<%i>", priority);
977         char_array_0(header_priority);
978         IOVEC_SET_STRING(iovec[n++], header_priority);
979
980         /* Second: identifier and PID */
981         if (ucred) {
982                 if (!identifier) {
983                         get_process_comm(ucred->pid, &ident_buf);
984                         identifier = ident_buf;
985                 }
986
987                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
988                 char_array_0(header_pid);
989
990                 if (identifier)
991                         IOVEC_SET_STRING(iovec[n++], identifier);
992
993                 IOVEC_SET_STRING(iovec[n++], header_pid);
994         } else if (identifier) {
995                 IOVEC_SET_STRING(iovec[n++], identifier);
996                 IOVEC_SET_STRING(iovec[n++], ": ");
997         }
998
999         /* Fourth: message */
1000         IOVEC_SET_STRING(iovec[n++], message);
1001         IOVEC_SET_STRING(iovec[n++], "\n");
1002
1003         if (writev(s->dev_kmsg_fd, iovec, n) < 0)
1004                 log_debug("Failed to write to /dev/kmsg for logging: %s", strerror(errno));
1005
1006         free(ident_buf);
1007 }
1008
1009 static void forward_console(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred) {
1010         struct iovec iovec[4];
1011         char header_pid[16];
1012         int n = 0, fd;
1013         char *ident_buf = NULL;
1014         const char *tty;
1015
1016         assert(s);
1017         assert(message);
1018
1019         if (LOG_PRI(priority) > s->max_level_console)
1020                 return;
1021
1022         /* First: identifier and PID */
1023         if (ucred) {
1024                 if (!identifier) {
1025                         get_process_comm(ucred->pid, &ident_buf);
1026                         identifier = ident_buf;
1027                 }
1028
1029                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
1030                 char_array_0(header_pid);
1031
1032                 if (identifier)
1033                         IOVEC_SET_STRING(iovec[n++], identifier);
1034
1035                 IOVEC_SET_STRING(iovec[n++], header_pid);
1036         } else if (identifier) {
1037                 IOVEC_SET_STRING(iovec[n++], identifier);
1038                 IOVEC_SET_STRING(iovec[n++], ": ");
1039         }
1040
1041         /* Third: message */
1042         IOVEC_SET_STRING(iovec[n++], message);
1043         IOVEC_SET_STRING(iovec[n++], "\n");
1044
1045         tty = s->tty_path ? s->tty_path : "/dev/console";
1046
1047         fd = open_terminal(tty, O_WRONLY|O_NOCTTY|O_CLOEXEC);
1048         if (fd < 0) {
1049                 log_debug("Failed to open %s for logging: %s", tty, strerror(errno));
1050                 goto finish;
1051         }
1052
1053         if (writev(fd, iovec, n) < 0)
1054                 log_debug("Failed to write to %s for logging: %s", tty, strerror(errno));
1055
1056         close_nointr_nofail(fd);
1057
1058 finish:
1059         free(ident_buf);
1060 }
1061
1062 static void read_identifier(const char **buf, char **identifier, char **pid) {
1063         const char *p;
1064         char *t;
1065         size_t l, e;
1066
1067         assert(buf);
1068         assert(identifier);
1069         assert(pid);
1070
1071         p = *buf;
1072
1073         p += strspn(p, WHITESPACE);
1074         l = strcspn(p, WHITESPACE);
1075
1076         if (l <= 0 ||
1077             p[l-1] != ':')
1078                 return;
1079
1080         e = l;
1081         l--;
1082
1083         if (p[l-1] == ']') {
1084                 size_t k = l-1;
1085
1086                 for (;;) {
1087
1088                         if (p[k] == '[') {
1089                                 t = strndup(p+k+1, l-k-2);
1090                                 if (t)
1091                                         *pid = t;
1092
1093                                 l = k;
1094                                 break;
1095                         }
1096
1097                         if (k == 0)
1098                                 break;
1099
1100                         k--;
1101                 }
1102         }
1103
1104         t = strndup(p, l);
1105         if (t)
1106                 *identifier = t;
1107
1108         *buf = p + e;
1109         *buf += strspn(*buf, WHITESPACE);
1110 }
1111
1112 static void process_syslog_message(Server *s, const char *buf, struct ucred *ucred, struct timeval *tv, const char *label, size_t label_len) {
1113         char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *syslog_pid = NULL;
1114         struct iovec iovec[N_IOVEC_META_FIELDS + 6];
1115         unsigned n = 0;
1116         int priority = LOG_USER | LOG_INFO;
1117         char *identifier = NULL, *pid = NULL;
1118         const char *orig;
1119
1120         assert(s);
1121         assert(buf);
1122
1123         orig = buf;
1124         parse_syslog_priority((char**) &buf, &priority);
1125
1126         if (s->forward_to_syslog)
1127                 forward_syslog_raw(s, priority, orig, ucred, tv);
1128
1129         skip_syslog_date((char**) &buf);
1130         read_identifier(&buf, &identifier, &pid);
1131
1132         if (s->forward_to_kmsg)
1133                 forward_kmsg(s, priority, identifier, buf, ucred);
1134
1135         if (s->forward_to_console)
1136                 forward_console(s, priority, identifier, buf, ucred);
1137
1138         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=syslog");
1139
1140         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1141                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1142
1143         if (priority & LOG_FACMASK)
1144                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1145                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1146
1147         if (identifier) {
1148                 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
1149                 if (syslog_identifier)
1150                         IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1151         }
1152
1153         if (pid) {
1154                 syslog_pid = strappend("SYSLOG_PID=", pid);
1155                 if (syslog_pid)
1156                         IOVEC_SET_STRING(iovec[n++], syslog_pid);
1157         }
1158
1159         message = strappend("MESSAGE=", buf);
1160         if (message)
1161                 IOVEC_SET_STRING(iovec[n++], message);
1162
1163         dispatch_message(s, iovec, n, ELEMENTSOF(iovec), ucred, tv, label, label_len, NULL, priority);
1164
1165         free(message);
1166         free(identifier);
1167         free(pid);
1168         free(syslog_priority);
1169         free(syslog_facility);
1170         free(syslog_identifier);
1171         free(syslog_pid);
1172 }
1173
1174 static bool valid_user_field(const char *p, size_t l) {
1175         const char *a;
1176
1177         /* We kinda enforce POSIX syntax recommendations for
1178            environment variables here, but make a couple of additional
1179            requirements.
1180
1181            http://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html */
1182
1183         /* No empty field names */
1184         if (l <= 0)
1185                 return false;
1186
1187         /* Don't allow names longer than 64 chars */
1188         if (l > 64)
1189                 return false;
1190
1191         /* Variables starting with an underscore are protected */
1192         if (p[0] == '_')
1193                 return false;
1194
1195         /* Don't allow digits as first character */
1196         if (p[0] >= '0' && p[0] <= '9')
1197                 return false;
1198
1199         /* Only allow A-Z0-9 and '_' */
1200         for (a = p; a < p + l; a++)
1201                 if (!((*a >= 'A' && *a <= 'Z') ||
1202                       (*a >= '0' && *a <= '9') ||
1203                       *a == '_'))
1204                         return false;
1205
1206         return true;
1207 }
1208
1209 static void process_native_message(
1210                 Server *s,
1211                 const void *buffer, size_t buffer_size,
1212                 struct ucred *ucred,
1213                 struct timeval *tv,
1214                 const char *label, size_t label_len) {
1215
1216         struct iovec *iovec = NULL;
1217         unsigned n = 0, m = 0, j, tn = (unsigned) -1;
1218         const char *p;
1219         size_t remaining;
1220         int priority = LOG_INFO;
1221         char *identifier = NULL, *message = NULL;
1222
1223         assert(s);
1224         assert(buffer || buffer_size == 0);
1225
1226         p = buffer;
1227         remaining = buffer_size;
1228
1229         while (remaining > 0) {
1230                 const char *e, *q;
1231
1232                 e = memchr(p, '\n', remaining);
1233
1234                 if (!e) {
1235                         /* Trailing noise, let's ignore it, and flush what we collected */
1236                         log_debug("Received message with trailing noise, ignoring.");
1237                         break;
1238                 }
1239
1240                 if (e == p) {
1241                         /* Entry separator */
1242                         dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, NULL, priority);
1243                         n = 0;
1244                         priority = LOG_INFO;
1245
1246                         p++;
1247                         remaining--;
1248                         continue;
1249                 }
1250
1251                 if (*p == '.' || *p == '#') {
1252                         /* Ignore control commands for now, and
1253                          * comments too. */
1254                         remaining -= (e - p) + 1;
1255                         p = e + 1;
1256                         continue;
1257                 }
1258
1259                 /* A property follows */
1260
1261                 if (n+N_IOVEC_META_FIELDS >= m) {
1262                         struct iovec *c;
1263                         unsigned u;
1264
1265                         u = MAX((n+N_IOVEC_META_FIELDS+1) * 2U, 4U);
1266                         c = realloc(iovec, u * sizeof(struct iovec));
1267                         if (!c) {
1268                                 log_oom();
1269                                 break;
1270                         }
1271
1272                         iovec = c;
1273                         m = u;
1274                 }
1275
1276                 q = memchr(p, '=', e - p);
1277                 if (q) {
1278                         if (valid_user_field(p, q - p)) {
1279                                 size_t l;
1280
1281                                 l = e - p;
1282
1283                                 /* If the field name starts with an
1284                                  * underscore, skip the variable,
1285                                  * since that indidates a trusted
1286                                  * field */
1287                                 iovec[n].iov_base = (char*) p;
1288                                 iovec[n].iov_len = l;
1289                                 n++;
1290
1291                                 /* We need to determine the priority
1292                                  * of this entry for the rate limiting
1293                                  * logic */
1294                                 if (l == 10 &&
1295                                     memcmp(p, "PRIORITY=", 9) == 0 &&
1296                                     p[9] >= '0' && p[9] <= '9')
1297                                         priority = (priority & LOG_FACMASK) | (p[9] - '0');
1298
1299                                 else if (l == 17 &&
1300                                          memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
1301                                          p[16] >= '0' && p[16] <= '9')
1302                                         priority = (priority & LOG_PRIMASK) | ((p[16] - '0') << 3);
1303
1304                                 else if (l == 18 &&
1305                                          memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
1306                                          p[16] >= '0' && p[16] <= '9' &&
1307                                          p[17] >= '0' && p[17] <= '9')
1308                                         priority = (priority & LOG_PRIMASK) | (((p[16] - '0')*10 + (p[17] - '0')) << 3);
1309
1310                                 else if (l >= 19 &&
1311                                          memcmp(p, "SYSLOG_IDENTIFIER=", 18) == 0) {
1312                                         char *t;
1313
1314                                         t = strndup(p + 18, l - 18);
1315                                         if (t) {
1316                                                 free(identifier);
1317                                                 identifier = t;
1318                                         }
1319                                 } else if (l >= 8 &&
1320                                            memcmp(p, "MESSAGE=", 8) == 0) {
1321                                         char *t;
1322
1323                                         t = strndup(p + 8, l - 8);
1324                                         if (t) {
1325                                                 free(message);
1326                                                 message = t;
1327                                         }
1328                                 }
1329                         }
1330
1331                         remaining -= (e - p) + 1;
1332                         p = e + 1;
1333                         continue;
1334                 } else {
1335                         le64_t l_le;
1336                         uint64_t l;
1337                         char *k;
1338
1339                         if (remaining < e - p + 1 + sizeof(uint64_t) + 1) {
1340                                 log_debug("Failed to parse message, ignoring.");
1341                                 break;
1342                         }
1343
1344                         memcpy(&l_le, e + 1, sizeof(uint64_t));
1345                         l = le64toh(l_le);
1346
1347                         if (remaining < e - p + 1 + sizeof(uint64_t) + l + 1 ||
1348                             e[1+sizeof(uint64_t)+l] != '\n') {
1349                                 log_debug("Failed to parse message, ignoring.");
1350                                 break;
1351                         }
1352
1353                         k = malloc((e - p) + 1 + l);
1354                         if (!k) {
1355                                 log_oom();
1356                                 break;
1357                         }
1358
1359                         memcpy(k, p, e - p);
1360                         k[e - p] = '=';
1361                         memcpy(k + (e - p) + 1, e + 1 + sizeof(uint64_t), l);
1362
1363                         if (valid_user_field(p, e - p)) {
1364                                 iovec[n].iov_base = k;
1365                                 iovec[n].iov_len = (e - p) + 1 + l;
1366                                 n++;
1367                         } else
1368                                 free(k);
1369
1370                         remaining -= (e - p) + 1 + sizeof(uint64_t) + l + 1;
1371                         p = e + 1 + sizeof(uint64_t) + l + 1;
1372                 }
1373         }
1374
1375         if (n <= 0)
1376                 goto finish;
1377
1378         tn = n++;
1379         IOVEC_SET_STRING(iovec[tn], "_TRANSPORT=journal");
1380
1381         if (message) {
1382                 if (s->forward_to_syslog)
1383                         forward_syslog(s, priority, identifier, message, ucred, tv);
1384
1385                 if (s->forward_to_kmsg)
1386                         forward_kmsg(s, priority, identifier, message, ucred);
1387
1388                 if (s->forward_to_console)
1389                         forward_console(s, priority, identifier, message, ucred);
1390         }
1391
1392         dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, NULL, priority);
1393
1394 finish:
1395         for (j = 0; j < n; j++)  {
1396                 if (j == tn)
1397                         continue;
1398
1399                 if (iovec[j].iov_base < buffer ||
1400                     (const uint8_t*) iovec[j].iov_base >= (const uint8_t*) buffer + buffer_size)
1401                         free(iovec[j].iov_base);
1402         }
1403
1404         free(iovec);
1405         free(identifier);
1406         free(message);
1407 }
1408
1409 static void process_native_file(
1410                 Server *s,
1411                 int fd,
1412                 struct ucred *ucred,
1413                 struct timeval *tv,
1414                 const char *label, size_t label_len) {
1415
1416         struct stat st;
1417         void *p;
1418         ssize_t n;
1419
1420         assert(s);
1421         assert(fd >= 0);
1422
1423         /* Data is in the passed file, since it didn't fit in a
1424          * datagram. We can't map the file here, since clients might
1425          * then truncate it and trigger a SIGBUS for us. So let's
1426          * stupidly read it */
1427
1428         if (fstat(fd, &st) < 0) {
1429                 log_error("Failed to stat passed file, ignoring: %m");
1430                 return;
1431         }
1432
1433         if (!S_ISREG(st.st_mode)) {
1434                 log_error("File passed is not regular. Ignoring.");
1435                 return;
1436         }
1437
1438         if (st.st_size <= 0)
1439                 return;
1440
1441         if (st.st_size > ENTRY_SIZE_MAX) {
1442                 log_error("File passed too large. Ignoring.");
1443                 return;
1444         }
1445
1446         p = malloc(st.st_size);
1447         if (!p) {
1448                 log_oom();
1449                 return;
1450         }
1451
1452         n = pread(fd, p, st.st_size, 0);
1453         if (n < 0)
1454                 log_error("Failed to read file, ignoring: %s", strerror(-n));
1455         else if (n > 0)
1456                 process_native_message(s, p, n, ucred, tv, label, label_len);
1457
1458         free(p);
1459 }
1460
1461 static int stdout_stream_log(StdoutStream *s, const char *p) {
1462         struct iovec iovec[N_IOVEC_META_FIELDS + 5];
1463         char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *syslog_identifier = NULL;
1464         unsigned n = 0;
1465         int priority;
1466         char *label = NULL;
1467         size_t label_len = 0;
1468
1469         assert(s);
1470         assert(p);
1471
1472         if (isempty(p))
1473                 return 0;
1474
1475         priority = s->priority;
1476
1477         if (s->level_prefix)
1478                 parse_syslog_priority((char**) &p, &priority);
1479
1480         if (s->forward_to_syslog || s->server->forward_to_syslog)
1481                 forward_syslog(s->server, fixup_priority(priority), s->identifier, p, &s->ucred, NULL);
1482
1483         if (s->forward_to_kmsg || s->server->forward_to_kmsg)
1484                 forward_kmsg(s->server, priority, s->identifier, p, &s->ucred);
1485
1486         if (s->forward_to_console || s->server->forward_to_console)
1487                 forward_console(s->server, priority, s->identifier, p, &s->ucred);
1488
1489         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=stdout");
1490
1491         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1492                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1493
1494         if (priority & LOG_FACMASK)
1495                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1496                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1497
1498         if (s->identifier) {
1499                 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", s->identifier);
1500                 if (syslog_identifier)
1501                         IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1502         }
1503
1504         message = strappend("MESSAGE=", p);
1505         if (message)
1506                 IOVEC_SET_STRING(iovec[n++], message);
1507
1508 #ifdef HAVE_SELINUX
1509         if (s->security_context) {
1510                 label = (char*) s->security_context;
1511                 label_len = strlen((char*) s->security_context);
1512         }
1513 #endif
1514
1515         dispatch_message(s->server, iovec, n, ELEMENTSOF(iovec), &s->ucred, NULL, label, label_len, s->unit_id, priority);
1516
1517         free(message);
1518         free(syslog_priority);
1519         free(syslog_facility);
1520         free(syslog_identifier);
1521
1522         return 0;
1523 }
1524
1525 static int stdout_stream_line(StdoutStream *s, char *p) {
1526         int r;
1527
1528         assert(s);
1529         assert(p);
1530
1531         p = strstrip(p);
1532
1533         switch (s->state) {
1534
1535         case STDOUT_STREAM_IDENTIFIER:
1536                 if (isempty(p))
1537                         s->identifier = NULL;
1538                 else  {
1539                         s->identifier = strdup(p);
1540                         if (!s->identifier)
1541                                 return log_oom();
1542                 }
1543
1544                 s->state = STDOUT_STREAM_UNIT_ID;
1545                 return 0;
1546
1547         case STDOUT_STREAM_UNIT_ID:
1548                 if (s->ucred.uid == 0) {
1549                         if (isempty(p))
1550                                 s->unit_id = NULL;
1551                         else  {
1552                                 s->unit_id = strdup(p);
1553                                 if (!s->unit_id)
1554                                         return log_oom();
1555                         }
1556                 }
1557
1558                 s->state = STDOUT_STREAM_PRIORITY;
1559                 return 0;
1560
1561         case STDOUT_STREAM_PRIORITY:
1562                 r = safe_atoi(p, &s->priority);
1563                 if (r < 0 || s->priority <= 0 || s->priority >= 999) {
1564                         log_warning("Failed to parse log priority line.");
1565                         return -EINVAL;
1566                 }
1567
1568                 s->state = STDOUT_STREAM_LEVEL_PREFIX;
1569                 return 0;
1570
1571         case STDOUT_STREAM_LEVEL_PREFIX:
1572                 r = parse_boolean(p);
1573                 if (r < 0) {
1574                         log_warning("Failed to parse level prefix line.");
1575                         return -EINVAL;
1576                 }
1577
1578                 s->level_prefix = !!r;
1579                 s->state = STDOUT_STREAM_FORWARD_TO_SYSLOG;
1580                 return 0;
1581
1582         case STDOUT_STREAM_FORWARD_TO_SYSLOG:
1583                 r = parse_boolean(p);
1584                 if (r < 0) {
1585                         log_warning("Failed to parse forward to syslog line.");
1586                         return -EINVAL;
1587                 }
1588
1589                 s->forward_to_syslog = !!r;
1590                 s->state = STDOUT_STREAM_FORWARD_TO_KMSG;
1591                 return 0;
1592
1593         case STDOUT_STREAM_FORWARD_TO_KMSG:
1594                 r = parse_boolean(p);
1595                 if (r < 0) {
1596                         log_warning("Failed to parse copy to kmsg line.");
1597                         return -EINVAL;
1598                 }
1599
1600                 s->forward_to_kmsg = !!r;
1601                 s->state = STDOUT_STREAM_FORWARD_TO_CONSOLE;
1602                 return 0;
1603
1604         case STDOUT_STREAM_FORWARD_TO_CONSOLE:
1605                 r = parse_boolean(p);
1606                 if (r < 0) {
1607                         log_warning("Failed to parse copy to console line.");
1608                         return -EINVAL;
1609                 }
1610
1611                 s->forward_to_console = !!r;
1612                 s->state = STDOUT_STREAM_RUNNING;
1613                 return 0;
1614
1615         case STDOUT_STREAM_RUNNING:
1616                 return stdout_stream_log(s, p);
1617         }
1618
1619         assert_not_reached("Unknown stream state");
1620 }
1621
1622 static int stdout_stream_scan(StdoutStream *s, bool force_flush) {
1623         char *p;
1624         size_t remaining;
1625         int r;
1626
1627         assert(s);
1628
1629         p = s->buffer;
1630         remaining = s->length;
1631         for (;;) {
1632                 char *end;
1633                 size_t skip;
1634
1635                 end = memchr(p, '\n', remaining);
1636                 if (end)
1637                         skip = end - p + 1;
1638                 else if (remaining >= sizeof(s->buffer) - 1) {
1639                         end = p + sizeof(s->buffer) - 1;
1640                         skip = remaining;
1641                 } else
1642                         break;
1643
1644                 *end = 0;
1645
1646                 r = stdout_stream_line(s, p);
1647                 if (r < 0)
1648                         return r;
1649
1650                 remaining -= skip;
1651                 p += skip;
1652         }
1653
1654         if (force_flush && remaining > 0) {
1655                 p[remaining] = 0;
1656                 r = stdout_stream_line(s, p);
1657                 if (r < 0)
1658                         return r;
1659
1660                 p += remaining;
1661                 remaining = 0;
1662         }
1663
1664         if (p > s->buffer) {
1665                 memmove(s->buffer, p, remaining);
1666                 s->length = remaining;
1667         }
1668
1669         return 0;
1670 }
1671
1672 static int stdout_stream_process(StdoutStream *s) {
1673         ssize_t l;
1674         int r;
1675
1676         assert(s);
1677
1678         l = read(s->fd, s->buffer+s->length, sizeof(s->buffer)-1-s->length);
1679         if (l < 0) {
1680
1681                 if (errno == EAGAIN)
1682                         return 0;
1683
1684                 log_warning("Failed to read from stream: %m");
1685                 return -errno;
1686         }
1687
1688         if (l == 0) {
1689                 r = stdout_stream_scan(s, true);
1690                 if (r < 0)
1691                         return r;
1692
1693                 return 0;
1694         }
1695
1696         s->length += l;
1697         r = stdout_stream_scan(s, false);
1698         if (r < 0)
1699                 return r;
1700
1701         return 1;
1702
1703 }
1704
1705 static void stdout_stream_free(StdoutStream *s) {
1706         assert(s);
1707
1708         if (s->server) {
1709                 assert(s->server->n_stdout_streams > 0);
1710                 s->server->n_stdout_streams --;
1711                 LIST_REMOVE(StdoutStream, stdout_stream, s->server->stdout_streams, s);
1712         }
1713
1714         if (s->fd >= 0) {
1715                 if (s->server)
1716                         epoll_ctl(s->server->epoll_fd, EPOLL_CTL_DEL, s->fd, NULL);
1717
1718                 close_nointr_nofail(s->fd);
1719         }
1720
1721 #ifdef HAVE_SELINUX
1722         if (s->security_context)
1723                 freecon(s->security_context);
1724 #endif
1725
1726         free(s->identifier);
1727         free(s);
1728 }
1729
1730 static int stdout_stream_new(Server *s) {
1731         StdoutStream *stream;
1732         int fd, r;
1733         socklen_t len;
1734         struct epoll_event ev;
1735
1736         assert(s);
1737
1738         fd = accept4(s->stdout_fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC);
1739         if (fd < 0) {
1740                 if (errno == EAGAIN)
1741                         return 0;
1742
1743                 log_error("Failed to accept stdout connection: %m");
1744                 return -errno;
1745         }
1746
1747         if (s->n_stdout_streams >= STDOUT_STREAMS_MAX) {
1748                 log_warning("Too many stdout streams, refusing connection.");
1749                 close_nointr_nofail(fd);
1750                 return 0;
1751         }
1752
1753         stream = new0(StdoutStream, 1);
1754         if (!stream) {
1755                 close_nointr_nofail(fd);
1756                 return log_oom();
1757         }
1758
1759         stream->fd = fd;
1760
1761         len = sizeof(stream->ucred);
1762         if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &stream->ucred, &len) < 0) {
1763                 log_error("Failed to determine peer credentials: %m");
1764                 r = -errno;
1765                 goto fail;
1766         }
1767
1768 #ifdef HAVE_SELINUX
1769         if (getpeercon(fd, &stream->security_context) < 0 && errno != ENOPROTOOPT)
1770                 log_error("Failed to determine peer security context: %m");
1771 #endif
1772
1773         if (shutdown(fd, SHUT_WR) < 0) {
1774                 log_error("Failed to shutdown writing side of socket: %m");
1775                 r = -errno;
1776                 goto fail;
1777         }
1778
1779         zero(ev);
1780         ev.data.ptr = stream;
1781         ev.events = EPOLLIN;
1782         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0) {
1783                 log_error("Failed to add stream to event loop: %m");
1784                 r = -errno;
1785                 goto fail;
1786         }
1787
1788         stream->server = s;
1789         LIST_PREPEND(StdoutStream, stdout_stream, s->stdout_streams, stream);
1790         s->n_stdout_streams ++;
1791
1792         return 0;
1793
1794 fail:
1795         stdout_stream_free(stream);
1796         return r;
1797 }
1798
1799 static bool is_us(const char *pid) {
1800         pid_t t;
1801
1802         assert(pid);
1803
1804         if (parse_pid(pid, &t) < 0)
1805                 return false;
1806
1807         return t == getpid();
1808 }
1809
1810 static void dev_kmsg_record(Server *s, char *p, size_t l) {
1811         struct iovec iovec[N_IOVEC_META_FIELDS + 7 + N_IOVEC_KERNEL_FIELDS];
1812         char *message = NULL, *syslog_priority = NULL, *syslog_pid = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *source_time = NULL;
1813         int priority, r;
1814         unsigned n = 0, z = 0, j;
1815         usec_t usec;
1816         char *identifier = NULL, *pid = NULL, *e, *f, *k;
1817         uint64_t serial;
1818         size_t pl;
1819
1820         assert(s);
1821         assert(p);
1822
1823         if (l <= 0)
1824                 return;
1825
1826         e = memchr(p, ',', l);
1827         if (!e)
1828                 return;
1829         *e = 0;
1830
1831         r = safe_atoi(p, &priority);
1832         if (r < 0 || priority < 0 || priority > 999)
1833                 return;
1834
1835         if (s->forward_to_kmsg && (priority & LOG_FACMASK) != LOG_KERN)
1836                 return;
1837
1838         l -= (e - p) + 1;
1839         p = e + 1;
1840         e = memchr(p, ',', l);
1841         if (!e)
1842                 return;
1843         *e = 0;
1844
1845         r = safe_atou64(p, &serial);
1846         if (r < 0)
1847                 return;
1848
1849         l -= (e - p) + 1;
1850         p = e + 1;
1851         f = memchr(p, ';', l);
1852         if (!f)
1853                 return;
1854         /* Kernel 3.6 has the flags field, kernel 3.5 lacks that */
1855         e = memchr(p, ',', l);
1856         if (!e || f < e)
1857                 e = f;
1858         *e = 0;
1859
1860         r = parse_usec(p, &usec);
1861         if (r < 0)
1862                 return;
1863
1864         l -= (f - p) + 1;
1865         p = f + 1;
1866         e = memchr(p, '\n', l);
1867         if (!e)
1868                 return;
1869         *e = 0;
1870
1871         pl = e - p;
1872         l -= (e - p) + 1;
1873         k = e + 1;
1874
1875         for (j = 0; l > 0 && j < N_IOVEC_KERNEL_FIELDS; j++) {
1876                 char *m;
1877                 /* Meta data fields attached */
1878
1879                 if (*k != ' ')
1880                         break;
1881
1882                 k ++, l --;
1883
1884                 e = memchr(k, '\n', l);
1885                 if (!e)
1886                         return;
1887
1888                 *e = 0;
1889
1890                 m = cunescape_length_with_prefix(k, e - k, "_KERNEL_");
1891                 if (!m)
1892                         break;
1893
1894                 IOVEC_SET_STRING(iovec[n++], m);
1895                 z++;
1896
1897                 l -= (e - k) + 1;
1898                 k = e + 1;
1899         }
1900
1901         if (asprintf(&source_time, "_SOURCE_MONOTONIC_TIMESTAMP=%llu",
1902                      (unsigned long long) usec) >= 0)
1903                 IOVEC_SET_STRING(iovec[n++], source_time);
1904
1905         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=kernel");
1906
1907         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1908                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1909
1910         if ((priority & LOG_FACMASK) == LOG_KERN)
1911                 IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=kernel");
1912         else {
1913                 read_identifier((const char**) &p, &identifier, &pid);
1914
1915                 /* Avoid any messages we generated ourselves via
1916                  * log_info() and friends. */
1917                 if (pid && is_us(pid))
1918                         goto finish;
1919
1920                 if (identifier) {
1921                         syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
1922                         if (syslog_identifier)
1923                                 IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1924                 }
1925
1926                 if (pid) {
1927                         syslog_pid = strappend("SYSLOG_PID=", pid);
1928                         if (syslog_pid)
1929                                 IOVEC_SET_STRING(iovec[n++], syslog_pid);
1930                 }
1931
1932                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1933                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1934         }
1935
1936         message = cunescape_length_with_prefix(p, pl, "MESSAGE=");
1937         if (message)
1938                 IOVEC_SET_STRING(iovec[n++], message);
1939
1940         dispatch_message(s, iovec, n, ELEMENTSOF(iovec), NULL, NULL, NULL, 0, NULL, priority);
1941
1942 finish:
1943         for (j = 0; j < z; j++)
1944                 free(iovec[j].iov_base);
1945
1946         free(message);
1947         free(syslog_priority);
1948         free(syslog_identifier);
1949         free(syslog_pid);
1950         free(syslog_facility);
1951         free(source_time);
1952         free(identifier);
1953         free(pid);
1954 }
1955
1956 static int system_journal_open(Server *s) {
1957         int r;
1958         char *fn;
1959         sd_id128_t machine;
1960         char ids[33];
1961
1962         r = sd_id128_get_machine(&machine);
1963         if (r < 0)
1964                 return r;
1965
1966         sd_id128_to_string(machine, ids);
1967
1968         if (!s->system_journal &&
1969             (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
1970             access("/run/systemd/journal/flushed", F_OK) >= 0) {
1971
1972                 /* If in auto mode: first try to create the machine
1973                  * path, but not the prefix.
1974                  *
1975                  * If in persistent mode: create /var/log/journal and
1976                  * the machine path */
1977
1978                 if (s->storage == STORAGE_PERSISTENT)
1979                         (void) mkdir("/var/log/journal/", 0755);
1980
1981                 fn = strappend("/var/log/journal/", ids);
1982                 if (!fn)
1983                         return -ENOMEM;
1984
1985                 (void) mkdir(fn, 0755);
1986                 free(fn);
1987
1988                 fn = strjoin("/var/log/journal/", ids, "/system.journal", NULL);
1989                 if (!fn)
1990                         return -ENOMEM;
1991
1992                 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, &s->system_metrics, NULL, &s->system_journal);
1993                 free(fn);
1994
1995                 if (r >= 0) {
1996                         s->system_journal->compress = s->compress;
1997
1998                         server_fix_perms(s, s->system_journal, 0);
1999                 } else if (r < 0) {
2000
2001                         if (r != -ENOENT && r != -EROFS)
2002                                 log_warning("Failed to open system journal: %s", strerror(-r));
2003
2004                         r = 0;
2005                 }
2006         }
2007
2008         if (!s->runtime_journal &&
2009             (s->storage != STORAGE_NONE)) {
2010
2011                 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
2012                 if (!fn)
2013                         return -ENOMEM;
2014
2015                 if (s->system_journal) {
2016
2017                         /* Try to open the runtime journal, but only
2018                          * if it already exists, so that we can flush
2019                          * it into the system journal */
2020
2021                         r = journal_file_open(fn, O_RDWR, 0640, &s->runtime_metrics, NULL, &s->runtime_journal);
2022                         free(fn);
2023
2024                         if (r < 0) {
2025                                 if (r != -ENOENT)
2026                                         log_warning("Failed to open runtime journal: %s", strerror(-r));
2027
2028                                 r = 0;
2029                         }
2030
2031                 } else {
2032
2033                         /* OK, we really need the runtime journal, so create
2034                          * it if necessary. */
2035
2036                         (void) mkdir_parents(fn, 0755);
2037                         r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, &s->runtime_metrics, NULL, &s->runtime_journal);
2038                         free(fn);
2039
2040                         if (r < 0) {
2041                                 log_error("Failed to open runtime journal: %s", strerror(-r));
2042                                 return r;
2043                         }
2044                 }
2045
2046                 if (s->runtime_journal) {
2047                         s->runtime_journal->compress = s->compress;
2048
2049                         server_fix_perms(s, s->runtime_journal, 0);
2050                 }
2051         }
2052
2053         return r;
2054 }
2055
2056 static int server_flush_to_var(Server *s) {
2057         Object *o = NULL;
2058         int r;
2059         sd_id128_t machine;
2060         sd_journal *j;
2061
2062         assert(s);
2063
2064         if (s->storage != STORAGE_AUTO &&
2065             s->storage != STORAGE_PERSISTENT)
2066                 return 0;
2067
2068         if (!s->runtime_journal)
2069                 return 0;
2070
2071         system_journal_open(s);
2072
2073         if (!s->system_journal)
2074                 return 0;
2075
2076         log_info("Flushing to /var...");
2077
2078         r = sd_id128_get_machine(&machine);
2079         if (r < 0) {
2080                 log_error("Failed to get machine id: %s", strerror(-r));
2081                 return r;
2082         }
2083
2084         r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
2085         if (r < 0) {
2086                 log_error("Failed to read runtime journal: %s", strerror(-r));
2087                 return r;
2088         }
2089
2090         SD_JOURNAL_FOREACH(j) {
2091                 JournalFile *f;
2092
2093                 f = j->current_file;
2094                 assert(f && f->current_offset > 0);
2095
2096                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
2097                 if (r < 0) {
2098                         log_error("Can't read entry: %s", strerror(-r));
2099                         goto finish;
2100                 }
2101
2102                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
2103                 if (r == -E2BIG) {
2104                         log_info("Allocation limit reached.");
2105
2106                         journal_file_post_change(s->system_journal);
2107                         server_rotate(s);
2108                         server_vacuum(s);
2109
2110                         r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
2111                 }
2112
2113                 if (r < 0) {
2114                         log_error("Can't write entry: %s", strerror(-r));
2115                         goto finish;
2116                 }
2117         }
2118
2119 finish:
2120         journal_file_post_change(s->system_journal);
2121
2122         journal_file_close(s->runtime_journal);
2123         s->runtime_journal = NULL;
2124
2125         if (r >= 0)
2126                 rm_rf("/run/log/journal", false, true, false);
2127
2128         return r;
2129 }
2130
2131 static int server_read_dev_kmsg(Server *s) {
2132         char buffer[8192+1]; /* the kernel-side limit per record is 8K currently */
2133         ssize_t l;
2134
2135         assert(s);
2136         assert(s->dev_kmsg_fd >= 0);
2137
2138         l = read(s->dev_kmsg_fd, buffer, sizeof(buffer) - 1);
2139         if (l == 0)
2140                 return 0;
2141         if (l < 0) {
2142                 /* Old kernels who don't allow reading from /dev/kmsg
2143                  * return EINVAL when we try. So handle this cleanly,
2144                  * but don' try to ever read from it again. */
2145                 if (errno == EINVAL) {
2146                         epoll_ctl(s->epoll_fd, EPOLL_CTL_DEL, s->dev_kmsg_fd, NULL);
2147                         return 0;
2148                 }
2149
2150                 if (errno == EAGAIN || errno == EINTR)
2151                         return 0;
2152
2153                 log_error("Failed to read from kernel: %m");
2154                 return -errno;
2155         }
2156
2157         dev_kmsg_record(s, buffer, l);
2158         return 1;
2159 }
2160
2161 static int server_flush_dev_kmsg(Server *s) {
2162         int r;
2163
2164         assert(s);
2165
2166         if (s->dev_kmsg_fd < 0)
2167                 return 0;
2168
2169         if (!s->dev_kmsg_readable)
2170                 return 0;
2171
2172         log_info("Flushing /dev/kmsg...");
2173
2174         for (;;) {
2175                 r = server_read_dev_kmsg(s);
2176                 if (r < 0)
2177                         return r;
2178
2179                 if (r == 0)
2180                         break;
2181         }
2182
2183         return 0;
2184 }
2185
2186 static int process_event(Server *s, struct epoll_event *ev) {
2187         assert(s);
2188         assert(ev);
2189
2190         if (ev->data.fd == s->signal_fd) {
2191                 struct signalfd_siginfo sfsi;
2192                 ssize_t n;
2193
2194                 if (ev->events != EPOLLIN) {
2195                         log_info("Got invalid event from epoll.");
2196                         return -EIO;
2197                 }
2198
2199                 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
2200                 if (n != sizeof(sfsi)) {
2201
2202                         if (n >= 0)
2203                                 return -EIO;
2204
2205                         if (errno == EINTR || errno == EAGAIN)
2206                                 return 1;
2207
2208                         return -errno;
2209                 }
2210
2211                 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
2212
2213                 if (sfsi.ssi_signo == SIGUSR1) {
2214                         touch("/run/systemd/journal/flushed");
2215                         server_flush_to_var(s);
2216                         return 1;
2217                 }
2218
2219                 if (sfsi.ssi_signo == SIGUSR2) {
2220                         server_rotate(s);
2221                         server_vacuum(s);
2222                         return 1;
2223                 }
2224
2225                 return 0;
2226
2227         } else if (ev->data.fd == s->dev_kmsg_fd) {
2228                 int r;
2229
2230                 if (ev->events != EPOLLIN) {
2231                         log_info("Got invalid event from epoll.");
2232                         return -EIO;
2233                 }
2234
2235                 r = server_read_dev_kmsg(s);
2236                 if (r < 0)
2237                         return r;
2238
2239                 return 1;
2240
2241         } else if (ev->data.fd == s->native_fd ||
2242                    ev->data.fd == s->syslog_fd) {
2243
2244                 if (ev->events != EPOLLIN) {
2245                         log_info("Got invalid event from epoll.");
2246                         return -EIO;
2247                 }
2248
2249                 for (;;) {
2250                         struct msghdr msghdr;
2251                         struct iovec iovec;
2252                         struct ucred *ucred = NULL;
2253                         struct timeval *tv = NULL;
2254                         struct cmsghdr *cmsg;
2255                         char *label = NULL;
2256                         size_t label_len = 0;
2257                         union {
2258                                 struct cmsghdr cmsghdr;
2259
2260                                 /* We use NAME_MAX space for the
2261                                  * SELinux label here. The kernel
2262                                  * currently enforces no limit, but
2263                                  * according to suggestions from the
2264                                  * SELinux people this will change and
2265                                  * it will probably be identical to
2266                                  * NAME_MAX. For now we use that, but
2267                                  * this should be updated one day when
2268                                  * the final limit is known.*/
2269                                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
2270                                             CMSG_SPACE(sizeof(struct timeval)) +
2271                                             CMSG_SPACE(sizeof(int)) + /* fd */
2272                                             CMSG_SPACE(NAME_MAX)]; /* selinux label */
2273                         } control;
2274                         ssize_t n;
2275                         int v;
2276                         int *fds = NULL;
2277                         unsigned n_fds = 0;
2278
2279                         if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
2280                                 log_error("SIOCINQ failed: %m");
2281                                 return -errno;
2282                         }
2283
2284                         if (s->buffer_size < (size_t) v) {
2285                                 void *b;
2286                                 size_t l;
2287
2288                                 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
2289                                 b = realloc(s->buffer, l+1);
2290
2291                                 if (!b) {
2292                                         log_error("Couldn't increase buffer.");
2293                                         return -ENOMEM;
2294                                 }
2295
2296                                 s->buffer_size = l;
2297                                 s->buffer = b;
2298                         }
2299
2300                         zero(iovec);
2301                         iovec.iov_base = s->buffer;
2302                         iovec.iov_len = s->buffer_size;
2303
2304                         zero(control);
2305                         zero(msghdr);
2306                         msghdr.msg_iov = &iovec;
2307                         msghdr.msg_iovlen = 1;
2308                         msghdr.msg_control = &control;
2309                         msghdr.msg_controllen = sizeof(control);
2310
2311                         n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
2312                         if (n < 0) {
2313
2314                                 if (errno == EINTR || errno == EAGAIN)
2315                                         return 1;
2316
2317                                 log_error("recvmsg() failed: %m");
2318                                 return -errno;
2319                         }
2320
2321                         for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
2322
2323                                 if (cmsg->cmsg_level == SOL_SOCKET &&
2324                                     cmsg->cmsg_type == SCM_CREDENTIALS &&
2325                                     cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
2326                                         ucred = (struct ucred*) CMSG_DATA(cmsg);
2327                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
2328                                          cmsg->cmsg_type == SCM_SECURITY) {
2329                                         label = (char*) CMSG_DATA(cmsg);
2330                                         label_len = cmsg->cmsg_len - CMSG_LEN(0);
2331                                 } else if (cmsg->cmsg_level == SOL_SOCKET &&
2332                                          cmsg->cmsg_type == SO_TIMESTAMP &&
2333                                          cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
2334                                         tv = (struct timeval*) CMSG_DATA(cmsg);
2335                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
2336                                          cmsg->cmsg_type == SCM_RIGHTS) {
2337                                         fds = (int*) CMSG_DATA(cmsg);
2338                                         n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
2339                                 }
2340                         }
2341
2342                         if (ev->data.fd == s->syslog_fd) {
2343                                 char *e;
2344
2345                                 if (n > 0 && n_fds == 0) {
2346                                         e = memchr(s->buffer, '\n', n);
2347                                         if (e)
2348                                                 *e = 0;
2349                                         else
2350                                                 s->buffer[n] = 0;
2351
2352                                         process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
2353                                 } else if (n_fds > 0)
2354                                         log_warning("Got file descriptors via syslog socket. Ignoring.");
2355
2356                         } else {
2357                                 if (n > 0 && n_fds == 0)
2358                                         process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
2359                                 else if (n == 0 && n_fds == 1)
2360                                         process_native_file(s, fds[0], ucred, tv, label, label_len);
2361                                 else if (n_fds > 0)
2362                                         log_warning("Got too many file descriptors via native socket. Ignoring.");
2363                         }
2364
2365                         close_many(fds, n_fds);
2366                 }
2367
2368                 return 1;
2369
2370         } else if (ev->data.fd == s->stdout_fd) {
2371
2372                 if (ev->events != EPOLLIN) {
2373                         log_info("Got invalid event from epoll.");
2374                         return -EIO;
2375                 }
2376
2377                 stdout_stream_new(s);
2378                 return 1;
2379
2380         } else {
2381                 StdoutStream *stream;
2382
2383                 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
2384                         log_info("Got invalid event from epoll.");
2385                         return -EIO;
2386                 }
2387
2388                 /* If it is none of the well-known fds, it must be an
2389                  * stdout stream fd. Note that this is a bit ugly here
2390                  * (since we rely that none of the well-known fds
2391                  * could be interpreted as pointer), but nonetheless
2392                  * safe, since the well-known fds would never get an
2393                  * fd > 4096, i.e. beyond the first memory page */
2394
2395                 stream = ev->data.ptr;
2396
2397                 if (stdout_stream_process(stream) <= 0)
2398                         stdout_stream_free(stream);
2399
2400                 return 1;
2401         }
2402
2403         log_error("Unknown event.");
2404         return 0;
2405 }
2406
2407 static int open_syslog_socket(Server *s) {
2408         union sockaddr_union sa;
2409         int one, r;
2410         struct epoll_event ev;
2411
2412         assert(s);
2413
2414         if (s->syslog_fd < 0) {
2415
2416                 s->syslog_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2417                 if (s->syslog_fd < 0) {
2418                         log_error("socket() failed: %m");
2419                         return -errno;
2420                 }
2421
2422                 zero(sa);
2423                 sa.un.sun_family = AF_UNIX;
2424                 strncpy(sa.un.sun_path, "/dev/log", sizeof(sa.un.sun_path));
2425
2426                 unlink(sa.un.sun_path);
2427
2428                 r = bind(s->syslog_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2429                 if (r < 0) {
2430                         log_error("bind() failed: %m");
2431                         return -errno;
2432                 }
2433
2434                 chmod(sa.un.sun_path, 0666);
2435         } else
2436                 fd_nonblock(s->syslog_fd, 1);
2437
2438         one = 1;
2439         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
2440         if (r < 0) {
2441                 log_error("SO_PASSCRED failed: %m");
2442                 return -errno;
2443         }
2444
2445 #ifdef HAVE_SELINUX
2446         one = 1;
2447         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
2448         if (r < 0)
2449                 log_warning("SO_PASSSEC failed: %m");
2450 #endif
2451
2452         one = 1;
2453         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
2454         if (r < 0) {
2455                 log_error("SO_TIMESTAMP failed: %m");
2456                 return -errno;
2457         }
2458
2459         zero(ev);
2460         ev.events = EPOLLIN;
2461         ev.data.fd = s->syslog_fd;
2462         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->syslog_fd, &ev) < 0) {
2463                 log_error("Failed to add syslog server fd to epoll object: %m");
2464                 return -errno;
2465         }
2466
2467         return 0;
2468 }
2469
2470 static int open_native_socket(Server*s) {
2471         union sockaddr_union sa;
2472         int one, r;
2473         struct epoll_event ev;
2474
2475         assert(s);
2476
2477         if (s->native_fd < 0) {
2478
2479                 s->native_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2480                 if (s->native_fd < 0) {
2481                         log_error("socket() failed: %m");
2482                         return -errno;
2483                 }
2484
2485                 zero(sa);
2486                 sa.un.sun_family = AF_UNIX;
2487                 strncpy(sa.un.sun_path, "/run/systemd/journal/socket", sizeof(sa.un.sun_path));
2488
2489                 unlink(sa.un.sun_path);
2490
2491                 r = bind(s->native_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2492                 if (r < 0) {
2493                         log_error("bind() failed: %m");
2494                         return -errno;
2495                 }
2496
2497                 chmod(sa.un.sun_path, 0666);
2498         } else
2499                 fd_nonblock(s->native_fd, 1);
2500
2501         one = 1;
2502         r = setsockopt(s->native_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
2503         if (r < 0) {
2504                 log_error("SO_PASSCRED failed: %m");
2505                 return -errno;
2506         }
2507
2508 #ifdef HAVE_SELINUX
2509         one = 1;
2510         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
2511         if (r < 0)
2512                 log_warning("SO_PASSSEC failed: %m");
2513 #endif
2514
2515         one = 1;
2516         r = setsockopt(s->native_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
2517         if (r < 0) {
2518                 log_error("SO_TIMESTAMP failed: %m");
2519                 return -errno;
2520         }
2521
2522         zero(ev);
2523         ev.events = EPOLLIN;
2524         ev.data.fd = s->native_fd;
2525         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->native_fd, &ev) < 0) {
2526                 log_error("Failed to add native server fd to epoll object: %m");
2527                 return -errno;
2528         }
2529
2530         return 0;
2531 }
2532
2533 static int open_stdout_socket(Server *s) {
2534         union sockaddr_union sa;
2535         int r;
2536         struct epoll_event ev;
2537
2538         assert(s);
2539
2540         if (s->stdout_fd < 0) {
2541
2542                 s->stdout_fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2543                 if (s->stdout_fd < 0) {
2544                         log_error("socket() failed: %m");
2545                         return -errno;
2546                 }
2547
2548                 zero(sa);
2549                 sa.un.sun_family = AF_UNIX;
2550                 strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
2551
2552                 unlink(sa.un.sun_path);
2553
2554                 r = bind(s->stdout_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2555                 if (r < 0) {
2556                         log_error("bind() failed: %m");
2557                         return -errno;
2558                 }
2559
2560                 chmod(sa.un.sun_path, 0666);
2561
2562                 if (listen(s->stdout_fd, SOMAXCONN) < 0) {
2563                         log_error("liste() failed: %m");
2564                         return -errno;
2565                 }
2566         } else
2567                 fd_nonblock(s->stdout_fd, 1);
2568
2569         zero(ev);
2570         ev.events = EPOLLIN;
2571         ev.data.fd = s->stdout_fd;
2572         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->stdout_fd, &ev) < 0) {
2573                 log_error("Failed to add stdout server fd to epoll object: %m");
2574                 return -errno;
2575         }
2576
2577         return 0;
2578 }
2579
2580 static int open_dev_kmsg(Server *s) {
2581         struct epoll_event ev;
2582
2583         assert(s);
2584
2585         s->dev_kmsg_fd = open("/dev/kmsg", O_RDWR|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
2586         if (s->dev_kmsg_fd < 0) {
2587                 log_warning("Failed to open /dev/kmsg, ignoring: %m");
2588                 return 0;
2589         }
2590
2591         zero(ev);
2592         ev.events = EPOLLIN;
2593         ev.data.fd = s->dev_kmsg_fd;
2594         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->dev_kmsg_fd, &ev) < 0) {
2595
2596                 /* This will fail with EPERM on older kernels where
2597                  * /dev/kmsg is not readable. */
2598                 if (errno == EPERM)
2599                         return 0;
2600
2601                 log_error("Failed to add /dev/kmsg fd to epoll object: %m");
2602                 return -errno;
2603         }
2604
2605         s->dev_kmsg_readable = true;
2606
2607         return 0;
2608 }
2609
2610 static int open_signalfd(Server *s) {
2611         sigset_t mask;
2612         struct epoll_event ev;
2613
2614         assert(s);
2615
2616         assert_se(sigemptyset(&mask) == 0);
2617         sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
2618         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
2619
2620         s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
2621         if (s->signal_fd < 0) {
2622                 log_error("signalfd(): %m");
2623                 return -errno;
2624         }
2625
2626         zero(ev);
2627         ev.events = EPOLLIN;
2628         ev.data.fd = s->signal_fd;
2629
2630         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
2631                 log_error("epoll_ctl(): %m");
2632                 return -errno;
2633         }
2634
2635         return 0;
2636 }
2637
2638 static int server_parse_proc_cmdline(Server *s) {
2639         char *line, *w, *state;
2640         int r;
2641         size_t l;
2642
2643         if (detect_container(NULL) > 0)
2644                 return 0;
2645
2646         r = read_one_line_file("/proc/cmdline", &line);
2647         if (r < 0) {
2648                 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
2649                 return 0;
2650         }
2651
2652         FOREACH_WORD_QUOTED(w, l, line, state) {
2653                 char *word;
2654
2655                 word = strndup(w, l);
2656                 if (!word) {
2657                         r = -ENOMEM;
2658                         goto finish;
2659                 }
2660
2661                 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
2662                         r = parse_boolean(word + 35);
2663                         if (r < 0)
2664                                 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
2665                         else
2666                                 s->forward_to_syslog = r;
2667                 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
2668                         r = parse_boolean(word + 33);
2669                         if (r < 0)
2670                                 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
2671                         else
2672                                 s->forward_to_kmsg = r;
2673                 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
2674                         r = parse_boolean(word + 36);
2675                         if (r < 0)
2676                                 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
2677                         else
2678                                 s->forward_to_console = r;
2679                 } else if (startswith(word, "systemd.journald"))
2680                         log_warning("Invalid systemd.journald parameter. Ignoring.");
2681
2682                 free(word);
2683         }
2684
2685         r = 0;
2686
2687 finish:
2688         free(line);
2689         return r;
2690 }
2691
2692 static int server_parse_config_file(Server *s) {
2693         FILE *f;
2694         const char *fn;
2695         int r;
2696
2697         assert(s);
2698
2699         fn = "/etc/systemd/journald.conf";
2700         f = fopen(fn, "re");
2701         if (!f) {
2702                 if (errno == ENOENT)
2703                         return 0;
2704
2705                 log_warning("Failed to open configuration file %s: %m", fn);
2706                 return -errno;
2707         }
2708
2709         r = config_parse(fn, f, "Journal\0", config_item_perf_lookup, (void*) journald_gperf_lookup, false, s);
2710         if (r < 0)
2711                 log_warning("Failed to parse configuration file: %s", strerror(-r));
2712
2713         fclose(f);
2714
2715         return r;
2716 }
2717
2718 static int server_init(Server *s) {
2719         int n, r, fd;
2720
2721         assert(s);
2722
2723         zero(*s);
2724         s->syslog_fd = s->native_fd = s->stdout_fd = s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
2725         s->compress = true;
2726
2727         s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
2728         s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
2729
2730         s->forward_to_syslog = true;
2731
2732         s->max_level_store = LOG_DEBUG;
2733         s->max_level_syslog = LOG_DEBUG;
2734         s->max_level_kmsg = LOG_NOTICE;
2735         s->max_level_console = LOG_INFO;
2736
2737         memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
2738         memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
2739
2740         server_parse_config_file(s);
2741         server_parse_proc_cmdline(s);
2742
2743         s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
2744         if (!s->user_journals)
2745                 return log_oom();
2746
2747         s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
2748         if (s->epoll_fd < 0) {
2749                 log_error("Failed to create epoll object: %m");
2750                 return -errno;
2751         }
2752
2753         n = sd_listen_fds(true);
2754         if (n < 0) {
2755                 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
2756                 return n;
2757         }
2758
2759         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
2760
2761                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
2762
2763                         if (s->native_fd >= 0) {
2764                                 log_error("Too many native sockets passed.");
2765                                 return -EINVAL;
2766                         }
2767
2768                         s->native_fd = fd;
2769
2770                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
2771
2772                         if (s->stdout_fd >= 0) {
2773                                 log_error("Too many stdout sockets passed.");
2774                                 return -EINVAL;
2775                         }
2776
2777                         s->stdout_fd = fd;
2778
2779                 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
2780
2781                         if (s->syslog_fd >= 0) {
2782                                 log_error("Too many /dev/log sockets passed.");
2783                                 return -EINVAL;
2784                         }
2785
2786                         s->syslog_fd = fd;
2787
2788                 } else {
2789                         log_error("Unknown socket passed.");
2790                         return -EINVAL;
2791                 }
2792         }
2793
2794         r = open_syslog_socket(s);
2795         if (r < 0)
2796                 return r;
2797
2798         r = open_native_socket(s);
2799         if (r < 0)
2800                 return r;
2801
2802         r = open_stdout_socket(s);
2803         if (r < 0)
2804                 return r;
2805
2806         r = open_dev_kmsg(s);
2807         if (r < 0)
2808                 return r;
2809
2810         r = open_signalfd(s);
2811         if (r < 0)
2812                 return r;
2813
2814         s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
2815         if (!s->rate_limit)
2816                 return -ENOMEM;
2817
2818         r = system_journal_open(s);
2819         if (r < 0)
2820                 return r;
2821
2822         return 0;
2823 }
2824
2825 static void server_done(Server *s) {
2826         JournalFile *f;
2827         assert(s);
2828
2829         while (s->stdout_streams)
2830                 stdout_stream_free(s->stdout_streams);
2831
2832         if (s->system_journal)
2833                 journal_file_close(s->system_journal);
2834
2835         if (s->runtime_journal)
2836                 journal_file_close(s->runtime_journal);
2837
2838         while ((f = hashmap_steal_first(s->user_journals)))
2839                 journal_file_close(f);
2840
2841         hashmap_free(s->user_journals);
2842
2843         if (s->epoll_fd >= 0)
2844                 close_nointr_nofail(s->epoll_fd);
2845
2846         if (s->signal_fd >= 0)
2847                 close_nointr_nofail(s->signal_fd);
2848
2849         if (s->syslog_fd >= 0)
2850                 close_nointr_nofail(s->syslog_fd);
2851
2852         if (s->native_fd >= 0)
2853                 close_nointr_nofail(s->native_fd);
2854
2855         if (s->stdout_fd >= 0)
2856                 close_nointr_nofail(s->stdout_fd);
2857
2858         if (s->dev_kmsg_fd >= 0)
2859                 close_nointr_nofail(s->dev_kmsg_fd);
2860
2861         if (s->rate_limit)
2862                 journal_rate_limit_free(s->rate_limit);
2863
2864         free(s->buffer);
2865         free(s->tty_path);
2866 }
2867
2868 int main(int argc, char *argv[]) {
2869         Server server;
2870         int r;
2871
2872         /* if (getppid() != 1) { */
2873         /*         log_error("This program should be invoked by init only."); */
2874         /*         return EXIT_FAILURE; */
2875         /* } */
2876
2877         if (argc > 1) {
2878                 log_error("This program does not take arguments.");
2879                 return EXIT_FAILURE;
2880         }
2881
2882         log_set_target(LOG_TARGET_SAFE);
2883         log_set_facility(LOG_SYSLOG);
2884         log_parse_environment();
2885         log_open();
2886
2887         umask(0022);
2888
2889         r = server_init(&server);
2890         if (r < 0)
2891                 goto finish;
2892
2893         server_vacuum(&server);
2894         server_flush_to_var(&server);
2895         server_flush_dev_kmsg(&server);
2896
2897         log_debug("systemd-journald running as pid %lu", (unsigned long) getpid());
2898         driver_message(&server, SD_MESSAGE_JOURNAL_START, "Journal started");
2899
2900         sd_notify(false,
2901                   "READY=1\n"
2902                   "STATUS=Processing requests...");
2903
2904         for (;;) {
2905                 struct epoll_event event;
2906
2907                 r = epoll_wait(server.epoll_fd, &event, 1, -1);
2908                 if (r < 0) {
2909
2910                         if (errno == EINTR)
2911                                 continue;
2912
2913                         log_error("epoll_wait() failed: %m");
2914                         r = -errno;
2915                         goto finish;
2916                 } else if (r == 0)
2917                         break;
2918
2919                 r = process_event(&server, &event);
2920                 if (r < 0)
2921                         goto finish;
2922                 else if (r == 0)
2923                         break;
2924         }
2925
2926         log_debug("systemd-journald stopped as pid %lu", (unsigned long) getpid());
2927         driver_message(&server, SD_MESSAGE_JOURNAL_STOP, "Journal stopped");
2928
2929 finish:
2930         sd_notify(false,
2931                   "STATUS=Shutting down...");
2932
2933         server_done(&server);
2934
2935         return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
2936 }