chiark / gitweb /
journald: also parse kernel key/value fields and store them prefixed with _KERNEL_...
[elogind.git] / src / journal / journald.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/epoll.h>
23 #include <sys/socket.h>
24 #include <errno.h>
25 #include <sys/signalfd.h>
26 #include <unistd.h>
27 #include <fcntl.h>
28 #include <stddef.h>
29 #include <sys/ioctl.h>
30 #include <linux/sockios.h>
31 #include <sys/statvfs.h>
32
33 #include <systemd/sd-journal.h>
34 #include <systemd/sd-messages.h>
35 #include <systemd/sd-daemon.h>
36
37 #ifdef HAVE_LOGIND
38 #include <systemd/sd-login.h>
39 #endif
40
41 #include "mkdir.h"
42 #include "hashmap.h"
43 #include "journal-file.h"
44 #include "socket-util.h"
45 #include "cgroup-util.h"
46 #include "list.h"
47 #include "journal-rate-limit.h"
48 #include "journal-internal.h"
49 #include "conf-parser.h"
50 #include "journald.h"
51 #include "virt.h"
52 #include "missing.h"
53
54 #ifdef HAVE_ACL
55 #include <sys/acl.h>
56 #include <acl/libacl.h>
57 #include "acl-util.h"
58 #endif
59
60 #ifdef HAVE_SELINUX
61 #include <selinux/selinux.h>
62 #endif
63
64 #define USER_JOURNALS_MAX 1024
65 #define STDOUT_STREAMS_MAX 4096
66
67 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
68 #define DEFAULT_RATE_LIMIT_BURST 200
69
70 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
71
72 #define N_IOVEC_META_FIELDS 17
73 #define N_IOVEC_KERNEL_FIELDS 64
74
75 #define ENTRY_SIZE_MAX (1024*1024*32)
76
77 typedef enum StdoutStreamState {
78         STDOUT_STREAM_IDENTIFIER,
79         STDOUT_STREAM_UNIT_ID,
80         STDOUT_STREAM_PRIORITY,
81         STDOUT_STREAM_LEVEL_PREFIX,
82         STDOUT_STREAM_FORWARD_TO_SYSLOG,
83         STDOUT_STREAM_FORWARD_TO_KMSG,
84         STDOUT_STREAM_FORWARD_TO_CONSOLE,
85         STDOUT_STREAM_RUNNING
86 } StdoutStreamState;
87
88 struct StdoutStream {
89         Server *server;
90         StdoutStreamState state;
91
92         int fd;
93
94         struct ucred ucred;
95 #ifdef HAVE_SELINUX
96         security_context_t security_context;
97 #endif
98
99         char *identifier;
100         char *unit_id;
101         int priority;
102         bool level_prefix:1;
103         bool forward_to_syslog:1;
104         bool forward_to_kmsg:1;
105         bool forward_to_console:1;
106
107         char buffer[LINE_MAX+1];
108         size_t length;
109
110         LIST_FIELDS(StdoutStream, stdout_stream);
111 };
112
113 static const char* const storage_table[] = {
114         [STORAGE_AUTO] = "auto",
115         [STORAGE_VOLATILE] = "volatile",
116         [STORAGE_PERSISTENT] = "persistent",
117         [STORAGE_NONE] = "none"
118 };
119
120 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
121 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
122
123 static uint64_t available_space(Server *s) {
124         char ids[33], *p;
125         const char *f;
126         sd_id128_t machine;
127         struct statvfs ss;
128         uint64_t sum = 0, avail = 0, ss_avail = 0;
129         int r;
130         DIR *d;
131         usec_t ts;
132         JournalMetrics *m;
133
134         ts = now(CLOCK_MONOTONIC);
135
136         if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
137                 return s->cached_available_space;
138
139         r = sd_id128_get_machine(&machine);
140         if (r < 0)
141                 return 0;
142
143         if (s->system_journal) {
144                 f = "/var/log/journal/";
145                 m = &s->system_metrics;
146         } else {
147                 f = "/run/log/journal/";
148                 m = &s->runtime_metrics;
149         }
150
151         assert(m);
152
153         p = strappend(f, sd_id128_to_string(machine, ids));
154         if (!p)
155                 return 0;
156
157         d = opendir(p);
158         free(p);
159
160         if (!d)
161                 return 0;
162
163         if (fstatvfs(dirfd(d), &ss) < 0)
164                 goto finish;
165
166         for (;;) {
167                 struct stat st;
168                 struct dirent buf, *de;
169
170                 r = readdir_r(d, &buf, &de);
171                 if (r != 0)
172                         break;
173
174                 if (!de)
175                         break;
176
177                 if (!endswith(de->d_name, ".journal") &&
178                     !endswith(de->d_name, ".journal~"))
179                         continue;
180
181                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
182                         continue;
183
184                 if (!S_ISREG(st.st_mode))
185                         continue;
186
187                 sum += (uint64_t) st.st_blocks * 512UL;
188         }
189
190         avail = sum >= m->max_use ? 0 : m->max_use - sum;
191
192         ss_avail = ss.f_bsize * ss.f_bavail;
193
194         ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
195
196         if (ss_avail < avail)
197                 avail = ss_avail;
198
199         s->cached_available_space = avail;
200         s->cached_available_space_timestamp = ts;
201
202 finish:
203         closedir(d);
204
205         return avail;
206 }
207
208 static void server_read_file_gid(Server *s) {
209         const char *adm = "adm";
210         int r;
211
212         assert(s);
213
214         if (s->file_gid_valid)
215                 return;
216
217         r = get_group_creds(&adm, &s->file_gid);
218         if (r < 0)
219                 log_warning("Failed to resolve 'adm' group: %s", strerror(-r));
220
221         /* if we couldn't read the gid, then it will be 0, but that's
222          * fine and we shouldn't try to resolve the group again, so
223          * let's just pretend it worked right-away. */
224         s->file_gid_valid = true;
225 }
226
227 static void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
228         int r;
229 #ifdef HAVE_ACL
230         acl_t acl;
231         acl_entry_t entry;
232         acl_permset_t permset;
233 #endif
234
235         assert(f);
236
237         server_read_file_gid(s);
238
239         r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
240         if (r < 0)
241                 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
242
243 #ifdef HAVE_ACL
244         if (uid <= 0)
245                 return;
246
247         acl = acl_get_fd(f->fd);
248         if (!acl) {
249                 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
250                 return;
251         }
252
253         r = acl_find_uid(acl, uid, &entry);
254         if (r <= 0) {
255
256                 if (acl_create_entry(&acl, &entry) < 0 ||
257                     acl_set_tag_type(entry, ACL_USER) < 0 ||
258                     acl_set_qualifier(entry, &uid) < 0) {
259                         log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
260                         goto finish;
261                 }
262         }
263
264         if (acl_get_permset(entry, &permset) < 0 ||
265             acl_add_perm(permset, ACL_READ) < 0 ||
266             acl_calc_mask(&acl) < 0) {
267                 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
268                 goto finish;
269         }
270
271         if (acl_set_fd(f->fd, acl) < 0)
272                 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
273
274 finish:
275         acl_free(acl);
276 #endif
277 }
278
279 static JournalFile* find_journal(Server *s, uid_t uid) {
280         char *p;
281         int r;
282         JournalFile *f;
283         char ids[33];
284         sd_id128_t machine;
285
286         assert(s);
287
288         /* We split up user logs only on /var, not on /run. If the
289          * runtime file is open, we write to it exclusively, in order
290          * to guarantee proper order as soon as we flush /run to
291          * /var and close the runtime file. */
292
293         if (s->runtime_journal)
294                 return s->runtime_journal;
295
296         if (uid <= 0)
297                 return s->system_journal;
298
299         r = sd_id128_get_machine(&machine);
300         if (r < 0)
301                 return s->system_journal;
302
303         f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
304         if (f)
305                 return f;
306
307         if (asprintf(&p, "/var/log/journal/%s/user-%lu.journal", sd_id128_to_string(machine, ids), (unsigned long) uid) < 0)
308                 return s->system_journal;
309
310         while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
311                 /* Too many open? Then let's close one */
312                 f = hashmap_steal_first(s->user_journals);
313                 assert(f);
314                 journal_file_close(f);
315         }
316
317         r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, &s->system_metrics, s->system_journal, &f);
318         free(p);
319
320         if (r < 0)
321                 return s->system_journal;
322
323         server_fix_perms(s, f, uid);
324
325         r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
326         if (r < 0) {
327                 journal_file_close(f);
328                 return s->system_journal;
329         }
330
331         return f;
332 }
333
334 static void server_rotate(Server *s) {
335         JournalFile *f;
336         void *k;
337         Iterator i;
338         int r;
339
340         log_info("Rotating...");
341
342         if (s->runtime_journal) {
343                 r = journal_file_rotate(&s->runtime_journal);
344                 if (r < 0)
345                         if (s->runtime_journal)
346                                 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
347                         else
348                                 log_error("Failed to create new runtime journal: %s", strerror(-r));
349                 else
350                         server_fix_perms(s, s->runtime_journal, 0);
351         }
352
353         if (s->system_journal) {
354                 r = journal_file_rotate(&s->system_journal);
355                 if (r < 0)
356                         if (s->system_journal)
357                                 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
358                         else
359                                 log_error("Failed to create new system journal: %s", strerror(-r));
360
361                 else
362                         server_fix_perms(s, s->system_journal, 0);
363         }
364
365         HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
366                 r = journal_file_rotate(&f);
367                 if (r < 0)
368                         if (f->path)
369                                 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
370                         else
371                                 log_error("Failed to create user journal: %s", strerror(-r));
372                 else {
373                         hashmap_replace(s->user_journals, k, f);
374                         server_fix_perms(s, s->system_journal, PTR_TO_UINT32(k));
375                 }
376         }
377 }
378
379 static void server_vacuum(Server *s) {
380         char *p;
381         char ids[33];
382         sd_id128_t machine;
383         int r;
384
385         log_info("Vacuuming...");
386
387         r = sd_id128_get_machine(&machine);
388         if (r < 0) {
389                 log_error("Failed to get machine ID: %s", strerror(-r));
390                 return;
391         }
392
393         sd_id128_to_string(machine, ids);
394
395         if (s->system_journal) {
396                 if (asprintf(&p, "/var/log/journal/%s", ids) < 0) {
397                         log_oom();
398                         return;
399                 }
400
401                 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free);
402                 if (r < 0 && r != -ENOENT)
403                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
404                 free(p);
405         }
406
407         if (s->runtime_journal) {
408                 if (asprintf(&p, "/run/log/journal/%s", ids) < 0) {
409                         log_oom();
410                         return;
411                 }
412
413                 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free);
414                 if (r < 0 && r != -ENOENT)
415                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
416                 free(p);
417         }
418
419         s->cached_available_space_timestamp = 0;
420 }
421
422 static char *shortened_cgroup_path(pid_t pid) {
423         int r;
424         char *process_path, *init_path, *path;
425
426         assert(pid > 0);
427
428         r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, pid, &process_path);
429         if (r < 0)
430                 return NULL;
431
432         r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &init_path);
433         if (r < 0) {
434                 free(process_path);
435                 return NULL;
436         }
437
438         if (endswith(init_path, "/system"))
439                 init_path[strlen(init_path) - 7] = 0;
440         else if (streq(init_path, "/"))
441                 init_path[0] = 0;
442
443         if (startswith(process_path, init_path)) {
444                 char *p;
445
446                 p = strdup(process_path + strlen(init_path));
447                 if (!p) {
448                         free(process_path);
449                         free(init_path);
450                         return NULL;
451                 }
452                 path = p;
453         } else {
454                 path = process_path;
455                 process_path = NULL;
456         }
457
458         free(process_path);
459         free(init_path);
460
461         return path;
462 }
463
464 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n) {
465         JournalFile *f;
466         bool vacuumed = false;
467         int r;
468
469         assert(s);
470         assert(iovec);
471         assert(n > 0);
472
473         f = find_journal(s, uid);
474         if (!f)
475                 return;
476
477         if (journal_file_rotate_suggested(f)) {
478                 log_info("Journal header limits reached or header out-of-date, rotating.");
479                 server_rotate(s);
480                 server_vacuum(s);
481                 vacuumed = true;
482
483                 f = find_journal(s, uid);
484                 if (!f)
485                         return;
486         }
487
488         for (;;) {
489                 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
490                 if (r >= 0)
491                         return;
492
493                 if (vacuumed ||
494                     (r != -E2BIG && /* hit limit */
495                      r != -EFBIG && /* hit fs limit */
496                      r != -EDQUOT && /* quota hit */
497                      r != -ENOSPC && /* disk full */
498                      r != -EBADMSG && /* corrupted */
499                      r != -ENODATA && /* truncated */
500                      r != -EHOSTDOWN && /* other machine */
501                      r != -EPROTONOSUPPORT && /* unsupported feature */
502                      r != -EBUSY && /* unclean shutdown */
503                      r != -ESHUTDOWN /* already archived */)) {
504                         log_error("Failed to write entry, ignoring: %s", strerror(-r));
505                         return;
506                 }
507
508                 if (r == -E2BIG || r == -EFBIG || r == EDQUOT || r == ENOSPC)
509                         log_info("Allocation limit reached, rotating.");
510                 else if (r == -EHOSTDOWN)
511                         log_info("Journal file from other machine, rotating.");
512                 else if (r == -EBUSY)
513                         log_info("Unlcean shutdown, rotating.");
514                 else
515                         log_warning("Journal file corrupted, rotating.");
516
517                 server_rotate(s);
518                 server_vacuum(s);
519                 vacuumed = true;
520
521                 f = find_journal(s, uid);
522                 if (!f)
523                         return;
524
525                 log_info("Retrying write.");
526         }
527 }
528
529 static void dispatch_message_real(
530                 Server *s,
531                 struct iovec *iovec, unsigned n, unsigned m,
532                 struct ucred *ucred,
533                 struct timeval *tv,
534                 const char *label, size_t label_len,
535                 const char *unit_id) {
536
537         char *pid = NULL, *uid = NULL, *gid = NULL,
538                 *source_time = NULL, *boot_id = NULL, *machine_id = NULL,
539                 *comm = NULL, *cmdline = NULL, *hostname = NULL,
540                 *audit_session = NULL, *audit_loginuid = NULL,
541                 *exe = NULL, *cgroup = NULL, *session = NULL,
542                 *owner_uid = NULL, *unit = NULL, *selinux_context = NULL;
543
544         char idbuf[33];
545         sd_id128_t id;
546         int r;
547         char *t;
548         uid_t loginuid = 0, realuid = 0;
549
550         assert(s);
551         assert(iovec);
552         assert(n > 0);
553         assert(n + N_IOVEC_META_FIELDS <= m);
554
555         if (ucred) {
556                 uint32_t audit;
557 #ifdef HAVE_LOGIND
558                 uid_t owner;
559 #endif
560
561                 realuid = ucred->uid;
562
563                 if (asprintf(&pid, "_PID=%lu", (unsigned long) ucred->pid) >= 0)
564                         IOVEC_SET_STRING(iovec[n++], pid);
565
566                 if (asprintf(&uid, "_UID=%lu", (unsigned long) ucred->uid) >= 0)
567                         IOVEC_SET_STRING(iovec[n++], uid);
568
569                 if (asprintf(&gid, "_GID=%lu", (unsigned long) ucred->gid) >= 0)
570                         IOVEC_SET_STRING(iovec[n++], gid);
571
572                 r = get_process_comm(ucred->pid, &t);
573                 if (r >= 0) {
574                         comm = strappend("_COMM=", t);
575                         free(t);
576
577                         if (comm)
578                                 IOVEC_SET_STRING(iovec[n++], comm);
579                 }
580
581                 r = get_process_exe(ucred->pid, &t);
582                 if (r >= 0) {
583                         exe = strappend("_EXE=", t);
584                         free(t);
585
586                         if (exe)
587                                 IOVEC_SET_STRING(iovec[n++], exe);
588                 }
589
590                 r = get_process_cmdline(ucred->pid, LINE_MAX, false, &t);
591                 if (r >= 0) {
592                         cmdline = strappend("_CMDLINE=", t);
593                         free(t);
594
595                         if (cmdline)
596                                 IOVEC_SET_STRING(iovec[n++], cmdline);
597                 }
598
599                 r = audit_session_from_pid(ucred->pid, &audit);
600                 if (r >= 0)
601                         if (asprintf(&audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit) >= 0)
602                                 IOVEC_SET_STRING(iovec[n++], audit_session);
603
604                 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
605                 if (r >= 0)
606                         if (asprintf(&audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0)
607                                 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
608
609                 t = shortened_cgroup_path(ucred->pid);
610                 if (t) {
611                         cgroup = strappend("_SYSTEMD_CGROUP=", t);
612                         free(t);
613
614                         if (cgroup)
615                                 IOVEC_SET_STRING(iovec[n++], cgroup);
616                 }
617
618 #ifdef HAVE_LOGIND
619                 if (sd_pid_get_session(ucred->pid, &t) >= 0) {
620                         session = strappend("_SYSTEMD_SESSION=", t);
621                         free(t);
622
623                         if (session)
624                                 IOVEC_SET_STRING(iovec[n++], session);
625                 }
626
627                 if (sd_pid_get_owner_uid(ucred->uid, &owner) >= 0)
628                         if (asprintf(&owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner) >= 0)
629                                 IOVEC_SET_STRING(iovec[n++], owner_uid);
630 #endif
631
632                 if (cg_pid_get_unit(ucred->pid, &t) >= 0) {
633                         unit = strappend("_SYSTEMD_UNIT=", t);
634                         free(t);
635                 } else if (unit_id)
636                         unit = strappend("_SYSTEMD_UNIT=", unit_id);
637
638                 if (unit)
639                         IOVEC_SET_STRING(iovec[n++], unit);
640
641 #ifdef HAVE_SELINUX
642                 if (label) {
643                         selinux_context = malloc(sizeof("_SELINUX_CONTEXT=") + label_len);
644                         if (selinux_context) {
645                                 memcpy(selinux_context, "_SELINUX_CONTEXT=", sizeof("_SELINUX_CONTEXT=")-1);
646                                 memcpy(selinux_context+sizeof("_SELINUX_CONTEXT=")-1, label, label_len);
647                                 selinux_context[sizeof("_SELINUX_CONTEXT=")-1+label_len] = 0;
648                                 IOVEC_SET_STRING(iovec[n++], selinux_context);
649                         }
650                 } else {
651                         security_context_t con;
652
653                         if (getpidcon(ucred->pid, &con) >= 0) {
654                                 selinux_context = strappend("_SELINUX_CONTEXT=", con);
655                                 if (selinux_context)
656                                         IOVEC_SET_STRING(iovec[n++], selinux_context);
657
658                                 freecon(con);
659                         }
660                 }
661 #endif
662         }
663
664         if (tv) {
665                 if (asprintf(&source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu",
666                              (unsigned long long) timeval_load(tv)) >= 0)
667                         IOVEC_SET_STRING(iovec[n++], source_time);
668         }
669
670         /* Note that strictly speaking storing the boot id here is
671          * redundant since the entry includes this in-line
672          * anyway. However, we need this indexed, too. */
673         r = sd_id128_get_boot(&id);
674         if (r >= 0)
675                 if (asprintf(&boot_id, "_BOOT_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
676                         IOVEC_SET_STRING(iovec[n++], boot_id);
677
678         r = sd_id128_get_machine(&id);
679         if (r >= 0)
680                 if (asprintf(&machine_id, "_MACHINE_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
681                         IOVEC_SET_STRING(iovec[n++], machine_id);
682
683         t = gethostname_malloc();
684         if (t) {
685                 hostname = strappend("_HOSTNAME=", t);
686                 free(t);
687                 if (hostname)
688                         IOVEC_SET_STRING(iovec[n++], hostname);
689         }
690
691         assert(n <= m);
692
693         write_to_journal(s, realuid == 0 ? 0 : loginuid, iovec, n);
694
695         free(pid);
696         free(uid);
697         free(gid);
698         free(comm);
699         free(exe);
700         free(cmdline);
701         free(source_time);
702         free(boot_id);
703         free(machine_id);
704         free(hostname);
705         free(audit_session);
706         free(audit_loginuid);
707         free(cgroup);
708         free(session);
709         free(owner_uid);
710         free(unit);
711         free(selinux_context);
712 }
713
714 static void driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
715         char mid[11 + 32 + 1];
716         char buffer[16 + LINE_MAX + 1];
717         struct iovec iovec[N_IOVEC_META_FIELDS + 4];
718         int n = 0;
719         va_list ap;
720         struct ucred ucred;
721
722         assert(s);
723         assert(format);
724
725         IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
726         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
727
728         memcpy(buffer, "MESSAGE=", 8);
729         va_start(ap, format);
730         vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
731         va_end(ap);
732         char_array_0(buffer);
733         IOVEC_SET_STRING(iovec[n++], buffer);
734
735         snprintf(mid, sizeof(mid), "MESSAGE_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(message_id));
736         char_array_0(mid);
737         IOVEC_SET_STRING(iovec[n++], mid);
738
739         zero(ucred);
740         ucred.pid = getpid();
741         ucred.uid = getuid();
742         ucred.gid = getgid();
743
744         dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL);
745 }
746
747 static void dispatch_message(Server *s,
748                              struct iovec *iovec, unsigned n, unsigned m,
749                              struct ucred *ucred,
750                              struct timeval *tv,
751                              const char *label, size_t label_len,
752                              const char *unit_id,
753                              int priority) {
754         int rl;
755         char *path = NULL, *c;
756
757         assert(s);
758         assert(iovec || n == 0);
759
760         if (n == 0)
761                 return;
762
763         if (LOG_PRI(priority) > s->max_level_store)
764                 return;
765
766         if (!ucred)
767                 goto finish;
768
769         path = shortened_cgroup_path(ucred->pid);
770         if (!path)
771                 goto finish;
772
773         /* example: /user/lennart/3/foobar
774          *          /system/dbus.service/foobar
775          *
776          * So let's cut of everything past the third /, since that is
777          * wher user directories start */
778
779         c = strchr(path, '/');
780         if (c) {
781                 c = strchr(c+1, '/');
782                 if (c) {
783                         c = strchr(c+1, '/');
784                         if (c)
785                                 *c = 0;
786                 }
787         }
788
789         rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available_space(s));
790
791         if (rl == 0) {
792                 free(path);
793                 return;
794         }
795
796         /* Write a suppression message if we suppressed something */
797         if (rl > 1)
798                 driver_message(s, SD_MESSAGE_JOURNAL_DROPPED, "Suppressed %u messages from %s", rl - 1, path);
799
800         free(path);
801
802 finish:
803         dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id);
804 }
805
806 static void forward_syslog_iovec(Server *s, const struct iovec *iovec, unsigned n_iovec, struct ucred *ucred, struct timeval *tv) {
807         struct msghdr msghdr;
808         struct cmsghdr *cmsg;
809         union {
810                 struct cmsghdr cmsghdr;
811                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred))];
812         } control;
813         union sockaddr_union sa;
814
815         assert(s);
816         assert(iovec);
817         assert(n_iovec > 0);
818
819         zero(msghdr);
820         msghdr.msg_iov = (struct iovec*) iovec;
821         msghdr.msg_iovlen = n_iovec;
822
823         zero(sa);
824         sa.un.sun_family = AF_UNIX;
825         strncpy(sa.un.sun_path, "/run/systemd/journal/syslog", sizeof(sa.un.sun_path));
826         msghdr.msg_name = &sa;
827         msghdr.msg_namelen = offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path);
828
829         if (ucred) {
830                 zero(control);
831                 msghdr.msg_control = &control;
832                 msghdr.msg_controllen = sizeof(control);
833
834                 cmsg = CMSG_FIRSTHDR(&msghdr);
835                 cmsg->cmsg_level = SOL_SOCKET;
836                 cmsg->cmsg_type = SCM_CREDENTIALS;
837                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred));
838                 memcpy(CMSG_DATA(cmsg), ucred, sizeof(struct ucred));
839                 msghdr.msg_controllen = cmsg->cmsg_len;
840         }
841
842         /* Forward the syslog message we received via /dev/log to
843          * /run/systemd/syslog. Unfortunately we currently can't set
844          * the SO_TIMESTAMP auxiliary data, and hence we don't. */
845
846         if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
847                 return;
848
849         /* The socket is full? I guess the syslog implementation is
850          * too slow, and we shouldn't wait for that... */
851         if (errno == EAGAIN)
852                 return;
853
854         if (ucred && errno == ESRCH) {
855                 struct ucred u;
856
857                 /* Hmm, presumably the sender process vanished
858                  * by now, so let's fix it as good as we
859                  * can, and retry */
860
861                 u = *ucred;
862                 u.pid = getpid();
863                 memcpy(CMSG_DATA(cmsg), &u, sizeof(struct ucred));
864
865                 if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
866                         return;
867
868                 if (errno == EAGAIN)
869                         return;
870         }
871
872         if (errno != ENOENT)
873                 log_debug("Failed to forward syslog message: %m");
874 }
875
876 static void forward_syslog_raw(Server *s, int priority, const char *buffer, struct ucred *ucred, struct timeval *tv) {
877         struct iovec iovec;
878
879         assert(s);
880         assert(buffer);
881
882         if (LOG_PRI(priority) > s->max_level_syslog)
883                 return;
884
885         IOVEC_SET_STRING(iovec, buffer);
886         forward_syslog_iovec(s, &iovec, 1, ucred, tv);
887 }
888
889 static void forward_syslog(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred, struct timeval *tv) {
890         struct iovec iovec[5];
891         char header_priority[6], header_time[64], header_pid[16];
892         int n = 0;
893         time_t t;
894         struct tm *tm;
895         char *ident_buf = NULL;
896
897         assert(s);
898         assert(priority >= 0);
899         assert(priority <= 999);
900         assert(message);
901
902         if (LOG_PRI(priority) > s->max_level_syslog)
903                 return;
904
905         /* First: priority field */
906         snprintf(header_priority, sizeof(header_priority), "<%i>", priority);
907         char_array_0(header_priority);
908         IOVEC_SET_STRING(iovec[n++], header_priority);
909
910         /* Second: timestamp */
911         t = tv ? tv->tv_sec : ((time_t) (now(CLOCK_REALTIME) / USEC_PER_SEC));
912         tm = localtime(&t);
913         if (!tm)
914                 return;
915         if (strftime(header_time, sizeof(header_time), "%h %e %T ", tm) <= 0)
916                 return;
917         IOVEC_SET_STRING(iovec[n++], header_time);
918
919         /* Third: identifier and PID */
920         if (ucred) {
921                 if (!identifier) {
922                         get_process_comm(ucred->pid, &ident_buf);
923                         identifier = ident_buf;
924                 }
925
926                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
927                 char_array_0(header_pid);
928
929                 if (identifier)
930                         IOVEC_SET_STRING(iovec[n++], identifier);
931
932                 IOVEC_SET_STRING(iovec[n++], header_pid);
933         } else if (identifier) {
934                 IOVEC_SET_STRING(iovec[n++], identifier);
935                 IOVEC_SET_STRING(iovec[n++], ": ");
936         }
937
938         /* Fourth: message */
939         IOVEC_SET_STRING(iovec[n++], message);
940
941         forward_syslog_iovec(s, iovec, n, ucred, tv);
942
943         free(ident_buf);
944 }
945
946 static int fixup_priority(int priority) {
947
948         if ((priority & LOG_FACMASK) == 0)
949                 return (priority & LOG_PRIMASK) | LOG_USER;
950
951         return priority;
952 }
953
954 static void forward_kmsg(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred) {
955         struct iovec iovec[5];
956         char header_priority[6], header_pid[16];
957         int n = 0;
958         char *ident_buf = NULL;
959
960         assert(s);
961         assert(priority >= 0);
962         assert(priority <= 999);
963         assert(message);
964
965         if (_unlikely_(LOG_PRI(priority) > s->max_level_kmsg))
966                 return;
967
968         if (_unlikely_(s->dev_kmsg_fd < 0))
969                 return;
970
971         /* Never allow messages with kernel facility to be written to
972          * kmsg, regardless where the data comes from. */
973         priority = fixup_priority(priority);
974
975         /* First: priority field */
976         snprintf(header_priority, sizeof(header_priority), "<%i>", priority);
977         char_array_0(header_priority);
978         IOVEC_SET_STRING(iovec[n++], header_priority);
979
980         /* Second: identifier and PID */
981         if (ucred) {
982                 if (!identifier) {
983                         get_process_comm(ucred->pid, &ident_buf);
984                         identifier = ident_buf;
985                 }
986
987                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
988                 char_array_0(header_pid);
989
990                 if (identifier)
991                         IOVEC_SET_STRING(iovec[n++], identifier);
992
993                 IOVEC_SET_STRING(iovec[n++], header_pid);
994         } else if (identifier) {
995                 IOVEC_SET_STRING(iovec[n++], identifier);
996                 IOVEC_SET_STRING(iovec[n++], ": ");
997         }
998
999         /* Fourth: message */
1000         IOVEC_SET_STRING(iovec[n++], message);
1001         IOVEC_SET_STRING(iovec[n++], "\n");
1002
1003         if (writev(s->dev_kmsg_fd, iovec, n) < 0)
1004                 log_debug("Failed to write to /dev/kmsg for logging: %s", strerror(errno));
1005
1006         free(ident_buf);
1007 }
1008
1009 static void forward_console(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred) {
1010         struct iovec iovec[4];
1011         char header_pid[16];
1012         int n = 0, fd;
1013         char *ident_buf = NULL;
1014         const char *tty;
1015
1016         assert(s);
1017         assert(message);
1018
1019         if (LOG_PRI(priority) > s->max_level_console)
1020                 return;
1021
1022         /* First: identifier and PID */
1023         if (ucred) {
1024                 if (!identifier) {
1025                         get_process_comm(ucred->pid, &ident_buf);
1026                         identifier = ident_buf;
1027                 }
1028
1029                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
1030                 char_array_0(header_pid);
1031
1032                 if (identifier)
1033                         IOVEC_SET_STRING(iovec[n++], identifier);
1034
1035                 IOVEC_SET_STRING(iovec[n++], header_pid);
1036         } else if (identifier) {
1037                 IOVEC_SET_STRING(iovec[n++], identifier);
1038                 IOVEC_SET_STRING(iovec[n++], ": ");
1039         }
1040
1041         /* Third: message */
1042         IOVEC_SET_STRING(iovec[n++], message);
1043         IOVEC_SET_STRING(iovec[n++], "\n");
1044
1045         tty = s->tty_path ? s->tty_path : "/dev/console";
1046
1047         fd = open_terminal(tty, O_WRONLY|O_NOCTTY|O_CLOEXEC);
1048         if (fd < 0) {
1049                 log_debug("Failed to open %s for logging: %s", tty, strerror(errno));
1050                 goto finish;
1051         }
1052
1053         if (writev(fd, iovec, n) < 0)
1054                 log_debug("Failed to write to %s for logging: %s", tty, strerror(errno));
1055
1056         close_nointr_nofail(fd);
1057
1058 finish:
1059         free(ident_buf);
1060 }
1061
1062 static void read_identifier(const char **buf, char **identifier, char **pid) {
1063         const char *p;
1064         char *t;
1065         size_t l, e;
1066
1067         assert(buf);
1068         assert(identifier);
1069         assert(pid);
1070
1071         p = *buf;
1072
1073         p += strspn(p, WHITESPACE);
1074         l = strcspn(p, WHITESPACE);
1075
1076         if (l <= 0 ||
1077             p[l-1] != ':')
1078                 return;
1079
1080         e = l;
1081         l--;
1082
1083         if (p[l-1] == ']') {
1084                 size_t k = l-1;
1085
1086                 for (;;) {
1087
1088                         if (p[k] == '[') {
1089                                 t = strndup(p+k+1, l-k-2);
1090                                 if (t)
1091                                         *pid = t;
1092
1093                                 l = k;
1094                                 break;
1095                         }
1096
1097                         if (k == 0)
1098                                 break;
1099
1100                         k--;
1101                 }
1102         }
1103
1104         t = strndup(p, l);
1105         if (t)
1106                 *identifier = t;
1107
1108         *buf = p + e;
1109         *buf += strspn(*buf, WHITESPACE);
1110 }
1111
1112 static void process_syslog_message(Server *s, const char *buf, struct ucred *ucred, struct timeval *tv, const char *label, size_t label_len) {
1113         char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *syslog_pid = NULL;
1114         struct iovec iovec[N_IOVEC_META_FIELDS + 6];
1115         unsigned n = 0;
1116         int priority = LOG_USER | LOG_INFO;
1117         char *identifier = NULL, *pid = NULL;
1118         const char *orig;
1119
1120         assert(s);
1121         assert(buf);
1122
1123         orig = buf;
1124         parse_syslog_priority((char**) &buf, &priority);
1125
1126         if (s->forward_to_syslog)
1127                 forward_syslog_raw(s, priority, orig, ucred, tv);
1128
1129         skip_syslog_date((char**) &buf);
1130         read_identifier(&buf, &identifier, &pid);
1131
1132         if (s->forward_to_kmsg)
1133                 forward_kmsg(s, priority, identifier, buf, ucred);
1134
1135         if (s->forward_to_console)
1136                 forward_console(s, priority, identifier, buf, ucred);
1137
1138         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=syslog");
1139
1140         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1141                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1142
1143         if (priority & LOG_FACMASK)
1144                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1145                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1146
1147         if (identifier) {
1148                 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
1149                 if (syslog_identifier)
1150                         IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1151         }
1152
1153         if (pid) {
1154                 syslog_pid = strappend("SYSLOG_PID=", pid);
1155                 if (syslog_pid)
1156                         IOVEC_SET_STRING(iovec[n++], syslog_pid);
1157         }
1158
1159         message = strappend("MESSAGE=", buf);
1160         if (message)
1161                 IOVEC_SET_STRING(iovec[n++], message);
1162
1163         dispatch_message(s, iovec, n, ELEMENTSOF(iovec), ucred, tv, label, label_len, NULL, priority);
1164
1165         free(message);
1166         free(identifier);
1167         free(pid);
1168         free(syslog_priority);
1169         free(syslog_facility);
1170         free(syslog_identifier);
1171         free(syslog_pid);
1172 }
1173
1174 static bool valid_user_field(const char *p, size_t l) {
1175         const char *a;
1176
1177         /* We kinda enforce POSIX syntax recommendations for
1178            environment variables here, but make a couple of additional
1179            requirements.
1180
1181            http://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html */
1182
1183         /* No empty field names */
1184         if (l <= 0)
1185                 return false;
1186
1187         /* Don't allow names longer than 64 chars */
1188         if (l > 64)
1189                 return false;
1190
1191         /* Variables starting with an underscore are protected */
1192         if (p[0] == '_')
1193                 return false;
1194
1195         /* Don't allow digits as first character */
1196         if (p[0] >= '0' && p[0] <= '9')
1197                 return false;
1198
1199         /* Only allow A-Z0-9 and '_' */
1200         for (a = p; a < p + l; a++)
1201                 if (!((*a >= 'A' && *a <= 'Z') ||
1202                       (*a >= '0' && *a <= '9') ||
1203                       *a == '_'))
1204                         return false;
1205
1206         return true;
1207 }
1208
1209 static void process_native_message(
1210                 Server *s,
1211                 const void *buffer, size_t buffer_size,
1212                 struct ucred *ucred,
1213                 struct timeval *tv,
1214                 const char *label, size_t label_len) {
1215
1216         struct iovec *iovec = NULL;
1217         unsigned n = 0, m = 0, j, tn = (unsigned) -1;
1218         const char *p;
1219         size_t remaining;
1220         int priority = LOG_INFO;
1221         char *identifier = NULL, *message = NULL;
1222
1223         assert(s);
1224         assert(buffer || buffer_size == 0);
1225
1226         p = buffer;
1227         remaining = buffer_size;
1228
1229         while (remaining > 0) {
1230                 const char *e, *q;
1231
1232                 e = memchr(p, '\n', remaining);
1233
1234                 if (!e) {
1235                         /* Trailing noise, let's ignore it, and flush what we collected */
1236                         log_debug("Received message with trailing noise, ignoring.");
1237                         break;
1238                 }
1239
1240                 if (e == p) {
1241                         /* Entry separator */
1242                         dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, NULL, priority);
1243                         n = 0;
1244                         priority = LOG_INFO;
1245
1246                         p++;
1247                         remaining--;
1248                         continue;
1249                 }
1250
1251                 if (*p == '.' || *p == '#') {
1252                         /* Ignore control commands for now, and
1253                          * comments too. */
1254                         remaining -= (e - p) + 1;
1255                         p = e + 1;
1256                         continue;
1257                 }
1258
1259                 /* A property follows */
1260
1261                 if (n+N_IOVEC_META_FIELDS >= m) {
1262                         struct iovec *c;
1263                         unsigned u;
1264
1265                         u = MAX((n+N_IOVEC_META_FIELDS+1) * 2U, 4U);
1266                         c = realloc(iovec, u * sizeof(struct iovec));
1267                         if (!c) {
1268                                 log_oom();
1269                                 break;
1270                         }
1271
1272                         iovec = c;
1273                         m = u;
1274                 }
1275
1276                 q = memchr(p, '=', e - p);
1277                 if (q) {
1278                         if (valid_user_field(p, q - p)) {
1279                                 size_t l;
1280
1281                                 l = e - p;
1282
1283                                 /* If the field name starts with an
1284                                  * underscore, skip the variable,
1285                                  * since that indidates a trusted
1286                                  * field */
1287                                 iovec[n].iov_base = (char*) p;
1288                                 iovec[n].iov_len = l;
1289                                 n++;
1290
1291                                 /* We need to determine the priority
1292                                  * of this entry for the rate limiting
1293                                  * logic */
1294                                 if (l == 10 &&
1295                                     memcmp(p, "PRIORITY=", 9) == 0 &&
1296                                     p[9] >= '0' && p[9] <= '9')
1297                                         priority = (priority & LOG_FACMASK) | (p[9] - '0');
1298
1299                                 else if (l == 17 &&
1300                                          memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
1301                                          p[16] >= '0' && p[16] <= '9')
1302                                         priority = (priority & LOG_PRIMASK) | ((p[16] - '0') << 3);
1303
1304                                 else if (l == 18 &&
1305                                          memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
1306                                          p[16] >= '0' && p[16] <= '9' &&
1307                                          p[17] >= '0' && p[17] <= '9')
1308                                         priority = (priority & LOG_PRIMASK) | (((p[16] - '0')*10 + (p[17] - '0')) << 3);
1309
1310                                 else if (l >= 19 &&
1311                                          memcmp(p, "SYSLOG_IDENTIFIER=", 18) == 0) {
1312                                         char *t;
1313
1314                                         t = strndup(p + 18, l - 18);
1315                                         if (t) {
1316                                                 free(identifier);
1317                                                 identifier = t;
1318                                         }
1319                                 } else if (l >= 8 &&
1320                                            memcmp(p, "MESSAGE=", 8) == 0) {
1321                                         char *t;
1322
1323                                         t = strndup(p + 8, l - 8);
1324                                         if (t) {
1325                                                 free(message);
1326                                                 message = t;
1327                                         }
1328                                 }
1329                         }
1330
1331                         remaining -= (e - p) + 1;
1332                         p = e + 1;
1333                         continue;
1334                 } else {
1335                         le64_t l_le;
1336                         uint64_t l;
1337                         char *k;
1338
1339                         if (remaining < e - p + 1 + sizeof(uint64_t) + 1) {
1340                                 log_debug("Failed to parse message, ignoring.");
1341                                 break;
1342                         }
1343
1344                         memcpy(&l_le, e + 1, sizeof(uint64_t));
1345                         l = le64toh(l_le);
1346
1347                         if (remaining < e - p + 1 + sizeof(uint64_t) + l + 1 ||
1348                             e[1+sizeof(uint64_t)+l] != '\n') {
1349                                 log_debug("Failed to parse message, ignoring.");
1350                                 break;
1351                         }
1352
1353                         k = malloc((e - p) + 1 + l);
1354                         if (!k) {
1355                                 log_oom();
1356                                 break;
1357                         }
1358
1359                         memcpy(k, p, e - p);
1360                         k[e - p] = '=';
1361                         memcpy(k + (e - p) + 1, e + 1 + sizeof(uint64_t), l);
1362
1363                         if (valid_user_field(p, e - p)) {
1364                                 iovec[n].iov_base = k;
1365                                 iovec[n].iov_len = (e - p) + 1 + l;
1366                                 n++;
1367                         } else
1368                                 free(k);
1369
1370                         remaining -= (e - p) + 1 + sizeof(uint64_t) + l + 1;
1371                         p = e + 1 + sizeof(uint64_t) + l + 1;
1372                 }
1373         }
1374
1375         if (n <= 0)
1376                 goto finish;
1377
1378         tn = n++;
1379         IOVEC_SET_STRING(iovec[tn], "_TRANSPORT=journal");
1380
1381         if (message) {
1382                 if (s->forward_to_syslog)
1383                         forward_syslog(s, priority, identifier, message, ucred, tv);
1384
1385                 if (s->forward_to_kmsg)
1386                         forward_kmsg(s, priority, identifier, message, ucred);
1387
1388                 if (s->forward_to_console)
1389                         forward_console(s, priority, identifier, message, ucred);
1390         }
1391
1392         dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, NULL, priority);
1393
1394 finish:
1395         for (j = 0; j < n; j++)  {
1396                 if (j == tn)
1397                         continue;
1398
1399                 if (iovec[j].iov_base < buffer ||
1400                     (const uint8_t*) iovec[j].iov_base >= (const uint8_t*) buffer + buffer_size)
1401                         free(iovec[j].iov_base);
1402         }
1403
1404         free(iovec);
1405         free(identifier);
1406         free(message);
1407 }
1408
1409 static void process_native_file(
1410                 Server *s,
1411                 int fd,
1412                 struct ucred *ucred,
1413                 struct timeval *tv,
1414                 const char *label, size_t label_len) {
1415
1416         struct stat st;
1417         void *p;
1418         ssize_t n;
1419
1420         assert(s);
1421         assert(fd >= 0);
1422
1423         /* Data is in the passed file, since it didn't fit in a
1424          * datagram. We can't map the file here, since clients might
1425          * then truncate it and trigger a SIGBUS for us. So let's
1426          * stupidly read it */
1427
1428         if (fstat(fd, &st) < 0) {
1429                 log_error("Failed to stat passed file, ignoring: %m");
1430                 return;
1431         }
1432
1433         if (!S_ISREG(st.st_mode)) {
1434                 log_error("File passed is not regular. Ignoring.");
1435                 return;
1436         }
1437
1438         if (st.st_size <= 0)
1439                 return;
1440
1441         if (st.st_size > ENTRY_SIZE_MAX) {
1442                 log_error("File passed too large. Ignoring.");
1443                 return;
1444         }
1445
1446         p = malloc(st.st_size);
1447         if (!p) {
1448                 log_oom();
1449                 return;
1450         }
1451
1452         n = pread(fd, p, st.st_size, 0);
1453         if (n < 0)
1454                 log_error("Failed to read file, ignoring: %s", strerror(-n));
1455         else if (n > 0)
1456                 process_native_message(s, p, n, ucred, tv, label, label_len);
1457
1458         free(p);
1459 }
1460
1461 static int stdout_stream_log(StdoutStream *s, const char *p) {
1462         struct iovec iovec[N_IOVEC_META_FIELDS + 5];
1463         char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *syslog_identifier = NULL;
1464         unsigned n = 0;
1465         int priority;
1466         char *label = NULL;
1467         size_t label_len = 0;
1468
1469         assert(s);
1470         assert(p);
1471
1472         if (isempty(p))
1473                 return 0;
1474
1475         priority = s->priority;
1476
1477         if (s->level_prefix)
1478                 parse_syslog_priority((char**) &p, &priority);
1479
1480         if (s->forward_to_syslog || s->server->forward_to_syslog)
1481                 forward_syslog(s->server, fixup_priority(priority), s->identifier, p, &s->ucred, NULL);
1482
1483         if (s->forward_to_kmsg || s->server->forward_to_kmsg)
1484                 forward_kmsg(s->server, priority, s->identifier, p, &s->ucred);
1485
1486         if (s->forward_to_console || s->server->forward_to_console)
1487                 forward_console(s->server, priority, s->identifier, p, &s->ucred);
1488
1489         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=stdout");
1490
1491         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1492                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1493
1494         if (priority & LOG_FACMASK)
1495                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1496                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1497
1498         if (s->identifier) {
1499                 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", s->identifier);
1500                 if (syslog_identifier)
1501                         IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1502         }
1503
1504         message = strappend("MESSAGE=", p);
1505         if (message)
1506                 IOVEC_SET_STRING(iovec[n++], message);
1507
1508 #ifdef HAVE_SELINUX
1509         if (s->security_context) {
1510                 label = (char*) s->security_context;
1511                 label_len = strlen((char*) s->security_context);
1512         }
1513 #endif
1514
1515         dispatch_message(s->server, iovec, n, ELEMENTSOF(iovec), &s->ucred, NULL, label, label_len, s->unit_id, priority);
1516
1517         free(message);
1518         free(syslog_priority);
1519         free(syslog_facility);
1520         free(syslog_identifier);
1521
1522         return 0;
1523 }
1524
1525 static int stdout_stream_line(StdoutStream *s, char *p) {
1526         int r;
1527
1528         assert(s);
1529         assert(p);
1530
1531         p = strstrip(p);
1532
1533         switch (s->state) {
1534
1535         case STDOUT_STREAM_IDENTIFIER:
1536                 if (isempty(p))
1537                         s->identifier = NULL;
1538                 else  {
1539                         s->identifier = strdup(p);
1540                         if (!s->identifier)
1541                                 return log_oom();
1542                 }
1543
1544                 s->state = STDOUT_STREAM_UNIT_ID;
1545                 return 0;
1546
1547         case STDOUT_STREAM_UNIT_ID:
1548                 if (s->ucred.uid == 0) {
1549                         if (isempty(p))
1550                                 s->unit_id = NULL;
1551                         else  {
1552                                 s->unit_id = strdup(p);
1553                                 if (!s->unit_id)
1554                                         return log_oom();
1555                         }
1556                 }
1557
1558                 s->state = STDOUT_STREAM_PRIORITY;
1559                 return 0;
1560
1561         case STDOUT_STREAM_PRIORITY:
1562                 r = safe_atoi(p, &s->priority);
1563                 if (r < 0 || s->priority <= 0 || s->priority >= 999) {
1564                         log_warning("Failed to parse log priority line.");
1565                         return -EINVAL;
1566                 }
1567
1568                 s->state = STDOUT_STREAM_LEVEL_PREFIX;
1569                 return 0;
1570
1571         case STDOUT_STREAM_LEVEL_PREFIX:
1572                 r = parse_boolean(p);
1573                 if (r < 0) {
1574                         log_warning("Failed to parse level prefix line.");
1575                         return -EINVAL;
1576                 }
1577
1578                 s->level_prefix = !!r;
1579                 s->state = STDOUT_STREAM_FORWARD_TO_SYSLOG;
1580                 return 0;
1581
1582         case STDOUT_STREAM_FORWARD_TO_SYSLOG:
1583                 r = parse_boolean(p);
1584                 if (r < 0) {
1585                         log_warning("Failed to parse forward to syslog line.");
1586                         return -EINVAL;
1587                 }
1588
1589                 s->forward_to_syslog = !!r;
1590                 s->state = STDOUT_STREAM_FORWARD_TO_KMSG;
1591                 return 0;
1592
1593         case STDOUT_STREAM_FORWARD_TO_KMSG:
1594                 r = parse_boolean(p);
1595                 if (r < 0) {
1596                         log_warning("Failed to parse copy to kmsg line.");
1597                         return -EINVAL;
1598                 }
1599
1600                 s->forward_to_kmsg = !!r;
1601                 s->state = STDOUT_STREAM_FORWARD_TO_CONSOLE;
1602                 return 0;
1603
1604         case STDOUT_STREAM_FORWARD_TO_CONSOLE:
1605                 r = parse_boolean(p);
1606                 if (r < 0) {
1607                         log_warning("Failed to parse copy to console line.");
1608                         return -EINVAL;
1609                 }
1610
1611                 s->forward_to_console = !!r;
1612                 s->state = STDOUT_STREAM_RUNNING;
1613                 return 0;
1614
1615         case STDOUT_STREAM_RUNNING:
1616                 return stdout_stream_log(s, p);
1617         }
1618
1619         assert_not_reached("Unknown stream state");
1620 }
1621
1622 static int stdout_stream_scan(StdoutStream *s, bool force_flush) {
1623         char *p;
1624         size_t remaining;
1625         int r;
1626
1627         assert(s);
1628
1629         p = s->buffer;
1630         remaining = s->length;
1631         for (;;) {
1632                 char *end;
1633                 size_t skip;
1634
1635                 end = memchr(p, '\n', remaining);
1636                 if (end)
1637                         skip = end - p + 1;
1638                 else if (remaining >= sizeof(s->buffer) - 1) {
1639                         end = p + sizeof(s->buffer) - 1;
1640                         skip = remaining;
1641                 } else
1642                         break;
1643
1644                 *end = 0;
1645
1646                 r = stdout_stream_line(s, p);
1647                 if (r < 0)
1648                         return r;
1649
1650                 remaining -= skip;
1651                 p += skip;
1652         }
1653
1654         if (force_flush && remaining > 0) {
1655                 p[remaining] = 0;
1656                 r = stdout_stream_line(s, p);
1657                 if (r < 0)
1658                         return r;
1659
1660                 p += remaining;
1661                 remaining = 0;
1662         }
1663
1664         if (p > s->buffer) {
1665                 memmove(s->buffer, p, remaining);
1666                 s->length = remaining;
1667         }
1668
1669         return 0;
1670 }
1671
1672 static int stdout_stream_process(StdoutStream *s) {
1673         ssize_t l;
1674         int r;
1675
1676         assert(s);
1677
1678         l = read(s->fd, s->buffer+s->length, sizeof(s->buffer)-1-s->length);
1679         if (l < 0) {
1680
1681                 if (errno == EAGAIN)
1682                         return 0;
1683
1684                 log_warning("Failed to read from stream: %m");
1685                 return -errno;
1686         }
1687
1688         if (l == 0) {
1689                 r = stdout_stream_scan(s, true);
1690                 if (r < 0)
1691                         return r;
1692
1693                 return 0;
1694         }
1695
1696         s->length += l;
1697         r = stdout_stream_scan(s, false);
1698         if (r < 0)
1699                 return r;
1700
1701         return 1;
1702
1703 }
1704
1705 static void stdout_stream_free(StdoutStream *s) {
1706         assert(s);
1707
1708         if (s->server) {
1709                 assert(s->server->n_stdout_streams > 0);
1710                 s->server->n_stdout_streams --;
1711                 LIST_REMOVE(StdoutStream, stdout_stream, s->server->stdout_streams, s);
1712         }
1713
1714         if (s->fd >= 0) {
1715                 if (s->server)
1716                         epoll_ctl(s->server->epoll_fd, EPOLL_CTL_DEL, s->fd, NULL);
1717
1718                 close_nointr_nofail(s->fd);
1719         }
1720
1721 #ifdef HAVE_SELINUX
1722         if (s->security_context)
1723                 freecon(s->security_context);
1724 #endif
1725
1726         free(s->identifier);
1727         free(s);
1728 }
1729
1730 static int stdout_stream_new(Server *s) {
1731         StdoutStream *stream;
1732         int fd, r;
1733         socklen_t len;
1734         struct epoll_event ev;
1735
1736         assert(s);
1737
1738         fd = accept4(s->stdout_fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC);
1739         if (fd < 0) {
1740                 if (errno == EAGAIN)
1741                         return 0;
1742
1743                 log_error("Failed to accept stdout connection: %m");
1744                 return -errno;
1745         }
1746
1747         if (s->n_stdout_streams >= STDOUT_STREAMS_MAX) {
1748                 log_warning("Too many stdout streams, refusing connection.");
1749                 close_nointr_nofail(fd);
1750                 return 0;
1751         }
1752
1753         stream = new0(StdoutStream, 1);
1754         if (!stream) {
1755                 close_nointr_nofail(fd);
1756                 return log_oom();
1757         }
1758
1759         stream->fd = fd;
1760
1761         len = sizeof(stream->ucred);
1762         if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &stream->ucred, &len) < 0) {
1763                 log_error("Failed to determine peer credentials: %m");
1764                 r = -errno;
1765                 goto fail;
1766         }
1767
1768 #ifdef HAVE_SELINUX
1769         if (getpeercon(fd, &stream->security_context) < 0 && errno != ENOPROTOOPT)
1770                 log_error("Failed to determine peer security context: %m");
1771 #endif
1772
1773         if (shutdown(fd, SHUT_WR) < 0) {
1774                 log_error("Failed to shutdown writing side of socket: %m");
1775                 r = -errno;
1776                 goto fail;
1777         }
1778
1779         zero(ev);
1780         ev.data.ptr = stream;
1781         ev.events = EPOLLIN;
1782         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0) {
1783                 log_error("Failed to add stream to event loop: %m");
1784                 r = -errno;
1785                 goto fail;
1786         }
1787
1788         stream->server = s;
1789         LIST_PREPEND(StdoutStream, stdout_stream, s->stdout_streams, stream);
1790         s->n_stdout_streams ++;
1791
1792         return 0;
1793
1794 fail:
1795         stdout_stream_free(stream);
1796         return r;
1797 }
1798
1799 static bool is_us(const char *pid) {
1800         pid_t t;
1801
1802         assert(pid);
1803
1804         if (parse_pid(pid, &t) < 0)
1805                 return false;
1806
1807         return t == getpid();
1808 }
1809
1810 static void dev_kmsg_record(Server *s, char *p, size_t l) {
1811         struct iovec iovec[N_IOVEC_META_FIELDS + 7 + N_IOVEC_KERNEL_FIELDS];
1812         char *message = NULL, *syslog_priority = NULL, *syslog_pid = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *source_time = NULL;
1813         int priority, r;
1814         unsigned n = 0, z = 0, j;
1815         usec_t usec;
1816         char *identifier = NULL, *pid = NULL, *e, *f, *k;
1817         uint64_t serial;
1818
1819         assert(s);
1820         assert(p);
1821
1822         if (l <= 0)
1823                 return;
1824
1825         e = memchr(p, ',', l);
1826         if (!e)
1827                 return;
1828         *e = 0;
1829
1830         r = safe_atoi(p, &priority);
1831         if (r < 0 || priority < 0 || priority > 999)
1832                 return;
1833
1834         if (s->forward_to_kmsg && (priority & LOG_FACMASK) != LOG_KERN)
1835                 return;
1836
1837         l -= (e - p) + 1;
1838         p = e + 1;
1839         e = memchr(p, ',', l);
1840         if (!e)
1841                 return;
1842         *e = 0;
1843
1844         r = safe_atou64(p, &serial);
1845         if (r < 0)
1846                 return;
1847
1848         l -= (e - p) + 1;
1849         p = e + 1;
1850         f = memchr(p, ';', l);
1851         if (!f)
1852                 return;
1853         /* Kernel 3.6 has the flags field, kernel 3.5 lacks that */
1854         e = memchr(p, ',', l);
1855         if (!e || f < e)
1856                 e = f;
1857         *e = 0;
1858
1859         r = parse_usec(p, &usec);
1860         if (r < 0)
1861                 return;
1862
1863         l -= (f - p) + 1;
1864         p = f + 1;
1865         e = memchr(p, '\n', l);
1866         if (!e)
1867                 return;
1868         *e = 0;
1869
1870         l -= (e - p) + 1;
1871         k = e + 1;
1872
1873         for (j = 0; l > 0 && j < N_IOVEC_KERNEL_FIELDS; j++) {
1874                 char *m;
1875                 /* Meta data fields attached */
1876
1877                 if (*k != ' ')
1878                         break;
1879
1880                 k ++, l --;
1881
1882                 e = memchr(k, '\n', l);
1883                 if (!e)
1884                         return;
1885
1886                 *e = 0;
1887
1888                 m = new(char, sizeof("_KERNEL_") - 1 + e - k);
1889                 if (!m)
1890                         break;
1891
1892                 memcpy(m, "_KERNEL_", sizeof("_KERNEL_") - 1);
1893                 memcpy(m + sizeof("_KERNEL_") - 1, k, e - k);
1894
1895                 iovec[n].iov_base = m;
1896                 iovec[n].iov_len = sizeof("_KERNEL_") - 1 + e - k;
1897                 n++, z++;
1898
1899                 l -= (e - k) + 1;
1900                 k = e + 1;
1901         }
1902
1903         if (asprintf(&source_time, "_SOURCE_MONOTONIC_TIMESTAMP=%llu",
1904                      (unsigned long long) usec) >= 0)
1905                 IOVEC_SET_STRING(iovec[n++], source_time);
1906
1907         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=kernel");
1908
1909         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1910                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1911
1912         if ((priority & LOG_FACMASK) == LOG_KERN)
1913                 IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=kernel");
1914         else {
1915                 read_identifier((const char**) &p, &identifier, &pid);
1916
1917                 /* Avoid any messages we generated ourselves via
1918                  * log_info() and friends. */
1919                 if (pid && is_us(pid))
1920                         goto finish;
1921
1922                 if (identifier) {
1923                         syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
1924                         if (syslog_identifier)
1925                                 IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1926                 }
1927
1928                 if (pid) {
1929                         syslog_pid = strappend("SYSLOG_PID=", pid);
1930                         if (syslog_pid)
1931                                 IOVEC_SET_STRING(iovec[n++], syslog_pid);
1932                 }
1933
1934                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1935                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1936         }
1937
1938         message = strappend("MESSAGE=", p);
1939         if (message)
1940                 IOVEC_SET_STRING(iovec[n++], message);
1941
1942         dispatch_message(s, iovec, n, ELEMENTSOF(iovec), NULL, NULL, NULL, 0, NULL, priority);
1943
1944 finish:
1945         for (j = 0; j < z; j++)
1946                 free(iovec[j].iov_base);
1947
1948         free(message);
1949         free(syslog_priority);
1950         free(syslog_identifier);
1951         free(syslog_pid);
1952         free(syslog_facility);
1953         free(source_time);
1954         free(identifier);
1955         free(pid);
1956 }
1957
1958 static int system_journal_open(Server *s) {
1959         int r;
1960         char *fn;
1961         sd_id128_t machine;
1962         char ids[33];
1963
1964         r = sd_id128_get_machine(&machine);
1965         if (r < 0)
1966                 return r;
1967
1968         sd_id128_to_string(machine, ids);
1969
1970         if (!s->system_journal &&
1971             (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
1972             access("/run/systemd/journal/flushed", F_OK) >= 0) {
1973
1974                 /* If in auto mode: first try to create the machine
1975                  * path, but not the prefix.
1976                  *
1977                  * If in persistent mode: create /var/log/journal and
1978                  * the machine path */
1979
1980                 if (s->storage == STORAGE_PERSISTENT)
1981                         (void) mkdir("/var/log/journal/", 0755);
1982
1983                 fn = strappend("/var/log/journal/", ids);
1984                 if (!fn)
1985                         return -ENOMEM;
1986
1987                 (void) mkdir(fn, 0755);
1988                 free(fn);
1989
1990                 fn = strjoin("/var/log/journal/", ids, "/system.journal", NULL);
1991                 if (!fn)
1992                         return -ENOMEM;
1993
1994                 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, &s->system_metrics, NULL, &s->system_journal);
1995                 free(fn);
1996
1997                 if (r >= 0) {
1998                         s->system_journal->compress = s->compress;
1999
2000                         server_fix_perms(s, s->system_journal, 0);
2001                 } else if (r < 0) {
2002
2003                         if (r != -ENOENT && r != -EROFS)
2004                                 log_warning("Failed to open system journal: %s", strerror(-r));
2005
2006                         r = 0;
2007                 }
2008         }
2009
2010         if (!s->runtime_journal &&
2011             (s->storage != STORAGE_NONE)) {
2012
2013                 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
2014                 if (!fn)
2015                         return -ENOMEM;
2016
2017                 if (s->system_journal) {
2018
2019                         /* Try to open the runtime journal, but only
2020                          * if it already exists, so that we can flush
2021                          * it into the system journal */
2022
2023                         r = journal_file_open(fn, O_RDWR, 0640, &s->runtime_metrics, NULL, &s->runtime_journal);
2024                         free(fn);
2025
2026                         if (r < 0) {
2027                                 if (r != -ENOENT)
2028                                         log_warning("Failed to open runtime journal: %s", strerror(-r));
2029
2030                                 r = 0;
2031                         }
2032
2033                 } else {
2034
2035                         /* OK, we really need the runtime journal, so create
2036                          * it if necessary. */
2037
2038                         (void) mkdir_parents(fn, 0755);
2039                         r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, &s->runtime_metrics, NULL, &s->runtime_journal);
2040                         free(fn);
2041
2042                         if (r < 0) {
2043                                 log_error("Failed to open runtime journal: %s", strerror(-r));
2044                                 return r;
2045                         }
2046                 }
2047
2048                 if (s->runtime_journal) {
2049                         s->runtime_journal->compress = s->compress;
2050
2051                         server_fix_perms(s, s->runtime_journal, 0);
2052                 }
2053         }
2054
2055         return r;
2056 }
2057
2058 static int server_flush_to_var(Server *s) {
2059         Object *o = NULL;
2060         int r;
2061         sd_id128_t machine;
2062         sd_journal *j;
2063
2064         assert(s);
2065
2066         if (s->storage != STORAGE_AUTO &&
2067             s->storage != STORAGE_PERSISTENT)
2068                 return 0;
2069
2070         if (!s->runtime_journal)
2071                 return 0;
2072
2073         system_journal_open(s);
2074
2075         if (!s->system_journal)
2076                 return 0;
2077
2078         log_info("Flushing to /var...");
2079
2080         r = sd_id128_get_machine(&machine);
2081         if (r < 0) {
2082                 log_error("Failed to get machine id: %s", strerror(-r));
2083                 return r;
2084         }
2085
2086         r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
2087         if (r < 0) {
2088                 log_error("Failed to read runtime journal: %s", strerror(-r));
2089                 return r;
2090         }
2091
2092         SD_JOURNAL_FOREACH(j) {
2093                 JournalFile *f;
2094
2095                 f = j->current_file;
2096                 assert(f && f->current_offset > 0);
2097
2098                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
2099                 if (r < 0) {
2100                         log_error("Can't read entry: %s", strerror(-r));
2101                         goto finish;
2102                 }
2103
2104                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
2105                 if (r == -E2BIG) {
2106                         log_info("Allocation limit reached.");
2107
2108                         journal_file_post_change(s->system_journal);
2109                         server_rotate(s);
2110                         server_vacuum(s);
2111
2112                         r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
2113                 }
2114
2115                 if (r < 0) {
2116                         log_error("Can't write entry: %s", strerror(-r));
2117                         goto finish;
2118                 }
2119         }
2120
2121 finish:
2122         journal_file_post_change(s->system_journal);
2123
2124         journal_file_close(s->runtime_journal);
2125         s->runtime_journal = NULL;
2126
2127         if (r >= 0)
2128                 rm_rf("/run/log/journal", false, true, false);
2129
2130         return r;
2131 }
2132
2133 static int server_read_dev_kmsg(Server *s) {
2134         char buffer[8192+1]; /* the kernel-side limit per record is 8K currently */
2135         ssize_t l;
2136
2137         assert(s);
2138         assert(s->dev_kmsg_fd >= 0);
2139
2140         l = read(s->dev_kmsg_fd, buffer, sizeof(buffer) - 1);
2141         if (l == 0)
2142                 return 0;
2143         if (l < 0) {
2144                 /* Old kernels who don't allow reading from /dev/kmsg
2145                  * return EINVAL when we try. So handle this cleanly,
2146                  * but don' try to ever read from it again. */
2147                 if (errno == EINVAL) {
2148                         epoll_ctl(s->epoll_fd, EPOLL_CTL_DEL, s->dev_kmsg_fd, NULL);
2149                         return 0;
2150                 }
2151
2152                 if (errno == EAGAIN || errno == EINTR)
2153                         return 0;
2154
2155                 log_error("Failed to read from kernel: %m");
2156                 return -errno;
2157         }
2158
2159         dev_kmsg_record(s, buffer, l);
2160         return 1;
2161 }
2162
2163 static int server_flush_dev_kmsg(Server *s) {
2164         int r;
2165
2166         assert(s);
2167
2168         if (s->dev_kmsg_fd < 0)
2169                 return 0;
2170
2171         if (!s->dev_kmsg_readable)
2172                 return 0;
2173
2174         log_info("Flushing /dev/kmsg...");
2175
2176         for (;;) {
2177                 r = server_read_dev_kmsg(s);
2178                 if (r < 0)
2179                         return r;
2180
2181                 if (r == 0)
2182                         break;
2183         }
2184
2185         return 0;
2186 }
2187
2188 static int process_event(Server *s, struct epoll_event *ev) {
2189         assert(s);
2190         assert(ev);
2191
2192         if (ev->data.fd == s->signal_fd) {
2193                 struct signalfd_siginfo sfsi;
2194                 ssize_t n;
2195
2196                 if (ev->events != EPOLLIN) {
2197                         log_info("Got invalid event from epoll.");
2198                         return -EIO;
2199                 }
2200
2201                 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
2202                 if (n != sizeof(sfsi)) {
2203
2204                         if (n >= 0)
2205                                 return -EIO;
2206
2207                         if (errno == EINTR || errno == EAGAIN)
2208                                 return 1;
2209
2210                         return -errno;
2211                 }
2212
2213                 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
2214
2215                 if (sfsi.ssi_signo == SIGUSR1) {
2216                         touch("/run/systemd/journal/flushed");
2217                         server_flush_to_var(s);
2218                         return 1;
2219                 }
2220
2221                 if (sfsi.ssi_signo == SIGUSR2) {
2222                         server_rotate(s);
2223                         server_vacuum(s);
2224                         return 1;
2225                 }
2226
2227                 return 0;
2228
2229         } else if (ev->data.fd == s->dev_kmsg_fd) {
2230                 int r;
2231
2232                 if (ev->events != EPOLLIN) {
2233                         log_info("Got invalid event from epoll.");
2234                         return -EIO;
2235                 }
2236
2237                 r = server_read_dev_kmsg(s);
2238                 if (r < 0)
2239                         return r;
2240
2241                 return 1;
2242
2243         } else if (ev->data.fd == s->native_fd ||
2244                    ev->data.fd == s->syslog_fd) {
2245
2246                 if (ev->events != EPOLLIN) {
2247                         log_info("Got invalid event from epoll.");
2248                         return -EIO;
2249                 }
2250
2251                 for (;;) {
2252                         struct msghdr msghdr;
2253                         struct iovec iovec;
2254                         struct ucred *ucred = NULL;
2255                         struct timeval *tv = NULL;
2256                         struct cmsghdr *cmsg;
2257                         char *label = NULL;
2258                         size_t label_len = 0;
2259                         union {
2260                                 struct cmsghdr cmsghdr;
2261
2262                                 /* We use NAME_MAX space for the
2263                                  * SELinux label here. The kernel
2264                                  * currently enforces no limit, but
2265                                  * according to suggestions from the
2266                                  * SELinux people this will change and
2267                                  * it will probably be identical to
2268                                  * NAME_MAX. For now we use that, but
2269                                  * this should be updated one day when
2270                                  * the final limit is known.*/
2271                                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
2272                                             CMSG_SPACE(sizeof(struct timeval)) +
2273                                             CMSG_SPACE(sizeof(int)) + /* fd */
2274                                             CMSG_SPACE(NAME_MAX)]; /* selinux label */
2275                         } control;
2276                         ssize_t n;
2277                         int v;
2278                         int *fds = NULL;
2279                         unsigned n_fds = 0;
2280
2281                         if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
2282                                 log_error("SIOCINQ failed: %m");
2283                                 return -errno;
2284                         }
2285
2286                         if (s->buffer_size < (size_t) v) {
2287                                 void *b;
2288                                 size_t l;
2289
2290                                 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
2291                                 b = realloc(s->buffer, l+1);
2292
2293                                 if (!b) {
2294                                         log_error("Couldn't increase buffer.");
2295                                         return -ENOMEM;
2296                                 }
2297
2298                                 s->buffer_size = l;
2299                                 s->buffer = b;
2300                         }
2301
2302                         zero(iovec);
2303                         iovec.iov_base = s->buffer;
2304                         iovec.iov_len = s->buffer_size;
2305
2306                         zero(control);
2307                         zero(msghdr);
2308                         msghdr.msg_iov = &iovec;
2309                         msghdr.msg_iovlen = 1;
2310                         msghdr.msg_control = &control;
2311                         msghdr.msg_controllen = sizeof(control);
2312
2313                         n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
2314                         if (n < 0) {
2315
2316                                 if (errno == EINTR || errno == EAGAIN)
2317                                         return 1;
2318
2319                                 log_error("recvmsg() failed: %m");
2320                                 return -errno;
2321                         }
2322
2323                         for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
2324
2325                                 if (cmsg->cmsg_level == SOL_SOCKET &&
2326                                     cmsg->cmsg_type == SCM_CREDENTIALS &&
2327                                     cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
2328                                         ucred = (struct ucred*) CMSG_DATA(cmsg);
2329                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
2330                                          cmsg->cmsg_type == SCM_SECURITY) {
2331                                         label = (char*) CMSG_DATA(cmsg);
2332                                         label_len = cmsg->cmsg_len - CMSG_LEN(0);
2333                                 } else if (cmsg->cmsg_level == SOL_SOCKET &&
2334                                          cmsg->cmsg_type == SO_TIMESTAMP &&
2335                                          cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
2336                                         tv = (struct timeval*) CMSG_DATA(cmsg);
2337                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
2338                                          cmsg->cmsg_type == SCM_RIGHTS) {
2339                                         fds = (int*) CMSG_DATA(cmsg);
2340                                         n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
2341                                 }
2342                         }
2343
2344                         if (ev->data.fd == s->syslog_fd) {
2345                                 char *e;
2346
2347                                 if (n > 0 && n_fds == 0) {
2348                                         e = memchr(s->buffer, '\n', n);
2349                                         if (e)
2350                                                 *e = 0;
2351                                         else
2352                                                 s->buffer[n] = 0;
2353
2354                                         process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
2355                                 } else if (n_fds > 0)
2356                                         log_warning("Got file descriptors via syslog socket. Ignoring.");
2357
2358                         } else {
2359                                 if (n > 0 && n_fds == 0)
2360                                         process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
2361                                 else if (n == 0 && n_fds == 1)
2362                                         process_native_file(s, fds[0], ucred, tv, label, label_len);
2363                                 else if (n_fds > 0)
2364                                         log_warning("Got too many file descriptors via native socket. Ignoring.");
2365                         }
2366
2367                         close_many(fds, n_fds);
2368                 }
2369
2370                 return 1;
2371
2372         } else if (ev->data.fd == s->stdout_fd) {
2373
2374                 if (ev->events != EPOLLIN) {
2375                         log_info("Got invalid event from epoll.");
2376                         return -EIO;
2377                 }
2378
2379                 stdout_stream_new(s);
2380                 return 1;
2381
2382         } else {
2383                 StdoutStream *stream;
2384
2385                 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
2386                         log_info("Got invalid event from epoll.");
2387                         return -EIO;
2388                 }
2389
2390                 /* If it is none of the well-known fds, it must be an
2391                  * stdout stream fd. Note that this is a bit ugly here
2392                  * (since we rely that none of the well-known fds
2393                  * could be interpreted as pointer), but nonetheless
2394                  * safe, since the well-known fds would never get an
2395                  * fd > 4096, i.e. beyond the first memory page */
2396
2397                 stream = ev->data.ptr;
2398
2399                 if (stdout_stream_process(stream) <= 0)
2400                         stdout_stream_free(stream);
2401
2402                 return 1;
2403         }
2404
2405         log_error("Unknown event.");
2406         return 0;
2407 }
2408
2409 static int open_syslog_socket(Server *s) {
2410         union sockaddr_union sa;
2411         int one, r;
2412         struct epoll_event ev;
2413
2414         assert(s);
2415
2416         if (s->syslog_fd < 0) {
2417
2418                 s->syslog_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2419                 if (s->syslog_fd < 0) {
2420                         log_error("socket() failed: %m");
2421                         return -errno;
2422                 }
2423
2424                 zero(sa);
2425                 sa.un.sun_family = AF_UNIX;
2426                 strncpy(sa.un.sun_path, "/dev/log", sizeof(sa.un.sun_path));
2427
2428                 unlink(sa.un.sun_path);
2429
2430                 r = bind(s->syslog_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2431                 if (r < 0) {
2432                         log_error("bind() failed: %m");
2433                         return -errno;
2434                 }
2435
2436                 chmod(sa.un.sun_path, 0666);
2437         } else
2438                 fd_nonblock(s->syslog_fd, 1);
2439
2440         one = 1;
2441         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
2442         if (r < 0) {
2443                 log_error("SO_PASSCRED failed: %m");
2444                 return -errno;
2445         }
2446
2447 #ifdef HAVE_SELINUX
2448         one = 1;
2449         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
2450         if (r < 0)
2451                 log_warning("SO_PASSSEC failed: %m");
2452 #endif
2453
2454         one = 1;
2455         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
2456         if (r < 0) {
2457                 log_error("SO_TIMESTAMP failed: %m");
2458                 return -errno;
2459         }
2460
2461         zero(ev);
2462         ev.events = EPOLLIN;
2463         ev.data.fd = s->syslog_fd;
2464         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->syslog_fd, &ev) < 0) {
2465                 log_error("Failed to add syslog server fd to epoll object: %m");
2466                 return -errno;
2467         }
2468
2469         return 0;
2470 }
2471
2472 static int open_native_socket(Server*s) {
2473         union sockaddr_union sa;
2474         int one, r;
2475         struct epoll_event ev;
2476
2477         assert(s);
2478
2479         if (s->native_fd < 0) {
2480
2481                 s->native_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2482                 if (s->native_fd < 0) {
2483                         log_error("socket() failed: %m");
2484                         return -errno;
2485                 }
2486
2487                 zero(sa);
2488                 sa.un.sun_family = AF_UNIX;
2489                 strncpy(sa.un.sun_path, "/run/systemd/journal/socket", sizeof(sa.un.sun_path));
2490
2491                 unlink(sa.un.sun_path);
2492
2493                 r = bind(s->native_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2494                 if (r < 0) {
2495                         log_error("bind() failed: %m");
2496                         return -errno;
2497                 }
2498
2499                 chmod(sa.un.sun_path, 0666);
2500         } else
2501                 fd_nonblock(s->native_fd, 1);
2502
2503         one = 1;
2504         r = setsockopt(s->native_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
2505         if (r < 0) {
2506                 log_error("SO_PASSCRED failed: %m");
2507                 return -errno;
2508         }
2509
2510 #ifdef HAVE_SELINUX
2511         one = 1;
2512         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
2513         if (r < 0)
2514                 log_warning("SO_PASSSEC failed: %m");
2515 #endif
2516
2517         one = 1;
2518         r = setsockopt(s->native_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
2519         if (r < 0) {
2520                 log_error("SO_TIMESTAMP failed: %m");
2521                 return -errno;
2522         }
2523
2524         zero(ev);
2525         ev.events = EPOLLIN;
2526         ev.data.fd = s->native_fd;
2527         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->native_fd, &ev) < 0) {
2528                 log_error("Failed to add native server fd to epoll object: %m");
2529                 return -errno;
2530         }
2531
2532         return 0;
2533 }
2534
2535 static int open_stdout_socket(Server *s) {
2536         union sockaddr_union sa;
2537         int r;
2538         struct epoll_event ev;
2539
2540         assert(s);
2541
2542         if (s->stdout_fd < 0) {
2543
2544                 s->stdout_fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2545                 if (s->stdout_fd < 0) {
2546                         log_error("socket() failed: %m");
2547                         return -errno;
2548                 }
2549
2550                 zero(sa);
2551                 sa.un.sun_family = AF_UNIX;
2552                 strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
2553
2554                 unlink(sa.un.sun_path);
2555
2556                 r = bind(s->stdout_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2557                 if (r < 0) {
2558                         log_error("bind() failed: %m");
2559                         return -errno;
2560                 }
2561
2562                 chmod(sa.un.sun_path, 0666);
2563
2564                 if (listen(s->stdout_fd, SOMAXCONN) < 0) {
2565                         log_error("liste() failed: %m");
2566                         return -errno;
2567                 }
2568         } else
2569                 fd_nonblock(s->stdout_fd, 1);
2570
2571         zero(ev);
2572         ev.events = EPOLLIN;
2573         ev.data.fd = s->stdout_fd;
2574         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->stdout_fd, &ev) < 0) {
2575                 log_error("Failed to add stdout server fd to epoll object: %m");
2576                 return -errno;
2577         }
2578
2579         return 0;
2580 }
2581
2582 static int open_dev_kmsg(Server *s) {
2583         struct epoll_event ev;
2584
2585         assert(s);
2586
2587         s->dev_kmsg_fd = open("/dev/kmsg", O_RDWR|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
2588         if (s->dev_kmsg_fd < 0) {
2589                 log_warning("Failed to open /dev/kmsg, ignoring: %m");
2590                 return 0;
2591         }
2592
2593         zero(ev);
2594         ev.events = EPOLLIN;
2595         ev.data.fd = s->dev_kmsg_fd;
2596         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->dev_kmsg_fd, &ev) < 0) {
2597
2598                 /* This will fail with EPERM on older kernels where
2599                  * /dev/kmsg is not readable. */
2600                 if (errno == EPERM)
2601                         return 0;
2602
2603                 log_error("Failed to add /dev/kmsg fd to epoll object: %m");
2604                 return -errno;
2605         }
2606
2607         s->dev_kmsg_readable = true;
2608
2609         return 0;
2610 }
2611
2612 static int open_signalfd(Server *s) {
2613         sigset_t mask;
2614         struct epoll_event ev;
2615
2616         assert(s);
2617
2618         assert_se(sigemptyset(&mask) == 0);
2619         sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
2620         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
2621
2622         s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
2623         if (s->signal_fd < 0) {
2624                 log_error("signalfd(): %m");
2625                 return -errno;
2626         }
2627
2628         zero(ev);
2629         ev.events = EPOLLIN;
2630         ev.data.fd = s->signal_fd;
2631
2632         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
2633                 log_error("epoll_ctl(): %m");
2634                 return -errno;
2635         }
2636
2637         return 0;
2638 }
2639
2640 static int server_parse_proc_cmdline(Server *s) {
2641         char *line, *w, *state;
2642         int r;
2643         size_t l;
2644
2645         if (detect_container(NULL) > 0)
2646                 return 0;
2647
2648         r = read_one_line_file("/proc/cmdline", &line);
2649         if (r < 0) {
2650                 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
2651                 return 0;
2652         }
2653
2654         FOREACH_WORD_QUOTED(w, l, line, state) {
2655                 char *word;
2656
2657                 word = strndup(w, l);
2658                 if (!word) {
2659                         r = -ENOMEM;
2660                         goto finish;
2661                 }
2662
2663                 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
2664                         r = parse_boolean(word + 35);
2665                         if (r < 0)
2666                                 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
2667                         else
2668                                 s->forward_to_syslog = r;
2669                 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
2670                         r = parse_boolean(word + 33);
2671                         if (r < 0)
2672                                 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
2673                         else
2674                                 s->forward_to_kmsg = r;
2675                 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
2676                         r = parse_boolean(word + 36);
2677                         if (r < 0)
2678                                 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
2679                         else
2680                                 s->forward_to_console = r;
2681                 } else if (startswith(word, "systemd.journald"))
2682                         log_warning("Invalid systemd.journald parameter. Ignoring.");
2683
2684                 free(word);
2685         }
2686
2687         r = 0;
2688
2689 finish:
2690         free(line);
2691         return r;
2692 }
2693
2694 static int server_parse_config_file(Server *s) {
2695         FILE *f;
2696         const char *fn;
2697         int r;
2698
2699         assert(s);
2700
2701         fn = "/etc/systemd/journald.conf";
2702         f = fopen(fn, "re");
2703         if (!f) {
2704                 if (errno == ENOENT)
2705                         return 0;
2706
2707                 log_warning("Failed to open configuration file %s: %m", fn);
2708                 return -errno;
2709         }
2710
2711         r = config_parse(fn, f, "Journal\0", config_item_perf_lookup, (void*) journald_gperf_lookup, false, s);
2712         if (r < 0)
2713                 log_warning("Failed to parse configuration file: %s", strerror(-r));
2714
2715         fclose(f);
2716
2717         return r;
2718 }
2719
2720 static int server_init(Server *s) {
2721         int n, r, fd;
2722
2723         assert(s);
2724
2725         zero(*s);
2726         s->syslog_fd = s->native_fd = s->stdout_fd = s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
2727         s->compress = true;
2728
2729         s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
2730         s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
2731
2732         s->forward_to_syslog = true;
2733
2734         s->max_level_store = LOG_DEBUG;
2735         s->max_level_syslog = LOG_DEBUG;
2736         s->max_level_kmsg = LOG_NOTICE;
2737         s->max_level_console = LOG_INFO;
2738
2739         memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
2740         memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
2741
2742         server_parse_config_file(s);
2743         server_parse_proc_cmdline(s);
2744
2745         s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
2746         if (!s->user_journals)
2747                 return log_oom();
2748
2749         s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
2750         if (s->epoll_fd < 0) {
2751                 log_error("Failed to create epoll object: %m");
2752                 return -errno;
2753         }
2754
2755         n = sd_listen_fds(true);
2756         if (n < 0) {
2757                 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
2758                 return n;
2759         }
2760
2761         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
2762
2763                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
2764
2765                         if (s->native_fd >= 0) {
2766                                 log_error("Too many native sockets passed.");
2767                                 return -EINVAL;
2768                         }
2769
2770                         s->native_fd = fd;
2771
2772                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
2773
2774                         if (s->stdout_fd >= 0) {
2775                                 log_error("Too many stdout sockets passed.");
2776                                 return -EINVAL;
2777                         }
2778
2779                         s->stdout_fd = fd;
2780
2781                 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
2782
2783                         if (s->syslog_fd >= 0) {
2784                                 log_error("Too many /dev/log sockets passed.");
2785                                 return -EINVAL;
2786                         }
2787
2788                         s->syslog_fd = fd;
2789
2790                 } else {
2791                         log_error("Unknown socket passed.");
2792                         return -EINVAL;
2793                 }
2794         }
2795
2796         r = open_syslog_socket(s);
2797         if (r < 0)
2798                 return r;
2799
2800         r = open_native_socket(s);
2801         if (r < 0)
2802                 return r;
2803
2804         r = open_stdout_socket(s);
2805         if (r < 0)
2806                 return r;
2807
2808         r = open_dev_kmsg(s);
2809         if (r < 0)
2810                 return r;
2811
2812         r = open_signalfd(s);
2813         if (r < 0)
2814                 return r;
2815
2816         s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
2817         if (!s->rate_limit)
2818                 return -ENOMEM;
2819
2820         r = system_journal_open(s);
2821         if (r < 0)
2822                 return r;
2823
2824         return 0;
2825 }
2826
2827 static void server_done(Server *s) {
2828         JournalFile *f;
2829         assert(s);
2830
2831         while (s->stdout_streams)
2832                 stdout_stream_free(s->stdout_streams);
2833
2834         if (s->system_journal)
2835                 journal_file_close(s->system_journal);
2836
2837         if (s->runtime_journal)
2838                 journal_file_close(s->runtime_journal);
2839
2840         while ((f = hashmap_steal_first(s->user_journals)))
2841                 journal_file_close(f);
2842
2843         hashmap_free(s->user_journals);
2844
2845         if (s->epoll_fd >= 0)
2846                 close_nointr_nofail(s->epoll_fd);
2847
2848         if (s->signal_fd >= 0)
2849                 close_nointr_nofail(s->signal_fd);
2850
2851         if (s->syslog_fd >= 0)
2852                 close_nointr_nofail(s->syslog_fd);
2853
2854         if (s->native_fd >= 0)
2855                 close_nointr_nofail(s->native_fd);
2856
2857         if (s->stdout_fd >= 0)
2858                 close_nointr_nofail(s->stdout_fd);
2859
2860         if (s->dev_kmsg_fd >= 0)
2861                 close_nointr_nofail(s->dev_kmsg_fd);
2862
2863         if (s->rate_limit)
2864                 journal_rate_limit_free(s->rate_limit);
2865
2866         free(s->buffer);
2867         free(s->tty_path);
2868 }
2869
2870 int main(int argc, char *argv[]) {
2871         Server server;
2872         int r;
2873
2874         /* if (getppid() != 1) { */
2875         /*         log_error("This program should be invoked by init only."); */
2876         /*         return EXIT_FAILURE; */
2877         /* } */
2878
2879         if (argc > 1) {
2880                 log_error("This program does not take arguments.");
2881                 return EXIT_FAILURE;
2882         }
2883
2884         log_set_target(LOG_TARGET_SAFE);
2885         log_set_facility(LOG_SYSLOG);
2886         log_parse_environment();
2887         log_open();
2888
2889         umask(0022);
2890
2891         r = server_init(&server);
2892         if (r < 0)
2893                 goto finish;
2894
2895         server_vacuum(&server);
2896         server_flush_to_var(&server);
2897         server_flush_dev_kmsg(&server);
2898
2899         log_debug("systemd-journald running as pid %lu", (unsigned long) getpid());
2900         driver_message(&server, SD_MESSAGE_JOURNAL_START, "Journal started");
2901
2902         sd_notify(false,
2903                   "READY=1\n"
2904                   "STATUS=Processing requests...");
2905
2906         for (;;) {
2907                 struct epoll_event event;
2908
2909                 r = epoll_wait(server.epoll_fd, &event, 1, -1);
2910                 if (r < 0) {
2911
2912                         if (errno == EINTR)
2913                                 continue;
2914
2915                         log_error("epoll_wait() failed: %m");
2916                         r = -errno;
2917                         goto finish;
2918                 } else if (r == 0)
2919                         break;
2920
2921                 r = process_event(&server, &event);
2922                 if (r < 0)
2923                         goto finish;
2924                 else if (r == 0)
2925                         break;
2926         }
2927
2928         log_debug("systemd-journald stopped as pid %lu", (unsigned long) getpid());
2929         driver_message(&server, SD_MESSAGE_JOURNAL_STOP, "Journal stopped");
2930
2931 finish:
2932         sd_notify(false,
2933                   "STATUS=Shutting down...");
2934
2935         server_done(&server);
2936
2937         return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
2938 }