chiark / gitweb /
33865b812075187b55191dbdc48edf640338ec7a
[elogind.git] / src / journal / journald.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU General Public License as published by
10   the Free Software Foundation; either version 2 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   General Public License for more details.
17
18   You should have received a copy of the GNU General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/epoll.h>
23 #include <sys/socket.h>
24 #include <errno.h>
25 #include <sys/signalfd.h>
26 #include <unistd.h>
27 #include <fcntl.h>
28 #include <stddef.h>
29 #include <sys/ioctl.h>
30 #include <linux/sockios.h>
31 #include <sys/statvfs.h>
32
33 #include <systemd/sd-journal.h>
34 #include <systemd/sd-login.h>
35 #include <systemd/sd-messages.h>
36 #include <systemd/sd-daemon.h>
37
38 #include "hashmap.h"
39 #include "journal-file.h"
40 #include "socket-util.h"
41 #include "cgroup-util.h"
42 #include "list.h"
43 #include "journal-rate-limit.h"
44 #include "journal-internal.h"
45 #include "conf-parser.h"
46 #include "journald.h"
47 #include "virt.h"
48
49 #ifdef HAVE_ACL
50 #include <sys/acl.h>
51 #include <acl/libacl.h>
52 #include "acl-util.h"
53 #endif
54
55 #define USER_JOURNALS_MAX 1024
56 #define STDOUT_STREAMS_MAX 4096
57
58 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
59 #define DEFAULT_RATE_LIMIT_BURST 200
60
61 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
62
63 #define RECHECK_VAR_AVAILABLE_USEC (30*USEC_PER_SEC)
64
65 #define SYSLOG_TIMEOUT_USEC (250*USEC_PER_MSEC)
66
67 #define N_IOVEC_META_FIELDS 16
68
69 typedef enum StdoutStreamState {
70         STDOUT_STREAM_IDENTIFIER,
71         STDOUT_STREAM_PRIORITY,
72         STDOUT_STREAM_LEVEL_PREFIX,
73         STDOUT_STREAM_FORWARD_TO_SYSLOG,
74         STDOUT_STREAM_FORWARD_TO_KMSG,
75         STDOUT_STREAM_FORWARD_TO_CONSOLE,
76         STDOUT_STREAM_RUNNING
77 } StdoutStreamState;
78
79 struct StdoutStream {
80         Server *server;
81         StdoutStreamState state;
82
83         int fd;
84
85         struct ucred ucred;
86
87         char *identifier;
88         int priority;
89         bool level_prefix:1;
90         bool forward_to_syslog:1;
91         bool forward_to_kmsg:1;
92         bool forward_to_console:1;
93
94         char buffer[LINE_MAX+1];
95         size_t length;
96
97         LIST_FIELDS(StdoutStream, stdout_stream);
98 };
99
100 static int server_flush_to_var(Server *s);
101
102 static uint64_t available_space(Server *s) {
103         char ids[33], *p;
104         const char *f;
105         sd_id128_t machine;
106         struct statvfs ss;
107         uint64_t sum = 0, avail = 0, ss_avail = 0;
108         int r;
109         DIR *d;
110         usec_t ts;
111         JournalMetrics *m;
112
113         ts = now(CLOCK_MONOTONIC);
114
115         if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
116                 return s->cached_available_space;
117
118         r = sd_id128_get_machine(&machine);
119         if (r < 0)
120                 return 0;
121
122         if (s->system_journal) {
123                 f = "/var/log/journal/";
124                 m = &s->system_metrics;
125         } else {
126                 f = "/run/log/journal/";
127                 m = &s->runtime_metrics;
128         }
129
130         assert(m);
131
132         p = strappend(f, sd_id128_to_string(machine, ids));
133         if (!p)
134                 return 0;
135
136         d = opendir(p);
137         free(p);
138
139         if (!d)
140                 return 0;
141
142         if (fstatvfs(dirfd(d), &ss) < 0)
143                 goto finish;
144
145         for (;;) {
146                 struct stat st;
147                 struct dirent buf, *de;
148                 int k;
149
150                 k = readdir_r(d, &buf, &de);
151                 if (k != 0) {
152                         r = -k;
153                         goto finish;
154                 }
155
156                 if (!de)
157                         break;
158
159                 if (!dirent_is_file_with_suffix(de, ".journal"))
160                         continue;
161
162                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
163                         continue;
164
165                 sum += (uint64_t) st.st_blocks * (uint64_t) st.st_blksize;
166         }
167
168         avail = sum >= m->max_use ? 0 : m->max_use - sum;
169
170         ss_avail = ss.f_bsize * ss.f_bavail;
171
172         ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
173
174         if (ss_avail < avail)
175                 avail = ss_avail;
176
177         s->cached_available_space = avail;
178         s->cached_available_space_timestamp = ts;
179
180 finish:
181         closedir(d);
182
183         return avail;
184 }
185
186 static void server_read_file_gid(Server *s) {
187         const char *adm = "adm";
188         int r;
189
190         assert(s);
191
192         if (s->file_gid_valid)
193                 return;
194
195         r = get_group_creds(&adm, &s->file_gid);
196         if (r < 0)
197                 log_warning("Failed to resolve 'adm' group: %s", strerror(-r));
198
199         /* if we couldn't read the gid, then it will be 0, but that's
200          * fine and we shouldn't try to resolve the group again, so
201          * let's just pretend it worked right-away. */
202         s->file_gid_valid = true;
203 }
204
205 static void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
206         int r;
207 #ifdef HAVE_ACL
208         acl_t acl;
209         acl_entry_t entry;
210         acl_permset_t permset;
211 #endif
212
213         assert(f);
214
215         server_read_file_gid(s);
216
217         r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
218         if (r < 0)
219                 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
220
221 #ifdef HAVE_ACL
222         if (uid <= 0)
223                 return;
224
225         acl = acl_get_fd(f->fd);
226         if (!acl) {
227                 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
228                 return;
229         }
230
231         r = acl_find_uid(acl, uid, &entry);
232         if (r <= 0) {
233
234                 if (acl_create_entry(&acl, &entry) < 0 ||
235                     acl_set_tag_type(entry, ACL_USER) < 0 ||
236                     acl_set_qualifier(entry, &uid) < 0) {
237                         log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
238                         goto finish;
239                 }
240         }
241
242         if (acl_get_permset(entry, &permset) < 0 ||
243             acl_add_perm(permset, ACL_READ) < 0 ||
244             acl_calc_mask(&acl) < 0) {
245                 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
246                 goto finish;
247         }
248
249         if (acl_set_fd(f->fd, acl) < 0)
250                 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
251
252 finish:
253         acl_free(acl);
254 #endif
255 }
256
257 static JournalFile* find_journal(Server *s, uid_t uid) {
258         char *p;
259         int r;
260         JournalFile *f;
261         char ids[33];
262         sd_id128_t machine;
263
264         assert(s);
265
266         /* We split up user logs only on /var, not on /run. If the
267          * runtime file is open, we write to it exclusively, in order
268          * to guarantee proper order as soon as we flush /run to
269          * /var and close the runtime file. */
270
271         if (s->runtime_journal)
272                 return s->runtime_journal;
273
274         if (uid <= 0)
275                 return s->system_journal;
276
277         r = sd_id128_get_machine(&machine);
278         if (r < 0)
279                 return s->system_journal;
280
281         f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
282         if (f)
283                 return f;
284
285         if (asprintf(&p, "/var/log/journal/%s/user-%lu.journal", sd_id128_to_string(machine, ids), (unsigned long) uid) < 0)
286                 return s->system_journal;
287
288         while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
289                 /* Too many open? Then let's close one */
290                 f = hashmap_steal_first(s->user_journals);
291                 assert(f);
292                 journal_file_close(f);
293         }
294
295         r = journal_file_open(p, O_RDWR|O_CREAT, 0640, s->system_journal, &f);
296         free(p);
297
298         if (r < 0)
299                 return s->system_journal;
300
301         server_fix_perms(s, f, uid);
302         f->metrics = s->system_metrics;
303         f->compress = s->compress;
304
305         r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
306         if (r < 0) {
307                 journal_file_close(f);
308                 return s->system_journal;
309         }
310
311         return f;
312 }
313
314 static void server_rotate(Server *s) {
315         JournalFile *f;
316         void *k;
317         Iterator i;
318         int r;
319
320         log_info("Rotating...");
321
322         if (s->runtime_journal) {
323                 r = journal_file_rotate(&s->runtime_journal);
324                 if (r < 0)
325                         log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
326         }
327
328         if (s->system_journal) {
329                 r = journal_file_rotate(&s->system_journal);
330                 if (r < 0)
331                         log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
332         }
333
334         HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
335                 r = journal_file_rotate(&f);
336                 if (r < 0)
337                         log_error("Failed to rotate %s: %s", f->path, strerror(-r));
338                 else
339                         hashmap_replace(s->user_journals, k, f);
340         }
341 }
342
343 static void server_vacuum(Server *s) {
344         char *p;
345         char ids[33];
346         sd_id128_t machine;
347         int r;
348
349         log_info("Vacuuming...");
350
351         r = sd_id128_get_machine(&machine);
352         if (r < 0) {
353                 log_error("Failed to get machine ID: %s", strerror(-r));
354                 return;
355         }
356
357         sd_id128_to_string(machine, ids);
358
359         if (s->system_journal) {
360                 if (asprintf(&p, "/var/log/journal/%s", ids) < 0) {
361                         log_error("Out of memory.");
362                         return;
363                 }
364
365                 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free);
366                 if (r < 0 && r != -ENOENT)
367                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
368                 free(p);
369         }
370
371
372         if (s->runtime_journal) {
373                 if (asprintf(&p, "/run/log/journal/%s", ids) < 0) {
374                         log_error("Out of memory.");
375                         return;
376                 }
377
378                 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free);
379                 if (r < 0 && r != -ENOENT)
380                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
381                 free(p);
382         }
383
384         s->cached_available_space_timestamp = 0;
385 }
386
387 static char *shortened_cgroup_path(pid_t pid) {
388         int r;
389         char *process_path, *init_path, *path;
390
391         assert(pid > 0);
392
393         r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, pid, &process_path);
394         if (r < 0)
395                 return NULL;
396
397         r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &init_path);
398         if (r < 0) {
399                 free(process_path);
400                 return NULL;
401         }
402
403         if (endswith(init_path, "/system"))
404                 init_path[strlen(init_path) - 7] = 0;
405         else if (streq(init_path, "/"))
406                 init_path[0] = 0;
407
408         if (startswith(process_path, init_path)) {
409                 char *p;
410
411                 p = strdup(process_path + strlen(init_path));
412                 if (!p) {
413                         free(process_path);
414                         free(init_path);
415                         return NULL;
416                 }
417                 path = p;
418         } else {
419                 path = process_path;
420                 process_path = NULL;
421         }
422
423         free(process_path);
424         free(init_path);
425
426         return path;
427 }
428
429 static void dispatch_message_real(Server *s,
430                              struct iovec *iovec, unsigned n, unsigned m,
431                              struct ucred *ucred,
432                              struct timeval *tv) {
433
434         char *pid = NULL, *uid = NULL, *gid = NULL,
435                 *source_time = NULL, *boot_id = NULL, *machine_id = NULL,
436                 *comm = NULL, *cmdline = NULL, *hostname = NULL,
437                 *audit_session = NULL, *audit_loginuid = NULL,
438                 *exe = NULL, *cgroup = NULL, *session = NULL,
439                 *owner_uid = NULL, *unit = NULL;
440
441         char idbuf[33];
442         sd_id128_t id;
443         int r;
444         char *t;
445         uid_t loginuid = 0, realuid = 0;
446         JournalFile *f;
447         bool vacuumed = false;
448
449         assert(s);
450         assert(iovec);
451         assert(n > 0);
452         assert(n + N_IOVEC_META_FIELDS <= m);
453
454         if (ucred) {
455                 uint32_t audit;
456                 uid_t owner;
457
458                 realuid = ucred->uid;
459
460                 if (asprintf(&pid, "_PID=%lu", (unsigned long) ucred->pid) >= 0)
461                         IOVEC_SET_STRING(iovec[n++], pid);
462
463                 if (asprintf(&uid, "_UID=%lu", (unsigned long) ucred->uid) >= 0)
464                         IOVEC_SET_STRING(iovec[n++], uid);
465
466                 if (asprintf(&gid, "_GID=%lu", (unsigned long) ucred->gid) >= 0)
467                         IOVEC_SET_STRING(iovec[n++], gid);
468
469                 r = get_process_comm(ucred->pid, &t);
470                 if (r >= 0) {
471                         comm = strappend("_COMM=", t);
472                         free(t);
473
474                         if (comm)
475                                 IOVEC_SET_STRING(iovec[n++], comm);
476                 }
477
478                 r = get_process_exe(ucred->pid, &t);
479                 if (r >= 0) {
480                         exe = strappend("_EXE=", t);
481                         free(t);
482
483                         if (comm)
484                                 IOVEC_SET_STRING(iovec[n++], exe);
485                 }
486
487                 r = get_process_cmdline(ucred->pid, LINE_MAX, false, &t);
488                 if (r >= 0) {
489                         cmdline = strappend("_CMDLINE=", t);
490                         free(t);
491
492                         if (cmdline)
493                                 IOVEC_SET_STRING(iovec[n++], cmdline);
494                 }
495
496                 r = audit_session_from_pid(ucred->pid, &audit);
497                 if (r >= 0)
498                         if (asprintf(&audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit) >= 0)
499                                 IOVEC_SET_STRING(iovec[n++], audit_session);
500
501                 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
502                 if (r >= 0)
503                         if (asprintf(&audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0)
504                                 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
505
506                 t = shortened_cgroup_path(ucred->pid);
507                 if (t) {
508                         cgroup = strappend("_SYSTEMD_CGROUP=", t);
509                         free(t);
510
511                         if (cgroup)
512                                 IOVEC_SET_STRING(iovec[n++], cgroup);
513                 }
514
515                 if (sd_pid_get_session(ucred->pid, &t) >= 0) {
516                         session = strappend("_SYSTEMD_SESSION=", t);
517                         free(t);
518
519                         if (session)
520                                 IOVEC_SET_STRING(iovec[n++], session);
521                 }
522
523                 if (sd_pid_get_unit(ucred->pid, &t) >= 0) {
524                         unit = strappend("_SYSTEMD_UNIT=", t);
525                         free(t);
526
527                         if (unit)
528                                 IOVEC_SET_STRING(iovec[n++], unit);
529                 }
530
531                 if (sd_pid_get_owner_uid(ucred->uid, &owner) >= 0)
532                         if (asprintf(&owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner) >= 0)
533                                 IOVEC_SET_STRING(iovec[n++], owner_uid);
534         }
535
536         if (tv) {
537                 if (asprintf(&source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu",
538                              (unsigned long long) timeval_load(tv)) >= 0)
539                         IOVEC_SET_STRING(iovec[n++], source_time);
540         }
541
542         /* Note that strictly speaking storing the boot id here is
543          * redundant since the entry includes this in-line
544          * anyway. However, we need this indexed, too. */
545         r = sd_id128_get_boot(&id);
546         if (r >= 0)
547                 if (asprintf(&boot_id, "_BOOT_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
548                         IOVEC_SET_STRING(iovec[n++], boot_id);
549
550         r = sd_id128_get_machine(&id);
551         if (r >= 0)
552                 if (asprintf(&machine_id, "_MACHINE_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
553                         IOVEC_SET_STRING(iovec[n++], machine_id);
554
555         t = gethostname_malloc();
556         if (t) {
557                 hostname = strappend("_HOSTNAME=", t);
558                 free(t);
559                 if (hostname)
560                         IOVEC_SET_STRING(iovec[n++], hostname);
561         }
562
563         assert(n <= m);
564
565         server_flush_to_var(s);
566
567 retry:
568         f = find_journal(s, realuid == 0 ? 0 : loginuid);
569         if (!f)
570                 log_warning("Dropping message, as we can't find a place to store the data.");
571         else {
572                 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
573
574                 if (r == -E2BIG && !vacuumed) {
575                         log_info("Allocation limit reached.");
576
577                         server_rotate(s);
578                         server_vacuum(s);
579                         vacuumed = true;
580
581                         log_info("Retrying write.");
582                         goto retry;
583                 }
584
585                 if (r < 0)
586                         log_error("Failed to write entry, ignoring: %s", strerror(-r));
587         }
588
589         free(pid);
590         free(uid);
591         free(gid);
592         free(comm);
593         free(exe);
594         free(cmdline);
595         free(source_time);
596         free(boot_id);
597         free(machine_id);
598         free(hostname);
599         free(audit_session);
600         free(audit_loginuid);
601         free(cgroup);
602         free(session);
603         free(owner_uid);
604         free(unit);
605 }
606
607 static void driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
608         char mid[11 + 32 + 1];
609         char buffer[16 + LINE_MAX + 1];
610         struct iovec iovec[N_IOVEC_META_FIELDS + 4];
611         int n = 0;
612         va_list ap;
613         struct ucred ucred;
614
615         assert(s);
616         assert(format);
617
618         IOVEC_SET_STRING(iovec[n++], "PRIORITY=5");
619         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
620
621         memcpy(buffer, "MESSAGE=", 8);
622         va_start(ap, format);
623         vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
624         va_end(ap);
625         char_array_0(buffer);
626         IOVEC_SET_STRING(iovec[n++], buffer);
627
628         snprintf(mid, sizeof(mid), "MESSAGE_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(message_id));
629         char_array_0(mid);
630         IOVEC_SET_STRING(iovec[n++], mid);
631
632         zero(ucred);
633         ucred.pid = getpid();
634         ucred.uid = getuid();
635         ucred.gid = getgid();
636
637         dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL);
638 }
639
640 static void dispatch_message(Server *s,
641                              struct iovec *iovec, unsigned n, unsigned m,
642                              struct ucred *ucred,
643                              struct timeval *tv,
644                              int priority) {
645         int rl;
646         char *path = NULL, *c;
647
648         assert(s);
649         assert(iovec || n == 0);
650
651         if (n == 0)
652                 return;
653
654         if (!ucred)
655                 goto finish;
656
657         path = shortened_cgroup_path(ucred->pid);
658         if (!path)
659                 goto finish;
660
661         /* example: /user/lennart/3/foobar
662          *          /system/dbus.service/foobar
663          *
664          * So let's cut of everything past the third /, since that is
665          * wher user directories start */
666
667         c = strchr(path, '/');
668         if (c) {
669                 c = strchr(c+1, '/');
670                 if (c) {
671                         c = strchr(c+1, '/');
672                         if (c)
673                                 *c = 0;
674                 }
675         }
676
677         rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available_space(s));
678
679         if (rl == 0) {
680                 free(path);
681                 return;
682         }
683
684         /* Write a suppression message if we suppressed something */
685         if (rl > 1)
686                 driver_message(s, SD_MESSAGE_JOURNAL_DROPPED, "Suppressed %u messages from %s", rl - 1, path);
687
688         free(path);
689
690 finish:
691         dispatch_message_real(s, iovec, n, m, ucred, tv);
692 }
693
694 static void forward_syslog_iovec(Server *s, const struct iovec *iovec, unsigned n_iovec, struct ucred *ucred, struct timeval *tv) {
695         struct msghdr msghdr;
696         struct cmsghdr *cmsg;
697         union {
698                 struct cmsghdr cmsghdr;
699                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred))];
700         } control;
701         union sockaddr_union sa;
702
703         assert(s);
704         assert(iovec);
705         assert(n_iovec > 0);
706
707         zero(msghdr);
708         msghdr.msg_iov = (struct iovec*) iovec;
709         msghdr.msg_iovlen = n_iovec;
710
711         zero(sa);
712         sa.un.sun_family = AF_UNIX;
713         strncpy(sa.un.sun_path, "/run/systemd/journal/syslog", sizeof(sa.un.sun_path));
714         msghdr.msg_name = &sa;
715         msghdr.msg_namelen = offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path);
716
717         if (ucred) {
718                 zero(control);
719                 msghdr.msg_control = &control;
720                 msghdr.msg_controllen = sizeof(control);
721
722                 cmsg = CMSG_FIRSTHDR(&msghdr);
723                 cmsg->cmsg_level = SOL_SOCKET;
724                 cmsg->cmsg_type = SCM_CREDENTIALS;
725                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred));
726                 memcpy(CMSG_DATA(cmsg), ucred, sizeof(struct ucred));
727                 msghdr.msg_controllen = cmsg->cmsg_len;
728         }
729
730         /* Forward the syslog message we received via /dev/log to
731          * /run/systemd/syslog. Unfortunately we currently can't set
732          * the SO_TIMESTAMP auxiliary data, and hence we don't. */
733
734         if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
735                 return;
736
737         if (ucred && errno == ESRCH) {
738                 struct ucred u;
739
740                 /* Hmm, presumably the sender process vanished
741                  * by now, so let's fix it as good as we
742                  * can, and retry */
743
744                 u = *ucred;
745                 u.pid = getpid();
746                 memcpy(CMSG_DATA(cmsg), &u, sizeof(struct ucred));
747
748                 if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
749                         return;
750         }
751
752         log_debug("Failed to forward syslog message: %m");
753 }
754
755 static void forward_syslog_raw(Server *s, const char *buffer, struct ucred *ucred, struct timeval *tv) {
756         struct iovec iovec;
757
758         assert(s);
759         assert(buffer);
760
761         IOVEC_SET_STRING(iovec, buffer);
762         forward_syslog_iovec(s, &iovec, 1, ucred, tv);
763 }
764
765 static void forward_syslog(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred, struct timeval *tv) {
766         struct iovec iovec[5];
767         char header_priority[6], header_time[64], header_pid[16];
768         int n = 0;
769         time_t t;
770         struct tm *tm;
771         char *ident_buf = NULL;
772
773         assert(s);
774         assert(priority >= 0);
775         assert(priority <= 999);
776         assert(message);
777
778         /* First: priority field */
779         snprintf(header_priority, sizeof(header_priority), "<%i>", priority);
780         char_array_0(header_priority);
781         IOVEC_SET_STRING(iovec[n++], header_priority);
782
783         /* Second: timestamp */
784         t = tv ? tv->tv_sec : ((time_t) (now(CLOCK_REALTIME) / USEC_PER_SEC));
785         tm = localtime(&t);
786         if (!tm)
787                 return;
788         if (strftime(header_time, sizeof(header_time), "%h %e %T ", tm) <= 0)
789                 return;
790         IOVEC_SET_STRING(iovec[n++], header_time);
791
792         /* Third: identifier and PID */
793         if (ucred) {
794                 if (!identifier) {
795                         get_process_comm(ucred->pid, &ident_buf);
796                         identifier = ident_buf;
797                 }
798
799                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
800                 char_array_0(header_pid);
801
802                 if (identifier)
803                         IOVEC_SET_STRING(iovec[n++], identifier);
804
805                 IOVEC_SET_STRING(iovec[n++], header_pid);
806         } else if (identifier) {
807                 IOVEC_SET_STRING(iovec[n++], identifier);
808                 IOVEC_SET_STRING(iovec[n++], ": ");
809         }
810
811         /* Fourth: message */
812         IOVEC_SET_STRING(iovec[n++], message);
813
814         forward_syslog_iovec(s, iovec, n, ucred, tv);
815
816         free(ident_buf);
817 }
818
819 static int fixup_priority(int priority) {
820
821         if ((priority & LOG_FACMASK) == 0)
822                 return (priority & LOG_PRIMASK) | LOG_USER;
823
824         return priority;
825 }
826
827 static void forward_kmsg(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred) {
828         struct iovec iovec[5];
829         char header_priority[6], header_pid[16];
830         int n = 0;
831         char *ident_buf = NULL;
832         int fd;
833
834         assert(s);
835         assert(priority >= 0);
836         assert(priority <= 999);
837         assert(message);
838
839         /* Never allow messages with kernel facility to be written to
840          * kmsg, regardless where the data comes from. */
841         priority = fixup_priority(priority);
842
843         /* First: priority field */
844         snprintf(header_priority, sizeof(header_priority), "<%i>", priority);
845         char_array_0(header_priority);
846         IOVEC_SET_STRING(iovec[n++], header_priority);
847
848         /* Second: identifier and PID */
849         if (ucred) {
850                 if (!identifier) {
851                         get_process_comm(ucred->pid, &ident_buf);
852                         identifier = ident_buf;
853                 }
854
855                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
856                 char_array_0(header_pid);
857
858                 if (identifier)
859                         IOVEC_SET_STRING(iovec[n++], identifier);
860
861                 IOVEC_SET_STRING(iovec[n++], header_pid);
862         } else if (identifier) {
863                 IOVEC_SET_STRING(iovec[n++], identifier);
864                 IOVEC_SET_STRING(iovec[n++], ": ");
865         }
866
867         /* Fourth: message */
868         IOVEC_SET_STRING(iovec[n++], message);
869         IOVEC_SET_STRING(iovec[n++], "\n");
870
871         fd = open("/dev/kmsg", O_WRONLY|O_NOCTTY|O_CLOEXEC);
872         if (fd < 0) {
873                 log_debug("Failed to open /dev/kmsg for logging: %s", strerror(errno));
874                 goto finish;
875         }
876
877         if (writev(fd, iovec, n) < 0)
878                 log_debug("Failed to write to /dev/kmsg for logging: %s", strerror(errno));
879
880         close_nointr_nofail(fd);
881
882 finish:
883         free(ident_buf);
884 }
885
886 static void forward_console(Server *s, const char *identifier, const char *message, struct ucred *ucred) {
887         struct iovec iovec[4];
888         char header_pid[16];
889         int n = 0, fd;
890         char *ident_buf = NULL;
891
892         assert(s);
893         assert(message);
894
895         /* First: identifier and PID */
896         if (ucred) {
897                 if (!identifier) {
898                         get_process_comm(ucred->pid, &ident_buf);
899                         identifier = ident_buf;
900                 }
901
902                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
903                 char_array_0(header_pid);
904
905                 if (identifier)
906                         IOVEC_SET_STRING(iovec[n++], identifier);
907
908                 IOVEC_SET_STRING(iovec[n++], header_pid);
909         } else if (identifier) {
910                 IOVEC_SET_STRING(iovec[n++], identifier);
911                 IOVEC_SET_STRING(iovec[n++], ": ");
912         }
913
914         /* Third: message */
915         IOVEC_SET_STRING(iovec[n++], message);
916         IOVEC_SET_STRING(iovec[n++], "\n");
917
918         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
919         if (fd < 0) {
920                 log_debug("Failed to open /dev/console for logging: %s", strerror(errno));
921                 goto finish;
922         }
923
924         if (writev(fd, iovec, n) < 0)
925                 log_debug("Failed to write to /dev/console for logging: %s", strerror(errno));
926
927         close_nointr_nofail(fd);
928
929 finish:
930         free(ident_buf);
931 }
932
933 static void read_identifier(const char **buf, char **identifier, char **pid) {
934         const char *p;
935         char *t;
936         size_t l, e;
937
938         assert(buf);
939         assert(identifier);
940         assert(pid);
941
942         p = *buf;
943
944         p += strspn(p, WHITESPACE);
945         l = strcspn(p, WHITESPACE);
946
947         if (l <= 0 ||
948             p[l-1] != ':')
949                 return;
950
951         e = l;
952         l--;
953
954         if (p[l-1] == ']') {
955                 size_t k = l-1;
956
957                 for (;;) {
958
959                         if (p[k] == '[') {
960                                 t = strndup(p+k+1, l-k-2);
961                                 if (t)
962                                         *pid = t;
963
964                                 l = k;
965                                 break;
966                         }
967
968                         if (k == 0)
969                                 break;
970
971                         k--;
972                 }
973         }
974
975         t = strndup(p, l);
976         if (t)
977                 *identifier = t;
978
979         *buf = p + e;
980         *buf += strspn(*buf, WHITESPACE);
981 }
982
983 static void process_syslog_message(Server *s, const char *buf, struct ucred *ucred, struct timeval *tv) {
984         char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *syslog_pid = NULL;
985         struct iovec iovec[N_IOVEC_META_FIELDS + 6];
986         unsigned n = 0;
987         int priority = LOG_USER | LOG_INFO;
988         char *identifier = NULL, *pid = NULL;
989
990         assert(s);
991         assert(buf);
992
993         if (s->forward_to_syslog)
994                 forward_syslog_raw(s, buf, ucred, tv);
995
996         parse_syslog_priority((char**) &buf, &priority);
997         skip_syslog_date((char**) &buf);
998         read_identifier(&buf, &identifier, &pid);
999
1000         if (s->forward_to_kmsg)
1001                 forward_kmsg(s, priority, identifier, buf, ucred);
1002
1003         if (s->forward_to_console)
1004                 forward_console(s, identifier, buf, ucred);
1005
1006         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=syslog");
1007
1008         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1009                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1010
1011         if (priority & LOG_FACMASK)
1012                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1013                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1014
1015         if (identifier) {
1016                 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
1017                 if (syslog_identifier)
1018                         IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1019         }
1020
1021         if (pid) {
1022                 syslog_pid = strappend("SYSLOG_PID=", pid);
1023                 if (syslog_pid)
1024                         IOVEC_SET_STRING(iovec[n++], syslog_pid);
1025         }
1026
1027         message = strappend("MESSAGE=", buf);
1028         if (message)
1029                 IOVEC_SET_STRING(iovec[n++], message);
1030
1031         dispatch_message(s, iovec, n, ELEMENTSOF(iovec), ucred, tv, priority);
1032
1033         free(message);
1034         free(identifier);
1035         free(pid);
1036         free(syslog_priority);
1037         free(syslog_facility);
1038         free(syslog_identifier);
1039 }
1040
1041 static bool valid_user_field(const char *p, size_t l) {
1042         const char *a;
1043
1044         /* We kinda enforce POSIX syntax recommendations for
1045            environment variables here, but make a couple of additional
1046            requirements.
1047
1048            http://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html */
1049
1050         /* No empty field names */
1051         if (l <= 0)
1052                 return false;
1053
1054         /* Don't allow names longer than 64 chars */
1055         if (l > 64)
1056                 return false;
1057
1058         /* Variables starting with an underscore are protected */
1059         if (p[0] == '_')
1060                 return false;
1061
1062         /* Don't allow digits as first character */
1063         if (p[0] >= '0' && p[0] <= '9')
1064                 return false;
1065
1066         /* Only allow A-Z0-9 and '_' */
1067         for (a = p; a < p + l; a++)
1068                 if (!((*a >= 'A' && *a <= 'Z') ||
1069                       (*a >= '0' && *a <= '9') ||
1070                       *a == '_'))
1071                         return false;
1072
1073         return true;
1074 }
1075
1076 static void process_native_message(Server *s, const void *buffer, size_t buffer_size, struct ucred *ucred, struct timeval *tv) {
1077         struct iovec *iovec = NULL;
1078         unsigned n = 0, m = 0, j, tn = (unsigned) -1;
1079         const char *p;
1080         size_t remaining;
1081         int priority = LOG_INFO;
1082         char *identifier = NULL, *message = NULL;
1083
1084         assert(s);
1085         assert(buffer || n == 0);
1086
1087         p = buffer;
1088         remaining = buffer_size;
1089
1090         while (remaining > 0) {
1091                 const char *e, *q;
1092
1093                 e = memchr(p, '\n', remaining);
1094
1095                 if (!e) {
1096                         /* Trailing noise, let's ignore it, and flush what we collected */
1097                         log_debug("Received message with trailing noise, ignoring.");
1098                         break;
1099                 }
1100
1101                 if (e == p) {
1102                         /* Entry separator */
1103                         dispatch_message(s, iovec, n, m, ucred, tv, priority);
1104                         n = 0;
1105                         priority = LOG_INFO;
1106
1107                         p++;
1108                         remaining--;
1109                         continue;
1110                 }
1111
1112                 if (*p == '.' || *p == '#') {
1113                         /* Ignore control commands for now, and
1114                          * comments too. */
1115                         remaining -= (e - p) + 1;
1116                         p = e + 1;
1117                         continue;
1118                 }
1119
1120                 /* A property follows */
1121
1122                 if (n+N_IOVEC_META_FIELDS >= m) {
1123                         struct iovec *c;
1124                         unsigned u;
1125
1126                         u = MAX((n+N_IOVEC_META_FIELDS+1) * 2U, 4U);
1127                         c = realloc(iovec, u * sizeof(struct iovec));
1128                         if (!c) {
1129                                 log_error("Out of memory");
1130                                 break;
1131                         }
1132
1133                         iovec = c;
1134                         m = u;
1135                 }
1136
1137                 q = memchr(p, '=', e - p);
1138                 if (q) {
1139                         if (valid_user_field(p, q - p)) {
1140                                 size_t l;
1141
1142                                 l = e - p;
1143
1144                                 /* If the field name starts with an
1145                                  * underscore, skip the variable,
1146                                  * since that indidates a trusted
1147                                  * field */
1148                                 iovec[n].iov_base = (char*) p;
1149                                 iovec[n].iov_len = l;
1150                                 n++;
1151
1152                                 /* We need to determine the priority
1153                                  * of this entry for the rate limiting
1154                                  * logic */
1155                                 if (l == 10 &&
1156                                     memcmp(p, "PRIORITY=", 9) == 0 &&
1157                                     p[9] >= '0' && p[9] <= '9')
1158                                         priority = (priority & LOG_FACMASK) | (p[9] - '0');
1159
1160                                 else if (l == 17 &&
1161                                          memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
1162                                          p[16] >= '0' && p[16] <= '9')
1163                                         priority = (priority & LOG_PRIMASK) | ((p[16] - '0') << 3);
1164
1165                                 else if (l == 18 &&
1166                                          memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
1167                                          p[16] >= '0' && p[16] <= '9' &&
1168                                          p[17] >= '0' && p[17] <= '9')
1169                                         priority = (priority & LOG_PRIMASK) | (((p[16] - '0')*10 + (p[17] - '0')) << 3);
1170
1171                                 else if (l >= 12 &&
1172                                          memcmp(p, "SYSLOG_IDENTIFIER=", 11) == 0) {
1173                                         char *t;
1174
1175                                         t = strndup(p + 11, l - 11);
1176                                         if (t) {
1177                                                 free(identifier);
1178                                                 identifier = t;
1179                                         }
1180                                 } else if (l >= 8 &&
1181                                            memcmp(p, "MESSAGE=", 8) == 0) {
1182                                         char *t;
1183
1184                                         t = strndup(p + 8, l - 8);
1185                                         if (t) {
1186                                                 free(message);
1187                                                 message = t;
1188                                         }
1189                                 }
1190                         }
1191
1192                         remaining -= (e - p) + 1;
1193                         p = e + 1;
1194                         continue;
1195                 } else {
1196                         uint64_t l;
1197                         char *k;
1198
1199                         if (remaining < e - p + 1 + sizeof(uint64_t) + 1) {
1200                                 log_debug("Failed to parse message, ignoring.");
1201                                 break;
1202                         }
1203
1204                         memcpy(&l, e + 1, sizeof(uint64_t));
1205                         l = le64toh(l);
1206
1207                         if (remaining < e - p + 1 + sizeof(uint64_t) + l + 1 ||
1208                             e[1+sizeof(uint64_t)+l] != '\n') {
1209                                 log_debug("Failed to parse message, ignoring.");
1210                                 break;
1211                         }
1212
1213                         k = malloc((e - p) + 1 + l);
1214                         if (!k) {
1215                                 log_error("Out of memory");
1216                                 break;
1217                         }
1218
1219                         memcpy(k, p, e - p);
1220                         k[e - p] = '=';
1221                         memcpy(k + (e - p) + 1, e + 1 + sizeof(uint64_t), l);
1222
1223                         if (valid_user_field(p, e - p)) {
1224                                 iovec[n].iov_base = k;
1225                                 iovec[n].iov_len = (e - p) + 1 + l;
1226                                 n++;
1227                         } else
1228                                 free(k);
1229
1230                         remaining -= (e - p) + 1 + sizeof(uint64_t) + l + 1;
1231                         p = e + 1 + sizeof(uint64_t) + l + 1;
1232                 }
1233         }
1234
1235         if (n <= 0)
1236                 goto finish;
1237
1238         tn = n++;
1239         IOVEC_SET_STRING(iovec[tn], "_TRANSPORT=journal");
1240
1241         if (message) {
1242                 if (s->forward_to_syslog)
1243                         forward_syslog(s, priority, identifier, message, ucred, tv);
1244
1245                 if (s->forward_to_kmsg)
1246                         forward_kmsg(s, priority, identifier, message, ucred);
1247
1248                 if (s->forward_to_console)
1249                         forward_console(s, identifier, message, ucred);
1250         }
1251
1252         dispatch_message(s, iovec, n, m, ucred, tv, priority);
1253
1254 finish:
1255         for (j = 0; j < n; j++)  {
1256                 if (j == tn)
1257                         continue;
1258
1259                 if (iovec[j].iov_base < buffer ||
1260                     (const uint8_t*) iovec[j].iov_base >= (const uint8_t*) buffer + buffer_size)
1261                         free(iovec[j].iov_base);
1262         }
1263
1264         free(identifier);
1265         free(message);
1266 }
1267
1268 static int stdout_stream_log(StdoutStream *s, const char *p) {
1269         struct iovec iovec[N_IOVEC_META_FIELDS + 5];
1270         char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *syslog_identifier = NULL;
1271         unsigned n = 0;
1272         int priority;
1273
1274         assert(s);
1275         assert(p);
1276
1277         priority = s->priority;
1278
1279         if (s->level_prefix)
1280                 parse_syslog_priority((char**) &p, &priority);
1281
1282         if (s->forward_to_syslog || s->server->forward_to_syslog)
1283                 forward_syslog(s->server, fixup_priority(priority), s->identifier, p, &s->ucred, NULL);
1284
1285         if (s->forward_to_kmsg || s->server->forward_to_kmsg)
1286                 forward_kmsg(s->server, priority, s->identifier, p, &s->ucred);
1287
1288         if (s->forward_to_console || s->server->forward_to_console)
1289                 forward_console(s->server, s->identifier, p, &s->ucred);
1290
1291         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=stdout");
1292
1293         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1294                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1295
1296         if (priority & LOG_FACMASK)
1297                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1298                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1299
1300         if (s->identifier) {
1301                 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", s->identifier);
1302                 if (syslog_identifier)
1303                         IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1304         }
1305
1306         message = strappend("MESSAGE=", p);
1307         if (message)
1308                 IOVEC_SET_STRING(iovec[n++], message);
1309
1310         dispatch_message(s->server, iovec, n, ELEMENTSOF(iovec), &s->ucred, NULL, priority);
1311
1312         free(message);
1313         free(syslog_priority);
1314         free(syslog_facility);
1315         free(syslog_identifier);
1316
1317         return 0;
1318 }
1319
1320 static int stdout_stream_line(StdoutStream *s, char *p) {
1321         int r;
1322
1323         assert(s);
1324         assert(p);
1325
1326         p = strstrip(p);
1327
1328         switch (s->state) {
1329
1330         case STDOUT_STREAM_IDENTIFIER:
1331                 s->identifier = strdup(p);
1332                 if (!s->identifier) {
1333                         log_error("Out of memory");
1334                         return -ENOMEM;
1335                 }
1336
1337                 s->state = STDOUT_STREAM_PRIORITY;
1338                 return 0;
1339
1340         case STDOUT_STREAM_PRIORITY:
1341                 r = safe_atoi(p, &s->priority);
1342                 if (r < 0 || s->priority <= 0 || s->priority >= 999) {
1343                         log_warning("Failed to parse log priority line.");
1344                         return -EINVAL;
1345                 }
1346
1347                 s->state = STDOUT_STREAM_LEVEL_PREFIX;
1348                 return 0;
1349
1350         case STDOUT_STREAM_LEVEL_PREFIX:
1351                 r = parse_boolean(p);
1352                 if (r < 0) {
1353                         log_warning("Failed to parse level prefix line.");
1354                         return -EINVAL;
1355                 }
1356
1357                 s->level_prefix = !!r;
1358                 s->state = STDOUT_STREAM_FORWARD_TO_SYSLOG;
1359                 return 0;
1360
1361         case STDOUT_STREAM_FORWARD_TO_SYSLOG:
1362                 r = parse_boolean(p);
1363                 if (r < 0) {
1364                         log_warning("Failed to parse forward to syslog line.");
1365                         return -EINVAL;
1366                 }
1367
1368                 s->forward_to_syslog = !!r;
1369                 s->state = STDOUT_STREAM_FORWARD_TO_KMSG;
1370                 return 0;
1371
1372         case STDOUT_STREAM_FORWARD_TO_KMSG:
1373                 r = parse_boolean(p);
1374                 if (r < 0) {
1375                         log_warning("Failed to parse copy to kmsg line.");
1376                         return -EINVAL;
1377                 }
1378
1379                 s->forward_to_kmsg = !!r;
1380                 s->state = STDOUT_STREAM_FORWARD_TO_CONSOLE;
1381                 return 0;
1382
1383         case STDOUT_STREAM_FORWARD_TO_CONSOLE:
1384                 r = parse_boolean(p);
1385                 if (r < 0) {
1386                         log_warning("Failed to parse copy to console line.");
1387                         return -EINVAL;
1388                 }
1389
1390                 s->forward_to_console = !!r;
1391                 s->state = STDOUT_STREAM_RUNNING;
1392                 return 0;
1393
1394         case STDOUT_STREAM_RUNNING:
1395                 return stdout_stream_log(s, p);
1396         }
1397
1398         assert_not_reached("Unknown stream state");
1399 }
1400
1401 static int stdout_stream_scan(StdoutStream *s, bool force_flush) {
1402         char *p;
1403         size_t remaining;
1404         int r;
1405
1406         assert(s);
1407
1408         p = s->buffer;
1409         remaining = s->length;
1410         for (;;) {
1411                 char *end;
1412                 size_t skip;
1413
1414                 end = memchr(p, '\n', remaining);
1415                 if (end)
1416                         skip = end - p + 1;
1417                 else if (remaining >= sizeof(s->buffer) - 1) {
1418                         end = p + sizeof(s->buffer) - 1;
1419                         skip = remaining;
1420                 } else
1421                         break;
1422
1423                 *end = 0;
1424
1425                 r = stdout_stream_line(s, p);
1426                 if (r < 0)
1427                         return r;
1428
1429                 remaining -= skip;
1430                 p += skip;
1431         }
1432
1433         if (force_flush && remaining > 0) {
1434                 p[remaining] = 0;
1435                 r = stdout_stream_line(s, p);
1436                 if (r < 0)
1437                         return r;
1438
1439                 p += remaining;
1440                 remaining = 0;
1441         }
1442
1443         if (p > s->buffer) {
1444                 memmove(s->buffer, p, remaining);
1445                 s->length = remaining;
1446         }
1447
1448         return 0;
1449 }
1450
1451 static int stdout_stream_process(StdoutStream *s) {
1452         ssize_t l;
1453         int r;
1454
1455         assert(s);
1456
1457         l = read(s->fd, s->buffer+s->length, sizeof(s->buffer)-1-s->length);
1458         if (l < 0) {
1459
1460                 if (errno == EAGAIN)
1461                         return 0;
1462
1463                 log_warning("Failed to read from stream: %m");
1464                 return -errno;
1465         }
1466
1467         if (l == 0) {
1468                 r = stdout_stream_scan(s, true);
1469                 if (r < 0)
1470                         return r;
1471
1472                 return 0;
1473         }
1474
1475         s->length += l;
1476         r = stdout_stream_scan(s, false);
1477         if (r < 0)
1478                 return r;
1479
1480         return 1;
1481
1482 }
1483
1484 static void stdout_stream_free(StdoutStream *s) {
1485         assert(s);
1486
1487         if (s->server) {
1488                 assert(s->server->n_stdout_streams > 0);
1489                 s->server->n_stdout_streams --;
1490                 LIST_REMOVE(StdoutStream, stdout_stream, s->server->stdout_streams, s);
1491         }
1492
1493         if (s->fd >= 0) {
1494                 if (s->server)
1495                         epoll_ctl(s->server->epoll_fd, EPOLL_CTL_DEL, s->fd, NULL);
1496
1497                 close_nointr_nofail(s->fd);
1498         }
1499
1500         free(s->identifier);
1501         free(s);
1502 }
1503
1504 static int stdout_stream_new(Server *s) {
1505         StdoutStream *stream;
1506         int fd, r;
1507         socklen_t len;
1508         struct epoll_event ev;
1509
1510         assert(s);
1511
1512         fd = accept4(s->stdout_fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC);
1513         if (fd < 0) {
1514                 if (errno == EAGAIN)
1515                         return 0;
1516
1517                 log_error("Failed to accept stdout connection: %m");
1518                 return -errno;
1519         }
1520
1521         if (s->n_stdout_streams >= STDOUT_STREAMS_MAX) {
1522                 log_warning("Too many stdout streams, refusing connection.");
1523                 close_nointr_nofail(fd);
1524                 return 0;
1525         }
1526
1527         stream = new0(StdoutStream, 1);
1528         if (!stream) {
1529                 log_error("Out of memory.");
1530                 close_nointr_nofail(fd);
1531                 return -ENOMEM;
1532         }
1533
1534         stream->fd = fd;
1535
1536         len = sizeof(stream->ucred);
1537         if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &stream->ucred, &len) < 0) {
1538                 log_error("Failed to determine peer credentials: %m");
1539                 r = -errno;
1540                 goto fail;
1541         }
1542
1543         if (shutdown(fd, SHUT_WR) < 0) {
1544                 log_error("Failed to shutdown writing side of socket: %m");
1545                 r = -errno;
1546                 goto fail;
1547         }
1548
1549         zero(ev);
1550         ev.data.ptr = stream;
1551         ev.events = EPOLLIN;
1552         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0) {
1553                 log_error("Failed to add stream to event loop: %m");
1554                 r = -errno;
1555                 goto fail;
1556         }
1557
1558         stream->server = s;
1559         LIST_PREPEND(StdoutStream, stdout_stream, s->stdout_streams, stream);
1560         s->n_stdout_streams ++;
1561
1562         return 0;
1563
1564 fail:
1565         stdout_stream_free(stream);
1566         return r;
1567 }
1568
1569 static int parse_kernel_timestamp(char **_p, usec_t *t) {
1570         usec_t r;
1571         int k, i;
1572         char *p;
1573
1574         assert(_p);
1575         assert(*_p);
1576         assert(t);
1577
1578         p = *_p;
1579
1580         if (strlen(p) < 14 || p[0] != '[' || p[13] != ']' || p[6] != '.')
1581                 return 0;
1582
1583         r = 0;
1584
1585         for (i = 1; i <= 5; i++) {
1586                 r *= 10;
1587
1588                 if (p[i] == ' ')
1589                         continue;
1590
1591                 k = undecchar(p[i]);
1592                 if (k < 0)
1593                         return 0;
1594
1595                 r += k;
1596         }
1597
1598         for (i = 7; i <= 12; i++) {
1599                 r *= 10;
1600
1601                 k = undecchar(p[i]);
1602                 if (k < 0)
1603                         return 0;
1604
1605                 r += k;
1606         }
1607
1608         *t = r;
1609         *_p += 14;
1610         *_p += strspn(*_p, WHITESPACE);
1611
1612         return 1;
1613 }
1614
1615 static void proc_kmsg_line(Server *s, const char *p) {
1616         struct iovec iovec[N_IOVEC_META_FIELDS + 7];
1617         char *message = NULL, *syslog_priority = NULL, *syslog_pid = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *source_time = NULL;
1618         int priority = LOG_KERN | LOG_INFO;
1619         unsigned n = 0;
1620         usec_t usec;
1621         char *identifier = NULL, *pid = NULL;
1622
1623         assert(s);
1624         assert(p);
1625
1626         parse_syslog_priority((char **) &p, &priority);
1627
1628         if (s->forward_to_kmsg && (priority & LOG_FACMASK) != LOG_KERN)
1629                 return;
1630
1631         if (parse_kernel_timestamp((char **) &p, &usec) > 0) {
1632                 if (asprintf(&source_time, "_SOURCE_MONOTONIC_TIMESTAMP=%llu",
1633                              (unsigned long long) usec) >= 0)
1634                         IOVEC_SET_STRING(iovec[n++], source_time);
1635         }
1636
1637         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=kernel");
1638
1639         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1640                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1641
1642         if ((priority & LOG_FACMASK) == LOG_KERN) {
1643
1644                 if (s->forward_to_syslog)
1645                         forward_syslog(s, priority, "kernel", p, NULL, NULL);
1646
1647                 IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=kernel");
1648         } else {
1649                 read_identifier(&p, &identifier, &pid);
1650
1651                 if (s->forward_to_syslog)
1652                         forward_syslog(s, priority, identifier, p, NULL, NULL);
1653
1654                 if (identifier) {
1655                         syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
1656                         if (syslog_identifier)
1657                                 IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1658                 }
1659
1660                 if (pid) {
1661                         syslog_pid = strappend("SYSLOG_PID=", pid);
1662                         if (syslog_pid)
1663                                 IOVEC_SET_STRING(iovec[n++], syslog_pid);
1664                 }
1665
1666                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1667                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1668         }
1669
1670         message = strappend("MESSAGE=", p);
1671         if (message)
1672                 IOVEC_SET_STRING(iovec[n++], message);
1673
1674
1675         dispatch_message(s, iovec, n, ELEMENTSOF(iovec), NULL, NULL, priority);
1676
1677         free(message);
1678         free(syslog_priority);
1679         free(syslog_identifier);
1680         free(syslog_pid);
1681         free(syslog_facility);
1682         free(source_time);
1683         free(identifier);
1684         free(pid);
1685 }
1686
1687 static void proc_kmsg_scan(Server *s) {
1688         char *p;
1689         size_t remaining;
1690
1691         assert(s);
1692
1693         p = s->proc_kmsg_buffer;
1694         remaining = s->proc_kmsg_length;
1695         for (;;) {
1696                 char *end;
1697                 size_t skip;
1698
1699                 end = memchr(p, '\n', remaining);
1700                 if (end)
1701                         skip = end - p + 1;
1702                 else if (remaining >= sizeof(s->proc_kmsg_buffer) - 1) {
1703                         end = p + sizeof(s->proc_kmsg_buffer) - 1;
1704                         skip = remaining;
1705                 } else
1706                         break;
1707
1708                 *end = 0;
1709
1710                 proc_kmsg_line(s, p);
1711
1712                 remaining -= skip;
1713                 p += skip;
1714         }
1715
1716         if (p > s->proc_kmsg_buffer) {
1717                 memmove(s->proc_kmsg_buffer, p, remaining);
1718                 s->proc_kmsg_length = remaining;
1719         }
1720 }
1721
1722 static int system_journal_open(Server *s) {
1723         int r;
1724         char *fn;
1725         sd_id128_t machine;
1726         char ids[33];
1727
1728         r = sd_id128_get_machine(&machine);
1729         if (r < 0)
1730                 return r;
1731
1732         sd_id128_to_string(machine, ids);
1733
1734         if (!s->system_journal) {
1735
1736                 /* First try to create the machine path, but not the prefix */
1737                 fn = strappend("/var/log/journal/", ids);
1738                 if (!fn)
1739                         return -ENOMEM;
1740                 (void) mkdir(fn, 0755);
1741                 free(fn);
1742
1743                 /* The create the system journal file */
1744                 fn = join("/var/log/journal/", ids, "/system.journal", NULL);
1745                 if (!fn)
1746                         return -ENOMEM;
1747
1748                 r = journal_file_open(fn, O_RDWR|O_CREAT, 0640, NULL, &s->system_journal);
1749                 free(fn);
1750
1751                 if (r >= 0) {
1752                         journal_default_metrics(&s->system_metrics, s->system_journal->fd);
1753
1754                         s->system_journal->metrics = s->system_metrics;
1755                         s->system_journal->compress = s->compress;
1756
1757                         server_fix_perms(s, s->system_journal, 0);
1758                 } else if (r < 0) {
1759
1760                         if (r != -ENOENT && r != -EROFS)
1761                                 log_warning("Failed to open system journal: %s", strerror(-r));
1762
1763                         r = 0;
1764                 }
1765         }
1766
1767         if (!s->runtime_journal) {
1768
1769                 fn = join("/run/log/journal/", ids, "/system.journal", NULL);
1770                 if (!fn)
1771                         return -ENOMEM;
1772
1773                 if (s->system_journal) {
1774
1775                         /* Try to open the runtime journal, but only
1776                          * if it already exists, so that we can flush
1777                          * it into the system journal */
1778
1779                         r = journal_file_open(fn, O_RDWR, 0640, NULL, &s->runtime_journal);
1780                         free(fn);
1781
1782                         if (r < 0) {
1783                                 if (r != -ENOENT)
1784                                         log_warning("Failed to open runtime journal: %s", strerror(-r));
1785
1786                                 r = 0;
1787                         }
1788
1789                 } else {
1790
1791                         /* OK, we really need the runtime journal, so create
1792                          * it if necessary. */
1793
1794                         (void) mkdir_parents(fn, 0755);
1795                         r = journal_file_open(fn, O_RDWR|O_CREAT, 0640, NULL, &s->runtime_journal);
1796                         free(fn);
1797
1798                         if (r < 0) {
1799                                 log_error("Failed to open runtime journal: %s", strerror(-r));
1800                                 return r;
1801                         }
1802                 }
1803
1804                 if (s->runtime_journal) {
1805                         journal_default_metrics(&s->runtime_metrics, s->runtime_journal->fd);
1806
1807                         s->runtime_journal->metrics = s->runtime_metrics;
1808                         s->runtime_journal->compress = s->compress;
1809
1810                         server_fix_perms(s, s->runtime_journal, 0);
1811                 }
1812         }
1813
1814         return r;
1815 }
1816
1817 static int server_flush_to_var(Server *s) {
1818         char path[] = "/run/log/journal/xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
1819         Object *o = NULL;
1820         int r;
1821         sd_id128_t machine;
1822         sd_journal *j;
1823         usec_t ts;
1824
1825         assert(s);
1826
1827         if (!s->runtime_journal)
1828                 return 0;
1829
1830         ts = now(CLOCK_MONOTONIC);
1831         if (s->var_available_timestamp + RECHECK_VAR_AVAILABLE_USEC > ts)
1832                 return 0;
1833
1834         s->var_available_timestamp = ts;
1835
1836         system_journal_open(s);
1837
1838         if (!s->system_journal)
1839                 return 0;
1840
1841         log_info("Flushing to /var...");
1842
1843         r = sd_id128_get_machine(&machine);
1844         if (r < 0) {
1845                 log_error("Failed to get machine id: %s", strerror(-r));
1846                 return r;
1847         }
1848
1849         r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1850         if (r < 0) {
1851                 log_error("Failed to read runtime journal: %s", strerror(-r));
1852                 return r;
1853         }
1854
1855         SD_JOURNAL_FOREACH(j) {
1856                 JournalFile *f;
1857
1858                 f = j->current_file;
1859                 assert(f && f->current_offset > 0);
1860
1861                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1862                 if (r < 0) {
1863                         log_error("Can't read entry: %s", strerror(-r));
1864                         goto finish;
1865                 }
1866
1867                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1868                 if (r == -E2BIG) {
1869                         log_info("Allocation limit reached.");
1870
1871                         journal_file_post_change(s->system_journal);
1872                         server_rotate(s);
1873                         server_vacuum(s);
1874
1875                         r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1876                 }
1877
1878                 if (r < 0) {
1879                         log_error("Can't write entry: %s", strerror(-r));
1880                         goto finish;
1881                 }
1882         }
1883
1884 finish:
1885         journal_file_post_change(s->system_journal);
1886
1887         journal_file_close(s->runtime_journal);
1888         s->runtime_journal = NULL;
1889
1890         if (r >= 0) {
1891                 sd_id128_to_string(machine, path + 17);
1892                 rm_rf(path, false, true, false);
1893         }
1894
1895         return r;
1896 }
1897
1898 static int server_read_proc_kmsg(Server *s) {
1899         ssize_t l;
1900         assert(s);
1901         assert(s->proc_kmsg_fd >= 0);
1902
1903         l = read(s->proc_kmsg_fd, s->proc_kmsg_buffer + s->proc_kmsg_length, sizeof(s->proc_kmsg_buffer) - 1 - s->proc_kmsg_length);
1904         if (l < 0) {
1905
1906                 if (errno == EAGAIN || errno == EINTR)
1907                         return 0;
1908
1909                 log_error("Failed to read from kernel: %m");
1910                 return -errno;
1911         }
1912
1913         s->proc_kmsg_length += l;
1914
1915         proc_kmsg_scan(s);
1916         return 1;
1917 }
1918
1919 static int server_flush_proc_kmsg(Server *s) {
1920         int r;
1921
1922         assert(s);
1923
1924         if (s->proc_kmsg_fd < 0)
1925                 return 0;
1926
1927         log_info("Flushing /proc/kmsg...");
1928
1929         for (;;) {
1930                 r = server_read_proc_kmsg(s);
1931                 if (r < 0)
1932                         return r;
1933
1934                 if (r == 0)
1935                         break;
1936         }
1937
1938         return 0;
1939 }
1940
1941 static int process_event(Server *s, struct epoll_event *ev) {
1942         assert(s);
1943
1944         if (ev->data.fd == s->signal_fd) {
1945                 struct signalfd_siginfo sfsi;
1946                 ssize_t n;
1947
1948                 if (ev->events != EPOLLIN) {
1949                         log_info("Got invalid event from epoll.");
1950                         return -EIO;
1951                 }
1952
1953                 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
1954                 if (n != sizeof(sfsi)) {
1955
1956                         if (n >= 0)
1957                                 return -EIO;
1958
1959                         if (errno == EINTR || errno == EAGAIN)
1960                                 return 1;
1961
1962                         return -errno;
1963                 }
1964
1965                 if (sfsi.ssi_signo == SIGUSR1) {
1966                         server_flush_to_var(s);
1967                         return 0;
1968                 }
1969
1970                 log_debug("Received SIG%s", signal_to_string(sfsi.ssi_signo));
1971                 return 0;
1972
1973         } else if (ev->data.fd == s->proc_kmsg_fd) {
1974                 int r;
1975
1976                 if (ev->events != EPOLLIN) {
1977                         log_info("Got invalid event from epoll.");
1978                         return -EIO;
1979                 }
1980
1981                 r = server_read_proc_kmsg(s);
1982                 if (r < 0)
1983                         return r;
1984
1985                 return 1;
1986
1987         } else if (ev->data.fd == s->native_fd ||
1988                    ev->data.fd == s->syslog_fd) {
1989
1990                 if (ev->events != EPOLLIN) {
1991                         log_info("Got invalid event from epoll.");
1992                         return -EIO;
1993                 }
1994
1995                 for (;;) {
1996                         struct msghdr msghdr;
1997                         struct iovec iovec;
1998                         struct ucred *ucred = NULL;
1999                         struct timeval *tv = NULL;
2000                         struct cmsghdr *cmsg;
2001                         union {
2002                                 struct cmsghdr cmsghdr;
2003                                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
2004                                             CMSG_SPACE(sizeof(struct timeval))];
2005                         } control;
2006                         ssize_t n;
2007                         int v;
2008
2009                         if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
2010                                 log_error("SIOCINQ failed: %m");
2011                                 return -errno;
2012                         }
2013
2014                         if (v <= 0)
2015                                 return 1;
2016
2017                         if (s->buffer_size < (size_t) v) {
2018                                 void *b;
2019                                 size_t l;
2020
2021                                 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
2022                                 b = realloc(s->buffer, l+1);
2023
2024                                 if (!b) {
2025                                         log_error("Couldn't increase buffer.");
2026                                         return -ENOMEM;
2027                                 }
2028
2029                                 s->buffer_size = l;
2030                                 s->buffer = b;
2031                         }
2032
2033                         zero(iovec);
2034                         iovec.iov_base = s->buffer;
2035                         iovec.iov_len = s->buffer_size;
2036
2037                         zero(control);
2038                         zero(msghdr);
2039                         msghdr.msg_iov = &iovec;
2040                         msghdr.msg_iovlen = 1;
2041                         msghdr.msg_control = &control;
2042                         msghdr.msg_controllen = sizeof(control);
2043
2044                         n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT);
2045                         if (n < 0) {
2046
2047                                 if (errno == EINTR || errno == EAGAIN)
2048                                         return 1;
2049
2050                                 log_error("recvmsg() failed: %m");
2051                                 return -errno;
2052                         }
2053
2054                         for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
2055
2056                                 if (cmsg->cmsg_level == SOL_SOCKET &&
2057                                     cmsg->cmsg_type == SCM_CREDENTIALS &&
2058                                     cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
2059                                         ucred = (struct ucred*) CMSG_DATA(cmsg);
2060                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
2061                                          cmsg->cmsg_type == SO_TIMESTAMP &&
2062                                          cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
2063                                         tv = (struct timeval*) CMSG_DATA(cmsg);
2064                         }
2065
2066                         if (ev->data.fd == s->syslog_fd) {
2067                                 char *e;
2068
2069                                 e = memchr(s->buffer, '\n', n);
2070                                 if (e)
2071                                         *e = 0;
2072                                 else
2073                                         s->buffer[n] = 0;
2074
2075                                 process_syslog_message(s, strstrip(s->buffer), ucred, tv);
2076                         } else
2077                                 process_native_message(s, s->buffer, n, ucred, tv);
2078                 }
2079
2080                 return 1;
2081
2082         } else if (ev->data.fd == s->stdout_fd) {
2083
2084                 if (ev->events != EPOLLIN) {
2085                         log_info("Got invalid event from epoll.");
2086                         return -EIO;
2087                 }
2088
2089                 stdout_stream_new(s);
2090                 return 1;
2091
2092         } else {
2093                 StdoutStream *stream;
2094
2095                 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
2096                         log_info("Got invalid event from epoll.");
2097                         return -EIO;
2098                 }
2099
2100                 /* If it is none of the well-known fds, it must be an
2101                  * stdout stream fd. Note that this is a bit ugly here
2102                  * (since we rely that none of the well-known fds
2103                  * could be interpreted as pointer), but nonetheless
2104                  * safe, since the well-known fds would never get an
2105                  * fd > 4096, i.e. beyond the first memory page */
2106
2107                 stream = ev->data.ptr;
2108
2109                 if (stdout_stream_process(stream) <= 0)
2110                         stdout_stream_free(stream);
2111
2112                 return 1;
2113         }
2114
2115         log_error("Unknown event.");
2116         return 0;
2117 }
2118
2119 static int open_syslog_socket(Server *s) {
2120         union sockaddr_union sa;
2121         int one, r;
2122         struct epoll_event ev;
2123         struct timeval tv;
2124
2125         assert(s);
2126
2127         if (s->syslog_fd < 0) {
2128
2129                 s->syslog_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2130                 if (s->syslog_fd < 0) {
2131                         log_error("socket() failed: %m");
2132                         return -errno;
2133                 }
2134
2135                 zero(sa);
2136                 sa.un.sun_family = AF_UNIX;
2137                 strncpy(sa.un.sun_path, "/dev/log", sizeof(sa.un.sun_path));
2138
2139                 unlink(sa.un.sun_path);
2140
2141                 r = bind(s->syslog_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2142                 if (r < 0) {
2143                         log_error("bind() failed: %m");
2144                         return -errno;
2145                 }
2146
2147                 chmod(sa.un.sun_path, 0666);
2148         }
2149
2150         one = 1;
2151         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
2152         if (r < 0) {
2153                 log_error("SO_PASSCRED failed: %m");
2154                 return -errno;
2155         }
2156
2157         one = 1;
2158         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
2159         if (r < 0) {
2160                 log_error("SO_TIMESTAMP failed: %m");
2161                 return -errno;
2162         }
2163
2164         /* Since we use the same socket for forwarding this to some
2165          * other syslog implementation, make sure we don't hang
2166          * forever */
2167         timeval_store(&tv, SYSLOG_TIMEOUT_USEC);
2168         if (setsockopt(s->syslog_fd, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)) < 0) {
2169                 log_error("SO_SNDTIMEO failed: %m");
2170                 return -errno;
2171         }
2172
2173         zero(ev);
2174         ev.events = EPOLLIN;
2175         ev.data.fd = s->syslog_fd;
2176         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->syslog_fd, &ev) < 0) {
2177                 log_error("Failed to add syslog server fd to epoll object: %m");
2178                 return -errno;
2179         }
2180
2181         return 0;
2182 }
2183
2184 static int open_native_socket(Server*s) {
2185         union sockaddr_union sa;
2186         int one, r;
2187         struct epoll_event ev;
2188
2189         assert(s);
2190
2191         if (s->native_fd < 0) {
2192
2193                 s->native_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2194                 if (s->native_fd < 0) {
2195                         log_error("socket() failed: %m");
2196                         return -errno;
2197                 }
2198
2199                 zero(sa);
2200                 sa.un.sun_family = AF_UNIX;
2201                 strncpy(sa.un.sun_path, "/run/systemd/journal/socket", sizeof(sa.un.sun_path));
2202
2203                 unlink(sa.un.sun_path);
2204
2205                 r = bind(s->native_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2206                 if (r < 0) {
2207                         log_error("bind() failed: %m");
2208                         return -errno;
2209                 }
2210
2211                 chmod(sa.un.sun_path, 0666);
2212         }
2213
2214         one = 1;
2215         r = setsockopt(s->native_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
2216         if (r < 0) {
2217                 log_error("SO_PASSCRED failed: %m");
2218                 return -errno;
2219         }
2220
2221         one = 1;
2222         r = setsockopt(s->native_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
2223         if (r < 0) {
2224                 log_error("SO_TIMESTAMP failed: %m");
2225                 return -errno;
2226         }
2227
2228         zero(ev);
2229         ev.events = EPOLLIN;
2230         ev.data.fd = s->native_fd;
2231         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->native_fd, &ev) < 0) {
2232                 log_error("Failed to add native server fd to epoll object: %m");
2233                 return -errno;
2234         }
2235
2236         return 0;
2237 }
2238
2239 static int open_stdout_socket(Server *s) {
2240         union sockaddr_union sa;
2241         int r;
2242         struct epoll_event ev;
2243
2244         assert(s);
2245
2246         if (s->stdout_fd < 0) {
2247
2248                 s->stdout_fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2249                 if (s->stdout_fd < 0) {
2250                         log_error("socket() failed: %m");
2251                         return -errno;
2252                 }
2253
2254                 zero(sa);
2255                 sa.un.sun_family = AF_UNIX;
2256                 strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
2257
2258                 unlink(sa.un.sun_path);
2259
2260                 r = bind(s->stdout_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2261                 if (r < 0) {
2262                         log_error("bind() failed: %m");
2263                         return -errno;
2264                 }
2265
2266                 chmod(sa.un.sun_path, 0666);
2267
2268                 if (listen(s->stdout_fd, SOMAXCONN) < 0) {
2269                         log_error("liste() failed: %m");
2270                         return -errno;
2271                 }
2272         }
2273
2274         zero(ev);
2275         ev.events = EPOLLIN;
2276         ev.data.fd = s->stdout_fd;
2277         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->stdout_fd, &ev) < 0) {
2278                 log_error("Failed to add stdout server fd to epoll object: %m");
2279                 return -errno;
2280         }
2281
2282         return 0;
2283 }
2284
2285 static int open_proc_kmsg(Server *s) {
2286         struct epoll_event ev;
2287
2288         assert(s);
2289
2290         if (!s->import_proc_kmsg)
2291                 return 0;
2292
2293
2294         s->proc_kmsg_fd = open("/proc/kmsg", O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
2295         if (s->proc_kmsg_fd < 0) {
2296                 log_warning("Failed to open /proc/kmsg, ignoring: %m");
2297                 return 0;
2298         }
2299
2300         zero(ev);
2301         ev.events = EPOLLIN;
2302         ev.data.fd = s->proc_kmsg_fd;
2303         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->proc_kmsg_fd, &ev) < 0) {
2304                 log_error("Failed to add /proc/kmsg fd to epoll object: %m");
2305                 return -errno;
2306         }
2307
2308         return 0;
2309 }
2310
2311 static int open_signalfd(Server *s) {
2312         sigset_t mask;
2313         struct epoll_event ev;
2314
2315         assert(s);
2316
2317         assert_se(sigemptyset(&mask) == 0);
2318         sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, -1);
2319         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
2320
2321         s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
2322         if (s->signal_fd < 0) {
2323                 log_error("signalfd(): %m");
2324                 return -errno;
2325         }
2326
2327         zero(ev);
2328         ev.events = EPOLLIN;
2329         ev.data.fd = s->signal_fd;
2330
2331         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
2332                 log_error("epoll_ctl(): %m");
2333                 return -errno;
2334         }
2335
2336         return 0;
2337 }
2338
2339 static int server_parse_proc_cmdline(Server *s) {
2340         char *line, *w, *state;
2341         int r;
2342         size_t l;
2343
2344         if (detect_container(NULL) > 0)
2345                 return 0;
2346
2347         r = read_one_line_file("/proc/cmdline", &line);
2348         if (r < 0) {
2349                 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
2350                 return 0;
2351         }
2352
2353         FOREACH_WORD_QUOTED(w, l, line, state) {
2354                 char *word;
2355
2356                 word = strndup(w, l);
2357                 if (!word) {
2358                         r = -ENOMEM;
2359                         goto finish;
2360                 }
2361
2362                 if (startswith(word, "systemd_journald.forward_to_syslog=")) {
2363                         r = parse_boolean(word + 35);
2364                         if (r < 0)
2365                                 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
2366                         else
2367                                 s->forward_to_syslog = r;
2368                 } else if (startswith(word, "systemd_journald.forward_to_kmsg=")) {
2369                         r = parse_boolean(word + 33);
2370                         if (r < 0)
2371                                 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
2372                         else
2373                                 s->forward_to_kmsg = r;
2374                 } else if (startswith(word, "systemd_journald.forward_to_console=")) {
2375                         r = parse_boolean(word + 36);
2376                         if (r < 0)
2377                                 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
2378                         else
2379                                 s->forward_to_console = r;
2380                 }
2381
2382                 free(word);
2383         }
2384
2385         r = 0;
2386
2387 finish:
2388         free(line);
2389         return r;
2390 }
2391
2392 static int server_parse_config_file(Server *s) {
2393         FILE *f;
2394         const char *fn;
2395         int r;
2396
2397         assert(s);
2398
2399         fn = "/etc/systemd/systemd-journald.conf";
2400         f = fopen(fn, "re");
2401         if (!f) {
2402                 if (errno == ENOENT)
2403                         return 0;
2404
2405                 log_warning("Failed to open configuration file %s: %m", fn);
2406                 return -errno;
2407         }
2408
2409         r = config_parse(fn, f, "Journal\0", config_item_perf_lookup, (void*) journald_gperf_lookup, false, s);
2410         if (r < 0)
2411                 log_warning("Failed to parse configuration file: %s", strerror(-r));
2412
2413         fclose(f);
2414
2415         return r;
2416 }
2417
2418 static int server_init(Server *s) {
2419         int n, r, fd;
2420
2421         assert(s);
2422
2423         zero(*s);
2424         s->syslog_fd = s->native_fd = s->stdout_fd = s->signal_fd = s->epoll_fd = s->proc_kmsg_fd = -1;
2425         s->compress = true;
2426
2427         s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
2428         s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
2429
2430         s->forward_to_syslog = true;
2431         s->import_proc_kmsg = true;
2432
2433         memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
2434         memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
2435
2436         server_parse_config_file(s);
2437         server_parse_proc_cmdline(s);
2438
2439         s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
2440         if (!s->user_journals) {
2441                 log_error("Out of memory.");
2442                 return -ENOMEM;
2443         }
2444
2445         s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
2446         if (s->epoll_fd < 0) {
2447                 log_error("Failed to create epoll object: %m");
2448                 return -errno;
2449         }
2450
2451         n = sd_listen_fds(true);
2452         if (n < 0) {
2453                 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
2454                 return n;
2455         }
2456
2457         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
2458
2459                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
2460
2461                         if (s->native_fd >= 0) {
2462                                 log_error("Too many native sockets passed.");
2463                                 return -EINVAL;
2464                         }
2465
2466                         s->native_fd = fd;
2467
2468                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
2469
2470                         if (s->stdout_fd >= 0) {
2471                                 log_error("Too many stdout sockets passed.");
2472                                 return -EINVAL;
2473                         }
2474
2475                         s->stdout_fd = fd;
2476
2477                 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
2478
2479                         if (s->syslog_fd >= 0) {
2480                                 log_error("Too many /dev/log sockets passed.");
2481                                 return -EINVAL;
2482                         }
2483
2484                         s->syslog_fd = fd;
2485
2486                 } else {
2487                         log_error("Unknown socket passed.");
2488                         return -EINVAL;
2489                 }
2490         }
2491
2492         r = open_syslog_socket(s);
2493         if (r < 0)
2494                 return r;
2495
2496         r = open_native_socket(s);
2497         if (r < 0)
2498                 return r;
2499
2500         r = open_stdout_socket(s);
2501         if (r < 0)
2502                 return r;
2503
2504         r = open_proc_kmsg(s);
2505         if (r < 0)
2506                 return r;
2507
2508         r = system_journal_open(s);
2509         if (r < 0)
2510                 return r;
2511
2512         r = open_signalfd(s);
2513         if (r < 0)
2514                 return r;
2515
2516         s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
2517         if (!s->rate_limit)
2518                 return -ENOMEM;
2519
2520         return 0;
2521 }
2522
2523 static void server_done(Server *s) {
2524         JournalFile *f;
2525         assert(s);
2526
2527         while (s->stdout_streams)
2528                 stdout_stream_free(s->stdout_streams);
2529
2530         if (s->system_journal)
2531                 journal_file_close(s->system_journal);
2532
2533         if (s->runtime_journal)
2534                 journal_file_close(s->runtime_journal);
2535
2536         while ((f = hashmap_steal_first(s->user_journals)))
2537                 journal_file_close(f);
2538
2539         hashmap_free(s->user_journals);
2540
2541         if (s->epoll_fd >= 0)
2542                 close_nointr_nofail(s->epoll_fd);
2543
2544         if (s->signal_fd >= 0)
2545                 close_nointr_nofail(s->signal_fd);
2546
2547         if (s->syslog_fd >= 0)
2548                 close_nointr_nofail(s->syslog_fd);
2549
2550         if (s->native_fd >= 0)
2551                 close_nointr_nofail(s->native_fd);
2552
2553         if (s->stdout_fd >= 0)
2554                 close_nointr_nofail(s->stdout_fd);
2555
2556         if (s->proc_kmsg_fd >= 0)
2557                 close_nointr_nofail(s->proc_kmsg_fd);
2558
2559         if (s->rate_limit)
2560                 journal_rate_limit_free(s->rate_limit);
2561
2562         free(s->buffer);
2563 }
2564
2565 int main(int argc, char *argv[]) {
2566         Server server;
2567         int r;
2568
2569         /* if (getppid() != 1) { */
2570         /*         log_error("This program should be invoked by init only."); */
2571         /*         return EXIT_FAILURE; */
2572         /* } */
2573
2574         if (argc > 1) {
2575                 log_error("This program does not take arguments.");
2576                 return EXIT_FAILURE;
2577         }
2578
2579         log_set_target(LOG_TARGET_CONSOLE);
2580         log_parse_environment();
2581         log_open();
2582
2583         umask(0022);
2584
2585         r = server_init(&server);
2586         if (r < 0)
2587                 goto finish;
2588
2589         server_vacuum(&server);
2590         server_flush_to_var(&server);
2591         server_flush_proc_kmsg(&server);
2592
2593         log_debug("systemd-journald running as pid %lu", (unsigned long) getpid());
2594         driver_message(&server, SD_MESSAGE_JOURNAL_START, "Journal started");
2595
2596         sd_notify(false,
2597                   "READY=1\n"
2598                   "STATUS=Processing requests...");
2599
2600         for (;;) {
2601                 struct epoll_event event;
2602
2603                 r = epoll_wait(server.epoll_fd, &event, 1, -1);
2604                 if (r < 0) {
2605
2606                         if (errno == EINTR)
2607                                 continue;
2608
2609                         log_error("epoll_wait() failed: %m");
2610                         r = -errno;
2611                         goto finish;
2612                 } else if (r == 0)
2613                         break;
2614
2615                 r = process_event(&server, &event);
2616                 if (r < 0)
2617                         goto finish;
2618                 else if (r == 0)
2619                         break;
2620         }
2621
2622         log_debug("systemd-journald stopped as pid %lu", (unsigned long) getpid());
2623         driver_message(&server, SD_MESSAGE_JOURNAL_STOP, "Journal stopped");
2624
2625 finish:
2626         sd_notify(false,
2627                   "STATUS=Shutting down...");
2628
2629         server_done(&server);
2630
2631         return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
2632 }