chiark / gitweb /
0194a1b8c9e60b32d3ecc03a9543f3442b4e2472
[elogind.git] / src / journal / journald.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU General Public License as published by
10   the Free Software Foundation; either version 2 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   General Public License for more details.
17
18   You should have received a copy of the GNU General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/epoll.h>
23 #include <sys/socket.h>
24 #include <errno.h>
25 #include <sys/signalfd.h>
26 #include <unistd.h>
27 #include <fcntl.h>
28 #include <stddef.h>
29 #include <sys/ioctl.h>
30 #include <linux/sockios.h>
31 #include <sys/statvfs.h>
32
33 #include <systemd/sd-journal.h>
34 #include <systemd/sd-login.h>
35 #include <systemd/sd-messages.h>
36 #include <systemd/sd-daemon.h>
37
38 #include "hashmap.h"
39 #include "journal-file.h"
40 #include "socket-util.h"
41 #include "cgroup-util.h"
42 #include "list.h"
43 #include "journal-rate-limit.h"
44 #include "journal-internal.h"
45 #include "conf-parser.h"
46 #include "journald.h"
47 #include "virt.h"
48
49 #ifdef HAVE_ACL
50 #include <sys/acl.h>
51 #include <acl/libacl.h>
52 #include "acl-util.h"
53 #endif
54
55 #define USER_JOURNALS_MAX 1024
56 #define STDOUT_STREAMS_MAX 4096
57
58 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
59 #define DEFAULT_RATE_LIMIT_BURST 200
60
61 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
62
63 #define RECHECK_VAR_AVAILABLE_USEC (30*USEC_PER_SEC)
64
65 #define SYSLOG_TIMEOUT_USEC (250*USEC_PER_MSEC)
66
67 #define N_IOVEC_META_FIELDS 16
68
69 typedef enum StdoutStreamState {
70         STDOUT_STREAM_IDENTIFIER,
71         STDOUT_STREAM_PRIORITY,
72         STDOUT_STREAM_LEVEL_PREFIX,
73         STDOUT_STREAM_FORWARD_TO_SYSLOG,
74         STDOUT_STREAM_FORWARD_TO_KMSG,
75         STDOUT_STREAM_FORWARD_TO_CONSOLE,
76         STDOUT_STREAM_RUNNING
77 } StdoutStreamState;
78
79 struct StdoutStream {
80         Server *server;
81         StdoutStreamState state;
82
83         int fd;
84
85         struct ucred ucred;
86
87         char *identifier;
88         int priority;
89         bool level_prefix:1;
90         bool forward_to_syslog:1;
91         bool forward_to_kmsg:1;
92         bool forward_to_console:1;
93
94         char buffer[LINE_MAX+1];
95         size_t length;
96
97         LIST_FIELDS(StdoutStream, stdout_stream);
98 };
99
100 static int server_flush_to_var(Server *s);
101
102 static uint64_t available_space(Server *s) {
103         char ids[33], *p;
104         const char *f;
105         sd_id128_t machine;
106         struct statvfs ss;
107         uint64_t sum = 0, avail = 0, ss_avail = 0;
108         int r;
109         DIR *d;
110         usec_t ts;
111         JournalMetrics *m;
112
113         ts = now(CLOCK_MONOTONIC);
114
115         if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
116                 return s->cached_available_space;
117
118         r = sd_id128_get_machine(&machine);
119         if (r < 0)
120                 return 0;
121
122         if (s->system_journal) {
123                 f = "/var/log/journal/";
124                 m = &s->system_metrics;
125         } else {
126                 f = "/run/log/journal/";
127                 m = &s->runtime_metrics;
128         }
129
130         assert(m);
131
132         p = strappend(f, sd_id128_to_string(machine, ids));
133         if (!p)
134                 return 0;
135
136         d = opendir(p);
137         free(p);
138
139         if (!d)
140                 return 0;
141
142         if (fstatvfs(dirfd(d), &ss) < 0)
143                 goto finish;
144
145         for (;;) {
146                 struct stat st;
147                 struct dirent buf, *de;
148                 int k;
149
150                 k = readdir_r(d, &buf, &de);
151                 if (k != 0) {
152                         r = -k;
153                         goto finish;
154                 }
155
156                 if (!de)
157                         break;
158
159                 if (!dirent_is_file_with_suffix(de, ".journal"))
160                         continue;
161
162                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
163                         continue;
164
165                 sum += (uint64_t) st.st_blocks * (uint64_t) st.st_blksize;
166         }
167
168         avail = sum >= m->max_use ? 0 : m->max_use - sum;
169
170         ss_avail = ss.f_bsize * ss.f_bavail;
171
172         ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
173
174         if (ss_avail < avail)
175                 avail = ss_avail;
176
177         s->cached_available_space = avail;
178         s->cached_available_space_timestamp = ts;
179
180 finish:
181         closedir(d);
182
183         return avail;
184 }
185
186 static void fix_perms(JournalFile *f, uid_t uid) {
187         int r;
188 #ifdef HAVE_ACL
189         acl_t acl;
190         acl_entry_t entry;
191         acl_permset_t permset;
192 #endif
193
194         assert(f);
195
196         r = fchmod_and_fchown(f->fd, 0640, 0, 0);
197         if (r < 0)
198                 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
199
200 #ifdef HAVE_ACL
201         if (uid <= 0)
202                 return;
203
204         acl = acl_get_fd(f->fd);
205         if (!acl) {
206                 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
207                 return;
208         }
209
210         r = acl_find_uid(acl, uid, &entry);
211         if (r <= 0) {
212
213                 if (acl_create_entry(&acl, &entry) < 0 ||
214                     acl_set_tag_type(entry, ACL_USER) < 0 ||
215                     acl_set_qualifier(entry, &uid) < 0) {
216                         log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
217                         goto finish;
218                 }
219         }
220
221         if (acl_get_permset(entry, &permset) < 0 ||
222             acl_add_perm(permset, ACL_READ) < 0 ||
223             acl_calc_mask(&acl) < 0) {
224                 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
225                 goto finish;
226         }
227
228         if (acl_set_fd(f->fd, acl) < 0)
229                 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
230
231 finish:
232         acl_free(acl);
233 #endif
234 }
235
236 static JournalFile* find_journal(Server *s, uid_t uid) {
237         char *p;
238         int r;
239         JournalFile *f;
240         char ids[33];
241         sd_id128_t machine;
242
243         assert(s);
244
245         /* We split up user logs only on /var, not on /run. If the
246          * runtime file is open, we write to it exclusively, in order
247          * to guarantee proper order as soon as we flush /run to
248          * /var and close the runtime file. */
249
250         if (s->runtime_journal)
251                 return s->runtime_journal;
252
253         if (uid <= 0)
254                 return s->system_journal;
255
256         r = sd_id128_get_machine(&machine);
257         if (r < 0)
258                 return s->system_journal;
259
260         f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
261         if (f)
262                 return f;
263
264         if (asprintf(&p, "/var/log/journal/%s/user-%lu.journal", sd_id128_to_string(machine, ids), (unsigned long) uid) < 0)
265                 return s->system_journal;
266
267         while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
268                 /* Too many open? Then let's close one */
269                 f = hashmap_steal_first(s->user_journals);
270                 assert(f);
271                 journal_file_close(f);
272         }
273
274         r = journal_file_open(p, O_RDWR|O_CREAT, 0640, s->system_journal, &f);
275         free(p);
276
277         if (r < 0)
278                 return s->system_journal;
279
280         fix_perms(f, uid);
281         f->metrics = s->system_metrics;
282         f->compress = s->compress;
283
284         r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
285         if (r < 0) {
286                 journal_file_close(f);
287                 return s->system_journal;
288         }
289
290         return f;
291 }
292
293 static void server_rotate(Server *s) {
294         JournalFile *f;
295         void *k;
296         Iterator i;
297         int r;
298
299         log_info("Rotating...");
300
301         if (s->runtime_journal) {
302                 r = journal_file_rotate(&s->runtime_journal);
303                 if (r < 0)
304                         log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
305         }
306
307         if (s->system_journal) {
308                 r = journal_file_rotate(&s->system_journal);
309                 if (r < 0)
310                         log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
311         }
312
313         HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
314                 r = journal_file_rotate(&f);
315                 if (r < 0)
316                         log_error("Failed to rotate %s: %s", f->path, strerror(-r));
317                 else
318                         hashmap_replace(s->user_journals, k, f);
319         }
320 }
321
322 static void server_vacuum(Server *s) {
323         char *p;
324         char ids[33];
325         sd_id128_t machine;
326         int r;
327
328         log_info("Vacuuming...");
329
330         r = sd_id128_get_machine(&machine);
331         if (r < 0) {
332                 log_error("Failed to get machine ID: %s", strerror(-r));
333                 return;
334         }
335
336         sd_id128_to_string(machine, ids);
337
338         if (s->system_journal) {
339                 if (asprintf(&p, "/var/log/journal/%s", ids) < 0) {
340                         log_error("Out of memory.");
341                         return;
342                 }
343
344                 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free);
345                 if (r < 0 && r != -ENOENT)
346                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
347                 free(p);
348         }
349
350
351         if (s->runtime_journal) {
352                 if (asprintf(&p, "/run/log/journal/%s", ids) < 0) {
353                         log_error("Out of memory.");
354                         return;
355                 }
356
357                 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free);
358                 if (r < 0 && r != -ENOENT)
359                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
360                 free(p);
361         }
362
363         s->cached_available_space_timestamp = 0;
364 }
365
366 static char *shortened_cgroup_path(pid_t pid) {
367         int r;
368         char *process_path, *init_path, *path;
369
370         assert(pid > 0);
371
372         r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, pid, &process_path);
373         if (r < 0)
374                 return NULL;
375
376         r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &init_path);
377         if (r < 0) {
378                 free(process_path);
379                 return NULL;
380         }
381
382         if (endswith(init_path, "/system"))
383                 init_path[strlen(init_path) - 7] = 0;
384         else if (streq(init_path, "/"))
385                 init_path[0] = 0;
386
387         if (startswith(process_path, init_path)) {
388                 char *p;
389
390                 p = strdup(process_path + strlen(init_path));
391                 if (!p) {
392                         free(process_path);
393                         free(init_path);
394                         return NULL;
395                 }
396                 path = p;
397         } else {
398                 path = process_path;
399                 process_path = NULL;
400         }
401
402         free(process_path);
403         free(init_path);
404
405         return path;
406 }
407
408 static void dispatch_message_real(Server *s,
409                              struct iovec *iovec, unsigned n, unsigned m,
410                              struct ucred *ucred,
411                              struct timeval *tv) {
412
413         char *pid = NULL, *uid = NULL, *gid = NULL,
414                 *source_time = NULL, *boot_id = NULL, *machine_id = NULL,
415                 *comm = NULL, *cmdline = NULL, *hostname = NULL,
416                 *audit_session = NULL, *audit_loginuid = NULL,
417                 *exe = NULL, *cgroup = NULL, *session = NULL,
418                 *owner_uid = NULL, *unit = NULL;
419
420         char idbuf[33];
421         sd_id128_t id;
422         int r;
423         char *t;
424         uid_t loginuid = 0, realuid = 0;
425         JournalFile *f;
426         bool vacuumed = false;
427
428         assert(s);
429         assert(iovec);
430         assert(n > 0);
431         assert(n + N_IOVEC_META_FIELDS <= m);
432
433         if (ucred) {
434                 uint32_t audit;
435                 uid_t owner;
436
437                 realuid = ucred->uid;
438
439                 if (asprintf(&pid, "_PID=%lu", (unsigned long) ucred->pid) >= 0)
440                         IOVEC_SET_STRING(iovec[n++], pid);
441
442                 if (asprintf(&uid, "_UID=%lu", (unsigned long) ucred->uid) >= 0)
443                         IOVEC_SET_STRING(iovec[n++], uid);
444
445                 if (asprintf(&gid, "_GID=%lu", (unsigned long) ucred->gid) >= 0)
446                         IOVEC_SET_STRING(iovec[n++], gid);
447
448                 r = get_process_comm(ucred->pid, &t);
449                 if (r >= 0) {
450                         comm = strappend("_COMM=", t);
451                         free(t);
452
453                         if (comm)
454                                 IOVEC_SET_STRING(iovec[n++], comm);
455                 }
456
457                 r = get_process_exe(ucred->pid, &t);
458                 if (r >= 0) {
459                         exe = strappend("_EXE=", t);
460                         free(t);
461
462                         if (comm)
463                                 IOVEC_SET_STRING(iovec[n++], exe);
464                 }
465
466                 r = get_process_cmdline(ucred->pid, LINE_MAX, false, &t);
467                 if (r >= 0) {
468                         cmdline = strappend("_CMDLINE=", t);
469                         free(t);
470
471                         if (cmdline)
472                                 IOVEC_SET_STRING(iovec[n++], cmdline);
473                 }
474
475                 r = audit_session_from_pid(ucred->pid, &audit);
476                 if (r >= 0)
477                         if (asprintf(&audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit) >= 0)
478                                 IOVEC_SET_STRING(iovec[n++], audit_session);
479
480                 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
481                 if (r >= 0)
482                         if (asprintf(&audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0)
483                                 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
484
485                 t = shortened_cgroup_path(ucred->pid);
486                 if (t) {
487                         cgroup = strappend("_SYSTEMD_CGROUP=", t);
488                         free(t);
489
490                         if (cgroup)
491                                 IOVEC_SET_STRING(iovec[n++], cgroup);
492                 }
493
494                 if (sd_pid_get_session(ucred->pid, &t) >= 0) {
495                         session = strappend("_SYSTEMD_SESSION=", t);
496                         free(t);
497
498                         if (session)
499                                 IOVEC_SET_STRING(iovec[n++], session);
500                 }
501
502                 if (sd_pid_get_unit(ucred->pid, &t) >= 0) {
503                         unit = strappend("_SYSTEMD_UNIT=", t);
504                         free(t);
505
506                         if (unit)
507                                 IOVEC_SET_STRING(iovec[n++], unit);
508                 }
509
510                 if (sd_pid_get_owner_uid(ucred->uid, &owner) >= 0)
511                         if (asprintf(&owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner) >= 0)
512                                 IOVEC_SET_STRING(iovec[n++], owner_uid);
513         }
514
515         if (tv) {
516                 if (asprintf(&source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu",
517                              (unsigned long long) timeval_load(tv)) >= 0)
518                         IOVEC_SET_STRING(iovec[n++], source_time);
519         }
520
521         /* Note that strictly speaking storing the boot id here is
522          * redundant since the entry includes this in-line
523          * anyway. However, we need this indexed, too. */
524         r = sd_id128_get_boot(&id);
525         if (r >= 0)
526                 if (asprintf(&boot_id, "_BOOT_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
527                         IOVEC_SET_STRING(iovec[n++], boot_id);
528
529         r = sd_id128_get_machine(&id);
530         if (r >= 0)
531                 if (asprintf(&machine_id, "_MACHINE_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
532                         IOVEC_SET_STRING(iovec[n++], machine_id);
533
534         t = gethostname_malloc();
535         if (t) {
536                 hostname = strappend("_HOSTNAME=", t);
537                 free(t);
538                 if (hostname)
539                         IOVEC_SET_STRING(iovec[n++], hostname);
540         }
541
542         assert(n <= m);
543
544         server_flush_to_var(s);
545
546 retry:
547         f = find_journal(s, realuid == 0 ? 0 : loginuid);
548         if (!f)
549                 log_warning("Dropping message, as we can't find a place to store the data.");
550         else {
551                 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
552
553                 if (r == -E2BIG && !vacuumed) {
554                         log_info("Allocation limit reached.");
555
556                         server_rotate(s);
557                         server_vacuum(s);
558                         vacuumed = true;
559
560                         log_info("Retrying write.");
561                         goto retry;
562                 }
563
564                 if (r < 0)
565                         log_error("Failed to write entry, ignoring: %s", strerror(-r));
566         }
567
568         free(pid);
569         free(uid);
570         free(gid);
571         free(comm);
572         free(exe);
573         free(cmdline);
574         free(source_time);
575         free(boot_id);
576         free(machine_id);
577         free(hostname);
578         free(audit_session);
579         free(audit_loginuid);
580         free(cgroup);
581         free(session);
582         free(owner_uid);
583         free(unit);
584 }
585
586 static void driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
587         char mid[11 + 32 + 1];
588         char buffer[16 + LINE_MAX + 1];
589         struct iovec iovec[N_IOVEC_META_FIELDS + 4];
590         int n = 0;
591         va_list ap;
592         struct ucred ucred;
593
594         assert(s);
595         assert(format);
596
597         IOVEC_SET_STRING(iovec[n++], "PRIORITY=5");
598         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
599
600         memcpy(buffer, "MESSAGE=", 8);
601         va_start(ap, format);
602         vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
603         va_end(ap);
604         char_array_0(buffer);
605         IOVEC_SET_STRING(iovec[n++], buffer);
606
607         snprintf(mid, sizeof(mid), "MESSAGE_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(message_id));
608         char_array_0(mid);
609         IOVEC_SET_STRING(iovec[n++], mid);
610
611         zero(ucred);
612         ucred.pid = getpid();
613         ucred.uid = getuid();
614         ucred.gid = getgid();
615
616         dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL);
617 }
618
619 static void dispatch_message(Server *s,
620                              struct iovec *iovec, unsigned n, unsigned m,
621                              struct ucred *ucred,
622                              struct timeval *tv,
623                              int priority) {
624         int rl;
625         char *path = NULL, *c;
626
627         assert(s);
628         assert(iovec || n == 0);
629
630         if (n == 0)
631                 return;
632
633         if (!ucred)
634                 goto finish;
635
636         path = shortened_cgroup_path(ucred->pid);
637         if (!path)
638                 goto finish;
639
640         /* example: /user/lennart/3/foobar
641          *          /system/dbus.service/foobar
642          *
643          * So let's cut of everything past the third /, since that is
644          * wher user directories start */
645
646         c = strchr(path, '/');
647         if (c) {
648                 c = strchr(c+1, '/');
649                 if (c) {
650                         c = strchr(c+1, '/');
651                         if (c)
652                                 *c = 0;
653                 }
654         }
655
656         rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available_space(s));
657
658         if (rl == 0) {
659                 free(path);
660                 return;
661         }
662
663         /* Write a suppression message if we suppressed something */
664         if (rl > 1)
665                 driver_message(s, SD_MESSAGE_JOURNAL_DROPPED, "Suppressed %u messages from %s", rl - 1, path);
666
667         free(path);
668
669 finish:
670         dispatch_message_real(s, iovec, n, m, ucred, tv);
671 }
672
673 static void forward_syslog_iovec(Server *s, const struct iovec *iovec, unsigned n_iovec, struct ucred *ucred, struct timeval *tv) {
674         struct msghdr msghdr;
675         struct cmsghdr *cmsg;
676         union {
677                 struct cmsghdr cmsghdr;
678                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred))];
679         } control;
680         union sockaddr_union sa;
681
682         assert(s);
683         assert(iovec);
684         assert(n_iovec > 0);
685
686         zero(msghdr);
687         msghdr.msg_iov = (struct iovec*) iovec;
688         msghdr.msg_iovlen = n_iovec;
689
690         zero(sa);
691         sa.un.sun_family = AF_UNIX;
692         strncpy(sa.un.sun_path, "/run/systemd/journal/syslog", sizeof(sa.un.sun_path));
693         msghdr.msg_name = &sa;
694         msghdr.msg_namelen = offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path);
695
696         if (ucred) {
697                 zero(control);
698                 msghdr.msg_control = &control;
699                 msghdr.msg_controllen = sizeof(control);
700
701                 cmsg = CMSG_FIRSTHDR(&msghdr);
702                 cmsg->cmsg_level = SOL_SOCKET;
703                 cmsg->cmsg_type = SCM_CREDENTIALS;
704                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred));
705                 memcpy(CMSG_DATA(cmsg), ucred, sizeof(struct ucred));
706                 msghdr.msg_controllen = cmsg->cmsg_len;
707         }
708
709         /* Forward the syslog message we received via /dev/log to
710          * /run/systemd/syslog. Unfortunately we currently can't set
711          * the SO_TIMESTAMP auxiliary data, and hence we don't. */
712
713         if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
714                 return;
715
716         if (ucred && errno == ESRCH) {
717                 struct ucred u;
718
719                 /* Hmm, presumably the sender process vanished
720                  * by now, so let's fix it as good as we
721                  * can, and retry */
722
723                 u = *ucred;
724                 u.pid = getpid();
725                 memcpy(CMSG_DATA(cmsg), &u, sizeof(struct ucred));
726
727                 if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
728                         return;
729         }
730
731         log_debug("Failed to forward syslog message: %m");
732 }
733
734 static void forward_syslog_raw(Server *s, const char *buffer, struct ucred *ucred, struct timeval *tv) {
735         struct iovec iovec;
736
737         assert(s);
738         assert(buffer);
739
740         IOVEC_SET_STRING(iovec, buffer);
741         forward_syslog_iovec(s, &iovec, 1, ucred, tv);
742 }
743
744 static void forward_syslog(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred, struct timeval *tv) {
745         struct iovec iovec[5];
746         char header_priority[6], header_time[64], header_pid[16];
747         int n = 0;
748         time_t t;
749         struct tm *tm;
750         char *ident_buf = NULL;
751
752         assert(s);
753         assert(priority >= 0);
754         assert(priority <= 999);
755         assert(message);
756
757         /* First: priority field */
758         snprintf(header_priority, sizeof(header_priority), "<%i>", priority);
759         char_array_0(header_priority);
760         IOVEC_SET_STRING(iovec[n++], header_priority);
761
762         /* Second: timestamp */
763         t = tv ? tv->tv_sec : ((time_t) (now(CLOCK_REALTIME) / USEC_PER_SEC));
764         tm = localtime(&t);
765         if (!tm)
766                 return;
767         if (strftime(header_time, sizeof(header_time), "%h %e %T ", tm) <= 0)
768                 return;
769         IOVEC_SET_STRING(iovec[n++], header_time);
770
771         /* Third: identifier and PID */
772         if (ucred) {
773                 if (!identifier) {
774                         get_process_comm(ucred->pid, &ident_buf);
775                         identifier = ident_buf;
776                 }
777
778                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
779                 char_array_0(header_pid);
780
781                 if (identifier)
782                         IOVEC_SET_STRING(iovec[n++], identifier);
783
784                 IOVEC_SET_STRING(iovec[n++], header_pid);
785         } else if (identifier) {
786                 IOVEC_SET_STRING(iovec[n++], identifier);
787                 IOVEC_SET_STRING(iovec[n++], ": ");
788         }
789
790         /* Fourth: message */
791         IOVEC_SET_STRING(iovec[n++], message);
792
793         forward_syslog_iovec(s, iovec, n, ucred, tv);
794
795         free(ident_buf);
796 }
797
798 static int fixup_priority(int priority) {
799
800         if ((priority & LOG_FACMASK) == 0)
801                 return (priority & LOG_PRIMASK) | LOG_USER;
802
803         return priority;
804 }
805
806 static void forward_kmsg(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred) {
807         struct iovec iovec[5];
808         char header_priority[6], header_pid[16];
809         int n = 0;
810         char *ident_buf = NULL;
811         int fd;
812
813         assert(s);
814         assert(priority >= 0);
815         assert(priority <= 999);
816         assert(message);
817
818         /* Never allow messages with kernel facility to be written to
819          * kmsg, regardless where the data comes from. */
820         priority = fixup_priority(priority);
821
822         /* First: priority field */
823         snprintf(header_priority, sizeof(header_priority), "<%i>", priority);
824         char_array_0(header_priority);
825         IOVEC_SET_STRING(iovec[n++], header_priority);
826
827         /* Second: identifier and PID */
828         if (ucred) {
829                 if (!identifier) {
830                         get_process_comm(ucred->pid, &ident_buf);
831                         identifier = ident_buf;
832                 }
833
834                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
835                 char_array_0(header_pid);
836
837                 if (identifier)
838                         IOVEC_SET_STRING(iovec[n++], identifier);
839
840                 IOVEC_SET_STRING(iovec[n++], header_pid);
841         } else if (identifier) {
842                 IOVEC_SET_STRING(iovec[n++], identifier);
843                 IOVEC_SET_STRING(iovec[n++], ": ");
844         }
845
846         /* Fourth: message */
847         IOVEC_SET_STRING(iovec[n++], message);
848         IOVEC_SET_STRING(iovec[n++], "\n");
849
850         fd = open("/dev/kmsg", O_WRONLY|O_NOCTTY|O_CLOEXEC);
851         if (fd < 0) {
852                 log_debug("Failed to open /dev/kmsg for logging: %s", strerror(errno));
853                 goto finish;
854         }
855
856         if (writev(fd, iovec, n) < 0)
857                 log_debug("Failed to write to /dev/kmsg for logging: %s", strerror(errno));
858
859         close_nointr_nofail(fd);
860
861 finish:
862         free(ident_buf);
863 }
864
865 static void forward_console(Server *s, const char *identifier, const char *message, struct ucred *ucred) {
866         struct iovec iovec[4];
867         char header_pid[16];
868         int n = 0, fd;
869         char *ident_buf = NULL;
870
871         assert(s);
872         assert(message);
873
874         /* First: identifier and PID */
875         if (ucred) {
876                 if (!identifier) {
877                         get_process_comm(ucred->pid, &ident_buf);
878                         identifier = ident_buf;
879                 }
880
881                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
882                 char_array_0(header_pid);
883
884                 if (identifier)
885                         IOVEC_SET_STRING(iovec[n++], identifier);
886
887                 IOVEC_SET_STRING(iovec[n++], header_pid);
888         } else if (identifier) {
889                 IOVEC_SET_STRING(iovec[n++], identifier);
890                 IOVEC_SET_STRING(iovec[n++], ": ");
891         }
892
893         /* Third: message */
894         IOVEC_SET_STRING(iovec[n++], message);
895         IOVEC_SET_STRING(iovec[n++], "\n");
896
897         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
898         if (fd < 0) {
899                 log_debug("Failed to open /dev/console for logging: %s", strerror(errno));
900                 goto finish;
901         }
902
903         if (writev(fd, iovec, n) < 0)
904                 log_debug("Failed to write to /dev/console for logging: %s", strerror(errno));
905
906         close_nointr_nofail(fd);
907
908 finish:
909         free(ident_buf);
910 }
911
912 static void read_identifier(const char **buf, char **identifier, char **pid) {
913         const char *p;
914         char *t;
915         size_t l, e;
916
917         assert(buf);
918         assert(identifier);
919         assert(pid);
920
921         p = *buf;
922
923         p += strspn(p, WHITESPACE);
924         l = strcspn(p, WHITESPACE);
925
926         if (l <= 0 ||
927             p[l-1] != ':')
928                 return;
929
930         e = l;
931         l--;
932
933         if (p[l-1] == ']') {
934                 size_t k = l-1;
935
936                 for (;;) {
937
938                         if (p[k] == '[') {
939                                 t = strndup(p+k+1, l-k-2);
940                                 if (t)
941                                         *pid = t;
942
943                                 l = k;
944                                 break;
945                         }
946
947                         if (k == 0)
948                                 break;
949
950                         k--;
951                 }
952         }
953
954         t = strndup(p, l);
955         if (t)
956                 *identifier = t;
957
958         *buf = p + e;
959         *buf += strspn(*buf, WHITESPACE);
960 }
961
962 static void process_syslog_message(Server *s, const char *buf, struct ucred *ucred, struct timeval *tv) {
963         char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *syslog_pid = NULL;
964         struct iovec iovec[N_IOVEC_META_FIELDS + 6];
965         unsigned n = 0;
966         int priority = LOG_USER | LOG_INFO;
967         char *identifier = NULL, *pid = NULL;
968
969         assert(s);
970         assert(buf);
971
972         if (s->forward_to_syslog)
973                 forward_syslog_raw(s, buf, ucred, tv);
974
975         parse_syslog_priority((char**) &buf, &priority);
976         skip_syslog_date((char**) &buf);
977         read_identifier(&buf, &identifier, &pid);
978
979         if (s->forward_to_kmsg)
980                 forward_kmsg(s, priority, identifier, buf, ucred);
981
982         if (s->forward_to_console)
983                 forward_console(s, identifier, buf, ucred);
984
985         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=syslog");
986
987         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
988                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
989
990         if (priority & LOG_FACMASK)
991                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
992                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
993
994         if (identifier) {
995                 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
996                 if (syslog_identifier)
997                         IOVEC_SET_STRING(iovec[n++], syslog_identifier);
998         }
999
1000         if (pid) {
1001                 syslog_pid = strappend("SYSLOG_PID=", pid);
1002                 if (syslog_pid)
1003                         IOVEC_SET_STRING(iovec[n++], syslog_pid);
1004         }
1005
1006         message = strappend("MESSAGE=", buf);
1007         if (message)
1008                 IOVEC_SET_STRING(iovec[n++], message);
1009
1010         dispatch_message(s, iovec, n, ELEMENTSOF(iovec), ucred, tv, priority);
1011
1012         free(message);
1013         free(identifier);
1014         free(pid);
1015         free(syslog_priority);
1016         free(syslog_facility);
1017         free(syslog_identifier);
1018 }
1019
1020 static bool valid_user_field(const char *p, size_t l) {
1021         const char *a;
1022
1023         /* We kinda enforce POSIX syntax recommendations for
1024            environment variables here, but make a couple of additional
1025            requirements.
1026
1027            http://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html */
1028
1029         /* No empty field names */
1030         if (l <= 0)
1031                 return false;
1032
1033         /* Don't allow names longer than 64 chars */
1034         if (l > 64)
1035                 return false;
1036
1037         /* Variables starting with an underscore are protected */
1038         if (p[0] == '_')
1039                 return false;
1040
1041         /* Don't allow digits as first character */
1042         if (p[0] >= '0' && p[0] <= '9')
1043                 return false;
1044
1045         /* Only allow A-Z0-9 and '_' */
1046         for (a = p; a < p + l; a++)
1047                 if (!((*a >= 'A' && *a <= 'Z') ||
1048                       (*a >= '0' && *a <= '9') ||
1049                       *a == '_'))
1050                         return false;
1051
1052         return true;
1053 }
1054
1055 static void process_native_message(Server *s, const void *buffer, size_t buffer_size, struct ucred *ucred, struct timeval *tv) {
1056         struct iovec *iovec = NULL;
1057         unsigned n = 0, m = 0, j, tn = (unsigned) -1;
1058         const char *p;
1059         size_t remaining;
1060         int priority = LOG_INFO;
1061         char *identifier = NULL, *message = NULL;
1062
1063         assert(s);
1064         assert(buffer || n == 0);
1065
1066         p = buffer;
1067         remaining = buffer_size;
1068
1069         while (remaining > 0) {
1070                 const char *e, *q;
1071
1072                 e = memchr(p, '\n', remaining);
1073
1074                 if (!e) {
1075                         /* Trailing noise, let's ignore it, and flush what we collected */
1076                         log_debug("Received message with trailing noise, ignoring.");
1077                         break;
1078                 }
1079
1080                 if (e == p) {
1081                         /* Entry separator */
1082                         dispatch_message(s, iovec, n, m, ucred, tv, priority);
1083                         n = 0;
1084                         priority = LOG_INFO;
1085
1086                         p++;
1087                         remaining--;
1088                         continue;
1089                 }
1090
1091                 if (*p == '.' || *p == '#') {
1092                         /* Ignore control commands for now, and
1093                          * comments too. */
1094                         remaining -= (e - p) + 1;
1095                         p = e + 1;
1096                         continue;
1097                 }
1098
1099                 /* A property follows */
1100
1101                 if (n+N_IOVEC_META_FIELDS >= m) {
1102                         struct iovec *c;
1103                         unsigned u;
1104
1105                         u = MAX((n+N_IOVEC_META_FIELDS+1) * 2U, 4U);
1106                         c = realloc(iovec, u * sizeof(struct iovec));
1107                         if (!c) {
1108                                 log_error("Out of memory");
1109                                 break;
1110                         }
1111
1112                         iovec = c;
1113                         m = u;
1114                 }
1115
1116                 q = memchr(p, '=', e - p);
1117                 if (q) {
1118                         if (valid_user_field(p, q - p)) {
1119                                 size_t l;
1120
1121                                 l = e - p;
1122
1123                                 /* If the field name starts with an
1124                                  * underscore, skip the variable,
1125                                  * since that indidates a trusted
1126                                  * field */
1127                                 iovec[n].iov_base = (char*) p;
1128                                 iovec[n].iov_len = l;
1129                                 n++;
1130
1131                                 /* We need to determine the priority
1132                                  * of this entry for the rate limiting
1133                                  * logic */
1134                                 if (l == 10 &&
1135                                     memcmp(p, "PRIORITY=", 9) == 0 &&
1136                                     p[9] >= '0' && p[9] <= '9')
1137                                         priority = (priority & LOG_FACMASK) | (p[9] - '0');
1138
1139                                 else if (l == 17 &&
1140                                          memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
1141                                          p[16] >= '0' && p[16] <= '9')
1142                                         priority = (priority & LOG_PRIMASK) | ((p[16] - '0') << 3);
1143
1144                                 else if (l == 18 &&
1145                                          memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
1146                                          p[16] >= '0' && p[16] <= '9' &&
1147                                          p[17] >= '0' && p[17] <= '9')
1148                                         priority = (priority & LOG_PRIMASK) | (((p[16] - '0')*10 + (p[17] - '0')) << 3);
1149
1150                                 else if (l >= 12 &&
1151                                          memcmp(p, "SYSLOG_IDENTIFIER=", 11) == 0) {
1152                                         char *t;
1153
1154                                         t = strndup(p + 11, l - 11);
1155                                         if (t) {
1156                                                 free(identifier);
1157                                                 identifier = t;
1158                                         }
1159                                 } else if (l >= 8 &&
1160                                            memcmp(p, "MESSAGE=", 8) == 0) {
1161                                         char *t;
1162
1163                                         t = strndup(p + 8, l - 8);
1164                                         if (t) {
1165                                                 free(message);
1166                                                 message = t;
1167                                         }
1168                                 }
1169                         }
1170
1171                         remaining -= (e - p) + 1;
1172                         p = e + 1;
1173                         continue;
1174                 } else {
1175                         uint64_t l;
1176                         char *k;
1177
1178                         if (remaining < e - p + 1 + sizeof(uint64_t) + 1) {
1179                                 log_debug("Failed to parse message, ignoring.");
1180                                 break;
1181                         }
1182
1183                         memcpy(&l, e + 1, sizeof(uint64_t));
1184                         l = le64toh(l);
1185
1186                         if (remaining < e - p + 1 + sizeof(uint64_t) + l + 1 ||
1187                             e[1+sizeof(uint64_t)+l] != '\n') {
1188                                 log_debug("Failed to parse message, ignoring.");
1189                                 break;
1190                         }
1191
1192                         k = malloc((e - p) + 1 + l);
1193                         if (!k) {
1194                                 log_error("Out of memory");
1195                                 break;
1196                         }
1197
1198                         memcpy(k, p, e - p);
1199                         k[e - p] = '=';
1200                         memcpy(k + (e - p) + 1, e + 1 + sizeof(uint64_t), l);
1201
1202                         if (valid_user_field(p, e - p)) {
1203                                 iovec[n].iov_base = k;
1204                                 iovec[n].iov_len = (e - p) + 1 + l;
1205                                 n++;
1206                         } else
1207                                 free(k);
1208
1209                         remaining -= (e - p) + 1 + sizeof(uint64_t) + l + 1;
1210                         p = e + 1 + sizeof(uint64_t) + l + 1;
1211                 }
1212         }
1213
1214         if (n <= 0)
1215                 goto finish;
1216
1217         tn = n++;
1218         IOVEC_SET_STRING(iovec[tn], "_TRANSPORT=journal");
1219
1220         if (message) {
1221                 if (s->forward_to_syslog)
1222                         forward_syslog(s, priority, identifier, message, ucred, tv);
1223
1224                 if (s->forward_to_kmsg)
1225                         forward_kmsg(s, priority, identifier, message, ucred);
1226
1227                 if (s->forward_to_console)
1228                         forward_console(s, identifier, message, ucred);
1229         }
1230
1231         dispatch_message(s, iovec, n, m, ucred, tv, priority);
1232
1233 finish:
1234         for (j = 0; j < n; j++)  {
1235                 if (j == tn)
1236                         continue;
1237
1238                 if (iovec[j].iov_base < buffer ||
1239                     (const uint8_t*) iovec[j].iov_base >= (const uint8_t*) buffer + buffer_size)
1240                         free(iovec[j].iov_base);
1241         }
1242
1243         free(identifier);
1244         free(message);
1245 }
1246
1247 static int stdout_stream_log(StdoutStream *s, const char *p) {
1248         struct iovec iovec[N_IOVEC_META_FIELDS + 5];
1249         char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *syslog_identifier = NULL;
1250         unsigned n = 0;
1251         int priority;
1252
1253         assert(s);
1254         assert(p);
1255
1256         priority = s->priority;
1257
1258         if (s->level_prefix)
1259                 parse_syslog_priority((char**) &p, &priority);
1260
1261         if (s->forward_to_syslog || s->server->forward_to_syslog)
1262                 forward_syslog(s->server, fixup_priority(priority), s->identifier, p, &s->ucred, NULL);
1263
1264         if (s->forward_to_kmsg || s->server->forward_to_kmsg)
1265                 forward_kmsg(s->server, priority, s->identifier, p, &s->ucred);
1266
1267         if (s->forward_to_console || s->server->forward_to_console)
1268                 forward_console(s->server, s->identifier, p, &s->ucred);
1269
1270         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=stdout");
1271
1272         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1273                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1274
1275         if (priority & LOG_FACMASK)
1276                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1277                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1278
1279         if (s->identifier) {
1280                 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", s->identifier);
1281                 if (syslog_identifier)
1282                         IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1283         }
1284
1285         message = strappend("MESSAGE=", p);
1286         if (message)
1287                 IOVEC_SET_STRING(iovec[n++], message);
1288
1289         dispatch_message(s->server, iovec, n, ELEMENTSOF(iovec), &s->ucred, NULL, priority);
1290
1291         free(message);
1292         free(syslog_priority);
1293         free(syslog_facility);
1294         free(syslog_identifier);
1295
1296         return 0;
1297 }
1298
1299 static int stdout_stream_line(StdoutStream *s, char *p) {
1300         int r;
1301
1302         assert(s);
1303         assert(p);
1304
1305         p = strstrip(p);
1306
1307         switch (s->state) {
1308
1309         case STDOUT_STREAM_IDENTIFIER:
1310                 s->identifier = strdup(p);
1311                 if (!s->identifier) {
1312                         log_error("Out of memory");
1313                         return -ENOMEM;
1314                 }
1315
1316                 s->state = STDOUT_STREAM_PRIORITY;
1317                 return 0;
1318
1319         case STDOUT_STREAM_PRIORITY:
1320                 r = safe_atoi(p, &s->priority);
1321                 if (r < 0 || s->priority <= 0 || s->priority >= 999) {
1322                         log_warning("Failed to parse log priority line.");
1323                         return -EINVAL;
1324                 }
1325
1326                 s->state = STDOUT_STREAM_LEVEL_PREFIX;
1327                 return 0;
1328
1329         case STDOUT_STREAM_LEVEL_PREFIX:
1330                 r = parse_boolean(p);
1331                 if (r < 0) {
1332                         log_warning("Failed to parse level prefix line.");
1333                         return -EINVAL;
1334                 }
1335
1336                 s->level_prefix = !!r;
1337                 s->state = STDOUT_STREAM_FORWARD_TO_SYSLOG;
1338                 return 0;
1339
1340         case STDOUT_STREAM_FORWARD_TO_SYSLOG:
1341                 r = parse_boolean(p);
1342                 if (r < 0) {
1343                         log_warning("Failed to parse forward to syslog line.");
1344                         return -EINVAL;
1345                 }
1346
1347                 s->forward_to_syslog = !!r;
1348                 s->state = STDOUT_STREAM_FORWARD_TO_KMSG;
1349                 return 0;
1350
1351         case STDOUT_STREAM_FORWARD_TO_KMSG:
1352                 r = parse_boolean(p);
1353                 if (r < 0) {
1354                         log_warning("Failed to parse copy to kmsg line.");
1355                         return -EINVAL;
1356                 }
1357
1358                 s->forward_to_kmsg = !!r;
1359                 s->state = STDOUT_STREAM_FORWARD_TO_CONSOLE;
1360                 return 0;
1361
1362         case STDOUT_STREAM_FORWARD_TO_CONSOLE:
1363                 r = parse_boolean(p);
1364                 if (r < 0) {
1365                         log_warning("Failed to parse copy to console line.");
1366                         return -EINVAL;
1367                 }
1368
1369                 s->forward_to_console = !!r;
1370                 s->state = STDOUT_STREAM_RUNNING;
1371                 return 0;
1372
1373         case STDOUT_STREAM_RUNNING:
1374                 return stdout_stream_log(s, p);
1375         }
1376
1377         assert_not_reached("Unknown stream state");
1378 }
1379
1380 static int stdout_stream_scan(StdoutStream *s, bool force_flush) {
1381         char *p;
1382         size_t remaining;
1383         int r;
1384
1385         assert(s);
1386
1387         p = s->buffer;
1388         remaining = s->length;
1389         for (;;) {
1390                 char *end;
1391                 size_t skip;
1392
1393                 end = memchr(p, '\n', remaining);
1394                 if (end)
1395                         skip = end - p + 1;
1396                 else if (remaining >= sizeof(s->buffer) - 1) {
1397                         end = p + sizeof(s->buffer) - 1;
1398                         skip = remaining;
1399                 } else
1400                         break;
1401
1402                 *end = 0;
1403
1404                 r = stdout_stream_line(s, p);
1405                 if (r < 0)
1406                         return r;
1407
1408                 remaining -= skip;
1409                 p += skip;
1410         }
1411
1412         if (force_flush && remaining > 0) {
1413                 p[remaining] = 0;
1414                 r = stdout_stream_line(s, p);
1415                 if (r < 0)
1416                         return r;
1417
1418                 p += remaining;
1419                 remaining = 0;
1420         }
1421
1422         if (p > s->buffer) {
1423                 memmove(s->buffer, p, remaining);
1424                 s->length = remaining;
1425         }
1426
1427         return 0;
1428 }
1429
1430 static int stdout_stream_process(StdoutStream *s) {
1431         ssize_t l;
1432         int r;
1433
1434         assert(s);
1435
1436         l = read(s->fd, s->buffer+s->length, sizeof(s->buffer)-1-s->length);
1437         if (l < 0) {
1438
1439                 if (errno == EAGAIN)
1440                         return 0;
1441
1442                 log_warning("Failed to read from stream: %m");
1443                 return -errno;
1444         }
1445
1446         if (l == 0) {
1447                 r = stdout_stream_scan(s, true);
1448                 if (r < 0)
1449                         return r;
1450
1451                 return 0;
1452         }
1453
1454         s->length += l;
1455         r = stdout_stream_scan(s, false);
1456         if (r < 0)
1457                 return r;
1458
1459         return 1;
1460
1461 }
1462
1463 static void stdout_stream_free(StdoutStream *s) {
1464         assert(s);
1465
1466         if (s->server) {
1467                 assert(s->server->n_stdout_streams > 0);
1468                 s->server->n_stdout_streams --;
1469                 LIST_REMOVE(StdoutStream, stdout_stream, s->server->stdout_streams, s);
1470         }
1471
1472         if (s->fd >= 0) {
1473                 if (s->server)
1474                         epoll_ctl(s->server->epoll_fd, EPOLL_CTL_DEL, s->fd, NULL);
1475
1476                 close_nointr_nofail(s->fd);
1477         }
1478
1479         free(s->identifier);
1480         free(s);
1481 }
1482
1483 static int stdout_stream_new(Server *s) {
1484         StdoutStream *stream;
1485         int fd, r;
1486         socklen_t len;
1487         struct epoll_event ev;
1488
1489         assert(s);
1490
1491         fd = accept4(s->stdout_fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC);
1492         if (fd < 0) {
1493                 if (errno == EAGAIN)
1494                         return 0;
1495
1496                 log_error("Failed to accept stdout connection: %m");
1497                 return -errno;
1498         }
1499
1500         if (s->n_stdout_streams >= STDOUT_STREAMS_MAX) {
1501                 log_warning("Too many stdout streams, refusing connection.");
1502                 close_nointr_nofail(fd);
1503                 return 0;
1504         }
1505
1506         stream = new0(StdoutStream, 1);
1507         if (!stream) {
1508                 log_error("Out of memory.");
1509                 close_nointr_nofail(fd);
1510                 return -ENOMEM;
1511         }
1512
1513         stream->fd = fd;
1514
1515         len = sizeof(stream->ucred);
1516         if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &stream->ucred, &len) < 0) {
1517                 log_error("Failed to determine peer credentials: %m");
1518                 r = -errno;
1519                 goto fail;
1520         }
1521
1522         if (shutdown(fd, SHUT_WR) < 0) {
1523                 log_error("Failed to shutdown writing side of socket: %m");
1524                 r = -errno;
1525                 goto fail;
1526         }
1527
1528         zero(ev);
1529         ev.data.ptr = stream;
1530         ev.events = EPOLLIN;
1531         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0) {
1532                 log_error("Failed to add stream to event loop: %m");
1533                 r = -errno;
1534                 goto fail;
1535         }
1536
1537         stream->server = s;
1538         LIST_PREPEND(StdoutStream, stdout_stream, s->stdout_streams, stream);
1539         s->n_stdout_streams ++;
1540
1541         return 0;
1542
1543 fail:
1544         stdout_stream_free(stream);
1545         return r;
1546 }
1547
1548 static int parse_kernel_timestamp(char **_p, usec_t *t) {
1549         usec_t r;
1550         int k, i;
1551         char *p;
1552
1553         assert(_p);
1554         assert(*_p);
1555         assert(t);
1556
1557         p = *_p;
1558
1559         if (strlen(p) < 14 || p[0] != '[' || p[13] != ']' || p[6] != '.')
1560                 return 0;
1561
1562         r = 0;
1563
1564         for (i = 1; i <= 5; i++) {
1565                 r *= 10;
1566
1567                 if (p[i] == ' ')
1568                         continue;
1569
1570                 k = undecchar(p[i]);
1571                 if (k < 0)
1572                         return 0;
1573
1574                 r += k;
1575         }
1576
1577         for (i = 7; i <= 12; i++) {
1578                 r *= 10;
1579
1580                 k = undecchar(p[i]);
1581                 if (k < 0)
1582                         return 0;
1583
1584                 r += k;
1585         }
1586
1587         *t = r;
1588         *_p += 14;
1589         *_p += strspn(*_p, WHITESPACE);
1590
1591         return 1;
1592 }
1593
1594 static void proc_kmsg_line(Server *s, const char *p) {
1595         struct iovec iovec[N_IOVEC_META_FIELDS + 7];
1596         char *message = NULL, *syslog_priority = NULL, *syslog_pid = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *source_time = NULL;
1597         int priority = LOG_KERN | LOG_INFO;
1598         unsigned n = 0;
1599         usec_t usec;
1600         char *identifier = NULL, *pid = NULL;
1601
1602         assert(s);
1603         assert(p);
1604
1605         parse_syslog_priority((char **) &p, &priority);
1606
1607         if (s->forward_to_kmsg && (priority & LOG_FACMASK) != LOG_KERN)
1608                 return;
1609
1610         if (parse_kernel_timestamp((char **) &p, &usec) > 0) {
1611                 if (asprintf(&source_time, "_SOURCE_MONOTONIC_TIMESTAMP=%llu",
1612                              (unsigned long long) usec) >= 0)
1613                         IOVEC_SET_STRING(iovec[n++], source_time);
1614         }
1615
1616         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=kernel");
1617
1618         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1619                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1620
1621         if ((priority & LOG_FACMASK) == LOG_KERN) {
1622
1623                 if (s->forward_to_syslog)
1624                         forward_syslog(s, priority, "kernel", p, NULL, NULL);
1625
1626                 IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=kernel");
1627         } else {
1628                 read_identifier(&p, &identifier, &pid);
1629
1630                 if (s->forward_to_syslog)
1631                         forward_syslog(s, priority, identifier, p, NULL, NULL);
1632
1633                 if (identifier) {
1634                         syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
1635                         if (syslog_identifier)
1636                                 IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1637                 }
1638
1639                 if (pid) {
1640                         syslog_pid = strappend("SYSLOG_PID=", pid);
1641                         if (syslog_pid)
1642                                 IOVEC_SET_STRING(iovec[n++], syslog_pid);
1643                 }
1644
1645                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1646                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1647         }
1648
1649         message = strappend("MESSAGE=", p);
1650         if (message)
1651                 IOVEC_SET_STRING(iovec[n++], message);
1652
1653
1654         dispatch_message(s, iovec, n, ELEMENTSOF(iovec), NULL, NULL, priority);
1655
1656         free(message);
1657         free(syslog_priority);
1658         free(syslog_identifier);
1659         free(syslog_pid);
1660         free(syslog_facility);
1661         free(source_time);
1662         free(identifier);
1663         free(pid);
1664 }
1665
1666 static void proc_kmsg_scan(Server *s) {
1667         char *p;
1668         size_t remaining;
1669
1670         assert(s);
1671
1672         p = s->proc_kmsg_buffer;
1673         remaining = s->proc_kmsg_length;
1674         for (;;) {
1675                 char *end;
1676                 size_t skip;
1677
1678                 end = memchr(p, '\n', remaining);
1679                 if (end)
1680                         skip = end - p + 1;
1681                 else if (remaining >= sizeof(s->proc_kmsg_buffer) - 1) {
1682                         end = p + sizeof(s->proc_kmsg_buffer) - 1;
1683                         skip = remaining;
1684                 } else
1685                         break;
1686
1687                 *end = 0;
1688
1689                 proc_kmsg_line(s, p);
1690
1691                 remaining -= skip;
1692                 p += skip;
1693         }
1694
1695         if (p > s->proc_kmsg_buffer) {
1696                 memmove(s->proc_kmsg_buffer, p, remaining);
1697                 s->proc_kmsg_length = remaining;
1698         }
1699 }
1700
1701 static int system_journal_open(Server *s) {
1702         int r;
1703         char *fn;
1704         sd_id128_t machine;
1705         char ids[33];
1706
1707         r = sd_id128_get_machine(&machine);
1708         if (r < 0)
1709                 return r;
1710
1711         sd_id128_to_string(machine, ids);
1712
1713         if (!s->system_journal) {
1714
1715                 /* First try to create the machine path, but not the prefix */
1716                 fn = strappend("/var/log/journal/", ids);
1717                 if (!fn)
1718                         return -ENOMEM;
1719                 (void) mkdir(fn, 0755);
1720                 free(fn);
1721
1722                 /* The create the system journal file */
1723                 fn = join("/var/log/journal/", ids, "/system.journal", NULL);
1724                 if (!fn)
1725                         return -ENOMEM;
1726
1727                 r = journal_file_open(fn, O_RDWR|O_CREAT, 0640, NULL, &s->system_journal);
1728                 free(fn);
1729
1730                 if (r >= 0) {
1731                         journal_default_metrics(&s->system_metrics, s->system_journal->fd);
1732
1733                         s->system_journal->metrics = s->system_metrics;
1734                         s->system_journal->compress = s->compress;
1735
1736                         fix_perms(s->system_journal, 0);
1737                 } else if (r < 0) {
1738
1739                         if (r != -ENOENT && r != -EROFS)
1740                                 log_warning("Failed to open system journal: %s", strerror(-r));
1741
1742                         r = 0;
1743                 }
1744         }
1745
1746         if (!s->runtime_journal) {
1747
1748                 fn = join("/run/log/journal/", ids, "/system.journal", NULL);
1749                 if (!fn)
1750                         return -ENOMEM;
1751
1752                 if (s->system_journal) {
1753
1754                         /* Try to open the runtime journal, but only
1755                          * if it already exists, so that we can flush
1756                          * it into the system journal */
1757
1758                         r = journal_file_open(fn, O_RDWR, 0640, NULL, &s->runtime_journal);
1759                         free(fn);
1760
1761                         if (r < 0) {
1762                                 if (r != -ENOENT)
1763                                         log_warning("Failed to open runtime journal: %s", strerror(-r));
1764
1765                                 r = 0;
1766                         }
1767
1768                 } else {
1769
1770                         /* OK, we really need the runtime journal, so create
1771                          * it if necessary. */
1772
1773                         (void) mkdir_parents(fn, 0755);
1774                         r = journal_file_open(fn, O_RDWR|O_CREAT, 0640, NULL, &s->runtime_journal);
1775                         free(fn);
1776
1777                         if (r < 0) {
1778                                 log_error("Failed to open runtime journal: %s", strerror(-r));
1779                                 return r;
1780                         }
1781                 }
1782
1783                 if (s->runtime_journal) {
1784                         journal_default_metrics(&s->runtime_metrics, s->runtime_journal->fd);
1785
1786                         s->runtime_journal->metrics = s->runtime_metrics;
1787                         s->runtime_journal->compress = s->compress;
1788
1789                         fix_perms(s->runtime_journal, 0);
1790                 }
1791         }
1792
1793         return r;
1794 }
1795
1796 static int server_flush_to_var(Server *s) {
1797         char path[] = "/run/log/journal/xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
1798         Object *o = NULL;
1799         int r;
1800         sd_id128_t machine;
1801         sd_journal *j;
1802         usec_t ts;
1803
1804         assert(s);
1805
1806         if (!s->runtime_journal)
1807                 return 0;
1808
1809         ts = now(CLOCK_MONOTONIC);
1810         if (s->var_available_timestamp + RECHECK_VAR_AVAILABLE_USEC > ts)
1811                 return 0;
1812
1813         s->var_available_timestamp = ts;
1814
1815         system_journal_open(s);
1816
1817         if (!s->system_journal)
1818                 return 0;
1819
1820         log_info("Flushing to /var...");
1821
1822         r = sd_id128_get_machine(&machine);
1823         if (r < 0) {
1824                 log_error("Failed to get machine id: %s", strerror(-r));
1825                 return r;
1826         }
1827
1828         r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1829         if (r < 0) {
1830                 log_error("Failed to read runtime journal: %s", strerror(-r));
1831                 return r;
1832         }
1833
1834         SD_JOURNAL_FOREACH(j) {
1835                 JournalFile *f;
1836
1837                 f = j->current_file;
1838                 assert(f && f->current_offset > 0);
1839
1840                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1841                 if (r < 0) {
1842                         log_error("Can't read entry: %s", strerror(-r));
1843                         goto finish;
1844                 }
1845
1846                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1847                 if (r == -E2BIG) {
1848                         log_info("Allocation limit reached.");
1849
1850                         journal_file_post_change(s->system_journal);
1851                         server_rotate(s);
1852                         server_vacuum(s);
1853
1854                         r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1855                 }
1856
1857                 if (r < 0) {
1858                         log_error("Can't write entry: %s", strerror(-r));
1859                         goto finish;
1860                 }
1861         }
1862
1863 finish:
1864         journal_file_post_change(s->system_journal);
1865
1866         journal_file_close(s->runtime_journal);
1867         s->runtime_journal = NULL;
1868
1869         if (r >= 0) {
1870                 sd_id128_to_string(machine, path + 17);
1871                 rm_rf(path, false, true, false);
1872         }
1873
1874         return r;
1875 }
1876
1877 static int server_read_proc_kmsg(Server *s) {
1878         ssize_t l;
1879         assert(s);
1880         assert(s->proc_kmsg_fd >= 0);
1881
1882         l = read(s->proc_kmsg_fd, s->proc_kmsg_buffer + s->proc_kmsg_length, sizeof(s->proc_kmsg_buffer) - 1 - s->proc_kmsg_length);
1883         if (l < 0) {
1884
1885                 if (errno == EAGAIN || errno == EINTR)
1886                         return 0;
1887
1888                 log_error("Failed to read from kernel: %m");
1889                 return -errno;
1890         }
1891
1892         s->proc_kmsg_length += l;
1893
1894         proc_kmsg_scan(s);
1895         return 1;
1896 }
1897
1898 static int server_flush_proc_kmsg(Server *s) {
1899         int r;
1900
1901         assert(s);
1902
1903         if (s->proc_kmsg_fd < 0)
1904                 return 0;
1905
1906         log_info("Flushing /proc/kmsg...");
1907
1908         for (;;) {
1909                 r = server_read_proc_kmsg(s);
1910                 if (r < 0)
1911                         return r;
1912
1913                 if (r == 0)
1914                         break;
1915         }
1916
1917         return 0;
1918 }
1919
1920 static int process_event(Server *s, struct epoll_event *ev) {
1921         assert(s);
1922
1923         if (ev->data.fd == s->signal_fd) {
1924                 struct signalfd_siginfo sfsi;
1925                 ssize_t n;
1926
1927                 if (ev->events != EPOLLIN) {
1928                         log_info("Got invalid event from epoll.");
1929                         return -EIO;
1930                 }
1931
1932                 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
1933                 if (n != sizeof(sfsi)) {
1934
1935                         if (n >= 0)
1936                                 return -EIO;
1937
1938                         if (errno == EINTR || errno == EAGAIN)
1939                                 return 1;
1940
1941                         return -errno;
1942                 }
1943
1944                 if (sfsi.ssi_signo == SIGUSR1) {
1945                         server_flush_to_var(s);
1946                         return 0;
1947                 }
1948
1949                 log_debug("Received SIG%s", signal_to_string(sfsi.ssi_signo));
1950                 return 0;
1951
1952         } else if (ev->data.fd == s->proc_kmsg_fd) {
1953                 int r;
1954
1955                 if (ev->events != EPOLLIN) {
1956                         log_info("Got invalid event from epoll.");
1957                         return -EIO;
1958                 }
1959
1960                 r = server_read_proc_kmsg(s);
1961                 if (r < 0)
1962                         return r;
1963
1964                 return 1;
1965
1966         } else if (ev->data.fd == s->native_fd ||
1967                    ev->data.fd == s->syslog_fd) {
1968
1969                 if (ev->events != EPOLLIN) {
1970                         log_info("Got invalid event from epoll.");
1971                         return -EIO;
1972                 }
1973
1974                 for (;;) {
1975                         struct msghdr msghdr;
1976                         struct iovec iovec;
1977                         struct ucred *ucred = NULL;
1978                         struct timeval *tv = NULL;
1979                         struct cmsghdr *cmsg;
1980                         union {
1981                                 struct cmsghdr cmsghdr;
1982                                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1983                                             CMSG_SPACE(sizeof(struct timeval))];
1984                         } control;
1985                         ssize_t n;
1986                         int v;
1987
1988                         if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
1989                                 log_error("SIOCINQ failed: %m");
1990                                 return -errno;
1991                         }
1992
1993                         if (v <= 0)
1994                                 return 1;
1995
1996                         if (s->buffer_size < (size_t) v) {
1997                                 void *b;
1998                                 size_t l;
1999
2000                                 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
2001                                 b = realloc(s->buffer, l+1);
2002
2003                                 if (!b) {
2004                                         log_error("Couldn't increase buffer.");
2005                                         return -ENOMEM;
2006                                 }
2007
2008                                 s->buffer_size = l;
2009                                 s->buffer = b;
2010                         }
2011
2012                         zero(iovec);
2013                         iovec.iov_base = s->buffer;
2014                         iovec.iov_len = s->buffer_size;
2015
2016                         zero(control);
2017                         zero(msghdr);
2018                         msghdr.msg_iov = &iovec;
2019                         msghdr.msg_iovlen = 1;
2020                         msghdr.msg_control = &control;
2021                         msghdr.msg_controllen = sizeof(control);
2022
2023                         n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT);
2024                         if (n < 0) {
2025
2026                                 if (errno == EINTR || errno == EAGAIN)
2027                                         return 1;
2028
2029                                 log_error("recvmsg() failed: %m");
2030                                 return -errno;
2031                         }
2032
2033                         for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
2034
2035                                 if (cmsg->cmsg_level == SOL_SOCKET &&
2036                                     cmsg->cmsg_type == SCM_CREDENTIALS &&
2037                                     cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
2038                                         ucred = (struct ucred*) CMSG_DATA(cmsg);
2039                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
2040                                          cmsg->cmsg_type == SO_TIMESTAMP &&
2041                                          cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
2042                                         tv = (struct timeval*) CMSG_DATA(cmsg);
2043                         }
2044
2045                         if (ev->data.fd == s->syslog_fd) {
2046                                 char *e;
2047
2048                                 e = memchr(s->buffer, '\n', n);
2049                                 if (e)
2050                                         *e = 0;
2051                                 else
2052                                         s->buffer[n] = 0;
2053
2054                                 process_syslog_message(s, strstrip(s->buffer), ucred, tv);
2055                         } else
2056                                 process_native_message(s, s->buffer, n, ucred, tv);
2057                 }
2058
2059                 return 1;
2060
2061         } else if (ev->data.fd == s->stdout_fd) {
2062
2063                 if (ev->events != EPOLLIN) {
2064                         log_info("Got invalid event from epoll.");
2065                         return -EIO;
2066                 }
2067
2068                 stdout_stream_new(s);
2069                 return 1;
2070
2071         } else {
2072                 StdoutStream *stream;
2073
2074                 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
2075                         log_info("Got invalid event from epoll.");
2076                         return -EIO;
2077                 }
2078
2079                 /* If it is none of the well-known fds, it must be an
2080                  * stdout stream fd. Note that this is a bit ugly here
2081                  * (since we rely that none of the well-known fds
2082                  * could be interpreted as pointer), but nonetheless
2083                  * safe, since the well-known fds would never get an
2084                  * fd > 4096, i.e. beyond the first memory page */
2085
2086                 stream = ev->data.ptr;
2087
2088                 if (stdout_stream_process(stream) <= 0)
2089                         stdout_stream_free(stream);
2090
2091                 return 1;
2092         }
2093
2094         log_error("Unknown event.");
2095         return 0;
2096 }
2097
2098 static int open_syslog_socket(Server *s) {
2099         union sockaddr_union sa;
2100         int one, r;
2101         struct epoll_event ev;
2102         struct timeval tv;
2103
2104         assert(s);
2105
2106         if (s->syslog_fd < 0) {
2107
2108                 s->syslog_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2109                 if (s->syslog_fd < 0) {
2110                         log_error("socket() failed: %m");
2111                         return -errno;
2112                 }
2113
2114                 zero(sa);
2115                 sa.un.sun_family = AF_UNIX;
2116                 strncpy(sa.un.sun_path, "/dev/log", sizeof(sa.un.sun_path));
2117
2118                 unlink(sa.un.sun_path);
2119
2120                 r = bind(s->syslog_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2121                 if (r < 0) {
2122                         log_error("bind() failed: %m");
2123                         return -errno;
2124                 }
2125
2126                 chmod(sa.un.sun_path, 0666);
2127         }
2128
2129         one = 1;
2130         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
2131         if (r < 0) {
2132                 log_error("SO_PASSCRED failed: %m");
2133                 return -errno;
2134         }
2135
2136         one = 1;
2137         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
2138         if (r < 0) {
2139                 log_error("SO_TIMESTAMP failed: %m");
2140                 return -errno;
2141         }
2142
2143         /* Since we use the same socket for forwarding this to some
2144          * other syslog implementation, make sure we don't hang
2145          * forever */
2146         timeval_store(&tv, SYSLOG_TIMEOUT_USEC);
2147         if (setsockopt(s->syslog_fd, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)) < 0) {
2148                 log_error("SO_SNDTIMEO failed: %m");
2149                 return -errno;
2150         }
2151
2152         zero(ev);
2153         ev.events = EPOLLIN;
2154         ev.data.fd = s->syslog_fd;
2155         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->syslog_fd, &ev) < 0) {
2156                 log_error("Failed to add syslog server fd to epoll object: %m");
2157                 return -errno;
2158         }
2159
2160         return 0;
2161 }
2162
2163 static int open_native_socket(Server*s) {
2164         union sockaddr_union sa;
2165         int one, r;
2166         struct epoll_event ev;
2167
2168         assert(s);
2169
2170         if (s->native_fd < 0) {
2171
2172                 s->native_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2173                 if (s->native_fd < 0) {
2174                         log_error("socket() failed: %m");
2175                         return -errno;
2176                 }
2177
2178                 zero(sa);
2179                 sa.un.sun_family = AF_UNIX;
2180                 strncpy(sa.un.sun_path, "/run/systemd/journal/socket", sizeof(sa.un.sun_path));
2181
2182                 unlink(sa.un.sun_path);
2183
2184                 r = bind(s->native_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2185                 if (r < 0) {
2186                         log_error("bind() failed: %m");
2187                         return -errno;
2188                 }
2189
2190                 chmod(sa.un.sun_path, 0666);
2191         }
2192
2193         one = 1;
2194         r = setsockopt(s->native_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
2195         if (r < 0) {
2196                 log_error("SO_PASSCRED failed: %m");
2197                 return -errno;
2198         }
2199
2200         one = 1;
2201         r = setsockopt(s->native_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
2202         if (r < 0) {
2203                 log_error("SO_TIMESTAMP failed: %m");
2204                 return -errno;
2205         }
2206
2207         zero(ev);
2208         ev.events = EPOLLIN;
2209         ev.data.fd = s->native_fd;
2210         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->native_fd, &ev) < 0) {
2211                 log_error("Failed to add native server fd to epoll object: %m");
2212                 return -errno;
2213         }
2214
2215         return 0;
2216 }
2217
2218 static int open_stdout_socket(Server *s) {
2219         union sockaddr_union sa;
2220         int r;
2221         struct epoll_event ev;
2222
2223         assert(s);
2224
2225         if (s->stdout_fd < 0) {
2226
2227                 s->stdout_fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2228                 if (s->stdout_fd < 0) {
2229                         log_error("socket() failed: %m");
2230                         return -errno;
2231                 }
2232
2233                 zero(sa);
2234                 sa.un.sun_family = AF_UNIX;
2235                 strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
2236
2237                 unlink(sa.un.sun_path);
2238
2239                 r = bind(s->stdout_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2240                 if (r < 0) {
2241                         log_error("bind() failed: %m");
2242                         return -errno;
2243                 }
2244
2245                 chmod(sa.un.sun_path, 0666);
2246
2247                 if (listen(s->stdout_fd, SOMAXCONN) < 0) {
2248                         log_error("liste() failed: %m");
2249                         return -errno;
2250                 }
2251         }
2252
2253         zero(ev);
2254         ev.events = EPOLLIN;
2255         ev.data.fd = s->stdout_fd;
2256         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->stdout_fd, &ev) < 0) {
2257                 log_error("Failed to add stdout server fd to epoll object: %m");
2258                 return -errno;
2259         }
2260
2261         return 0;
2262 }
2263
2264 static int open_proc_kmsg(Server *s) {
2265         struct epoll_event ev;
2266
2267         assert(s);
2268
2269         if (!s->import_proc_kmsg)
2270                 return 0;
2271
2272
2273         s->proc_kmsg_fd = open("/proc/kmsg", O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
2274         if (s->proc_kmsg_fd < 0) {
2275                 log_warning("Failed to open /proc/kmsg, ignoring: %m");
2276                 return 0;
2277         }
2278
2279         zero(ev);
2280         ev.events = EPOLLIN;
2281         ev.data.fd = s->proc_kmsg_fd;
2282         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->proc_kmsg_fd, &ev) < 0) {
2283                 log_error("Failed to add /proc/kmsg fd to epoll object: %m");
2284                 return -errno;
2285         }
2286
2287         return 0;
2288 }
2289
2290 static int open_signalfd(Server *s) {
2291         sigset_t mask;
2292         struct epoll_event ev;
2293
2294         assert(s);
2295
2296         assert_se(sigemptyset(&mask) == 0);
2297         sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, -1);
2298         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
2299
2300         s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
2301         if (s->signal_fd < 0) {
2302                 log_error("signalfd(): %m");
2303                 return -errno;
2304         }
2305
2306         zero(ev);
2307         ev.events = EPOLLIN;
2308         ev.data.fd = s->signal_fd;
2309
2310         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
2311                 log_error("epoll_ctl(): %m");
2312                 return -errno;
2313         }
2314
2315         return 0;
2316 }
2317
2318 static int server_parse_proc_cmdline(Server *s) {
2319         char *line, *w, *state;
2320         int r;
2321         size_t l;
2322
2323         if (detect_container(NULL) > 0)
2324                 return 0;
2325
2326         r = read_one_line_file("/proc/cmdline", &line);
2327         if (r < 0) {
2328                 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
2329                 return 0;
2330         }
2331
2332         FOREACH_WORD_QUOTED(w, l, line, state) {
2333                 char *word;
2334
2335                 word = strndup(w, l);
2336                 if (!word) {
2337                         r = -ENOMEM;
2338                         goto finish;
2339                 }
2340
2341                 if (startswith(word, "systemd_journald.forward_to_syslog=")) {
2342                         r = parse_boolean(word + 35);
2343                         if (r < 0)
2344                                 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
2345                         else
2346                                 s->forward_to_syslog = r;
2347                 } else if (startswith(word, "systemd_journald.forward_to_kmsg=")) {
2348                         r = parse_boolean(word + 33);
2349                         if (r < 0)
2350                                 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
2351                         else
2352                                 s->forward_to_kmsg = r;
2353                 } else if (startswith(word, "systemd_journald.forward_to_console=")) {
2354                         r = parse_boolean(word + 36);
2355                         if (r < 0)
2356                                 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
2357                         else
2358                                 s->forward_to_console = r;
2359                 }
2360
2361                 free(word);
2362         }
2363
2364         r = 0;
2365
2366 finish:
2367         free(line);
2368         return r;
2369 }
2370
2371 static int server_parse_config_file(Server *s) {
2372         FILE *f;
2373         const char *fn;
2374         int r;
2375
2376         assert(s);
2377
2378         fn = "/etc/systemd/systemd-journald.conf";
2379         f = fopen(fn, "re");
2380         if (!f) {
2381                 if (errno == ENOENT)
2382                         return 0;
2383
2384                 log_warning("Failed to open configuration file %s: %m", fn);
2385                 return -errno;
2386         }
2387
2388         r = config_parse(fn, f, "Journal\0", config_item_perf_lookup, (void*) journald_gperf_lookup, false, s);
2389         if (r < 0)
2390                 log_warning("Failed to parse configuration file: %s", strerror(-r));
2391
2392         fclose(f);
2393
2394         return r;
2395 }
2396
2397 static int server_init(Server *s) {
2398         int n, r, fd;
2399
2400         assert(s);
2401
2402         zero(*s);
2403         s->syslog_fd = s->native_fd = s->stdout_fd = s->signal_fd = s->epoll_fd = s->proc_kmsg_fd = -1;
2404         s->compress = true;
2405
2406         s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
2407         s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
2408
2409         s->forward_to_syslog = true;
2410         s->import_proc_kmsg = true;
2411
2412         memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
2413         memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
2414
2415         server_parse_config_file(s);
2416         server_parse_proc_cmdline(s);
2417
2418         s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
2419         if (!s->user_journals) {
2420                 log_error("Out of memory.");
2421                 return -ENOMEM;
2422         }
2423
2424         s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
2425         if (s->epoll_fd < 0) {
2426                 log_error("Failed to create epoll object: %m");
2427                 return -errno;
2428         }
2429
2430         n = sd_listen_fds(true);
2431         if (n < 0) {
2432                 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
2433                 return n;
2434         }
2435
2436         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
2437
2438                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
2439
2440                         if (s->native_fd >= 0) {
2441                                 log_error("Too many native sockets passed.");
2442                                 return -EINVAL;
2443                         }
2444
2445                         s->native_fd = fd;
2446
2447                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
2448
2449                         if (s->stdout_fd >= 0) {
2450                                 log_error("Too many stdout sockets passed.");
2451                                 return -EINVAL;
2452                         }
2453
2454                         s->stdout_fd = fd;
2455
2456                 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
2457
2458                         if (s->syslog_fd >= 0) {
2459                                 log_error("Too many /dev/log sockets passed.");
2460                                 return -EINVAL;
2461                         }
2462
2463                         s->syslog_fd = fd;
2464
2465                 } else {
2466                         log_error("Unknown socket passed.");
2467                         return -EINVAL;
2468                 }
2469         }
2470
2471         r = open_syslog_socket(s);
2472         if (r < 0)
2473                 return r;
2474
2475         r = open_native_socket(s);
2476         if (r < 0)
2477                 return r;
2478
2479         r = open_stdout_socket(s);
2480         if (r < 0)
2481                 return r;
2482
2483         r = open_proc_kmsg(s);
2484         if (r < 0)
2485                 return r;
2486
2487         r = system_journal_open(s);
2488         if (r < 0)
2489                 return r;
2490
2491         r = open_signalfd(s);
2492         if (r < 0)
2493                 return r;
2494
2495         s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
2496         if (!s->rate_limit)
2497                 return -ENOMEM;
2498
2499         return 0;
2500 }
2501
2502 static void server_done(Server *s) {
2503         JournalFile *f;
2504         assert(s);
2505
2506         while (s->stdout_streams)
2507                 stdout_stream_free(s->stdout_streams);
2508
2509         if (s->system_journal)
2510                 journal_file_close(s->system_journal);
2511
2512         if (s->runtime_journal)
2513                 journal_file_close(s->runtime_journal);
2514
2515         while ((f = hashmap_steal_first(s->user_journals)))
2516                 journal_file_close(f);
2517
2518         hashmap_free(s->user_journals);
2519
2520         if (s->epoll_fd >= 0)
2521                 close_nointr_nofail(s->epoll_fd);
2522
2523         if (s->signal_fd >= 0)
2524                 close_nointr_nofail(s->signal_fd);
2525
2526         if (s->syslog_fd >= 0)
2527                 close_nointr_nofail(s->syslog_fd);
2528
2529         if (s->native_fd >= 0)
2530                 close_nointr_nofail(s->native_fd);
2531
2532         if (s->stdout_fd >= 0)
2533                 close_nointr_nofail(s->stdout_fd);
2534
2535         if (s->proc_kmsg_fd >= 0)
2536                 close_nointr_nofail(s->proc_kmsg_fd);
2537
2538         if (s->rate_limit)
2539                 journal_rate_limit_free(s->rate_limit);
2540
2541         free(s->buffer);
2542 }
2543
2544 int main(int argc, char *argv[]) {
2545         Server server;
2546         int r;
2547
2548         /* if (getppid() != 1) { */
2549         /*         log_error("This program should be invoked by init only."); */
2550         /*         return EXIT_FAILURE; */
2551         /* } */
2552
2553         if (argc > 1) {
2554                 log_error("This program does not take arguments.");
2555                 return EXIT_FAILURE;
2556         }
2557
2558         log_set_target(LOG_TARGET_CONSOLE);
2559         log_parse_environment();
2560         log_open();
2561
2562         umask(0022);
2563
2564         r = server_init(&server);
2565         if (r < 0)
2566                 goto finish;
2567
2568         server_vacuum(&server);
2569         server_flush_to_var(&server);
2570         server_flush_proc_kmsg(&server);
2571
2572         log_debug("systemd-journald running as pid %lu", (unsigned long) getpid());
2573         driver_message(&server, SD_MESSAGE_JOURNAL_START, "Journal started");
2574
2575         sd_notify(false,
2576                   "READY=1\n"
2577                   "STATUS=Processing requests...");
2578
2579         for (;;) {
2580                 struct epoll_event event;
2581
2582                 r = epoll_wait(server.epoll_fd, &event, 1, -1);
2583                 if (r < 0) {
2584
2585                         if (errno == EINTR)
2586                                 continue;
2587
2588                         log_error("epoll_wait() failed: %m");
2589                         r = -errno;
2590                         goto finish;
2591                 } else if (r == 0)
2592                         break;
2593
2594                 r = process_event(&server, &event);
2595                 if (r < 0)
2596                         goto finish;
2597                 else if (r == 0)
2598                         break;
2599         }
2600
2601         log_debug("systemd-journald stopped as pid %lu", (unsigned long) getpid());
2602         driver_message(&server, SD_MESSAGE_JOURNAL_STOP, "Journal stopped");
2603
2604 finish:
2605         sd_notify(false,
2606                   "STATUS=Shutting down...");
2607
2608         server_done(&server);
2609
2610         return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
2611 }