chiark / gitweb /
fb298be758f91058028feb46aa140becfcc6485b
[elogind.git] / src / journal / journald.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU General Public License as published by
10   the Free Software Foundation; either version 2 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   General Public License for more details.
17
18   You should have received a copy of the GNU General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/epoll.h>
23 #include <sys/socket.h>
24 #include <errno.h>
25 #include <sys/signalfd.h>
26 #include <unistd.h>
27 #include <fcntl.h>
28 #include <stddef.h>
29 #include <sys/ioctl.h>
30 #include <linux/sockios.h>
31 #include <sys/statvfs.h>
32 #include <sys/user.h>
33
34 #include <systemd/sd-journal.h>
35 #include <systemd/sd-login.h>
36 #include <systemd/sd-messages.h>
37 #include <systemd/sd-daemon.h>
38
39 #include "hashmap.h"
40 #include "journal-file.h"
41 #include "socket-util.h"
42 #include "cgroup-util.h"
43 #include "list.h"
44 #include "journal-rate-limit.h"
45 #include "journal-internal.h"
46 #include "conf-parser.h"
47 #include "journald.h"
48 #include "virt.h"
49 #include "missing.h"
50
51 #ifdef HAVE_ACL
52 #include <sys/acl.h>
53 #include <acl/libacl.h>
54 #include "acl-util.h"
55 #endif
56
57 #ifdef HAVE_SELINUX
58 #include <selinux/selinux.h>
59 #endif
60
61 #define USER_JOURNALS_MAX 1024
62 #define STDOUT_STREAMS_MAX 4096
63
64 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
65 #define DEFAULT_RATE_LIMIT_BURST 200
66
67 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
68
69 #define RECHECK_VAR_AVAILABLE_USEC (30*USEC_PER_SEC)
70
71 #define N_IOVEC_META_FIELDS 17
72
73 #define ENTRY_SIZE_MAX (1024*1024*32)
74
75 typedef enum StdoutStreamState {
76         STDOUT_STREAM_IDENTIFIER,
77         STDOUT_STREAM_PRIORITY,
78         STDOUT_STREAM_LEVEL_PREFIX,
79         STDOUT_STREAM_FORWARD_TO_SYSLOG,
80         STDOUT_STREAM_FORWARD_TO_KMSG,
81         STDOUT_STREAM_FORWARD_TO_CONSOLE,
82         STDOUT_STREAM_RUNNING
83 } StdoutStreamState;
84
85 struct StdoutStream {
86         Server *server;
87         StdoutStreamState state;
88
89         int fd;
90
91         struct ucred ucred;
92 #ifdef HAVE_SELINUX
93         security_context_t security_context;
94 #endif
95
96         char *identifier;
97         int priority;
98         bool level_prefix:1;
99         bool forward_to_syslog:1;
100         bool forward_to_kmsg:1;
101         bool forward_to_console:1;
102
103         char buffer[LINE_MAX+1];
104         size_t length;
105
106         LIST_FIELDS(StdoutStream, stdout_stream);
107 };
108
109 static int server_flush_to_var(Server *s);
110
111 static uint64_t available_space(Server *s) {
112         char ids[33], *p;
113         const char *f;
114         sd_id128_t machine;
115         struct statvfs ss;
116         uint64_t sum = 0, avail = 0, ss_avail = 0;
117         int r;
118         DIR *d;
119         usec_t ts;
120         JournalMetrics *m;
121
122         ts = now(CLOCK_MONOTONIC);
123
124         if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
125                 return s->cached_available_space;
126
127         r = sd_id128_get_machine(&machine);
128         if (r < 0)
129                 return 0;
130
131         if (s->system_journal) {
132                 f = "/var/log/journal/";
133                 m = &s->system_metrics;
134         } else {
135                 f = "/run/log/journal/";
136                 m = &s->runtime_metrics;
137         }
138
139         assert(m);
140
141         p = strappend(f, sd_id128_to_string(machine, ids));
142         if (!p)
143                 return 0;
144
145         d = opendir(p);
146         free(p);
147
148         if (!d)
149                 return 0;
150
151         if (fstatvfs(dirfd(d), &ss) < 0)
152                 goto finish;
153
154         for (;;) {
155                 struct stat st;
156                 struct dirent buf, *de;
157                 int k;
158
159                 k = readdir_r(d, &buf, &de);
160                 if (k != 0) {
161                         r = -k;
162                         goto finish;
163                 }
164
165                 if (!de)
166                         break;
167
168                 if (!dirent_is_file_with_suffix(de, ".journal"))
169                         continue;
170
171                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
172                         continue;
173
174                 sum += (uint64_t) st.st_blocks * (uint64_t) st.st_blksize;
175         }
176
177         avail = sum >= m->max_use ? 0 : m->max_use - sum;
178
179         ss_avail = ss.f_bsize * ss.f_bavail;
180
181         ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
182
183         if (ss_avail < avail)
184                 avail = ss_avail;
185
186         s->cached_available_space = avail;
187         s->cached_available_space_timestamp = ts;
188
189 finish:
190         closedir(d);
191
192         return avail;
193 }
194
195 static void server_read_file_gid(Server *s) {
196         const char *adm = "adm";
197         int r;
198
199         assert(s);
200
201         if (s->file_gid_valid)
202                 return;
203
204         r = get_group_creds(&adm, &s->file_gid);
205         if (r < 0)
206                 log_warning("Failed to resolve 'adm' group: %s", strerror(-r));
207
208         /* if we couldn't read the gid, then it will be 0, but that's
209          * fine and we shouldn't try to resolve the group again, so
210          * let's just pretend it worked right-away. */
211         s->file_gid_valid = true;
212 }
213
214 static void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
215         int r;
216 #ifdef HAVE_ACL
217         acl_t acl;
218         acl_entry_t entry;
219         acl_permset_t permset;
220 #endif
221
222         assert(f);
223
224         server_read_file_gid(s);
225
226         r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
227         if (r < 0)
228                 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
229
230 #ifdef HAVE_ACL
231         if (uid <= 0)
232                 return;
233
234         acl = acl_get_fd(f->fd);
235         if (!acl) {
236                 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
237                 return;
238         }
239
240         r = acl_find_uid(acl, uid, &entry);
241         if (r <= 0) {
242
243                 if (acl_create_entry(&acl, &entry) < 0 ||
244                     acl_set_tag_type(entry, ACL_USER) < 0 ||
245                     acl_set_qualifier(entry, &uid) < 0) {
246                         log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
247                         goto finish;
248                 }
249         }
250
251         if (acl_get_permset(entry, &permset) < 0 ||
252             acl_add_perm(permset, ACL_READ) < 0 ||
253             acl_calc_mask(&acl) < 0) {
254                 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
255                 goto finish;
256         }
257
258         if (acl_set_fd(f->fd, acl) < 0)
259                 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
260
261 finish:
262         acl_free(acl);
263 #endif
264 }
265
266 static JournalFile* find_journal(Server *s, uid_t uid) {
267         char *p;
268         int r;
269         JournalFile *f;
270         char ids[33];
271         sd_id128_t machine;
272
273         assert(s);
274
275         /* We split up user logs only on /var, not on /run. If the
276          * runtime file is open, we write to it exclusively, in order
277          * to guarantee proper order as soon as we flush /run to
278          * /var and close the runtime file. */
279
280         if (s->runtime_journal)
281                 return s->runtime_journal;
282
283         if (uid <= 0)
284                 return s->system_journal;
285
286         r = sd_id128_get_machine(&machine);
287         if (r < 0)
288                 return s->system_journal;
289
290         f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
291         if (f)
292                 return f;
293
294         if (asprintf(&p, "/var/log/journal/%s/user-%lu.journal", sd_id128_to_string(machine, ids), (unsigned long) uid) < 0)
295                 return s->system_journal;
296
297         while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
298                 /* Too many open? Then let's close one */
299                 f = hashmap_steal_first(s->user_journals);
300                 assert(f);
301                 journal_file_close(f);
302         }
303
304         r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->system_journal, &f);
305         free(p);
306
307         if (r < 0)
308                 return s->system_journal;
309
310         server_fix_perms(s, f, uid);
311
312         r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
313         if (r < 0) {
314                 journal_file_close(f);
315                 return s->system_journal;
316         }
317
318         return f;
319 }
320
321 static void server_rotate(Server *s) {
322         JournalFile *f;
323         void *k;
324         Iterator i;
325         int r;
326
327         log_info("Rotating...");
328
329         if (s->runtime_journal) {
330                 r = journal_file_rotate(&s->runtime_journal);
331                 if (r < 0)
332                         log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
333         }
334
335         if (s->system_journal) {
336                 r = journal_file_rotate(&s->system_journal);
337                 if (r < 0)
338                         log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
339         }
340
341         HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
342                 r = journal_file_rotate(&f);
343                 if (r < 0)
344                         log_error("Failed to rotate %s: %s", f->path, strerror(-r));
345                 else
346                         hashmap_replace(s->user_journals, k, f);
347         }
348 }
349
350 static void server_vacuum(Server *s) {
351         char *p;
352         char ids[33];
353         sd_id128_t machine;
354         int r;
355
356         log_info("Vacuuming...");
357
358         r = sd_id128_get_machine(&machine);
359         if (r < 0) {
360                 log_error("Failed to get machine ID: %s", strerror(-r));
361                 return;
362         }
363
364         sd_id128_to_string(machine, ids);
365
366         if (s->system_journal) {
367                 if (asprintf(&p, "/var/log/journal/%s", ids) < 0) {
368                         log_error("Out of memory.");
369                         return;
370                 }
371
372                 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free);
373                 if (r < 0 && r != -ENOENT)
374                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
375                 free(p);
376         }
377
378
379         if (s->runtime_journal) {
380                 if (asprintf(&p, "/run/log/journal/%s", ids) < 0) {
381                         log_error("Out of memory.");
382                         return;
383                 }
384
385                 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free);
386                 if (r < 0 && r != -ENOENT)
387                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
388                 free(p);
389         }
390
391         s->cached_available_space_timestamp = 0;
392 }
393
394 static char *shortened_cgroup_path(pid_t pid) {
395         int r;
396         char *process_path, *init_path, *path;
397
398         assert(pid > 0);
399
400         r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, pid, &process_path);
401         if (r < 0)
402                 return NULL;
403
404         r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &init_path);
405         if (r < 0) {
406                 free(process_path);
407                 return NULL;
408         }
409
410         if (endswith(init_path, "/system"))
411                 init_path[strlen(init_path) - 7] = 0;
412         else if (streq(init_path, "/"))
413                 init_path[0] = 0;
414
415         if (startswith(process_path, init_path)) {
416                 char *p;
417
418                 p = strdup(process_path + strlen(init_path));
419                 if (!p) {
420                         free(process_path);
421                         free(init_path);
422                         return NULL;
423                 }
424                 path = p;
425         } else {
426                 path = process_path;
427                 process_path = NULL;
428         }
429
430         free(process_path);
431         free(init_path);
432
433         return path;
434 }
435
436 static void dispatch_message_real(
437                 Server *s,
438                 struct iovec *iovec, unsigned n, unsigned m,
439                 struct ucred *ucred,
440                 struct timeval *tv,
441                 const char *label, size_t label_len) {
442
443         char *pid = NULL, *uid = NULL, *gid = NULL,
444                 *source_time = NULL, *boot_id = NULL, *machine_id = NULL,
445                 *comm = NULL, *cmdline = NULL, *hostname = NULL,
446                 *audit_session = NULL, *audit_loginuid = NULL,
447                 *exe = NULL, *cgroup = NULL, *session = NULL,
448                 *owner_uid = NULL, *unit = NULL, *selinux_context = NULL;
449
450         char idbuf[33];
451         sd_id128_t id;
452         int r;
453         char *t;
454         uid_t loginuid = 0, realuid = 0;
455         JournalFile *f;
456         bool vacuumed = false;
457
458         assert(s);
459         assert(iovec);
460         assert(n > 0);
461         assert(n + N_IOVEC_META_FIELDS <= m);
462
463         if (ucred) {
464                 uint32_t audit;
465                 uid_t owner;
466
467                 realuid = ucred->uid;
468
469                 if (asprintf(&pid, "_PID=%lu", (unsigned long) ucred->pid) >= 0)
470                         IOVEC_SET_STRING(iovec[n++], pid);
471
472                 if (asprintf(&uid, "_UID=%lu", (unsigned long) ucred->uid) >= 0)
473                         IOVEC_SET_STRING(iovec[n++], uid);
474
475                 if (asprintf(&gid, "_GID=%lu", (unsigned long) ucred->gid) >= 0)
476                         IOVEC_SET_STRING(iovec[n++], gid);
477
478                 r = get_process_comm(ucred->pid, &t);
479                 if (r >= 0) {
480                         comm = strappend("_COMM=", t);
481                         free(t);
482
483                         if (comm)
484                                 IOVEC_SET_STRING(iovec[n++], comm);
485                 }
486
487                 r = get_process_exe(ucred->pid, &t);
488                 if (r >= 0) {
489                         exe = strappend("_EXE=", t);
490                         free(t);
491
492                         if (exe)
493                                 IOVEC_SET_STRING(iovec[n++], exe);
494                 }
495
496                 r = get_process_cmdline(ucred->pid, LINE_MAX, false, &t);
497                 if (r >= 0) {
498                         cmdline = strappend("_CMDLINE=", t);
499                         free(t);
500
501                         if (cmdline)
502                                 IOVEC_SET_STRING(iovec[n++], cmdline);
503                 }
504
505                 r = audit_session_from_pid(ucred->pid, &audit);
506                 if (r >= 0)
507                         if (asprintf(&audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit) >= 0)
508                                 IOVEC_SET_STRING(iovec[n++], audit_session);
509
510                 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
511                 if (r >= 0)
512                         if (asprintf(&audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0)
513                                 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
514
515                 t = shortened_cgroup_path(ucred->pid);
516                 if (t) {
517                         cgroup = strappend("_SYSTEMD_CGROUP=", t);
518                         free(t);
519
520                         if (cgroup)
521                                 IOVEC_SET_STRING(iovec[n++], cgroup);
522                 }
523
524                 if (sd_pid_get_session(ucred->pid, &t) >= 0) {
525                         session = strappend("_SYSTEMD_SESSION=", t);
526                         free(t);
527
528                         if (session)
529                                 IOVEC_SET_STRING(iovec[n++], session);
530                 }
531
532                 if (sd_pid_get_unit(ucred->pid, &t) >= 0) {
533                         unit = strappend("_SYSTEMD_UNIT=", t);
534                         free(t);
535
536                         if (unit)
537                                 IOVEC_SET_STRING(iovec[n++], unit);
538                 }
539
540                 if (sd_pid_get_owner_uid(ucred->uid, &owner) >= 0)
541                         if (asprintf(&owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner) >= 0)
542                                 IOVEC_SET_STRING(iovec[n++], owner_uid);
543
544 #ifdef HAVE_SELINUX
545                 if (label) {
546                         selinux_context = malloc(sizeof("_SELINUX_CONTEXT=") + label_len);
547                         if (selinux_context) {
548                                 memcpy(selinux_context, "_SELINUX_CONTEXT=", sizeof("_SELINUX_CONTEXT=")-1);
549                                 memcpy(selinux_context+sizeof("_SELINUX_CONTEXT=")-1, label, label_len);
550                                 selinux_context[sizeof("_SELINUX_CONTEXT=")-1+label_len] = 0;
551                                 IOVEC_SET_STRING(iovec[n++], selinux_context);
552                         }
553                 } else {
554                         security_context_t con;
555
556                         if (getpidcon(ucred->pid, &con) >= 0) {
557                                 selinux_context = strappend("_SELINUX_CONTEXT=", con);
558                                 if (selinux_context)
559                                         IOVEC_SET_STRING(iovec[n++], selinux_context);
560
561                                 freecon(con);
562                         }
563                 }
564 #endif
565         }
566
567         if (tv) {
568                 if (asprintf(&source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu",
569                              (unsigned long long) timeval_load(tv)) >= 0)
570                         IOVEC_SET_STRING(iovec[n++], source_time);
571         }
572
573         /* Note that strictly speaking storing the boot id here is
574          * redundant since the entry includes this in-line
575          * anyway. However, we need this indexed, too. */
576         r = sd_id128_get_boot(&id);
577         if (r >= 0)
578                 if (asprintf(&boot_id, "_BOOT_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
579                         IOVEC_SET_STRING(iovec[n++], boot_id);
580
581         r = sd_id128_get_machine(&id);
582         if (r >= 0)
583                 if (asprintf(&machine_id, "_MACHINE_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
584                         IOVEC_SET_STRING(iovec[n++], machine_id);
585
586         t = gethostname_malloc();
587         if (t) {
588                 hostname = strappend("_HOSTNAME=", t);
589                 free(t);
590                 if (hostname)
591                         IOVEC_SET_STRING(iovec[n++], hostname);
592         }
593
594         assert(n <= m);
595
596         server_flush_to_var(s);
597
598 retry:
599         f = find_journal(s, realuid == 0 ? 0 : loginuid);
600         if (!f)
601                 log_warning("Dropping message, as we can't find a place to store the data.");
602         else {
603                 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
604
605                 if ((r == -EBADMSG || r == -E2BIG) && !vacuumed) {
606
607                         if (r == -E2BIG)
608                                 log_info("Allocation limit reached, rotating.");
609                         else
610                                 log_warning("Journal file corrupted, rotating.");
611
612                         server_rotate(s);
613                         server_vacuum(s);
614                         vacuumed = true;
615
616                         log_info("Retrying write.");
617                         goto retry;
618                 }
619
620                 if (r < 0)
621                         log_error("Failed to write entry, ignoring: %s", strerror(-r));
622         }
623
624         free(pid);
625         free(uid);
626         free(gid);
627         free(comm);
628         free(exe);
629         free(cmdline);
630         free(source_time);
631         free(boot_id);
632         free(machine_id);
633         free(hostname);
634         free(audit_session);
635         free(audit_loginuid);
636         free(cgroup);
637         free(session);
638         free(owner_uid);
639         free(unit);
640         free(selinux_context);
641 }
642
643 static void driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
644         char mid[11 + 32 + 1];
645         char buffer[16 + LINE_MAX + 1];
646         struct iovec iovec[N_IOVEC_META_FIELDS + 4];
647         int n = 0;
648         va_list ap;
649         struct ucred ucred;
650
651         assert(s);
652         assert(format);
653
654         IOVEC_SET_STRING(iovec[n++], "PRIORITY=5");
655         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
656
657         memcpy(buffer, "MESSAGE=", 8);
658         va_start(ap, format);
659         vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
660         va_end(ap);
661         char_array_0(buffer);
662         IOVEC_SET_STRING(iovec[n++], buffer);
663
664         snprintf(mid, sizeof(mid), "MESSAGE_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(message_id));
665         char_array_0(mid);
666         IOVEC_SET_STRING(iovec[n++], mid);
667
668         zero(ucred);
669         ucred.pid = getpid();
670         ucred.uid = getuid();
671         ucred.gid = getgid();
672
673         dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0);
674 }
675
676 static void dispatch_message(Server *s,
677                              struct iovec *iovec, unsigned n, unsigned m,
678                              struct ucred *ucred,
679                              struct timeval *tv,
680                              const char *label, size_t label_len,
681                              int priority) {
682         int rl;
683         char *path = NULL, *c;
684
685         assert(s);
686         assert(iovec || n == 0);
687
688         if (n == 0)
689                 return;
690
691         if (!ucred)
692                 goto finish;
693
694         path = shortened_cgroup_path(ucred->pid);
695         if (!path)
696                 goto finish;
697
698         /* example: /user/lennart/3/foobar
699          *          /system/dbus.service/foobar
700          *
701          * So let's cut of everything past the third /, since that is
702          * wher user directories start */
703
704         c = strchr(path, '/');
705         if (c) {
706                 c = strchr(c+1, '/');
707                 if (c) {
708                         c = strchr(c+1, '/');
709                         if (c)
710                                 *c = 0;
711                 }
712         }
713
714         rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available_space(s));
715
716         if (rl == 0) {
717                 free(path);
718                 return;
719         }
720
721         /* Write a suppression message if we suppressed something */
722         if (rl > 1)
723                 driver_message(s, SD_MESSAGE_JOURNAL_DROPPED, "Suppressed %u messages from %s", rl - 1, path);
724
725         free(path);
726
727 finish:
728         dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len);
729 }
730
731 static void forward_syslog_iovec(Server *s, const struct iovec *iovec, unsigned n_iovec, struct ucred *ucred, struct timeval *tv) {
732         struct msghdr msghdr;
733         struct cmsghdr *cmsg;
734         union {
735                 struct cmsghdr cmsghdr;
736                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred))];
737         } control;
738         union sockaddr_union sa;
739
740         assert(s);
741         assert(iovec);
742         assert(n_iovec > 0);
743
744         zero(msghdr);
745         msghdr.msg_iov = (struct iovec*) iovec;
746         msghdr.msg_iovlen = n_iovec;
747
748         zero(sa);
749         sa.un.sun_family = AF_UNIX;
750         strncpy(sa.un.sun_path, "/run/systemd/journal/syslog", sizeof(sa.un.sun_path));
751         msghdr.msg_name = &sa;
752         msghdr.msg_namelen = offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path);
753
754         if (ucred) {
755                 zero(control);
756                 msghdr.msg_control = &control;
757                 msghdr.msg_controllen = sizeof(control);
758
759                 cmsg = CMSG_FIRSTHDR(&msghdr);
760                 cmsg->cmsg_level = SOL_SOCKET;
761                 cmsg->cmsg_type = SCM_CREDENTIALS;
762                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred));
763                 memcpy(CMSG_DATA(cmsg), ucred, sizeof(struct ucred));
764                 msghdr.msg_controllen = cmsg->cmsg_len;
765         }
766
767         /* Forward the syslog message we received via /dev/log to
768          * /run/systemd/syslog. Unfortunately we currently can't set
769          * the SO_TIMESTAMP auxiliary data, and hence we don't. */
770
771         if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
772                 return;
773
774         /* The socket is full? I guess the syslog implementation is
775          * too slow, and we shouldn't wait for that... */
776         if (errno == EAGAIN)
777                 return;
778
779         if (ucred && errno == ESRCH) {
780                 struct ucred u;
781
782                 /* Hmm, presumably the sender process vanished
783                  * by now, so let's fix it as good as we
784                  * can, and retry */
785
786                 u = *ucred;
787                 u.pid = getpid();
788                 memcpy(CMSG_DATA(cmsg), &u, sizeof(struct ucred));
789
790                 if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
791                         return;
792
793                 if (errno == EAGAIN)
794                         return;
795         }
796
797         log_debug("Failed to forward syslog message: %m");
798 }
799
800 static void forward_syslog_raw(Server *s, const char *buffer, struct ucred *ucred, struct timeval *tv) {
801         struct iovec iovec;
802
803         assert(s);
804         assert(buffer);
805
806         IOVEC_SET_STRING(iovec, buffer);
807         forward_syslog_iovec(s, &iovec, 1, ucred, tv);
808 }
809
810 static void forward_syslog(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred, struct timeval *tv) {
811         struct iovec iovec[5];
812         char header_priority[6], header_time[64], header_pid[16];
813         int n = 0;
814         time_t t;
815         struct tm *tm;
816         char *ident_buf = NULL;
817
818         assert(s);
819         assert(priority >= 0);
820         assert(priority <= 999);
821         assert(message);
822
823         /* First: priority field */
824         snprintf(header_priority, sizeof(header_priority), "<%i>", priority);
825         char_array_0(header_priority);
826         IOVEC_SET_STRING(iovec[n++], header_priority);
827
828         /* Second: timestamp */
829         t = tv ? tv->tv_sec : ((time_t) (now(CLOCK_REALTIME) / USEC_PER_SEC));
830         tm = localtime(&t);
831         if (!tm)
832                 return;
833         if (strftime(header_time, sizeof(header_time), "%h %e %T ", tm) <= 0)
834                 return;
835         IOVEC_SET_STRING(iovec[n++], header_time);
836
837         /* Third: identifier and PID */
838         if (ucred) {
839                 if (!identifier) {
840                         get_process_comm(ucred->pid, &ident_buf);
841                         identifier = ident_buf;
842                 }
843
844                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
845                 char_array_0(header_pid);
846
847                 if (identifier)
848                         IOVEC_SET_STRING(iovec[n++], identifier);
849
850                 IOVEC_SET_STRING(iovec[n++], header_pid);
851         } else if (identifier) {
852                 IOVEC_SET_STRING(iovec[n++], identifier);
853                 IOVEC_SET_STRING(iovec[n++], ": ");
854         }
855
856         /* Fourth: message */
857         IOVEC_SET_STRING(iovec[n++], message);
858
859         forward_syslog_iovec(s, iovec, n, ucred, tv);
860
861         free(ident_buf);
862 }
863
864 static int fixup_priority(int priority) {
865
866         if ((priority & LOG_FACMASK) == 0)
867                 return (priority & LOG_PRIMASK) | LOG_USER;
868
869         return priority;
870 }
871
872 static void forward_kmsg(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred) {
873         struct iovec iovec[5];
874         char header_priority[6], header_pid[16];
875         int n = 0;
876         char *ident_buf = NULL;
877         int fd;
878
879         assert(s);
880         assert(priority >= 0);
881         assert(priority <= 999);
882         assert(message);
883
884         /* Never allow messages with kernel facility to be written to
885          * kmsg, regardless where the data comes from. */
886         priority = fixup_priority(priority);
887
888         /* First: priority field */
889         snprintf(header_priority, sizeof(header_priority), "<%i>", priority);
890         char_array_0(header_priority);
891         IOVEC_SET_STRING(iovec[n++], header_priority);
892
893         /* Second: identifier and PID */
894         if (ucred) {
895                 if (!identifier) {
896                         get_process_comm(ucred->pid, &ident_buf);
897                         identifier = ident_buf;
898                 }
899
900                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
901                 char_array_0(header_pid);
902
903                 if (identifier)
904                         IOVEC_SET_STRING(iovec[n++], identifier);
905
906                 IOVEC_SET_STRING(iovec[n++], header_pid);
907         } else if (identifier) {
908                 IOVEC_SET_STRING(iovec[n++], identifier);
909                 IOVEC_SET_STRING(iovec[n++], ": ");
910         }
911
912         /* Fourth: message */
913         IOVEC_SET_STRING(iovec[n++], message);
914         IOVEC_SET_STRING(iovec[n++], "\n");
915
916         fd = open("/dev/kmsg", O_WRONLY|O_NOCTTY|O_CLOEXEC);
917         if (fd < 0) {
918                 log_debug("Failed to open /dev/kmsg for logging: %s", strerror(errno));
919                 goto finish;
920         }
921
922         if (writev(fd, iovec, n) < 0)
923                 log_debug("Failed to write to /dev/kmsg for logging: %s", strerror(errno));
924
925         close_nointr_nofail(fd);
926
927 finish:
928         free(ident_buf);
929 }
930
931 static void forward_console(Server *s, const char *identifier, const char *message, struct ucred *ucred) {
932         struct iovec iovec[4];
933         char header_pid[16];
934         int n = 0, fd;
935         char *ident_buf = NULL;
936
937         assert(s);
938         assert(message);
939
940         /* First: identifier and PID */
941         if (ucred) {
942                 if (!identifier) {
943                         get_process_comm(ucred->pid, &ident_buf);
944                         identifier = ident_buf;
945                 }
946
947                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
948                 char_array_0(header_pid);
949
950                 if (identifier)
951                         IOVEC_SET_STRING(iovec[n++], identifier);
952
953                 IOVEC_SET_STRING(iovec[n++], header_pid);
954         } else if (identifier) {
955                 IOVEC_SET_STRING(iovec[n++], identifier);
956                 IOVEC_SET_STRING(iovec[n++], ": ");
957         }
958
959         /* Third: message */
960         IOVEC_SET_STRING(iovec[n++], message);
961         IOVEC_SET_STRING(iovec[n++], "\n");
962
963         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
964         if (fd < 0) {
965                 log_debug("Failed to open /dev/console for logging: %s", strerror(errno));
966                 goto finish;
967         }
968
969         if (writev(fd, iovec, n) < 0)
970                 log_debug("Failed to write to /dev/console for logging: %s", strerror(errno));
971
972         close_nointr_nofail(fd);
973
974 finish:
975         free(ident_buf);
976 }
977
978 static void read_identifier(const char **buf, char **identifier, char **pid) {
979         const char *p;
980         char *t;
981         size_t l, e;
982
983         assert(buf);
984         assert(identifier);
985         assert(pid);
986
987         p = *buf;
988
989         p += strspn(p, WHITESPACE);
990         l = strcspn(p, WHITESPACE);
991
992         if (l <= 0 ||
993             p[l-1] != ':')
994                 return;
995
996         e = l;
997         l--;
998
999         if (p[l-1] == ']') {
1000                 size_t k = l-1;
1001
1002                 for (;;) {
1003
1004                         if (p[k] == '[') {
1005                                 t = strndup(p+k+1, l-k-2);
1006                                 if (t)
1007                                         *pid = t;
1008
1009                                 l = k;
1010                                 break;
1011                         }
1012
1013                         if (k == 0)
1014                                 break;
1015
1016                         k--;
1017                 }
1018         }
1019
1020         t = strndup(p, l);
1021         if (t)
1022                 *identifier = t;
1023
1024         *buf = p + e;
1025         *buf += strspn(*buf, WHITESPACE);
1026 }
1027
1028 static void process_syslog_message(Server *s, const char *buf, struct ucred *ucred, struct timeval *tv, const char *label, size_t label_len) {
1029         char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *syslog_pid = NULL;
1030         struct iovec iovec[N_IOVEC_META_FIELDS + 6];
1031         unsigned n = 0;
1032         int priority = LOG_USER | LOG_INFO;
1033         char *identifier = NULL, *pid = NULL;
1034
1035         assert(s);
1036         assert(buf);
1037
1038         if (s->forward_to_syslog)
1039                 forward_syslog_raw(s, buf, ucred, tv);
1040
1041         parse_syslog_priority((char**) &buf, &priority);
1042         skip_syslog_date((char**) &buf);
1043         read_identifier(&buf, &identifier, &pid);
1044
1045         if (s->forward_to_kmsg)
1046                 forward_kmsg(s, priority, identifier, buf, ucred);
1047
1048         if (s->forward_to_console)
1049                 forward_console(s, identifier, buf, ucred);
1050
1051         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=syslog");
1052
1053         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1054                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1055
1056         if (priority & LOG_FACMASK)
1057                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1058                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1059
1060         if (identifier) {
1061                 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
1062                 if (syslog_identifier)
1063                         IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1064         }
1065
1066         if (pid) {
1067                 syslog_pid = strappend("SYSLOG_PID=", pid);
1068                 if (syslog_pid)
1069                         IOVEC_SET_STRING(iovec[n++], syslog_pid);
1070         }
1071
1072         message = strappend("MESSAGE=", buf);
1073         if (message)
1074                 IOVEC_SET_STRING(iovec[n++], message);
1075
1076         dispatch_message(s, iovec, n, ELEMENTSOF(iovec), ucred, tv, label, label_len, priority);
1077
1078         free(message);
1079         free(identifier);
1080         free(pid);
1081         free(syslog_priority);
1082         free(syslog_facility);
1083         free(syslog_identifier);
1084 }
1085
1086 static bool valid_user_field(const char *p, size_t l) {
1087         const char *a;
1088
1089         /* We kinda enforce POSIX syntax recommendations for
1090            environment variables here, but make a couple of additional
1091            requirements.
1092
1093            http://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html */
1094
1095         /* No empty field names */
1096         if (l <= 0)
1097                 return false;
1098
1099         /* Don't allow names longer than 64 chars */
1100         if (l > 64)
1101                 return false;
1102
1103         /* Variables starting with an underscore are protected */
1104         if (p[0] == '_')
1105                 return false;
1106
1107         /* Don't allow digits as first character */
1108         if (p[0] >= '0' && p[0] <= '9')
1109                 return false;
1110
1111         /* Only allow A-Z0-9 and '_' */
1112         for (a = p; a < p + l; a++)
1113                 if (!((*a >= 'A' && *a <= 'Z') ||
1114                       (*a >= '0' && *a <= '9') ||
1115                       *a == '_'))
1116                         return false;
1117
1118         return true;
1119 }
1120
1121 static void process_native_message(
1122                 Server *s,
1123                 const void *buffer, size_t buffer_size,
1124                 struct ucred *ucred,
1125                 struct timeval *tv,
1126                 const char *label, size_t label_len) {
1127
1128         struct iovec *iovec = NULL;
1129         unsigned n = 0, m = 0, j, tn = (unsigned) -1;
1130         const char *p;
1131         size_t remaining;
1132         int priority = LOG_INFO;
1133         char *identifier = NULL, *message = NULL;
1134
1135         assert(s);
1136         assert(buffer || n == 0);
1137
1138         p = buffer;
1139         remaining = buffer_size;
1140
1141         while (remaining > 0) {
1142                 const char *e, *q;
1143
1144                 e = memchr(p, '\n', remaining);
1145
1146                 if (!e) {
1147                         /* Trailing noise, let's ignore it, and flush what we collected */
1148                         log_debug("Received message with trailing noise, ignoring.");
1149                         break;
1150                 }
1151
1152                 if (e == p) {
1153                         /* Entry separator */
1154                         dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, priority);
1155                         n = 0;
1156                         priority = LOG_INFO;
1157
1158                         p++;
1159                         remaining--;
1160                         continue;
1161                 }
1162
1163                 if (*p == '.' || *p == '#') {
1164                         /* Ignore control commands for now, and
1165                          * comments too. */
1166                         remaining -= (e - p) + 1;
1167                         p = e + 1;
1168                         continue;
1169                 }
1170
1171                 /* A property follows */
1172
1173                 if (n+N_IOVEC_META_FIELDS >= m) {
1174                         struct iovec *c;
1175                         unsigned u;
1176
1177                         u = MAX((n+N_IOVEC_META_FIELDS+1) * 2U, 4U);
1178                         c = realloc(iovec, u * sizeof(struct iovec));
1179                         if (!c) {
1180                                 log_error("Out of memory");
1181                                 break;
1182                         }
1183
1184                         iovec = c;
1185                         m = u;
1186                 }
1187
1188                 q = memchr(p, '=', e - p);
1189                 if (q) {
1190                         if (valid_user_field(p, q - p)) {
1191                                 size_t l;
1192
1193                                 l = e - p;
1194
1195                                 /* If the field name starts with an
1196                                  * underscore, skip the variable,
1197                                  * since that indidates a trusted
1198                                  * field */
1199                                 iovec[n].iov_base = (char*) p;
1200                                 iovec[n].iov_len = l;
1201                                 n++;
1202
1203                                 /* We need to determine the priority
1204                                  * of this entry for the rate limiting
1205                                  * logic */
1206                                 if (l == 10 &&
1207                                     memcmp(p, "PRIORITY=", 9) == 0 &&
1208                                     p[9] >= '0' && p[9] <= '9')
1209                                         priority = (priority & LOG_FACMASK) | (p[9] - '0');
1210
1211                                 else if (l == 17 &&
1212                                          memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
1213                                          p[16] >= '0' && p[16] <= '9')
1214                                         priority = (priority & LOG_PRIMASK) | ((p[16] - '0') << 3);
1215
1216                                 else if (l == 18 &&
1217                                          memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
1218                                          p[16] >= '0' && p[16] <= '9' &&
1219                                          p[17] >= '0' && p[17] <= '9')
1220                                         priority = (priority & LOG_PRIMASK) | (((p[16] - '0')*10 + (p[17] - '0')) << 3);
1221
1222                                 else if (l >= 12 &&
1223                                          memcmp(p, "SYSLOG_IDENTIFIER=", 11) == 0) {
1224                                         char *t;
1225
1226                                         t = strndup(p + 11, l - 11);
1227                                         if (t) {
1228                                                 free(identifier);
1229                                                 identifier = t;
1230                                         }
1231                                 } else if (l >= 8 &&
1232                                            memcmp(p, "MESSAGE=", 8) == 0) {
1233                                         char *t;
1234
1235                                         t = strndup(p + 8, l - 8);
1236                                         if (t) {
1237                                                 free(message);
1238                                                 message = t;
1239                                         }
1240                                 }
1241                         }
1242
1243                         remaining -= (e - p) + 1;
1244                         p = e + 1;
1245                         continue;
1246                 } else {
1247                         uint64_t l;
1248                         char *k;
1249
1250                         if (remaining < e - p + 1 + sizeof(uint64_t) + 1) {
1251                                 log_debug("Failed to parse message, ignoring.");
1252                                 break;
1253                         }
1254
1255                         memcpy(&l, e + 1, sizeof(uint64_t));
1256                         l = le64toh(l);
1257
1258                         if (remaining < e - p + 1 + sizeof(uint64_t) + l + 1 ||
1259                             e[1+sizeof(uint64_t)+l] != '\n') {
1260                                 log_debug("Failed to parse message, ignoring.");
1261                                 break;
1262                         }
1263
1264                         k = malloc((e - p) + 1 + l);
1265                         if (!k) {
1266                                 log_error("Out of memory");
1267                                 break;
1268                         }
1269
1270                         memcpy(k, p, e - p);
1271                         k[e - p] = '=';
1272                         memcpy(k + (e - p) + 1, e + 1 + sizeof(uint64_t), l);
1273
1274                         if (valid_user_field(p, e - p)) {
1275                                 iovec[n].iov_base = k;
1276                                 iovec[n].iov_len = (e - p) + 1 + l;
1277                                 n++;
1278                         } else
1279                                 free(k);
1280
1281                         remaining -= (e - p) + 1 + sizeof(uint64_t) + l + 1;
1282                         p = e + 1 + sizeof(uint64_t) + l + 1;
1283                 }
1284         }
1285
1286         if (n <= 0)
1287                 goto finish;
1288
1289         tn = n++;
1290         IOVEC_SET_STRING(iovec[tn], "_TRANSPORT=journal");
1291
1292         if (message) {
1293                 if (s->forward_to_syslog)
1294                         forward_syslog(s, priority, identifier, message, ucred, tv);
1295
1296                 if (s->forward_to_kmsg)
1297                         forward_kmsg(s, priority, identifier, message, ucred);
1298
1299                 if (s->forward_to_console)
1300                         forward_console(s, identifier, message, ucred);
1301         }
1302
1303         dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, priority);
1304
1305 finish:
1306         for (j = 0; j < n; j++)  {
1307                 if (j == tn)
1308                         continue;
1309
1310                 if (iovec[j].iov_base < buffer ||
1311                     (const uint8_t*) iovec[j].iov_base >= (const uint8_t*) buffer + buffer_size)
1312                         free(iovec[j].iov_base);
1313         }
1314
1315         free(iovec);
1316         free(identifier);
1317         free(message);
1318 }
1319
1320 static void process_native_file(
1321                 Server *s,
1322                 int fd,
1323                 struct ucred *ucred,
1324                 struct timeval *tv,
1325                 const char *label, size_t label_len) {
1326
1327         struct stat st;
1328         void *p;
1329         ssize_t n;
1330
1331         assert(s);
1332         assert(fd >= 0);
1333
1334         /* Data is in the passed file, since it didn't fit in a
1335          * datagram. We can't map the file here, since clients might
1336          * then truncate it and trigger a SIGBUS for us. So let's
1337          * stupidly read it */
1338
1339         if (fstat(fd, &st) < 0) {
1340                 log_error("Failed to stat passed file, ignoring: %m");
1341                 return;
1342         }
1343
1344         if (!S_ISREG(st.st_mode)) {
1345                 log_error("File passed is not regular. Ignoring.");
1346                 return;
1347         }
1348
1349         if (st.st_size <= 0)
1350                 return;
1351
1352         if (st.st_size > ENTRY_SIZE_MAX) {
1353                 log_error("File passed too large. Ignoring.");
1354                 return;
1355         }
1356
1357         p = malloc(st.st_size);
1358         if (!p) {
1359                 log_error("Out of memory");
1360                 return;
1361         }
1362
1363         n = pread(fd, p, st.st_size, 0);
1364         if (n < 0)
1365                 log_error("Failed to read file, ignoring: %s", strerror(-n));
1366         else if (n > 0)
1367                 process_native_message(s, p, n, ucred, tv, label, label_len);
1368
1369         free(p);
1370 }
1371
1372 static int stdout_stream_log(StdoutStream *s, const char *p) {
1373         struct iovec iovec[N_IOVEC_META_FIELDS + 5];
1374         char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *syslog_identifier = NULL;
1375         unsigned n = 0;
1376         int priority;
1377         char *label = NULL;
1378         size_t label_len = 0;
1379
1380         assert(s);
1381         assert(p);
1382
1383         if (isempty(p))
1384                 return 0;
1385
1386         priority = s->priority;
1387
1388         if (s->level_prefix)
1389                 parse_syslog_priority((char**) &p, &priority);
1390
1391         if (s->forward_to_syslog || s->server->forward_to_syslog)
1392                 forward_syslog(s->server, fixup_priority(priority), s->identifier, p, &s->ucred, NULL);
1393
1394         if (s->forward_to_kmsg || s->server->forward_to_kmsg)
1395                 forward_kmsg(s->server, priority, s->identifier, p, &s->ucred);
1396
1397         if (s->forward_to_console || s->server->forward_to_console)
1398                 forward_console(s->server, s->identifier, p, &s->ucred);
1399
1400         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=stdout");
1401
1402         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1403                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1404
1405         if (priority & LOG_FACMASK)
1406                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1407                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1408
1409         if (s->identifier) {
1410                 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", s->identifier);
1411                 if (syslog_identifier)
1412                         IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1413         }
1414
1415         message = strappend("MESSAGE=", p);
1416         if (message)
1417                 IOVEC_SET_STRING(iovec[n++], message);
1418
1419 #ifdef HAVE_SELINUX
1420         if (s->security_context) {
1421                 label = (char*) s->security_context;
1422                 label_len = strlen((char*) s->security_context);
1423         }
1424 #endif
1425
1426         dispatch_message(s->server, iovec, n, ELEMENTSOF(iovec), &s->ucred, NULL, label, label_len, priority);
1427
1428         free(message);
1429         free(syslog_priority);
1430         free(syslog_facility);
1431         free(syslog_identifier);
1432
1433         return 0;
1434 }
1435
1436 static int stdout_stream_line(StdoutStream *s, char *p) {
1437         int r;
1438
1439         assert(s);
1440         assert(p);
1441
1442         p = strstrip(p);
1443
1444         switch (s->state) {
1445
1446         case STDOUT_STREAM_IDENTIFIER:
1447                 if (isempty(p))
1448                         s->identifier = NULL;
1449                 else  {
1450                         s->identifier = strdup(p);
1451                         if (!s->identifier) {
1452                                 log_error("Out of memory");
1453                                 return -ENOMEM;
1454                         }
1455                 }
1456
1457                 s->state = STDOUT_STREAM_PRIORITY;
1458                 return 0;
1459
1460         case STDOUT_STREAM_PRIORITY:
1461                 r = safe_atoi(p, &s->priority);
1462                 if (r < 0 || s->priority <= 0 || s->priority >= 999) {
1463                         log_warning("Failed to parse log priority line.");
1464                         return -EINVAL;
1465                 }
1466
1467                 s->state = STDOUT_STREAM_LEVEL_PREFIX;
1468                 return 0;
1469
1470         case STDOUT_STREAM_LEVEL_PREFIX:
1471                 r = parse_boolean(p);
1472                 if (r < 0) {
1473                         log_warning("Failed to parse level prefix line.");
1474                         return -EINVAL;
1475                 }
1476
1477                 s->level_prefix = !!r;
1478                 s->state = STDOUT_STREAM_FORWARD_TO_SYSLOG;
1479                 return 0;
1480
1481         case STDOUT_STREAM_FORWARD_TO_SYSLOG:
1482                 r = parse_boolean(p);
1483                 if (r < 0) {
1484                         log_warning("Failed to parse forward to syslog line.");
1485                         return -EINVAL;
1486                 }
1487
1488                 s->forward_to_syslog = !!r;
1489                 s->state = STDOUT_STREAM_FORWARD_TO_KMSG;
1490                 return 0;
1491
1492         case STDOUT_STREAM_FORWARD_TO_KMSG:
1493                 r = parse_boolean(p);
1494                 if (r < 0) {
1495                         log_warning("Failed to parse copy to kmsg line.");
1496                         return -EINVAL;
1497                 }
1498
1499                 s->forward_to_kmsg = !!r;
1500                 s->state = STDOUT_STREAM_FORWARD_TO_CONSOLE;
1501                 return 0;
1502
1503         case STDOUT_STREAM_FORWARD_TO_CONSOLE:
1504                 r = parse_boolean(p);
1505                 if (r < 0) {
1506                         log_warning("Failed to parse copy to console line.");
1507                         return -EINVAL;
1508                 }
1509
1510                 s->forward_to_console = !!r;
1511                 s->state = STDOUT_STREAM_RUNNING;
1512                 return 0;
1513
1514         case STDOUT_STREAM_RUNNING:
1515                 return stdout_stream_log(s, p);
1516         }
1517
1518         assert_not_reached("Unknown stream state");
1519 }
1520
1521 static int stdout_stream_scan(StdoutStream *s, bool force_flush) {
1522         char *p;
1523         size_t remaining;
1524         int r;
1525
1526         assert(s);
1527
1528         p = s->buffer;
1529         remaining = s->length;
1530         for (;;) {
1531                 char *end;
1532                 size_t skip;
1533
1534                 end = memchr(p, '\n', remaining);
1535                 if (end)
1536                         skip = end - p + 1;
1537                 else if (remaining >= sizeof(s->buffer) - 1) {
1538                         end = p + sizeof(s->buffer) - 1;
1539                         skip = remaining;
1540                 } else
1541                         break;
1542
1543                 *end = 0;
1544
1545                 r = stdout_stream_line(s, p);
1546                 if (r < 0)
1547                         return r;
1548
1549                 remaining -= skip;
1550                 p += skip;
1551         }
1552
1553         if (force_flush && remaining > 0) {
1554                 p[remaining] = 0;
1555                 r = stdout_stream_line(s, p);
1556                 if (r < 0)
1557                         return r;
1558
1559                 p += remaining;
1560                 remaining = 0;
1561         }
1562
1563         if (p > s->buffer) {
1564                 memmove(s->buffer, p, remaining);
1565                 s->length = remaining;
1566         }
1567
1568         return 0;
1569 }
1570
1571 static int stdout_stream_process(StdoutStream *s) {
1572         ssize_t l;
1573         int r;
1574
1575         assert(s);
1576
1577         l = read(s->fd, s->buffer+s->length, sizeof(s->buffer)-1-s->length);
1578         if (l < 0) {
1579
1580                 if (errno == EAGAIN)
1581                         return 0;
1582
1583                 log_warning("Failed to read from stream: %m");
1584                 return -errno;
1585         }
1586
1587         if (l == 0) {
1588                 r = stdout_stream_scan(s, true);
1589                 if (r < 0)
1590                         return r;
1591
1592                 return 0;
1593         }
1594
1595         s->length += l;
1596         r = stdout_stream_scan(s, false);
1597         if (r < 0)
1598                 return r;
1599
1600         return 1;
1601
1602 }
1603
1604 static void stdout_stream_free(StdoutStream *s) {
1605         assert(s);
1606
1607         if (s->server) {
1608                 assert(s->server->n_stdout_streams > 0);
1609                 s->server->n_stdout_streams --;
1610                 LIST_REMOVE(StdoutStream, stdout_stream, s->server->stdout_streams, s);
1611         }
1612
1613         if (s->fd >= 0) {
1614                 if (s->server)
1615                         epoll_ctl(s->server->epoll_fd, EPOLL_CTL_DEL, s->fd, NULL);
1616
1617                 close_nointr_nofail(s->fd);
1618         }
1619
1620 #ifdef HAVE_SELINUX
1621         if (s->security_context)
1622                 freecon(s->security_context);
1623 #endif
1624
1625         free(s->identifier);
1626         free(s);
1627 }
1628
1629 static int stdout_stream_new(Server *s) {
1630         StdoutStream *stream;
1631         int fd, r;
1632         socklen_t len;
1633         struct epoll_event ev;
1634
1635         assert(s);
1636
1637         fd = accept4(s->stdout_fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC);
1638         if (fd < 0) {
1639                 if (errno == EAGAIN)
1640                         return 0;
1641
1642                 log_error("Failed to accept stdout connection: %m");
1643                 return -errno;
1644         }
1645
1646         if (s->n_stdout_streams >= STDOUT_STREAMS_MAX) {
1647                 log_warning("Too many stdout streams, refusing connection.");
1648                 close_nointr_nofail(fd);
1649                 return 0;
1650         }
1651
1652         stream = new0(StdoutStream, 1);
1653         if (!stream) {
1654                 log_error("Out of memory.");
1655                 close_nointr_nofail(fd);
1656                 return -ENOMEM;
1657         }
1658
1659         stream->fd = fd;
1660
1661         len = sizeof(stream->ucred);
1662         if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &stream->ucred, &len) < 0) {
1663                 log_error("Failed to determine peer credentials: %m");
1664                 r = -errno;
1665                 goto fail;
1666         }
1667
1668 #ifdef HAVE_SELINUX
1669         if (getpeercon(fd, &stream->security_context) < 0)
1670                 log_error("Failed to determine peer security context.");
1671 #endif
1672
1673         if (shutdown(fd, SHUT_WR) < 0) {
1674                 log_error("Failed to shutdown writing side of socket: %m");
1675                 r = -errno;
1676                 goto fail;
1677         }
1678
1679         zero(ev);
1680         ev.data.ptr = stream;
1681         ev.events = EPOLLIN;
1682         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0) {
1683                 log_error("Failed to add stream to event loop: %m");
1684                 r = -errno;
1685                 goto fail;
1686         }
1687
1688         stream->server = s;
1689         LIST_PREPEND(StdoutStream, stdout_stream, s->stdout_streams, stream);
1690         s->n_stdout_streams ++;
1691
1692         return 0;
1693
1694 fail:
1695         stdout_stream_free(stream);
1696         return r;
1697 }
1698
1699 static int parse_kernel_timestamp(char **_p, usec_t *t) {
1700         usec_t r;
1701         int k, i;
1702         char *p;
1703
1704         assert(_p);
1705         assert(*_p);
1706         assert(t);
1707
1708         p = *_p;
1709
1710         if (strlen(p) < 14 || p[0] != '[' || p[13] != ']' || p[6] != '.')
1711                 return 0;
1712
1713         r = 0;
1714
1715         for (i = 1; i <= 5; i++) {
1716                 r *= 10;
1717
1718                 if (p[i] == ' ')
1719                         continue;
1720
1721                 k = undecchar(p[i]);
1722                 if (k < 0)
1723                         return 0;
1724
1725                 r += k;
1726         }
1727
1728         for (i = 7; i <= 12; i++) {
1729                 r *= 10;
1730
1731                 k = undecchar(p[i]);
1732                 if (k < 0)
1733                         return 0;
1734
1735                 r += k;
1736         }
1737
1738         *t = r;
1739         *_p += 14;
1740         *_p += strspn(*_p, WHITESPACE);
1741
1742         return 1;
1743 }
1744
1745 static void proc_kmsg_line(Server *s, const char *p) {
1746         struct iovec iovec[N_IOVEC_META_FIELDS + 7];
1747         char *message = NULL, *syslog_priority = NULL, *syslog_pid = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *source_time = NULL;
1748         int priority = LOG_KERN | LOG_INFO;
1749         unsigned n = 0;
1750         usec_t usec;
1751         char *identifier = NULL, *pid = NULL;
1752
1753         assert(s);
1754         assert(p);
1755
1756         if (isempty(p))
1757                 return;
1758
1759         parse_syslog_priority((char **) &p, &priority);
1760
1761         if (s->forward_to_kmsg && (priority & LOG_FACMASK) != LOG_KERN)
1762                 return;
1763
1764         if (parse_kernel_timestamp((char **) &p, &usec) > 0) {
1765                 if (asprintf(&source_time, "_SOURCE_MONOTONIC_TIMESTAMP=%llu",
1766                              (unsigned long long) usec) >= 0)
1767                         IOVEC_SET_STRING(iovec[n++], source_time);
1768         }
1769
1770         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=kernel");
1771
1772         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1773                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1774
1775         if ((priority & LOG_FACMASK) == LOG_KERN) {
1776
1777                 if (s->forward_to_syslog)
1778                         forward_syslog(s, priority, "kernel", p, NULL, NULL);
1779
1780                 IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=kernel");
1781         } else {
1782                 read_identifier(&p, &identifier, &pid);
1783
1784                 if (s->forward_to_syslog)
1785                         forward_syslog(s, priority, identifier, p, NULL, NULL);
1786
1787                 if (identifier) {
1788                         syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
1789                         if (syslog_identifier)
1790                                 IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1791                 }
1792
1793                 if (pid) {
1794                         syslog_pid = strappend("SYSLOG_PID=", pid);
1795                         if (syslog_pid)
1796                                 IOVEC_SET_STRING(iovec[n++], syslog_pid);
1797                 }
1798
1799                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1800                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1801         }
1802
1803         message = strappend("MESSAGE=", p);
1804         if (message)
1805                 IOVEC_SET_STRING(iovec[n++], message);
1806
1807         dispatch_message(s, iovec, n, ELEMENTSOF(iovec), NULL, NULL, NULL, 0, priority);
1808
1809         free(message);
1810         free(syslog_priority);
1811         free(syslog_identifier);
1812         free(syslog_pid);
1813         free(syslog_facility);
1814         free(source_time);
1815         free(identifier);
1816         free(pid);
1817 }
1818
1819 static void proc_kmsg_scan(Server *s) {
1820         char *p;
1821         size_t remaining;
1822
1823         assert(s);
1824
1825         p = s->proc_kmsg_buffer;
1826         remaining = s->proc_kmsg_length;
1827         for (;;) {
1828                 char *end;
1829                 size_t skip;
1830
1831                 end = memchr(p, '\n', remaining);
1832                 if (end)
1833                         skip = end - p + 1;
1834                 else if (remaining >= sizeof(s->proc_kmsg_buffer) - 1) {
1835                         end = p + sizeof(s->proc_kmsg_buffer) - 1;
1836                         skip = remaining;
1837                 } else
1838                         break;
1839
1840                 *end = 0;
1841
1842                 proc_kmsg_line(s, p);
1843
1844                 remaining -= skip;
1845                 p += skip;
1846         }
1847
1848         if (p > s->proc_kmsg_buffer) {
1849                 memmove(s->proc_kmsg_buffer, p, remaining);
1850                 s->proc_kmsg_length = remaining;
1851         }
1852 }
1853
1854 static int system_journal_open(Server *s) {
1855         int r;
1856         char *fn;
1857         sd_id128_t machine;
1858         char ids[33];
1859
1860         r = sd_id128_get_machine(&machine);
1861         if (r < 0)
1862                 return r;
1863
1864         sd_id128_to_string(machine, ids);
1865
1866         if (!s->system_journal) {
1867
1868                 /* First try to create the machine path, but not the prefix */
1869                 fn = strappend("/var/log/journal/", ids);
1870                 if (!fn)
1871                         return -ENOMEM;
1872                 (void) mkdir(fn, 0755);
1873                 free(fn);
1874
1875                 /* The create the system journal file */
1876                 fn = join("/var/log/journal/", ids, "/system.journal", NULL);
1877                 if (!fn)
1878                         return -ENOMEM;
1879
1880                 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, NULL, &s->system_journal);
1881                 free(fn);
1882
1883                 if (r >= 0) {
1884                         journal_default_metrics(&s->system_metrics, s->system_journal->fd);
1885
1886                         s->system_journal->metrics = s->system_metrics;
1887                         s->system_journal->compress = s->compress;
1888
1889                         server_fix_perms(s, s->system_journal, 0);
1890                 } else if (r < 0) {
1891
1892                         if (r != -ENOENT && r != -EROFS)
1893                                 log_warning("Failed to open system journal: %s", strerror(-r));
1894
1895                         r = 0;
1896                 }
1897         }
1898
1899         if (!s->runtime_journal) {
1900
1901                 fn = join("/run/log/journal/", ids, "/system.journal", NULL);
1902                 if (!fn)
1903                         return -ENOMEM;
1904
1905                 if (s->system_journal) {
1906
1907                         /* Try to open the runtime journal, but only
1908                          * if it already exists, so that we can flush
1909                          * it into the system journal */
1910
1911                         r = journal_file_open_reliably(fn, O_RDWR, 0640, NULL, &s->runtime_journal);
1912                         free(fn);
1913
1914                         if (r < 0) {
1915                                 if (r != -ENOENT)
1916                                         log_warning("Failed to open runtime journal: %s", strerror(-r));
1917
1918                                 r = 0;
1919                         }
1920
1921                 } else {
1922
1923                         /* OK, we really need the runtime journal, so create
1924                          * it if necessary. */
1925
1926                         (void) mkdir_parents(fn, 0755);
1927                         r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, NULL, &s->runtime_journal);
1928                         free(fn);
1929
1930                         if (r < 0) {
1931                                 log_error("Failed to open runtime journal: %s", strerror(-r));
1932                                 return r;
1933                         }
1934                 }
1935
1936                 if (s->runtime_journal) {
1937                         journal_default_metrics(&s->runtime_metrics, s->runtime_journal->fd);
1938
1939                         s->runtime_journal->metrics = s->runtime_metrics;
1940                         s->runtime_journal->compress = s->compress;
1941
1942                         server_fix_perms(s, s->runtime_journal, 0);
1943                 }
1944         }
1945
1946         return r;
1947 }
1948
1949 static int server_flush_to_var(Server *s) {
1950         char path[] = "/run/log/journal/xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
1951         Object *o = NULL;
1952         int r;
1953         sd_id128_t machine;
1954         sd_journal *j;
1955         usec_t ts;
1956
1957         assert(s);
1958
1959         if (!s->runtime_journal)
1960                 return 0;
1961
1962         ts = now(CLOCK_MONOTONIC);
1963         if (s->var_available_timestamp + RECHECK_VAR_AVAILABLE_USEC > ts)
1964                 return 0;
1965
1966         s->var_available_timestamp = ts;
1967
1968         system_journal_open(s);
1969
1970         if (!s->system_journal)
1971                 return 0;
1972
1973         log_info("Flushing to /var...");
1974
1975         r = sd_id128_get_machine(&machine);
1976         if (r < 0) {
1977                 log_error("Failed to get machine id: %s", strerror(-r));
1978                 return r;
1979         }
1980
1981         r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1982         if (r < 0) {
1983                 log_error("Failed to read runtime journal: %s", strerror(-r));
1984                 return r;
1985         }
1986
1987         SD_JOURNAL_FOREACH(j) {
1988                 JournalFile *f;
1989
1990                 f = j->current_file;
1991                 assert(f && f->current_offset > 0);
1992
1993                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1994                 if (r < 0) {
1995                         log_error("Can't read entry: %s", strerror(-r));
1996                         goto finish;
1997                 }
1998
1999                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
2000                 if (r == -E2BIG) {
2001                         log_info("Allocation limit reached.");
2002
2003                         journal_file_post_change(s->system_journal);
2004                         server_rotate(s);
2005                         server_vacuum(s);
2006
2007                         r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
2008                 }
2009
2010                 if (r < 0) {
2011                         log_error("Can't write entry: %s", strerror(-r));
2012                         goto finish;
2013                 }
2014         }
2015
2016 finish:
2017         journal_file_post_change(s->system_journal);
2018
2019         journal_file_close(s->runtime_journal);
2020         s->runtime_journal = NULL;
2021
2022         if (r >= 0) {
2023                 sd_id128_to_string(machine, path + 17);
2024                 rm_rf(path, false, true, false);
2025         }
2026
2027         return r;
2028 }
2029
2030 static int server_read_proc_kmsg(Server *s) {
2031         ssize_t l;
2032         assert(s);
2033         assert(s->proc_kmsg_fd >= 0);
2034
2035         l = read(s->proc_kmsg_fd, s->proc_kmsg_buffer + s->proc_kmsg_length, sizeof(s->proc_kmsg_buffer) - 1 - s->proc_kmsg_length);
2036         if (l < 0) {
2037
2038                 if (errno == EAGAIN || errno == EINTR)
2039                         return 0;
2040
2041                 log_error("Failed to read from kernel: %m");
2042                 return -errno;
2043         }
2044
2045         s->proc_kmsg_length += l;
2046
2047         proc_kmsg_scan(s);
2048         return 1;
2049 }
2050
2051 static int server_flush_proc_kmsg(Server *s) {
2052         int r;
2053
2054         assert(s);
2055
2056         if (s->proc_kmsg_fd < 0)
2057                 return 0;
2058
2059         log_info("Flushing /proc/kmsg...");
2060
2061         for (;;) {
2062                 r = server_read_proc_kmsg(s);
2063                 if (r < 0)
2064                         return r;
2065
2066                 if (r == 0)
2067                         break;
2068         }
2069
2070         return 0;
2071 }
2072
2073 static int process_event(Server *s, struct epoll_event *ev) {
2074         assert(s);
2075
2076         if (ev->data.fd == s->signal_fd) {
2077                 struct signalfd_siginfo sfsi;
2078                 ssize_t n;
2079
2080                 if (ev->events != EPOLLIN) {
2081                         log_info("Got invalid event from epoll.");
2082                         return -EIO;
2083                 }
2084
2085                 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
2086                 if (n != sizeof(sfsi)) {
2087
2088                         if (n >= 0)
2089                                 return -EIO;
2090
2091                         if (errno == EINTR || errno == EAGAIN)
2092                                 return 1;
2093
2094                         return -errno;
2095                 }
2096
2097                 if (sfsi.ssi_signo == SIGUSR1) {
2098                         server_flush_to_var(s);
2099                         return 0;
2100                 }
2101
2102                 log_debug("Received SIG%s", signal_to_string(sfsi.ssi_signo));
2103                 return 0;
2104
2105         } else if (ev->data.fd == s->proc_kmsg_fd) {
2106                 int r;
2107
2108                 if (ev->events != EPOLLIN) {
2109                         log_info("Got invalid event from epoll.");
2110                         return -EIO;
2111                 }
2112
2113                 r = server_read_proc_kmsg(s);
2114                 if (r < 0)
2115                         return r;
2116
2117                 return 1;
2118
2119         } else if (ev->data.fd == s->native_fd ||
2120                    ev->data.fd == s->syslog_fd) {
2121
2122                 if (ev->events != EPOLLIN) {
2123                         log_info("Got invalid event from epoll.");
2124                         return -EIO;
2125                 }
2126
2127                 for (;;) {
2128                         struct msghdr msghdr;
2129                         struct iovec iovec;
2130                         struct ucred *ucred = NULL;
2131                         struct timeval *tv = NULL;
2132                         struct cmsghdr *cmsg;
2133                         char *label = NULL;
2134                         size_t label_len = 0;
2135                         union {
2136                                 struct cmsghdr cmsghdr;
2137                                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
2138                                             CMSG_SPACE(sizeof(struct timeval)) +
2139                                             CMSG_SPACE(sizeof(int)) +
2140                                             CMSG_SPACE(PAGE_SIZE)]; /* selinux label */
2141                         } control;
2142                         ssize_t n;
2143                         int v;
2144                         int *fds = NULL;
2145                         unsigned n_fds = 0;
2146
2147                         if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
2148                                 log_error("SIOCINQ failed: %m");
2149                                 return -errno;
2150                         }
2151
2152                         if (s->buffer_size < (size_t) v) {
2153                                 void *b;
2154                                 size_t l;
2155
2156                                 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
2157                                 b = realloc(s->buffer, l+1);
2158
2159                                 if (!b) {
2160                                         log_error("Couldn't increase buffer.");
2161                                         return -ENOMEM;
2162                                 }
2163
2164                                 s->buffer_size = l;
2165                                 s->buffer = b;
2166                         }
2167
2168                         zero(iovec);
2169                         iovec.iov_base = s->buffer;
2170                         iovec.iov_len = s->buffer_size;
2171
2172                         zero(control);
2173                         zero(msghdr);
2174                         msghdr.msg_iov = &iovec;
2175                         msghdr.msg_iovlen = 1;
2176                         msghdr.msg_control = &control;
2177                         msghdr.msg_controllen = sizeof(control);
2178
2179                         n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
2180                         if (n < 0) {
2181
2182                                 if (errno == EINTR || errno == EAGAIN)
2183                                         return 1;
2184
2185                                 log_error("recvmsg() failed: %m");
2186                                 return -errno;
2187                         }
2188
2189                         for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
2190
2191                                 if (cmsg->cmsg_level == SOL_SOCKET &&
2192                                     cmsg->cmsg_type == SCM_CREDENTIALS &&
2193                                     cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
2194                                         ucred = (struct ucred*) CMSG_DATA(cmsg);
2195                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
2196                                          cmsg->cmsg_type == SCM_SECURITY) {
2197                                         label = (char*) CMSG_DATA(cmsg);
2198                                         label_len = cmsg->cmsg_len - CMSG_LEN(0);
2199                                 } else if (cmsg->cmsg_level == SOL_SOCKET &&
2200                                          cmsg->cmsg_type == SO_TIMESTAMP &&
2201                                          cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
2202                                         tv = (struct timeval*) CMSG_DATA(cmsg);
2203                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
2204                                          cmsg->cmsg_type == SCM_RIGHTS) {
2205                                         fds = (int*) CMSG_DATA(cmsg);
2206                                         n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
2207                                 }
2208                         }
2209
2210                         if (ev->data.fd == s->syslog_fd) {
2211                                 char *e;
2212
2213                                 if (n > 0 && n_fds == 0) {
2214                                         e = memchr(s->buffer, '\n', n);
2215                                         if (e)
2216                                                 *e = 0;
2217                                         else
2218                                                 s->buffer[n] = 0;
2219
2220                                         process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
2221                                 } else if (n_fds > 0)
2222                                         log_warning("Got file descriptors via syslog socket. Ignoring.");
2223
2224                         } else {
2225                                 if (n > 0 && n_fds == 0)
2226                                         process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
2227                                 else if (n == 0 && n_fds == 1)
2228                                         process_native_file(s, fds[0], ucred, tv, label, label_len);
2229                                 else if (n_fds > 0)
2230                                         log_warning("Got too many file descriptors via native socket. Ignoring.");
2231                         }
2232
2233                         close_many(fds, n_fds);
2234                 }
2235
2236                 return 1;
2237
2238         } else if (ev->data.fd == s->stdout_fd) {
2239
2240                 if (ev->events != EPOLLIN) {
2241                         log_info("Got invalid event from epoll.");
2242                         return -EIO;
2243                 }
2244
2245                 stdout_stream_new(s);
2246                 return 1;
2247
2248         } else {
2249                 StdoutStream *stream;
2250
2251                 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
2252                         log_info("Got invalid event from epoll.");
2253                         return -EIO;
2254                 }
2255
2256                 /* If it is none of the well-known fds, it must be an
2257                  * stdout stream fd. Note that this is a bit ugly here
2258                  * (since we rely that none of the well-known fds
2259                  * could be interpreted as pointer), but nonetheless
2260                  * safe, since the well-known fds would never get an
2261                  * fd > 4096, i.e. beyond the first memory page */
2262
2263                 stream = ev->data.ptr;
2264
2265                 if (stdout_stream_process(stream) <= 0)
2266                         stdout_stream_free(stream);
2267
2268                 return 1;
2269         }
2270
2271         log_error("Unknown event.");
2272         return 0;
2273 }
2274
2275 static int open_syslog_socket(Server *s) {
2276         union sockaddr_union sa;
2277         int one, r;
2278         struct epoll_event ev;
2279
2280         assert(s);
2281
2282         if (s->syslog_fd < 0) {
2283
2284                 s->syslog_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2285                 if (s->syslog_fd < 0) {
2286                         log_error("socket() failed: %m");
2287                         return -errno;
2288                 }
2289
2290                 zero(sa);
2291                 sa.un.sun_family = AF_UNIX;
2292                 strncpy(sa.un.sun_path, "/dev/log", sizeof(sa.un.sun_path));
2293
2294                 unlink(sa.un.sun_path);
2295
2296                 r = bind(s->syslog_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2297                 if (r < 0) {
2298                         log_error("bind() failed: %m");
2299                         return -errno;
2300                 }
2301
2302                 chmod(sa.un.sun_path, 0666);
2303         } else
2304                 fd_nonblock(s->syslog_fd, 1);
2305
2306         one = 1;
2307         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
2308         if (r < 0) {
2309                 log_error("SO_PASSCRED failed: %m");
2310                 return -errno;
2311         }
2312
2313 #ifdef HAVE_SELINUX
2314         one = 1;
2315         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
2316         if (r < 0)
2317                 log_warning("SO_PASSSEC failed: %m");
2318 #endif
2319
2320         one = 1;
2321         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
2322         if (r < 0) {
2323                 log_error("SO_TIMESTAMP failed: %m");
2324                 return -errno;
2325         }
2326
2327         zero(ev);
2328         ev.events = EPOLLIN;
2329         ev.data.fd = s->syslog_fd;
2330         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->syslog_fd, &ev) < 0) {
2331                 log_error("Failed to add syslog server fd to epoll object: %m");
2332                 return -errno;
2333         }
2334
2335         return 0;
2336 }
2337
2338 static int open_native_socket(Server*s) {
2339         union sockaddr_union sa;
2340         int one, r;
2341         struct epoll_event ev;
2342
2343         assert(s);
2344
2345         if (s->native_fd < 0) {
2346
2347                 s->native_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2348                 if (s->native_fd < 0) {
2349                         log_error("socket() failed: %m");
2350                         return -errno;
2351                 }
2352
2353                 zero(sa);
2354                 sa.un.sun_family = AF_UNIX;
2355                 strncpy(sa.un.sun_path, "/run/systemd/journal/socket", sizeof(sa.un.sun_path));
2356
2357                 unlink(sa.un.sun_path);
2358
2359                 r = bind(s->native_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2360                 if (r < 0) {
2361                         log_error("bind() failed: %m");
2362                         return -errno;
2363                 }
2364
2365                 chmod(sa.un.sun_path, 0666);
2366         } else
2367                 fd_nonblock(s->native_fd, 1);
2368
2369         one = 1;
2370         r = setsockopt(s->native_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
2371         if (r < 0) {
2372                 log_error("SO_PASSCRED failed: %m");
2373                 return -errno;
2374         }
2375
2376 #ifdef HAVE_SELINUX
2377         one = 1;
2378         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
2379         if (r < 0)
2380                 log_warning("SO_PASSSEC failed: %m");
2381 #endif
2382
2383         one = 1;
2384         r = setsockopt(s->native_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
2385         if (r < 0) {
2386                 log_error("SO_TIMESTAMP failed: %m");
2387                 return -errno;
2388         }
2389
2390         zero(ev);
2391         ev.events = EPOLLIN;
2392         ev.data.fd = s->native_fd;
2393         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->native_fd, &ev) < 0) {
2394                 log_error("Failed to add native server fd to epoll object: %m");
2395                 return -errno;
2396         }
2397
2398         return 0;
2399 }
2400
2401 static int open_stdout_socket(Server *s) {
2402         union sockaddr_union sa;
2403         int r;
2404         struct epoll_event ev;
2405
2406         assert(s);
2407
2408         if (s->stdout_fd < 0) {
2409
2410                 s->stdout_fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2411                 if (s->stdout_fd < 0) {
2412                         log_error("socket() failed: %m");
2413                         return -errno;
2414                 }
2415
2416                 zero(sa);
2417                 sa.un.sun_family = AF_UNIX;
2418                 strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
2419
2420                 unlink(sa.un.sun_path);
2421
2422                 r = bind(s->stdout_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2423                 if (r < 0) {
2424                         log_error("bind() failed: %m");
2425                         return -errno;
2426                 }
2427
2428                 chmod(sa.un.sun_path, 0666);
2429
2430                 if (listen(s->stdout_fd, SOMAXCONN) < 0) {
2431                         log_error("liste() failed: %m");
2432                         return -errno;
2433                 }
2434         } else
2435                 fd_nonblock(s->stdout_fd, 1);
2436
2437         zero(ev);
2438         ev.events = EPOLLIN;
2439         ev.data.fd = s->stdout_fd;
2440         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->stdout_fd, &ev) < 0) {
2441                 log_error("Failed to add stdout server fd to epoll object: %m");
2442                 return -errno;
2443         }
2444
2445         return 0;
2446 }
2447
2448 static int open_proc_kmsg(Server *s) {
2449         struct epoll_event ev;
2450
2451         assert(s);
2452
2453         if (!s->import_proc_kmsg)
2454                 return 0;
2455
2456
2457         s->proc_kmsg_fd = open("/proc/kmsg", O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
2458         if (s->proc_kmsg_fd < 0) {
2459                 log_warning("Failed to open /proc/kmsg, ignoring: %m");
2460                 return 0;
2461         }
2462
2463         zero(ev);
2464         ev.events = EPOLLIN;
2465         ev.data.fd = s->proc_kmsg_fd;
2466         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->proc_kmsg_fd, &ev) < 0) {
2467                 log_error("Failed to add /proc/kmsg fd to epoll object: %m");
2468                 return -errno;
2469         }
2470
2471         return 0;
2472 }
2473
2474 static int open_signalfd(Server *s) {
2475         sigset_t mask;
2476         struct epoll_event ev;
2477
2478         assert(s);
2479
2480         assert_se(sigemptyset(&mask) == 0);
2481         sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, -1);
2482         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
2483
2484         s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
2485         if (s->signal_fd < 0) {
2486                 log_error("signalfd(): %m");
2487                 return -errno;
2488         }
2489
2490         zero(ev);
2491         ev.events = EPOLLIN;
2492         ev.data.fd = s->signal_fd;
2493
2494         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
2495                 log_error("epoll_ctl(): %m");
2496                 return -errno;
2497         }
2498
2499         return 0;
2500 }
2501
2502 static int server_parse_proc_cmdline(Server *s) {
2503         char *line, *w, *state;
2504         int r;
2505         size_t l;
2506
2507         if (detect_container(NULL) > 0)
2508                 return 0;
2509
2510         r = read_one_line_file("/proc/cmdline", &line);
2511         if (r < 0) {
2512                 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
2513                 return 0;
2514         }
2515
2516         FOREACH_WORD_QUOTED(w, l, line, state) {
2517                 char *word;
2518
2519                 word = strndup(w, l);
2520                 if (!word) {
2521                         r = -ENOMEM;
2522                         goto finish;
2523                 }
2524
2525                 if (startswith(word, "systemd_journald.forward_to_syslog=")) {
2526                         r = parse_boolean(word + 35);
2527                         if (r < 0)
2528                                 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
2529                         else
2530                                 s->forward_to_syslog = r;
2531                 } else if (startswith(word, "systemd_journald.forward_to_kmsg=")) {
2532                         r = parse_boolean(word + 33);
2533                         if (r < 0)
2534                                 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
2535                         else
2536                                 s->forward_to_kmsg = r;
2537                 } else if (startswith(word, "systemd_journald.forward_to_console=")) {
2538                         r = parse_boolean(word + 36);
2539                         if (r < 0)
2540                                 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
2541                         else
2542                                 s->forward_to_console = r;
2543                 }
2544
2545                 free(word);
2546         }
2547
2548         r = 0;
2549
2550 finish:
2551         free(line);
2552         return r;
2553 }
2554
2555 static int server_parse_config_file(Server *s) {
2556         FILE *f;
2557         const char *fn;
2558         int r;
2559
2560         assert(s);
2561
2562         fn = "/etc/systemd/systemd-journald.conf";
2563         f = fopen(fn, "re");
2564         if (!f) {
2565                 if (errno == ENOENT)
2566                         return 0;
2567
2568                 log_warning("Failed to open configuration file %s: %m", fn);
2569                 return -errno;
2570         }
2571
2572         r = config_parse(fn, f, "Journal\0", config_item_perf_lookup, (void*) journald_gperf_lookup, false, s);
2573         if (r < 0)
2574                 log_warning("Failed to parse configuration file: %s", strerror(-r));
2575
2576         fclose(f);
2577
2578         return r;
2579 }
2580
2581 static int server_init(Server *s) {
2582         int n, r, fd;
2583
2584         assert(s);
2585
2586         zero(*s);
2587         s->syslog_fd = s->native_fd = s->stdout_fd = s->signal_fd = s->epoll_fd = s->proc_kmsg_fd = -1;
2588         s->compress = true;
2589
2590         s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
2591         s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
2592
2593         s->forward_to_syslog = true;
2594         s->import_proc_kmsg = true;
2595
2596         memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
2597         memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
2598
2599         server_parse_config_file(s);
2600         server_parse_proc_cmdline(s);
2601
2602         s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
2603         if (!s->user_journals) {
2604                 log_error("Out of memory.");
2605                 return -ENOMEM;
2606         }
2607
2608         s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
2609         if (s->epoll_fd < 0) {
2610                 log_error("Failed to create epoll object: %m");
2611                 return -errno;
2612         }
2613
2614         n = sd_listen_fds(true);
2615         if (n < 0) {
2616                 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
2617                 return n;
2618         }
2619
2620         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
2621
2622                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
2623
2624                         if (s->native_fd >= 0) {
2625                                 log_error("Too many native sockets passed.");
2626                                 return -EINVAL;
2627                         }
2628
2629                         s->native_fd = fd;
2630
2631                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
2632
2633                         if (s->stdout_fd >= 0) {
2634                                 log_error("Too many stdout sockets passed.");
2635                                 return -EINVAL;
2636                         }
2637
2638                         s->stdout_fd = fd;
2639
2640                 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
2641
2642                         if (s->syslog_fd >= 0) {
2643                                 log_error("Too many /dev/log sockets passed.");
2644                                 return -EINVAL;
2645                         }
2646
2647                         s->syslog_fd = fd;
2648
2649                 } else {
2650                         log_error("Unknown socket passed.");
2651                         return -EINVAL;
2652                 }
2653         }
2654
2655         r = open_syslog_socket(s);
2656         if (r < 0)
2657                 return r;
2658
2659         r = open_native_socket(s);
2660         if (r < 0)
2661                 return r;
2662
2663         r = open_stdout_socket(s);
2664         if (r < 0)
2665                 return r;
2666
2667         r = open_proc_kmsg(s);
2668         if (r < 0)
2669                 return r;
2670
2671         r = open_signalfd(s);
2672         if (r < 0)
2673                 return r;
2674
2675         s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
2676         if (!s->rate_limit)
2677                 return -ENOMEM;
2678
2679         r = system_journal_open(s);
2680         if (r < 0)
2681                 return r;
2682
2683         return 0;
2684 }
2685
2686 static void server_done(Server *s) {
2687         JournalFile *f;
2688         assert(s);
2689
2690         while (s->stdout_streams)
2691                 stdout_stream_free(s->stdout_streams);
2692
2693         if (s->system_journal)
2694                 journal_file_close(s->system_journal);
2695
2696         if (s->runtime_journal)
2697                 journal_file_close(s->runtime_journal);
2698
2699         while ((f = hashmap_steal_first(s->user_journals)))
2700                 journal_file_close(f);
2701
2702         hashmap_free(s->user_journals);
2703
2704         if (s->epoll_fd >= 0)
2705                 close_nointr_nofail(s->epoll_fd);
2706
2707         if (s->signal_fd >= 0)
2708                 close_nointr_nofail(s->signal_fd);
2709
2710         if (s->syslog_fd >= 0)
2711                 close_nointr_nofail(s->syslog_fd);
2712
2713         if (s->native_fd >= 0)
2714                 close_nointr_nofail(s->native_fd);
2715
2716         if (s->stdout_fd >= 0)
2717                 close_nointr_nofail(s->stdout_fd);
2718
2719         if (s->proc_kmsg_fd >= 0)
2720                 close_nointr_nofail(s->proc_kmsg_fd);
2721
2722         if (s->rate_limit)
2723                 journal_rate_limit_free(s->rate_limit);
2724
2725         free(s->buffer);
2726 }
2727
2728 int main(int argc, char *argv[]) {
2729         Server server;
2730         int r;
2731
2732         /* if (getppid() != 1) { */
2733         /*         log_error("This program should be invoked by init only."); */
2734         /*         return EXIT_FAILURE; */
2735         /* } */
2736
2737         if (argc > 1) {
2738                 log_error("This program does not take arguments.");
2739                 return EXIT_FAILURE;
2740         }
2741
2742         log_set_target(LOG_TARGET_CONSOLE);
2743         log_parse_environment();
2744         log_open();
2745
2746         umask(0022);
2747
2748         r = server_init(&server);
2749         if (r < 0)
2750                 goto finish;
2751
2752         server_vacuum(&server);
2753         server_flush_to_var(&server);
2754         server_flush_proc_kmsg(&server);
2755
2756         log_debug("systemd-journald running as pid %lu", (unsigned long) getpid());
2757         driver_message(&server, SD_MESSAGE_JOURNAL_START, "Journal started");
2758
2759         sd_notify(false,
2760                   "READY=1\n"
2761                   "STATUS=Processing requests...");
2762
2763         for (;;) {
2764                 struct epoll_event event;
2765
2766                 r = epoll_wait(server.epoll_fd, &event, 1, -1);
2767                 if (r < 0) {
2768
2769                         if (errno == EINTR)
2770                                 continue;
2771
2772                         log_error("epoll_wait() failed: %m");
2773                         r = -errno;
2774                         goto finish;
2775                 } else if (r == 0)
2776                         break;
2777
2778                 r = process_event(&server, &event);
2779                 if (r < 0)
2780                         goto finish;
2781                 else if (r == 0)
2782                         break;
2783         }
2784
2785         log_debug("systemd-journald stopped as pid %lu", (unsigned long) getpid());
2786         driver_message(&server, SD_MESSAGE_JOURNAL_STOP, "Journal stopped");
2787
2788 finish:
2789         sd_notify(false,
2790                   "STATUS=Shutting down...");
2791
2792         server_done(&server);
2793
2794         return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
2795 }