chiark / gitweb /
50f66be9d9c59a79592150b5c8360a8a91019e9a
[elogind.git] / src / journal / journald.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU General Public License as published by
10   the Free Software Foundation; either version 2 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   General Public License for more details.
17
18   You should have received a copy of the GNU General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/epoll.h>
23 #include <sys/socket.h>
24 #include <errno.h>
25 #include <sys/signalfd.h>
26 #include <unistd.h>
27 #include <fcntl.h>
28 #include <stddef.h>
29 #include <sys/ioctl.h>
30 #include <linux/sockios.h>
31 #include <sys/statvfs.h>
32 #include <sys/user.h>
33
34 #include <systemd/sd-journal.h>
35 #include <systemd/sd-login.h>
36 #include <systemd/sd-messages.h>
37 #include <systemd/sd-daemon.h>
38
39 #include "hashmap.h"
40 #include "journal-file.h"
41 #include "socket-util.h"
42 #include "cgroup-util.h"
43 #include "list.h"
44 #include "journal-rate-limit.h"
45 #include "journal-internal.h"
46 #include "conf-parser.h"
47 #include "journald.h"
48 #include "virt.h"
49 #include "missing.h"
50
51 #ifdef HAVE_ACL
52 #include <sys/acl.h>
53 #include <acl/libacl.h>
54 #include "acl-util.h"
55 #endif
56
57 #ifdef HAVE_SELINUX
58 #include <selinux/selinux.h>
59 #endif
60
61 #define USER_JOURNALS_MAX 1024
62 #define STDOUT_STREAMS_MAX 4096
63
64 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
65 #define DEFAULT_RATE_LIMIT_BURST 200
66
67 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
68
69 #define RECHECK_VAR_AVAILABLE_USEC (30*USEC_PER_SEC)
70
71 #define N_IOVEC_META_FIELDS 17
72
73 #define ENTRY_SIZE_MAX (1024*1024*32)
74
75 typedef enum StdoutStreamState {
76         STDOUT_STREAM_IDENTIFIER,
77         STDOUT_STREAM_PRIORITY,
78         STDOUT_STREAM_LEVEL_PREFIX,
79         STDOUT_STREAM_FORWARD_TO_SYSLOG,
80         STDOUT_STREAM_FORWARD_TO_KMSG,
81         STDOUT_STREAM_FORWARD_TO_CONSOLE,
82         STDOUT_STREAM_RUNNING
83 } StdoutStreamState;
84
85 struct StdoutStream {
86         Server *server;
87         StdoutStreamState state;
88
89         int fd;
90
91         struct ucred ucred;
92 #ifdef HAVE_SELINUX
93         security_context_t security_context;
94 #endif
95
96         char *identifier;
97         int priority;
98         bool level_prefix:1;
99         bool forward_to_syslog:1;
100         bool forward_to_kmsg:1;
101         bool forward_to_console:1;
102
103         char buffer[LINE_MAX+1];
104         size_t length;
105
106         LIST_FIELDS(StdoutStream, stdout_stream);
107 };
108
109 static int server_flush_to_var(Server *s);
110
111 static uint64_t available_space(Server *s) {
112         char ids[33], *p;
113         const char *f;
114         sd_id128_t machine;
115         struct statvfs ss;
116         uint64_t sum = 0, avail = 0, ss_avail = 0;
117         int r;
118         DIR *d;
119         usec_t ts;
120         JournalMetrics *m;
121
122         ts = now(CLOCK_MONOTONIC);
123
124         if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
125                 return s->cached_available_space;
126
127         r = sd_id128_get_machine(&machine);
128         if (r < 0)
129                 return 0;
130
131         if (s->system_journal) {
132                 f = "/var/log/journal/";
133                 m = &s->system_metrics;
134         } else {
135                 f = "/run/log/journal/";
136                 m = &s->runtime_metrics;
137         }
138
139         assert(m);
140
141         p = strappend(f, sd_id128_to_string(machine, ids));
142         if (!p)
143                 return 0;
144
145         d = opendir(p);
146         free(p);
147
148         if (!d)
149                 return 0;
150
151         if (fstatvfs(dirfd(d), &ss) < 0)
152                 goto finish;
153
154         for (;;) {
155                 struct stat st;
156                 struct dirent buf, *de;
157                 int k;
158
159                 k = readdir_r(d, &buf, &de);
160                 if (k != 0) {
161                         r = -k;
162                         goto finish;
163                 }
164
165                 if (!de)
166                         break;
167
168                 if (!dirent_is_file_with_suffix(de, ".journal"))
169                         continue;
170
171                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
172                         continue;
173
174                 sum += (uint64_t) st.st_blocks * (uint64_t) st.st_blksize;
175         }
176
177         avail = sum >= m->max_use ? 0 : m->max_use - sum;
178
179         ss_avail = ss.f_bsize * ss.f_bavail;
180
181         ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
182
183         if (ss_avail < avail)
184                 avail = ss_avail;
185
186         s->cached_available_space = avail;
187         s->cached_available_space_timestamp = ts;
188
189 finish:
190         closedir(d);
191
192         return avail;
193 }
194
195 static void server_read_file_gid(Server *s) {
196         const char *adm = "adm";
197         int r;
198
199         assert(s);
200
201         if (s->file_gid_valid)
202                 return;
203
204         r = get_group_creds(&adm, &s->file_gid);
205         if (r < 0)
206                 log_warning("Failed to resolve 'adm' group: %s", strerror(-r));
207
208         /* if we couldn't read the gid, then it will be 0, but that's
209          * fine and we shouldn't try to resolve the group again, so
210          * let's just pretend it worked right-away. */
211         s->file_gid_valid = true;
212 }
213
214 static void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
215         int r;
216 #ifdef HAVE_ACL
217         acl_t acl;
218         acl_entry_t entry;
219         acl_permset_t permset;
220 #endif
221
222         assert(f);
223
224         server_read_file_gid(s);
225
226         r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
227         if (r < 0)
228                 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
229
230 #ifdef HAVE_ACL
231         if (uid <= 0)
232                 return;
233
234         acl = acl_get_fd(f->fd);
235         if (!acl) {
236                 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
237                 return;
238         }
239
240         r = acl_find_uid(acl, uid, &entry);
241         if (r <= 0) {
242
243                 if (acl_create_entry(&acl, &entry) < 0 ||
244                     acl_set_tag_type(entry, ACL_USER) < 0 ||
245                     acl_set_qualifier(entry, &uid) < 0) {
246                         log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
247                         goto finish;
248                 }
249         }
250
251         if (acl_get_permset(entry, &permset) < 0 ||
252             acl_add_perm(permset, ACL_READ) < 0 ||
253             acl_calc_mask(&acl) < 0) {
254                 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
255                 goto finish;
256         }
257
258         if (acl_set_fd(f->fd, acl) < 0)
259                 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
260
261 finish:
262         acl_free(acl);
263 #endif
264 }
265
266 static JournalFile* find_journal(Server *s, uid_t uid) {
267         char *p;
268         int r;
269         JournalFile *f;
270         char ids[33];
271         sd_id128_t machine;
272
273         assert(s);
274
275         /* We split up user logs only on /var, not on /run. If the
276          * runtime file is open, we write to it exclusively, in order
277          * to guarantee proper order as soon as we flush /run to
278          * /var and close the runtime file. */
279
280         if (s->runtime_journal)
281                 return s->runtime_journal;
282
283         if (uid <= 0)
284                 return s->system_journal;
285
286         r = sd_id128_get_machine(&machine);
287         if (r < 0)
288                 return s->system_journal;
289
290         f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
291         if (f)
292                 return f;
293
294         if (asprintf(&p, "/var/log/journal/%s/user-%lu.journal", sd_id128_to_string(machine, ids), (unsigned long) uid) < 0)
295                 return s->system_journal;
296
297         while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
298                 /* Too many open? Then let's close one */
299                 f = hashmap_steal_first(s->user_journals);
300                 assert(f);
301                 journal_file_close(f);
302         }
303
304         r = journal_file_open(p, O_RDWR|O_CREAT, 0640, s->system_journal, &f);
305         free(p);
306
307         if (r < 0)
308                 return s->system_journal;
309
310         server_fix_perms(s, f, uid);
311         f->metrics = s->system_metrics;
312         f->compress = s->compress;
313
314         r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
315         if (r < 0) {
316                 journal_file_close(f);
317                 return s->system_journal;
318         }
319
320         return f;
321 }
322
323 static void server_rotate(Server *s) {
324         JournalFile *f;
325         void *k;
326         Iterator i;
327         int r;
328
329         log_info("Rotating...");
330
331         if (s->runtime_journal) {
332                 r = journal_file_rotate(&s->runtime_journal);
333                 if (r < 0)
334                         log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
335         }
336
337         if (s->system_journal) {
338                 r = journal_file_rotate(&s->system_journal);
339                 if (r < 0)
340                         log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
341         }
342
343         HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
344                 r = journal_file_rotate(&f);
345                 if (r < 0)
346                         log_error("Failed to rotate %s: %s", f->path, strerror(-r));
347                 else
348                         hashmap_replace(s->user_journals, k, f);
349         }
350 }
351
352 static void server_vacuum(Server *s) {
353         char *p;
354         char ids[33];
355         sd_id128_t machine;
356         int r;
357
358         log_info("Vacuuming...");
359
360         r = sd_id128_get_machine(&machine);
361         if (r < 0) {
362                 log_error("Failed to get machine ID: %s", strerror(-r));
363                 return;
364         }
365
366         sd_id128_to_string(machine, ids);
367
368         if (s->system_journal) {
369                 if (asprintf(&p, "/var/log/journal/%s", ids) < 0) {
370                         log_error("Out of memory.");
371                         return;
372                 }
373
374                 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free);
375                 if (r < 0 && r != -ENOENT)
376                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
377                 free(p);
378         }
379
380
381         if (s->runtime_journal) {
382                 if (asprintf(&p, "/run/log/journal/%s", ids) < 0) {
383                         log_error("Out of memory.");
384                         return;
385                 }
386
387                 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free);
388                 if (r < 0 && r != -ENOENT)
389                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
390                 free(p);
391         }
392
393         s->cached_available_space_timestamp = 0;
394 }
395
396 static char *shortened_cgroup_path(pid_t pid) {
397         int r;
398         char *process_path, *init_path, *path;
399
400         assert(pid > 0);
401
402         r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, pid, &process_path);
403         if (r < 0)
404                 return NULL;
405
406         r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &init_path);
407         if (r < 0) {
408                 free(process_path);
409                 return NULL;
410         }
411
412         if (endswith(init_path, "/system"))
413                 init_path[strlen(init_path) - 7] = 0;
414         else if (streq(init_path, "/"))
415                 init_path[0] = 0;
416
417         if (startswith(process_path, init_path)) {
418                 char *p;
419
420                 p = strdup(process_path + strlen(init_path));
421                 if (!p) {
422                         free(process_path);
423                         free(init_path);
424                         return NULL;
425                 }
426                 path = p;
427         } else {
428                 path = process_path;
429                 process_path = NULL;
430         }
431
432         free(process_path);
433         free(init_path);
434
435         return path;
436 }
437
438 static void dispatch_message_real(
439                 Server *s,
440                 struct iovec *iovec, unsigned n, unsigned m,
441                 struct ucred *ucred,
442                 struct timeval *tv,
443                 const char *label, size_t label_len) {
444
445         char *pid = NULL, *uid = NULL, *gid = NULL,
446                 *source_time = NULL, *boot_id = NULL, *machine_id = NULL,
447                 *comm = NULL, *cmdline = NULL, *hostname = NULL,
448                 *audit_session = NULL, *audit_loginuid = NULL,
449                 *exe = NULL, *cgroup = NULL, *session = NULL,
450                 *owner_uid = NULL, *unit = NULL, *selinux_context = NULL;
451
452         char idbuf[33];
453         sd_id128_t id;
454         int r;
455         char *t;
456         uid_t loginuid = 0, realuid = 0;
457         JournalFile *f;
458         bool vacuumed = false;
459
460         assert(s);
461         assert(iovec);
462         assert(n > 0);
463         assert(n + N_IOVEC_META_FIELDS <= m);
464
465         if (ucred) {
466                 uint32_t audit;
467                 uid_t owner;
468
469                 realuid = ucred->uid;
470
471                 if (asprintf(&pid, "_PID=%lu", (unsigned long) ucred->pid) >= 0)
472                         IOVEC_SET_STRING(iovec[n++], pid);
473
474                 if (asprintf(&uid, "_UID=%lu", (unsigned long) ucred->uid) >= 0)
475                         IOVEC_SET_STRING(iovec[n++], uid);
476
477                 if (asprintf(&gid, "_GID=%lu", (unsigned long) ucred->gid) >= 0)
478                         IOVEC_SET_STRING(iovec[n++], gid);
479
480                 r = get_process_comm(ucred->pid, &t);
481                 if (r >= 0) {
482                         comm = strappend("_COMM=", t);
483                         free(t);
484
485                         if (comm)
486                                 IOVEC_SET_STRING(iovec[n++], comm);
487                 }
488
489                 r = get_process_exe(ucred->pid, &t);
490                 if (r >= 0) {
491                         exe = strappend("_EXE=", t);
492                         free(t);
493
494                         if (exe)
495                                 IOVEC_SET_STRING(iovec[n++], exe);
496                 }
497
498                 r = get_process_cmdline(ucred->pid, LINE_MAX, false, &t);
499                 if (r >= 0) {
500                         cmdline = strappend("_CMDLINE=", t);
501                         free(t);
502
503                         if (cmdline)
504                                 IOVEC_SET_STRING(iovec[n++], cmdline);
505                 }
506
507                 r = audit_session_from_pid(ucred->pid, &audit);
508                 if (r >= 0)
509                         if (asprintf(&audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit) >= 0)
510                                 IOVEC_SET_STRING(iovec[n++], audit_session);
511
512                 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
513                 if (r >= 0)
514                         if (asprintf(&audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0)
515                                 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
516
517                 t = shortened_cgroup_path(ucred->pid);
518                 if (t) {
519                         cgroup = strappend("_SYSTEMD_CGROUP=", t);
520                         free(t);
521
522                         if (cgroup)
523                                 IOVEC_SET_STRING(iovec[n++], cgroup);
524                 }
525
526                 if (sd_pid_get_session(ucred->pid, &t) >= 0) {
527                         session = strappend("_SYSTEMD_SESSION=", t);
528                         free(t);
529
530                         if (session)
531                                 IOVEC_SET_STRING(iovec[n++], session);
532                 }
533
534                 if (sd_pid_get_unit(ucred->pid, &t) >= 0) {
535                         unit = strappend("_SYSTEMD_UNIT=", t);
536                         free(t);
537
538                         if (unit)
539                                 IOVEC_SET_STRING(iovec[n++], unit);
540                 }
541
542                 if (sd_pid_get_owner_uid(ucred->uid, &owner) >= 0)
543                         if (asprintf(&owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner) >= 0)
544                                 IOVEC_SET_STRING(iovec[n++], owner_uid);
545
546 #ifdef HAVE_SELINUX
547                 if (label) {
548                         selinux_context = malloc(sizeof("_SELINUX_CONTEXT=") + label_len);
549                         if (selinux_context) {
550                                 memcpy(selinux_context, "_SELINUX_CONTEXT=", sizeof("_SELINUX_CONTEXT=")-1);
551                                 memcpy(selinux_context+sizeof("_SELINUX_CONTEXT=")-1, label, label_len);
552                                 selinux_context[sizeof("_SELINUX_CONTEXT=")-1+label_len] = 0;
553                                 IOVEC_SET_STRING(iovec[n++], selinux_context);
554                         }
555                 } else {
556                         security_context_t con;
557
558                         if (getpidcon(ucred->pid, &con) >= 0) {
559                                 selinux_context = strappend("_SELINUX_CONTEXT=", con);
560                                 if (selinux_context)
561                                         IOVEC_SET_STRING(iovec[n++], selinux_context);
562
563                                 freecon(con);
564                         }
565                 }
566 #endif
567         }
568
569         if (tv) {
570                 if (asprintf(&source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu",
571                              (unsigned long long) timeval_load(tv)) >= 0)
572                         IOVEC_SET_STRING(iovec[n++], source_time);
573         }
574
575         /* Note that strictly speaking storing the boot id here is
576          * redundant since the entry includes this in-line
577          * anyway. However, we need this indexed, too. */
578         r = sd_id128_get_boot(&id);
579         if (r >= 0)
580                 if (asprintf(&boot_id, "_BOOT_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
581                         IOVEC_SET_STRING(iovec[n++], boot_id);
582
583         r = sd_id128_get_machine(&id);
584         if (r >= 0)
585                 if (asprintf(&machine_id, "_MACHINE_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
586                         IOVEC_SET_STRING(iovec[n++], machine_id);
587
588         t = gethostname_malloc();
589         if (t) {
590                 hostname = strappend("_HOSTNAME=", t);
591                 free(t);
592                 if (hostname)
593                         IOVEC_SET_STRING(iovec[n++], hostname);
594         }
595
596         assert(n <= m);
597
598         server_flush_to_var(s);
599
600 retry:
601         f = find_journal(s, realuid == 0 ? 0 : loginuid);
602         if (!f)
603                 log_warning("Dropping message, as we can't find a place to store the data.");
604         else {
605                 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
606
607                 if (r == -E2BIG && !vacuumed) {
608                         log_info("Allocation limit reached.");
609
610                         server_rotate(s);
611                         server_vacuum(s);
612                         vacuumed = true;
613
614                         log_info("Retrying write.");
615                         goto retry;
616                 }
617
618                 if (r < 0)
619                         log_error("Failed to write entry, ignoring: %s", strerror(-r));
620         }
621
622         free(pid);
623         free(uid);
624         free(gid);
625         free(comm);
626         free(exe);
627         free(cmdline);
628         free(source_time);
629         free(boot_id);
630         free(machine_id);
631         free(hostname);
632         free(audit_session);
633         free(audit_loginuid);
634         free(cgroup);
635         free(session);
636         free(owner_uid);
637         free(unit);
638         free(selinux_context);
639 }
640
641 static void driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
642         char mid[11 + 32 + 1];
643         char buffer[16 + LINE_MAX + 1];
644         struct iovec iovec[N_IOVEC_META_FIELDS + 4];
645         int n = 0;
646         va_list ap;
647         struct ucred ucred;
648
649         assert(s);
650         assert(format);
651
652         IOVEC_SET_STRING(iovec[n++], "PRIORITY=5");
653         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
654
655         memcpy(buffer, "MESSAGE=", 8);
656         va_start(ap, format);
657         vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
658         va_end(ap);
659         char_array_0(buffer);
660         IOVEC_SET_STRING(iovec[n++], buffer);
661
662         snprintf(mid, sizeof(mid), "MESSAGE_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(message_id));
663         char_array_0(mid);
664         IOVEC_SET_STRING(iovec[n++], mid);
665
666         zero(ucred);
667         ucred.pid = getpid();
668         ucred.uid = getuid();
669         ucred.gid = getgid();
670
671         dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0);
672 }
673
674 static void dispatch_message(Server *s,
675                              struct iovec *iovec, unsigned n, unsigned m,
676                              struct ucred *ucred,
677                              struct timeval *tv,
678                              const char *label, size_t label_len,
679                              int priority) {
680         int rl;
681         char *path = NULL, *c;
682
683         assert(s);
684         assert(iovec || n == 0);
685
686         if (n == 0)
687                 return;
688
689         if (!ucred)
690                 goto finish;
691
692         path = shortened_cgroup_path(ucred->pid);
693         if (!path)
694                 goto finish;
695
696         /* example: /user/lennart/3/foobar
697          *          /system/dbus.service/foobar
698          *
699          * So let's cut of everything past the third /, since that is
700          * wher user directories start */
701
702         c = strchr(path, '/');
703         if (c) {
704                 c = strchr(c+1, '/');
705                 if (c) {
706                         c = strchr(c+1, '/');
707                         if (c)
708                                 *c = 0;
709                 }
710         }
711
712         rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available_space(s));
713
714         if (rl == 0) {
715                 free(path);
716                 return;
717         }
718
719         /* Write a suppression message if we suppressed something */
720         if (rl > 1)
721                 driver_message(s, SD_MESSAGE_JOURNAL_DROPPED, "Suppressed %u messages from %s", rl - 1, path);
722
723         free(path);
724
725 finish:
726         dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len);
727 }
728
729 static void forward_syslog_iovec(Server *s, const struct iovec *iovec, unsigned n_iovec, struct ucred *ucred, struct timeval *tv) {
730         struct msghdr msghdr;
731         struct cmsghdr *cmsg;
732         union {
733                 struct cmsghdr cmsghdr;
734                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred))];
735         } control;
736         union sockaddr_union sa;
737
738         assert(s);
739         assert(iovec);
740         assert(n_iovec > 0);
741
742         zero(msghdr);
743         msghdr.msg_iov = (struct iovec*) iovec;
744         msghdr.msg_iovlen = n_iovec;
745
746         zero(sa);
747         sa.un.sun_family = AF_UNIX;
748         strncpy(sa.un.sun_path, "/run/systemd/journal/syslog", sizeof(sa.un.sun_path));
749         msghdr.msg_name = &sa;
750         msghdr.msg_namelen = offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path);
751
752         if (ucred) {
753                 zero(control);
754                 msghdr.msg_control = &control;
755                 msghdr.msg_controllen = sizeof(control);
756
757                 cmsg = CMSG_FIRSTHDR(&msghdr);
758                 cmsg->cmsg_level = SOL_SOCKET;
759                 cmsg->cmsg_type = SCM_CREDENTIALS;
760                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred));
761                 memcpy(CMSG_DATA(cmsg), ucred, sizeof(struct ucred));
762                 msghdr.msg_controllen = cmsg->cmsg_len;
763         }
764
765         /* Forward the syslog message we received via /dev/log to
766          * /run/systemd/syslog. Unfortunately we currently can't set
767          * the SO_TIMESTAMP auxiliary data, and hence we don't. */
768
769         if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
770                 return;
771
772         /* The socket is full? I guess the syslog implementation is
773          * too slow, and we shouldn't wait for that... */
774         if (errno == EAGAIN)
775                 return;
776
777         if (ucred && errno == ESRCH) {
778                 struct ucred u;
779
780                 /* Hmm, presumably the sender process vanished
781                  * by now, so let's fix it as good as we
782                  * can, and retry */
783
784                 u = *ucred;
785                 u.pid = getpid();
786                 memcpy(CMSG_DATA(cmsg), &u, sizeof(struct ucred));
787
788                 if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
789                         return;
790
791                 if (errno == EAGAIN)
792                         return;
793         }
794
795         log_debug("Failed to forward syslog message: %m");
796 }
797
798 static void forward_syslog_raw(Server *s, const char *buffer, struct ucred *ucred, struct timeval *tv) {
799         struct iovec iovec;
800
801         assert(s);
802         assert(buffer);
803
804         IOVEC_SET_STRING(iovec, buffer);
805         forward_syslog_iovec(s, &iovec, 1, ucred, tv);
806 }
807
808 static void forward_syslog(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred, struct timeval *tv) {
809         struct iovec iovec[5];
810         char header_priority[6], header_time[64], header_pid[16];
811         int n = 0;
812         time_t t;
813         struct tm *tm;
814         char *ident_buf = NULL;
815
816         assert(s);
817         assert(priority >= 0);
818         assert(priority <= 999);
819         assert(message);
820
821         /* First: priority field */
822         snprintf(header_priority, sizeof(header_priority), "<%i>", priority);
823         char_array_0(header_priority);
824         IOVEC_SET_STRING(iovec[n++], header_priority);
825
826         /* Second: timestamp */
827         t = tv ? tv->tv_sec : ((time_t) (now(CLOCK_REALTIME) / USEC_PER_SEC));
828         tm = localtime(&t);
829         if (!tm)
830                 return;
831         if (strftime(header_time, sizeof(header_time), "%h %e %T ", tm) <= 0)
832                 return;
833         IOVEC_SET_STRING(iovec[n++], header_time);
834
835         /* Third: identifier and PID */
836         if (ucred) {
837                 if (!identifier) {
838                         get_process_comm(ucred->pid, &ident_buf);
839                         identifier = ident_buf;
840                 }
841
842                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
843                 char_array_0(header_pid);
844
845                 if (identifier)
846                         IOVEC_SET_STRING(iovec[n++], identifier);
847
848                 IOVEC_SET_STRING(iovec[n++], header_pid);
849         } else if (identifier) {
850                 IOVEC_SET_STRING(iovec[n++], identifier);
851                 IOVEC_SET_STRING(iovec[n++], ": ");
852         }
853
854         /* Fourth: message */
855         IOVEC_SET_STRING(iovec[n++], message);
856
857         forward_syslog_iovec(s, iovec, n, ucred, tv);
858
859         free(ident_buf);
860 }
861
862 static int fixup_priority(int priority) {
863
864         if ((priority & LOG_FACMASK) == 0)
865                 return (priority & LOG_PRIMASK) | LOG_USER;
866
867         return priority;
868 }
869
870 static void forward_kmsg(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred) {
871         struct iovec iovec[5];
872         char header_priority[6], header_pid[16];
873         int n = 0;
874         char *ident_buf = NULL;
875         int fd;
876
877         assert(s);
878         assert(priority >= 0);
879         assert(priority <= 999);
880         assert(message);
881
882         /* Never allow messages with kernel facility to be written to
883          * kmsg, regardless where the data comes from. */
884         priority = fixup_priority(priority);
885
886         /* First: priority field */
887         snprintf(header_priority, sizeof(header_priority), "<%i>", priority);
888         char_array_0(header_priority);
889         IOVEC_SET_STRING(iovec[n++], header_priority);
890
891         /* Second: identifier and PID */
892         if (ucred) {
893                 if (!identifier) {
894                         get_process_comm(ucred->pid, &ident_buf);
895                         identifier = ident_buf;
896                 }
897
898                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
899                 char_array_0(header_pid);
900
901                 if (identifier)
902                         IOVEC_SET_STRING(iovec[n++], identifier);
903
904                 IOVEC_SET_STRING(iovec[n++], header_pid);
905         } else if (identifier) {
906                 IOVEC_SET_STRING(iovec[n++], identifier);
907                 IOVEC_SET_STRING(iovec[n++], ": ");
908         }
909
910         /* Fourth: message */
911         IOVEC_SET_STRING(iovec[n++], message);
912         IOVEC_SET_STRING(iovec[n++], "\n");
913
914         fd = open("/dev/kmsg", O_WRONLY|O_NOCTTY|O_CLOEXEC);
915         if (fd < 0) {
916                 log_debug("Failed to open /dev/kmsg for logging: %s", strerror(errno));
917                 goto finish;
918         }
919
920         if (writev(fd, iovec, n) < 0)
921                 log_debug("Failed to write to /dev/kmsg for logging: %s", strerror(errno));
922
923         close_nointr_nofail(fd);
924
925 finish:
926         free(ident_buf);
927 }
928
929 static void forward_console(Server *s, const char *identifier, const char *message, struct ucred *ucred) {
930         struct iovec iovec[4];
931         char header_pid[16];
932         int n = 0, fd;
933         char *ident_buf = NULL;
934
935         assert(s);
936         assert(message);
937
938         /* First: identifier and PID */
939         if (ucred) {
940                 if (!identifier) {
941                         get_process_comm(ucred->pid, &ident_buf);
942                         identifier = ident_buf;
943                 }
944
945                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
946                 char_array_0(header_pid);
947
948                 if (identifier)
949                         IOVEC_SET_STRING(iovec[n++], identifier);
950
951                 IOVEC_SET_STRING(iovec[n++], header_pid);
952         } else if (identifier) {
953                 IOVEC_SET_STRING(iovec[n++], identifier);
954                 IOVEC_SET_STRING(iovec[n++], ": ");
955         }
956
957         /* Third: message */
958         IOVEC_SET_STRING(iovec[n++], message);
959         IOVEC_SET_STRING(iovec[n++], "\n");
960
961         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
962         if (fd < 0) {
963                 log_debug("Failed to open /dev/console for logging: %s", strerror(errno));
964                 goto finish;
965         }
966
967         if (writev(fd, iovec, n) < 0)
968                 log_debug("Failed to write to /dev/console for logging: %s", strerror(errno));
969
970         close_nointr_nofail(fd);
971
972 finish:
973         free(ident_buf);
974 }
975
976 static void read_identifier(const char **buf, char **identifier, char **pid) {
977         const char *p;
978         char *t;
979         size_t l, e;
980
981         assert(buf);
982         assert(identifier);
983         assert(pid);
984
985         p = *buf;
986
987         p += strspn(p, WHITESPACE);
988         l = strcspn(p, WHITESPACE);
989
990         if (l <= 0 ||
991             p[l-1] != ':')
992                 return;
993
994         e = l;
995         l--;
996
997         if (p[l-1] == ']') {
998                 size_t k = l-1;
999
1000                 for (;;) {
1001
1002                         if (p[k] == '[') {
1003                                 t = strndup(p+k+1, l-k-2);
1004                                 if (t)
1005                                         *pid = t;
1006
1007                                 l = k;
1008                                 break;
1009                         }
1010
1011                         if (k == 0)
1012                                 break;
1013
1014                         k--;
1015                 }
1016         }
1017
1018         t = strndup(p, l);
1019         if (t)
1020                 *identifier = t;
1021
1022         *buf = p + e;
1023         *buf += strspn(*buf, WHITESPACE);
1024 }
1025
1026 static void process_syslog_message(Server *s, const char *buf, struct ucred *ucred, struct timeval *tv, const char *label, size_t label_len) {
1027         char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *syslog_pid = NULL;
1028         struct iovec iovec[N_IOVEC_META_FIELDS + 6];
1029         unsigned n = 0;
1030         int priority = LOG_USER | LOG_INFO;
1031         char *identifier = NULL, *pid = NULL;
1032
1033         assert(s);
1034         assert(buf);
1035
1036         if (s->forward_to_syslog)
1037                 forward_syslog_raw(s, buf, ucred, tv);
1038
1039         parse_syslog_priority((char**) &buf, &priority);
1040         skip_syslog_date((char**) &buf);
1041         read_identifier(&buf, &identifier, &pid);
1042
1043         if (s->forward_to_kmsg)
1044                 forward_kmsg(s, priority, identifier, buf, ucred);
1045
1046         if (s->forward_to_console)
1047                 forward_console(s, identifier, buf, ucred);
1048
1049         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=syslog");
1050
1051         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1052                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1053
1054         if (priority & LOG_FACMASK)
1055                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1056                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1057
1058         if (identifier) {
1059                 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
1060                 if (syslog_identifier)
1061                         IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1062         }
1063
1064         if (pid) {
1065                 syslog_pid = strappend("SYSLOG_PID=", pid);
1066                 if (syslog_pid)
1067                         IOVEC_SET_STRING(iovec[n++], syslog_pid);
1068         }
1069
1070         message = strappend("MESSAGE=", buf);
1071         if (message)
1072                 IOVEC_SET_STRING(iovec[n++], message);
1073
1074         dispatch_message(s, iovec, n, ELEMENTSOF(iovec), ucred, tv, label, label_len, priority);
1075
1076         free(message);
1077         free(identifier);
1078         free(pid);
1079         free(syslog_priority);
1080         free(syslog_facility);
1081         free(syslog_identifier);
1082 }
1083
1084 static bool valid_user_field(const char *p, size_t l) {
1085         const char *a;
1086
1087         /* We kinda enforce POSIX syntax recommendations for
1088            environment variables here, but make a couple of additional
1089            requirements.
1090
1091            http://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html */
1092
1093         /* No empty field names */
1094         if (l <= 0)
1095                 return false;
1096
1097         /* Don't allow names longer than 64 chars */
1098         if (l > 64)
1099                 return false;
1100
1101         /* Variables starting with an underscore are protected */
1102         if (p[0] == '_')
1103                 return false;
1104
1105         /* Don't allow digits as first character */
1106         if (p[0] >= '0' && p[0] <= '9')
1107                 return false;
1108
1109         /* Only allow A-Z0-9 and '_' */
1110         for (a = p; a < p + l; a++)
1111                 if (!((*a >= 'A' && *a <= 'Z') ||
1112                       (*a >= '0' && *a <= '9') ||
1113                       *a == '_'))
1114                         return false;
1115
1116         return true;
1117 }
1118
1119 static void process_native_message(
1120                 Server *s,
1121                 const void *buffer, size_t buffer_size,
1122                 struct ucred *ucred,
1123                 struct timeval *tv,
1124                 const char *label, size_t label_len) {
1125
1126         struct iovec *iovec = NULL;
1127         unsigned n = 0, m = 0, j, tn = (unsigned) -1;
1128         const char *p;
1129         size_t remaining;
1130         int priority = LOG_INFO;
1131         char *identifier = NULL, *message = NULL;
1132
1133         assert(s);
1134         assert(buffer || n == 0);
1135
1136         p = buffer;
1137         remaining = buffer_size;
1138
1139         while (remaining > 0) {
1140                 const char *e, *q;
1141
1142                 e = memchr(p, '\n', remaining);
1143
1144                 if (!e) {
1145                         /* Trailing noise, let's ignore it, and flush what we collected */
1146                         log_debug("Received message with trailing noise, ignoring.");
1147                         break;
1148                 }
1149
1150                 if (e == p) {
1151                         /* Entry separator */
1152                         dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, priority);
1153                         n = 0;
1154                         priority = LOG_INFO;
1155
1156                         p++;
1157                         remaining--;
1158                         continue;
1159                 }
1160
1161                 if (*p == '.' || *p == '#') {
1162                         /* Ignore control commands for now, and
1163                          * comments too. */
1164                         remaining -= (e - p) + 1;
1165                         p = e + 1;
1166                         continue;
1167                 }
1168
1169                 /* A property follows */
1170
1171                 if (n+N_IOVEC_META_FIELDS >= m) {
1172                         struct iovec *c;
1173                         unsigned u;
1174
1175                         u = MAX((n+N_IOVEC_META_FIELDS+1) * 2U, 4U);
1176                         c = realloc(iovec, u * sizeof(struct iovec));
1177                         if (!c) {
1178                                 log_error("Out of memory");
1179                                 break;
1180                         }
1181
1182                         iovec = c;
1183                         m = u;
1184                 }
1185
1186                 q = memchr(p, '=', e - p);
1187                 if (q) {
1188                         if (valid_user_field(p, q - p)) {
1189                                 size_t l;
1190
1191                                 l = e - p;
1192
1193                                 /* If the field name starts with an
1194                                  * underscore, skip the variable,
1195                                  * since that indidates a trusted
1196                                  * field */
1197                                 iovec[n].iov_base = (char*) p;
1198                                 iovec[n].iov_len = l;
1199                                 n++;
1200
1201                                 /* We need to determine the priority
1202                                  * of this entry for the rate limiting
1203                                  * logic */
1204                                 if (l == 10 &&
1205                                     memcmp(p, "PRIORITY=", 9) == 0 &&
1206                                     p[9] >= '0' && p[9] <= '9')
1207                                         priority = (priority & LOG_FACMASK) | (p[9] - '0');
1208
1209                                 else if (l == 17 &&
1210                                          memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
1211                                          p[16] >= '0' && p[16] <= '9')
1212                                         priority = (priority & LOG_PRIMASK) | ((p[16] - '0') << 3);
1213
1214                                 else if (l == 18 &&
1215                                          memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
1216                                          p[16] >= '0' && p[16] <= '9' &&
1217                                          p[17] >= '0' && p[17] <= '9')
1218                                         priority = (priority & LOG_PRIMASK) | (((p[16] - '0')*10 + (p[17] - '0')) << 3);
1219
1220                                 else if (l >= 12 &&
1221                                          memcmp(p, "SYSLOG_IDENTIFIER=", 11) == 0) {
1222                                         char *t;
1223
1224                                         t = strndup(p + 11, l - 11);
1225                                         if (t) {
1226                                                 free(identifier);
1227                                                 identifier = t;
1228                                         }
1229                                 } else if (l >= 8 &&
1230                                            memcmp(p, "MESSAGE=", 8) == 0) {
1231                                         char *t;
1232
1233                                         t = strndup(p + 8, l - 8);
1234                                         if (t) {
1235                                                 free(message);
1236                                                 message = t;
1237                                         }
1238                                 }
1239                         }
1240
1241                         remaining -= (e - p) + 1;
1242                         p = e + 1;
1243                         continue;
1244                 } else {
1245                         uint64_t l;
1246                         char *k;
1247
1248                         if (remaining < e - p + 1 + sizeof(uint64_t) + 1) {
1249                                 log_debug("Failed to parse message, ignoring.");
1250                                 break;
1251                         }
1252
1253                         memcpy(&l, e + 1, sizeof(uint64_t));
1254                         l = le64toh(l);
1255
1256                         if (remaining < e - p + 1 + sizeof(uint64_t) + l + 1 ||
1257                             e[1+sizeof(uint64_t)+l] != '\n') {
1258                                 log_debug("Failed to parse message, ignoring.");
1259                                 break;
1260                         }
1261
1262                         k = malloc((e - p) + 1 + l);
1263                         if (!k) {
1264                                 log_error("Out of memory");
1265                                 break;
1266                         }
1267
1268                         memcpy(k, p, e - p);
1269                         k[e - p] = '=';
1270                         memcpy(k + (e - p) + 1, e + 1 + sizeof(uint64_t), l);
1271
1272                         if (valid_user_field(p, e - p)) {
1273                                 iovec[n].iov_base = k;
1274                                 iovec[n].iov_len = (e - p) + 1 + l;
1275                                 n++;
1276                         } else
1277                                 free(k);
1278
1279                         remaining -= (e - p) + 1 + sizeof(uint64_t) + l + 1;
1280                         p = e + 1 + sizeof(uint64_t) + l + 1;
1281                 }
1282         }
1283
1284         if (n <= 0)
1285                 goto finish;
1286
1287         tn = n++;
1288         IOVEC_SET_STRING(iovec[tn], "_TRANSPORT=journal");
1289
1290         if (message) {
1291                 if (s->forward_to_syslog)
1292                         forward_syslog(s, priority, identifier, message, ucred, tv);
1293
1294                 if (s->forward_to_kmsg)
1295                         forward_kmsg(s, priority, identifier, message, ucred);
1296
1297                 if (s->forward_to_console)
1298                         forward_console(s, identifier, message, ucred);
1299         }
1300
1301         dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, priority);
1302
1303 finish:
1304         for (j = 0; j < n; j++)  {
1305                 if (j == tn)
1306                         continue;
1307
1308                 if (iovec[j].iov_base < buffer ||
1309                     (const uint8_t*) iovec[j].iov_base >= (const uint8_t*) buffer + buffer_size)
1310                         free(iovec[j].iov_base);
1311         }
1312
1313         free(iovec);
1314         free(identifier);
1315         free(message);
1316 }
1317
1318 static void process_native_file(
1319                 Server *s,
1320                 int fd,
1321                 struct ucred *ucred,
1322                 struct timeval *tv,
1323                 const char *label, size_t label_len) {
1324
1325         struct stat st;
1326         void *p;
1327         ssize_t n;
1328
1329         assert(s);
1330         assert(fd >= 0);
1331
1332         /* Data is in the passed file, since it didn't fit in a
1333          * datagram. We can't map the file here, since clients might
1334          * then truncate it and trigger a SIGBUS for us. So let's
1335          * stupidly read it */
1336
1337         if (fstat(fd, &st) < 0) {
1338                 log_error("Failed to stat passed file, ignoring: %m");
1339                 return;
1340         }
1341
1342         if (!S_ISREG(st.st_mode)) {
1343                 log_error("File passed is not regular. Ignoring.");
1344                 return;
1345         }
1346
1347         if (st.st_size <= 0)
1348                 return;
1349
1350         if (st.st_size > ENTRY_SIZE_MAX) {
1351                 log_error("File passed too large. Ignoring.");
1352                 return;
1353         }
1354
1355         p = malloc(st.st_size);
1356         if (!p) {
1357                 log_error("Out of memory");
1358                 return;
1359         }
1360
1361         n = pread(fd, p, st.st_size, 0);
1362         if (n < 0)
1363                 log_error("Failed to read file, ignoring: %s", strerror(-n));
1364         else if (n > 0)
1365                 process_native_message(s, p, n, ucred, tv, label, label_len);
1366
1367         free(p);
1368 }
1369
1370 static int stdout_stream_log(StdoutStream *s, const char *p) {
1371         struct iovec iovec[N_IOVEC_META_FIELDS + 5];
1372         char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *syslog_identifier = NULL;
1373         unsigned n = 0;
1374         int priority;
1375         char *label = NULL;
1376         size_t label_len = 0;
1377
1378         assert(s);
1379         assert(p);
1380
1381         if (isempty(p))
1382                 return 0;
1383
1384         priority = s->priority;
1385
1386         if (s->level_prefix)
1387                 parse_syslog_priority((char**) &p, &priority);
1388
1389         if (s->forward_to_syslog || s->server->forward_to_syslog)
1390                 forward_syslog(s->server, fixup_priority(priority), s->identifier, p, &s->ucred, NULL);
1391
1392         if (s->forward_to_kmsg || s->server->forward_to_kmsg)
1393                 forward_kmsg(s->server, priority, s->identifier, p, &s->ucred);
1394
1395         if (s->forward_to_console || s->server->forward_to_console)
1396                 forward_console(s->server, s->identifier, p, &s->ucred);
1397
1398         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=stdout");
1399
1400         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1401                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1402
1403         if (priority & LOG_FACMASK)
1404                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1405                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1406
1407         if (s->identifier) {
1408                 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", s->identifier);
1409                 if (syslog_identifier)
1410                         IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1411         }
1412
1413         message = strappend("MESSAGE=", p);
1414         if (message)
1415                 IOVEC_SET_STRING(iovec[n++], message);
1416
1417 #ifdef HAVE_SELINUX
1418         if (s->security_context) {
1419                 label = (char*) s->security_context;
1420                 label_len = strlen((char*) s->security_context);
1421         }
1422 #endif
1423
1424         dispatch_message(s->server, iovec, n, ELEMENTSOF(iovec), &s->ucred, NULL, label, label_len, priority);
1425
1426         free(message);
1427         free(syslog_priority);
1428         free(syslog_facility);
1429         free(syslog_identifier);
1430
1431         return 0;
1432 }
1433
1434 static int stdout_stream_line(StdoutStream *s, char *p) {
1435         int r;
1436
1437         assert(s);
1438         assert(p);
1439
1440         p = strstrip(p);
1441
1442         switch (s->state) {
1443
1444         case STDOUT_STREAM_IDENTIFIER:
1445                 if (isempty(p))
1446                         s->identifier = NULL;
1447                 else  {
1448                         s->identifier = strdup(p);
1449                         if (!s->identifier) {
1450                                 log_error("Out of memory");
1451                                 return -ENOMEM;
1452                         }
1453                 }
1454
1455                 s->state = STDOUT_STREAM_PRIORITY;
1456                 return 0;
1457
1458         case STDOUT_STREAM_PRIORITY:
1459                 r = safe_atoi(p, &s->priority);
1460                 if (r < 0 || s->priority <= 0 || s->priority >= 999) {
1461                         log_warning("Failed to parse log priority line.");
1462                         return -EINVAL;
1463                 }
1464
1465                 s->state = STDOUT_STREAM_LEVEL_PREFIX;
1466                 return 0;
1467
1468         case STDOUT_STREAM_LEVEL_PREFIX:
1469                 r = parse_boolean(p);
1470                 if (r < 0) {
1471                         log_warning("Failed to parse level prefix line.");
1472                         return -EINVAL;
1473                 }
1474
1475                 s->level_prefix = !!r;
1476                 s->state = STDOUT_STREAM_FORWARD_TO_SYSLOG;
1477                 return 0;
1478
1479         case STDOUT_STREAM_FORWARD_TO_SYSLOG:
1480                 r = parse_boolean(p);
1481                 if (r < 0) {
1482                         log_warning("Failed to parse forward to syslog line.");
1483                         return -EINVAL;
1484                 }
1485
1486                 s->forward_to_syslog = !!r;
1487                 s->state = STDOUT_STREAM_FORWARD_TO_KMSG;
1488                 return 0;
1489
1490         case STDOUT_STREAM_FORWARD_TO_KMSG:
1491                 r = parse_boolean(p);
1492                 if (r < 0) {
1493                         log_warning("Failed to parse copy to kmsg line.");
1494                         return -EINVAL;
1495                 }
1496
1497                 s->forward_to_kmsg = !!r;
1498                 s->state = STDOUT_STREAM_FORWARD_TO_CONSOLE;
1499                 return 0;
1500
1501         case STDOUT_STREAM_FORWARD_TO_CONSOLE:
1502                 r = parse_boolean(p);
1503                 if (r < 0) {
1504                         log_warning("Failed to parse copy to console line.");
1505                         return -EINVAL;
1506                 }
1507
1508                 s->forward_to_console = !!r;
1509                 s->state = STDOUT_STREAM_RUNNING;
1510                 return 0;
1511
1512         case STDOUT_STREAM_RUNNING:
1513                 return stdout_stream_log(s, p);
1514         }
1515
1516         assert_not_reached("Unknown stream state");
1517 }
1518
1519 static int stdout_stream_scan(StdoutStream *s, bool force_flush) {
1520         char *p;
1521         size_t remaining;
1522         int r;
1523
1524         assert(s);
1525
1526         p = s->buffer;
1527         remaining = s->length;
1528         for (;;) {
1529                 char *end;
1530                 size_t skip;
1531
1532                 end = memchr(p, '\n', remaining);
1533                 if (end)
1534                         skip = end - p + 1;
1535                 else if (remaining >= sizeof(s->buffer) - 1) {
1536                         end = p + sizeof(s->buffer) - 1;
1537                         skip = remaining;
1538                 } else
1539                         break;
1540
1541                 *end = 0;
1542
1543                 r = stdout_stream_line(s, p);
1544                 if (r < 0)
1545                         return r;
1546
1547                 remaining -= skip;
1548                 p += skip;
1549         }
1550
1551         if (force_flush && remaining > 0) {
1552                 p[remaining] = 0;
1553                 r = stdout_stream_line(s, p);
1554                 if (r < 0)
1555                         return r;
1556
1557                 p += remaining;
1558                 remaining = 0;
1559         }
1560
1561         if (p > s->buffer) {
1562                 memmove(s->buffer, p, remaining);
1563                 s->length = remaining;
1564         }
1565
1566         return 0;
1567 }
1568
1569 static int stdout_stream_process(StdoutStream *s) {
1570         ssize_t l;
1571         int r;
1572
1573         assert(s);
1574
1575         l = read(s->fd, s->buffer+s->length, sizeof(s->buffer)-1-s->length);
1576         if (l < 0) {
1577
1578                 if (errno == EAGAIN)
1579                         return 0;
1580
1581                 log_warning("Failed to read from stream: %m");
1582                 return -errno;
1583         }
1584
1585         if (l == 0) {
1586                 r = stdout_stream_scan(s, true);
1587                 if (r < 0)
1588                         return r;
1589
1590                 return 0;
1591         }
1592
1593         s->length += l;
1594         r = stdout_stream_scan(s, false);
1595         if (r < 0)
1596                 return r;
1597
1598         return 1;
1599
1600 }
1601
1602 static void stdout_stream_free(StdoutStream *s) {
1603         assert(s);
1604
1605         if (s->server) {
1606                 assert(s->server->n_stdout_streams > 0);
1607                 s->server->n_stdout_streams --;
1608                 LIST_REMOVE(StdoutStream, stdout_stream, s->server->stdout_streams, s);
1609         }
1610
1611         if (s->fd >= 0) {
1612                 if (s->server)
1613                         epoll_ctl(s->server->epoll_fd, EPOLL_CTL_DEL, s->fd, NULL);
1614
1615                 close_nointr_nofail(s->fd);
1616         }
1617
1618 #ifdef HAVE_SELINUX
1619         if (s->security_context)
1620                 freecon(s->security_context);
1621 #endif
1622
1623         free(s->identifier);
1624         free(s);
1625 }
1626
1627 static int stdout_stream_new(Server *s) {
1628         StdoutStream *stream;
1629         int fd, r;
1630         socklen_t len;
1631         struct epoll_event ev;
1632
1633         assert(s);
1634
1635         fd = accept4(s->stdout_fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC);
1636         if (fd < 0) {
1637                 if (errno == EAGAIN)
1638                         return 0;
1639
1640                 log_error("Failed to accept stdout connection: %m");
1641                 return -errno;
1642         }
1643
1644         if (s->n_stdout_streams >= STDOUT_STREAMS_MAX) {
1645                 log_warning("Too many stdout streams, refusing connection.");
1646                 close_nointr_nofail(fd);
1647                 return 0;
1648         }
1649
1650         stream = new0(StdoutStream, 1);
1651         if (!stream) {
1652                 log_error("Out of memory.");
1653                 close_nointr_nofail(fd);
1654                 return -ENOMEM;
1655         }
1656
1657         stream->fd = fd;
1658
1659         len = sizeof(stream->ucred);
1660         if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &stream->ucred, &len) < 0) {
1661                 log_error("Failed to determine peer credentials: %m");
1662                 r = -errno;
1663                 goto fail;
1664         }
1665
1666 #ifdef HAVE_SELINUX
1667         if (getpeercon(fd, &stream->security_context) < 0)
1668                 log_error("Failed to determine peer security context.");
1669 #endif
1670
1671         if (shutdown(fd, SHUT_WR) < 0) {
1672                 log_error("Failed to shutdown writing side of socket: %m");
1673                 r = -errno;
1674                 goto fail;
1675         }
1676
1677         zero(ev);
1678         ev.data.ptr = stream;
1679         ev.events = EPOLLIN;
1680         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0) {
1681                 log_error("Failed to add stream to event loop: %m");
1682                 r = -errno;
1683                 goto fail;
1684         }
1685
1686         stream->server = s;
1687         LIST_PREPEND(StdoutStream, stdout_stream, s->stdout_streams, stream);
1688         s->n_stdout_streams ++;
1689
1690         return 0;
1691
1692 fail:
1693         stdout_stream_free(stream);
1694         return r;
1695 }
1696
1697 static int parse_kernel_timestamp(char **_p, usec_t *t) {
1698         usec_t r;
1699         int k, i;
1700         char *p;
1701
1702         assert(_p);
1703         assert(*_p);
1704         assert(t);
1705
1706         p = *_p;
1707
1708         if (strlen(p) < 14 || p[0] != '[' || p[13] != ']' || p[6] != '.')
1709                 return 0;
1710
1711         r = 0;
1712
1713         for (i = 1; i <= 5; i++) {
1714                 r *= 10;
1715
1716                 if (p[i] == ' ')
1717                         continue;
1718
1719                 k = undecchar(p[i]);
1720                 if (k < 0)
1721                         return 0;
1722
1723                 r += k;
1724         }
1725
1726         for (i = 7; i <= 12; i++) {
1727                 r *= 10;
1728
1729                 k = undecchar(p[i]);
1730                 if (k < 0)
1731                         return 0;
1732
1733                 r += k;
1734         }
1735
1736         *t = r;
1737         *_p += 14;
1738         *_p += strspn(*_p, WHITESPACE);
1739
1740         return 1;
1741 }
1742
1743 static void proc_kmsg_line(Server *s, const char *p) {
1744         struct iovec iovec[N_IOVEC_META_FIELDS + 7];
1745         char *message = NULL, *syslog_priority = NULL, *syslog_pid = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *source_time = NULL;
1746         int priority = LOG_KERN | LOG_INFO;
1747         unsigned n = 0;
1748         usec_t usec;
1749         char *identifier = NULL, *pid = NULL;
1750
1751         assert(s);
1752         assert(p);
1753
1754         if (isempty(p))
1755                 return;
1756
1757         parse_syslog_priority((char **) &p, &priority);
1758
1759         if (s->forward_to_kmsg && (priority & LOG_FACMASK) != LOG_KERN)
1760                 return;
1761
1762         if (parse_kernel_timestamp((char **) &p, &usec) > 0) {
1763                 if (asprintf(&source_time, "_SOURCE_MONOTONIC_TIMESTAMP=%llu",
1764                              (unsigned long long) usec) >= 0)
1765                         IOVEC_SET_STRING(iovec[n++], source_time);
1766         }
1767
1768         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=kernel");
1769
1770         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1771                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1772
1773         if ((priority & LOG_FACMASK) == LOG_KERN) {
1774
1775                 if (s->forward_to_syslog)
1776                         forward_syslog(s, priority, "kernel", p, NULL, NULL);
1777
1778                 IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=kernel");
1779         } else {
1780                 read_identifier(&p, &identifier, &pid);
1781
1782                 if (s->forward_to_syslog)
1783                         forward_syslog(s, priority, identifier, p, NULL, NULL);
1784
1785                 if (identifier) {
1786                         syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
1787                         if (syslog_identifier)
1788                                 IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1789                 }
1790
1791                 if (pid) {
1792                         syslog_pid = strappend("SYSLOG_PID=", pid);
1793                         if (syslog_pid)
1794                                 IOVEC_SET_STRING(iovec[n++], syslog_pid);
1795                 }
1796
1797                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1798                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1799         }
1800
1801         message = strappend("MESSAGE=", p);
1802         if (message)
1803                 IOVEC_SET_STRING(iovec[n++], message);
1804
1805         dispatch_message(s, iovec, n, ELEMENTSOF(iovec), NULL, NULL, NULL, 0, priority);
1806
1807         free(message);
1808         free(syslog_priority);
1809         free(syslog_identifier);
1810         free(syslog_pid);
1811         free(syslog_facility);
1812         free(source_time);
1813         free(identifier);
1814         free(pid);
1815 }
1816
1817 static void proc_kmsg_scan(Server *s) {
1818         char *p;
1819         size_t remaining;
1820
1821         assert(s);
1822
1823         p = s->proc_kmsg_buffer;
1824         remaining = s->proc_kmsg_length;
1825         for (;;) {
1826                 char *end;
1827                 size_t skip;
1828
1829                 end = memchr(p, '\n', remaining);
1830                 if (end)
1831                         skip = end - p + 1;
1832                 else if (remaining >= sizeof(s->proc_kmsg_buffer) - 1) {
1833                         end = p + sizeof(s->proc_kmsg_buffer) - 1;
1834                         skip = remaining;
1835                 } else
1836                         break;
1837
1838                 *end = 0;
1839
1840                 proc_kmsg_line(s, p);
1841
1842                 remaining -= skip;
1843                 p += skip;
1844         }
1845
1846         if (p > s->proc_kmsg_buffer) {
1847                 memmove(s->proc_kmsg_buffer, p, remaining);
1848                 s->proc_kmsg_length = remaining;
1849         }
1850 }
1851
1852 static int system_journal_open(Server *s) {
1853         int r;
1854         char *fn;
1855         sd_id128_t machine;
1856         char ids[33];
1857
1858         r = sd_id128_get_machine(&machine);
1859         if (r < 0)
1860                 return r;
1861
1862         sd_id128_to_string(machine, ids);
1863
1864         if (!s->system_journal) {
1865
1866                 /* First try to create the machine path, but not the prefix */
1867                 fn = strappend("/var/log/journal/", ids);
1868                 if (!fn)
1869                         return -ENOMEM;
1870                 (void) mkdir(fn, 0755);
1871                 free(fn);
1872
1873                 /* The create the system journal file */
1874                 fn = join("/var/log/journal/", ids, "/system.journal", NULL);
1875                 if (!fn)
1876                         return -ENOMEM;
1877
1878                 r = journal_file_open(fn, O_RDWR|O_CREAT, 0640, NULL, &s->system_journal);
1879                 free(fn);
1880
1881                 if (r >= 0) {
1882                         journal_default_metrics(&s->system_metrics, s->system_journal->fd);
1883
1884                         s->system_journal->metrics = s->system_metrics;
1885                         s->system_journal->compress = s->compress;
1886
1887                         server_fix_perms(s, s->system_journal, 0);
1888                 } else if (r < 0) {
1889
1890                         if (r != -ENOENT && r != -EROFS)
1891                                 log_warning("Failed to open system journal: %s", strerror(-r));
1892
1893                         r = 0;
1894                 }
1895         }
1896
1897         if (!s->runtime_journal) {
1898
1899                 fn = join("/run/log/journal/", ids, "/system.journal", NULL);
1900                 if (!fn)
1901                         return -ENOMEM;
1902
1903                 if (s->system_journal) {
1904
1905                         /* Try to open the runtime journal, but only
1906                          * if it already exists, so that we can flush
1907                          * it into the system journal */
1908
1909                         r = journal_file_open(fn, O_RDWR, 0640, NULL, &s->runtime_journal);
1910                         free(fn);
1911
1912                         if (r < 0) {
1913                                 if (r != -ENOENT)
1914                                         log_warning("Failed to open runtime journal: %s", strerror(-r));
1915
1916                                 r = 0;
1917                         }
1918
1919                 } else {
1920
1921                         /* OK, we really need the runtime journal, so create
1922                          * it if necessary. */
1923
1924                         (void) mkdir_parents(fn, 0755);
1925                         r = journal_file_open(fn, O_RDWR|O_CREAT, 0640, NULL, &s->runtime_journal);
1926                         free(fn);
1927
1928                         if (r < 0) {
1929                                 log_error("Failed to open runtime journal: %s", strerror(-r));
1930                                 return r;
1931                         }
1932                 }
1933
1934                 if (s->runtime_journal) {
1935                         journal_default_metrics(&s->runtime_metrics, s->runtime_journal->fd);
1936
1937                         s->runtime_journal->metrics = s->runtime_metrics;
1938                         s->runtime_journal->compress = s->compress;
1939
1940                         server_fix_perms(s, s->runtime_journal, 0);
1941                 }
1942         }
1943
1944         return r;
1945 }
1946
1947 static int server_flush_to_var(Server *s) {
1948         char path[] = "/run/log/journal/xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
1949         Object *o = NULL;
1950         int r;
1951         sd_id128_t machine;
1952         sd_journal *j;
1953         usec_t ts;
1954
1955         assert(s);
1956
1957         if (!s->runtime_journal)
1958                 return 0;
1959
1960         ts = now(CLOCK_MONOTONIC);
1961         if (s->var_available_timestamp + RECHECK_VAR_AVAILABLE_USEC > ts)
1962                 return 0;
1963
1964         s->var_available_timestamp = ts;
1965
1966         system_journal_open(s);
1967
1968         if (!s->system_journal)
1969                 return 0;
1970
1971         log_info("Flushing to /var...");
1972
1973         r = sd_id128_get_machine(&machine);
1974         if (r < 0) {
1975                 log_error("Failed to get machine id: %s", strerror(-r));
1976                 return r;
1977         }
1978
1979         r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1980         if (r < 0) {
1981                 log_error("Failed to read runtime journal: %s", strerror(-r));
1982                 return r;
1983         }
1984
1985         SD_JOURNAL_FOREACH(j) {
1986                 JournalFile *f;
1987
1988                 f = j->current_file;
1989                 assert(f && f->current_offset > 0);
1990
1991                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1992                 if (r < 0) {
1993                         log_error("Can't read entry: %s", strerror(-r));
1994                         goto finish;
1995                 }
1996
1997                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1998                 if (r == -E2BIG) {
1999                         log_info("Allocation limit reached.");
2000
2001                         journal_file_post_change(s->system_journal);
2002                         server_rotate(s);
2003                         server_vacuum(s);
2004
2005                         r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
2006                 }
2007
2008                 if (r < 0) {
2009                         log_error("Can't write entry: %s", strerror(-r));
2010                         goto finish;
2011                 }
2012         }
2013
2014 finish:
2015         journal_file_post_change(s->system_journal);
2016
2017         journal_file_close(s->runtime_journal);
2018         s->runtime_journal = NULL;
2019
2020         if (r >= 0) {
2021                 sd_id128_to_string(machine, path + 17);
2022                 rm_rf(path, false, true, false);
2023         }
2024
2025         return r;
2026 }
2027
2028 static int server_read_proc_kmsg(Server *s) {
2029         ssize_t l;
2030         assert(s);
2031         assert(s->proc_kmsg_fd >= 0);
2032
2033         l = read(s->proc_kmsg_fd, s->proc_kmsg_buffer + s->proc_kmsg_length, sizeof(s->proc_kmsg_buffer) - 1 - s->proc_kmsg_length);
2034         if (l < 0) {
2035
2036                 if (errno == EAGAIN || errno == EINTR)
2037                         return 0;
2038
2039                 log_error("Failed to read from kernel: %m");
2040                 return -errno;
2041         }
2042
2043         s->proc_kmsg_length += l;
2044
2045         proc_kmsg_scan(s);
2046         return 1;
2047 }
2048
2049 static int server_flush_proc_kmsg(Server *s) {
2050         int r;
2051
2052         assert(s);
2053
2054         if (s->proc_kmsg_fd < 0)
2055                 return 0;
2056
2057         log_info("Flushing /proc/kmsg...");
2058
2059         for (;;) {
2060                 r = server_read_proc_kmsg(s);
2061                 if (r < 0)
2062                         return r;
2063
2064                 if (r == 0)
2065                         break;
2066         }
2067
2068         return 0;
2069 }
2070
2071 static int process_event(Server *s, struct epoll_event *ev) {
2072         assert(s);
2073
2074         if (ev->data.fd == s->signal_fd) {
2075                 struct signalfd_siginfo sfsi;
2076                 ssize_t n;
2077
2078                 if (ev->events != EPOLLIN) {
2079                         log_info("Got invalid event from epoll.");
2080                         return -EIO;
2081                 }
2082
2083                 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
2084                 if (n != sizeof(sfsi)) {
2085
2086                         if (n >= 0)
2087                                 return -EIO;
2088
2089                         if (errno == EINTR || errno == EAGAIN)
2090                                 return 1;
2091
2092                         return -errno;
2093                 }
2094
2095                 if (sfsi.ssi_signo == SIGUSR1) {
2096                         server_flush_to_var(s);
2097                         return 0;
2098                 }
2099
2100                 log_debug("Received SIG%s", signal_to_string(sfsi.ssi_signo));
2101                 return 0;
2102
2103         } else if (ev->data.fd == s->proc_kmsg_fd) {
2104                 int r;
2105
2106                 if (ev->events != EPOLLIN) {
2107                         log_info("Got invalid event from epoll.");
2108                         return -EIO;
2109                 }
2110
2111                 r = server_read_proc_kmsg(s);
2112                 if (r < 0)
2113                         return r;
2114
2115                 return 1;
2116
2117         } else if (ev->data.fd == s->native_fd ||
2118                    ev->data.fd == s->syslog_fd) {
2119
2120                 if (ev->events != EPOLLIN) {
2121                         log_info("Got invalid event from epoll.");
2122                         return -EIO;
2123                 }
2124
2125                 for (;;) {
2126                         struct msghdr msghdr;
2127                         struct iovec iovec;
2128                         struct ucred *ucred = NULL;
2129                         struct timeval *tv = NULL;
2130                         struct cmsghdr *cmsg;
2131                         char *label = NULL;
2132                         size_t label_len = 0;
2133                         union {
2134                                 struct cmsghdr cmsghdr;
2135                                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
2136                                             CMSG_SPACE(sizeof(struct timeval)) +
2137                                             CMSG_SPACE(sizeof(int)) +
2138                                             CMSG_SPACE(PAGE_SIZE)]; /* selinux label */
2139                         } control;
2140                         ssize_t n;
2141                         int v;
2142                         int *fds = NULL;
2143                         unsigned n_fds = 0;
2144
2145                         if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
2146                                 log_error("SIOCINQ failed: %m");
2147                                 return -errno;
2148                         }
2149
2150                         if (s->buffer_size < (size_t) v) {
2151                                 void *b;
2152                                 size_t l;
2153
2154                                 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
2155                                 b = realloc(s->buffer, l+1);
2156
2157                                 if (!b) {
2158                                         log_error("Couldn't increase buffer.");
2159                                         return -ENOMEM;
2160                                 }
2161
2162                                 s->buffer_size = l;
2163                                 s->buffer = b;
2164                         }
2165
2166                         zero(iovec);
2167                         iovec.iov_base = s->buffer;
2168                         iovec.iov_len = s->buffer_size;
2169
2170                         zero(control);
2171                         zero(msghdr);
2172                         msghdr.msg_iov = &iovec;
2173                         msghdr.msg_iovlen = 1;
2174                         msghdr.msg_control = &control;
2175                         msghdr.msg_controllen = sizeof(control);
2176
2177                         n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
2178                         if (n < 0) {
2179
2180                                 if (errno == EINTR || errno == EAGAIN)
2181                                         return 1;
2182
2183                                 log_error("recvmsg() failed: %m");
2184                                 return -errno;
2185                         }
2186
2187                         for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
2188
2189                                 if (cmsg->cmsg_level == SOL_SOCKET &&
2190                                     cmsg->cmsg_type == SCM_CREDENTIALS &&
2191                                     cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
2192                                         ucred = (struct ucred*) CMSG_DATA(cmsg);
2193                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
2194                                          cmsg->cmsg_type == SCM_SECURITY) {
2195                                         label = (char*) CMSG_DATA(cmsg);
2196                                         label_len = cmsg->cmsg_len - CMSG_LEN(0);
2197                                 } else if (cmsg->cmsg_level == SOL_SOCKET &&
2198                                          cmsg->cmsg_type == SO_TIMESTAMP &&
2199                                          cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
2200                                         tv = (struct timeval*) CMSG_DATA(cmsg);
2201                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
2202                                          cmsg->cmsg_type == SCM_RIGHTS) {
2203                                         fds = (int*) CMSG_DATA(cmsg);
2204                                         n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
2205                                 }
2206                         }
2207
2208                         if (ev->data.fd == s->syslog_fd) {
2209                                 char *e;
2210
2211                                 if (n > 0 && n_fds == 0) {
2212                                         e = memchr(s->buffer, '\n', n);
2213                                         if (e)
2214                                                 *e = 0;
2215                                         else
2216                                                 s->buffer[n] = 0;
2217
2218                                         process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
2219                                 } else if (n_fds > 0)
2220                                         log_warning("Got file descriptors via syslog socket. Ignoring.");
2221
2222                         } else {
2223                                 if (n > 0 && n_fds == 0)
2224                                         process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
2225                                 else if (n == 0 && n_fds == 1)
2226                                         process_native_file(s, fds[0], ucred, tv, label, label_len);
2227                                 else if (n_fds > 0)
2228                                         log_warning("Got too many file descriptors via native socket. Ignoring.");
2229                         }
2230
2231                         close_many(fds, n_fds);
2232                 }
2233
2234                 return 1;
2235
2236         } else if (ev->data.fd == s->stdout_fd) {
2237
2238                 if (ev->events != EPOLLIN) {
2239                         log_info("Got invalid event from epoll.");
2240                         return -EIO;
2241                 }
2242
2243                 stdout_stream_new(s);
2244                 return 1;
2245
2246         } else {
2247                 StdoutStream *stream;
2248
2249                 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
2250                         log_info("Got invalid event from epoll.");
2251                         return -EIO;
2252                 }
2253
2254                 /* If it is none of the well-known fds, it must be an
2255                  * stdout stream fd. Note that this is a bit ugly here
2256                  * (since we rely that none of the well-known fds
2257                  * could be interpreted as pointer), but nonetheless
2258                  * safe, since the well-known fds would never get an
2259                  * fd > 4096, i.e. beyond the first memory page */
2260
2261                 stream = ev->data.ptr;
2262
2263                 if (stdout_stream_process(stream) <= 0)
2264                         stdout_stream_free(stream);
2265
2266                 return 1;
2267         }
2268
2269         log_error("Unknown event.");
2270         return 0;
2271 }
2272
2273 static int open_syslog_socket(Server *s) {
2274         union sockaddr_union sa;
2275         int one, r;
2276         struct epoll_event ev;
2277
2278         assert(s);
2279
2280         if (s->syslog_fd < 0) {
2281
2282                 s->syslog_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2283                 if (s->syslog_fd < 0) {
2284                         log_error("socket() failed: %m");
2285                         return -errno;
2286                 }
2287
2288                 zero(sa);
2289                 sa.un.sun_family = AF_UNIX;
2290                 strncpy(sa.un.sun_path, "/dev/log", sizeof(sa.un.sun_path));
2291
2292                 unlink(sa.un.sun_path);
2293
2294                 r = bind(s->syslog_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2295                 if (r < 0) {
2296                         log_error("bind() failed: %m");
2297                         return -errno;
2298                 }
2299
2300                 chmod(sa.un.sun_path, 0666);
2301         } else
2302                 fd_nonblock(s->syslog_fd, 1);
2303
2304         one = 1;
2305         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
2306         if (r < 0) {
2307                 log_error("SO_PASSCRED failed: %m");
2308                 return -errno;
2309         }
2310
2311 #ifdef HAVE_SE
2312         one = 1;
2313         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
2314         if (r < 0)
2315                 log_warning("SO_PASSSEC failed: %m");
2316 #endif
2317
2318         one = 1;
2319         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
2320         if (r < 0) {
2321                 log_error("SO_TIMESTAMP failed: %m");
2322                 return -errno;
2323         }
2324
2325         zero(ev);
2326         ev.events = EPOLLIN;
2327         ev.data.fd = s->syslog_fd;
2328         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->syslog_fd, &ev) < 0) {
2329                 log_error("Failed to add syslog server fd to epoll object: %m");
2330                 return -errno;
2331         }
2332
2333         return 0;
2334 }
2335
2336 static int open_native_socket(Server*s) {
2337         union sockaddr_union sa;
2338         int one, r;
2339         struct epoll_event ev;
2340
2341         assert(s);
2342
2343         if (s->native_fd < 0) {
2344
2345                 s->native_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2346                 if (s->native_fd < 0) {
2347                         log_error("socket() failed: %m");
2348                         return -errno;
2349                 }
2350
2351                 zero(sa);
2352                 sa.un.sun_family = AF_UNIX;
2353                 strncpy(sa.un.sun_path, "/run/systemd/journal/socket", sizeof(sa.un.sun_path));
2354
2355                 unlink(sa.un.sun_path);
2356
2357                 r = bind(s->native_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2358                 if (r < 0) {
2359                         log_error("bind() failed: %m");
2360                         return -errno;
2361                 }
2362
2363                 chmod(sa.un.sun_path, 0666);
2364         } else
2365                 fd_nonblock(s->native_fd, 1);
2366
2367         one = 1;
2368         r = setsockopt(s->native_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
2369         if (r < 0) {
2370                 log_error("SO_PASSCRED failed: %m");
2371                 return -errno;
2372         }
2373
2374 #ifdef HAVE_SELINUX
2375         one = 1;
2376         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
2377         if (r < 0)
2378                 log_warning("SO_PASSSEC failed: %m");
2379 #endif
2380
2381         one = 1;
2382         r = setsockopt(s->native_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
2383         if (r < 0) {
2384                 log_error("SO_TIMESTAMP failed: %m");
2385                 return -errno;
2386         }
2387
2388         zero(ev);
2389         ev.events = EPOLLIN;
2390         ev.data.fd = s->native_fd;
2391         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->native_fd, &ev) < 0) {
2392                 log_error("Failed to add native server fd to epoll object: %m");
2393                 return -errno;
2394         }
2395
2396         return 0;
2397 }
2398
2399 static int open_stdout_socket(Server *s) {
2400         union sockaddr_union sa;
2401         int r;
2402         struct epoll_event ev;
2403
2404         assert(s);
2405
2406         if (s->stdout_fd < 0) {
2407
2408                 s->stdout_fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2409                 if (s->stdout_fd < 0) {
2410                         log_error("socket() failed: %m");
2411                         return -errno;
2412                 }
2413
2414                 zero(sa);
2415                 sa.un.sun_family = AF_UNIX;
2416                 strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
2417
2418                 unlink(sa.un.sun_path);
2419
2420                 r = bind(s->stdout_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2421                 if (r < 0) {
2422                         log_error("bind() failed: %m");
2423                         return -errno;
2424                 }
2425
2426                 chmod(sa.un.sun_path, 0666);
2427
2428                 if (listen(s->stdout_fd, SOMAXCONN) < 0) {
2429                         log_error("liste() failed: %m");
2430                         return -errno;
2431                 }
2432         } else
2433                 fd_nonblock(s->stdout_fd, 1);
2434
2435         zero(ev);
2436         ev.events = EPOLLIN;
2437         ev.data.fd = s->stdout_fd;
2438         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->stdout_fd, &ev) < 0) {
2439                 log_error("Failed to add stdout server fd to epoll object: %m");
2440                 return -errno;
2441         }
2442
2443         return 0;
2444 }
2445
2446 static int open_proc_kmsg(Server *s) {
2447         struct epoll_event ev;
2448
2449         assert(s);
2450
2451         if (!s->import_proc_kmsg)
2452                 return 0;
2453
2454
2455         s->proc_kmsg_fd = open("/proc/kmsg", O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
2456         if (s->proc_kmsg_fd < 0) {
2457                 log_warning("Failed to open /proc/kmsg, ignoring: %m");
2458                 return 0;
2459         }
2460
2461         zero(ev);
2462         ev.events = EPOLLIN;
2463         ev.data.fd = s->proc_kmsg_fd;
2464         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->proc_kmsg_fd, &ev) < 0) {
2465                 log_error("Failed to add /proc/kmsg fd to epoll object: %m");
2466                 return -errno;
2467         }
2468
2469         return 0;
2470 }
2471
2472 static int open_signalfd(Server *s) {
2473         sigset_t mask;
2474         struct epoll_event ev;
2475
2476         assert(s);
2477
2478         assert_se(sigemptyset(&mask) == 0);
2479         sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, -1);
2480         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
2481
2482         s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
2483         if (s->signal_fd < 0) {
2484                 log_error("signalfd(): %m");
2485                 return -errno;
2486         }
2487
2488         zero(ev);
2489         ev.events = EPOLLIN;
2490         ev.data.fd = s->signal_fd;
2491
2492         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
2493                 log_error("epoll_ctl(): %m");
2494                 return -errno;
2495         }
2496
2497         return 0;
2498 }
2499
2500 static int server_parse_proc_cmdline(Server *s) {
2501         char *line, *w, *state;
2502         int r;
2503         size_t l;
2504
2505         if (detect_container(NULL) > 0)
2506                 return 0;
2507
2508         r = read_one_line_file("/proc/cmdline", &line);
2509         if (r < 0) {
2510                 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
2511                 return 0;
2512         }
2513
2514         FOREACH_WORD_QUOTED(w, l, line, state) {
2515                 char *word;
2516
2517                 word = strndup(w, l);
2518                 if (!word) {
2519                         r = -ENOMEM;
2520                         goto finish;
2521                 }
2522
2523                 if (startswith(word, "systemd_journald.forward_to_syslog=")) {
2524                         r = parse_boolean(word + 35);
2525                         if (r < 0)
2526                                 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
2527                         else
2528                                 s->forward_to_syslog = r;
2529                 } else if (startswith(word, "systemd_journald.forward_to_kmsg=")) {
2530                         r = parse_boolean(word + 33);
2531                         if (r < 0)
2532                                 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
2533                         else
2534                                 s->forward_to_kmsg = r;
2535                 } else if (startswith(word, "systemd_journald.forward_to_console=")) {
2536                         r = parse_boolean(word + 36);
2537                         if (r < 0)
2538                                 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
2539                         else
2540                                 s->forward_to_console = r;
2541                 }
2542
2543                 free(word);
2544         }
2545
2546         r = 0;
2547
2548 finish:
2549         free(line);
2550         return r;
2551 }
2552
2553 static int server_parse_config_file(Server *s) {
2554         FILE *f;
2555         const char *fn;
2556         int r;
2557
2558         assert(s);
2559
2560         fn = "/etc/systemd/systemd-journald.conf";
2561         f = fopen(fn, "re");
2562         if (!f) {
2563                 if (errno == ENOENT)
2564                         return 0;
2565
2566                 log_warning("Failed to open configuration file %s: %m", fn);
2567                 return -errno;
2568         }
2569
2570         r = config_parse(fn, f, "Journal\0", config_item_perf_lookup, (void*) journald_gperf_lookup, false, s);
2571         if (r < 0)
2572                 log_warning("Failed to parse configuration file: %s", strerror(-r));
2573
2574         fclose(f);
2575
2576         return r;
2577 }
2578
2579 static int server_init(Server *s) {
2580         int n, r, fd;
2581
2582         assert(s);
2583
2584         zero(*s);
2585         s->syslog_fd = s->native_fd = s->stdout_fd = s->signal_fd = s->epoll_fd = s->proc_kmsg_fd = -1;
2586         s->compress = true;
2587
2588         s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
2589         s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
2590
2591         s->forward_to_syslog = true;
2592         s->import_proc_kmsg = true;
2593
2594         memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
2595         memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
2596
2597         server_parse_config_file(s);
2598         server_parse_proc_cmdline(s);
2599
2600         s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
2601         if (!s->user_journals) {
2602                 log_error("Out of memory.");
2603                 return -ENOMEM;
2604         }
2605
2606         s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
2607         if (s->epoll_fd < 0) {
2608                 log_error("Failed to create epoll object: %m");
2609                 return -errno;
2610         }
2611
2612         n = sd_listen_fds(true);
2613         if (n < 0) {
2614                 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
2615                 return n;
2616         }
2617
2618         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
2619
2620                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
2621
2622                         if (s->native_fd >= 0) {
2623                                 log_error("Too many native sockets passed.");
2624                                 return -EINVAL;
2625                         }
2626
2627                         s->native_fd = fd;
2628
2629                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
2630
2631                         if (s->stdout_fd >= 0) {
2632                                 log_error("Too many stdout sockets passed.");
2633                                 return -EINVAL;
2634                         }
2635
2636                         s->stdout_fd = fd;
2637
2638                 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
2639
2640                         if (s->syslog_fd >= 0) {
2641                                 log_error("Too many /dev/log sockets passed.");
2642                                 return -EINVAL;
2643                         }
2644
2645                         s->syslog_fd = fd;
2646
2647                 } else {
2648                         log_error("Unknown socket passed.");
2649                         return -EINVAL;
2650                 }
2651         }
2652
2653         r = open_syslog_socket(s);
2654         if (r < 0)
2655                 return r;
2656
2657         r = open_native_socket(s);
2658         if (r < 0)
2659                 return r;
2660
2661         r = open_stdout_socket(s);
2662         if (r < 0)
2663                 return r;
2664
2665         r = open_proc_kmsg(s);
2666         if (r < 0)
2667                 return r;
2668
2669         r = system_journal_open(s);
2670         if (r < 0)
2671                 return r;
2672
2673         r = open_signalfd(s);
2674         if (r < 0)
2675                 return r;
2676
2677         s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
2678         if (!s->rate_limit)
2679                 return -ENOMEM;
2680
2681         return 0;
2682 }
2683
2684 static void server_done(Server *s) {
2685         JournalFile *f;
2686         assert(s);
2687
2688         while (s->stdout_streams)
2689                 stdout_stream_free(s->stdout_streams);
2690
2691         if (s->system_journal)
2692                 journal_file_close(s->system_journal);
2693
2694         if (s->runtime_journal)
2695                 journal_file_close(s->runtime_journal);
2696
2697         while ((f = hashmap_steal_first(s->user_journals)))
2698                 journal_file_close(f);
2699
2700         hashmap_free(s->user_journals);
2701
2702         if (s->epoll_fd >= 0)
2703                 close_nointr_nofail(s->epoll_fd);
2704
2705         if (s->signal_fd >= 0)
2706                 close_nointr_nofail(s->signal_fd);
2707
2708         if (s->syslog_fd >= 0)
2709                 close_nointr_nofail(s->syslog_fd);
2710
2711         if (s->native_fd >= 0)
2712                 close_nointr_nofail(s->native_fd);
2713
2714         if (s->stdout_fd >= 0)
2715                 close_nointr_nofail(s->stdout_fd);
2716
2717         if (s->proc_kmsg_fd >= 0)
2718                 close_nointr_nofail(s->proc_kmsg_fd);
2719
2720         if (s->rate_limit)
2721                 journal_rate_limit_free(s->rate_limit);
2722
2723         free(s->buffer);
2724 }
2725
2726 int main(int argc, char *argv[]) {
2727         Server server;
2728         int r;
2729
2730         /* if (getppid() != 1) { */
2731         /*         log_error("This program should be invoked by init only."); */
2732         /*         return EXIT_FAILURE; */
2733         /* } */
2734
2735         if (argc > 1) {
2736                 log_error("This program does not take arguments.");
2737                 return EXIT_FAILURE;
2738         }
2739
2740         log_set_target(LOG_TARGET_CONSOLE);
2741         log_parse_environment();
2742         log_open();
2743
2744         umask(0022);
2745
2746         r = server_init(&server);
2747         if (r < 0)
2748                 goto finish;
2749
2750         server_vacuum(&server);
2751         server_flush_to_var(&server);
2752         server_flush_proc_kmsg(&server);
2753
2754         log_debug("systemd-journald running as pid %lu", (unsigned long) getpid());
2755         driver_message(&server, SD_MESSAGE_JOURNAL_START, "Journal started");
2756
2757         sd_notify(false,
2758                   "READY=1\n"
2759                   "STATUS=Processing requests...");
2760
2761         for (;;) {
2762                 struct epoll_event event;
2763
2764                 r = epoll_wait(server.epoll_fd, &event, 1, -1);
2765                 if (r < 0) {
2766
2767                         if (errno == EINTR)
2768                                 continue;
2769
2770                         log_error("epoll_wait() failed: %m");
2771                         r = -errno;
2772                         goto finish;
2773                 } else if (r == 0)
2774                         break;
2775
2776                 r = process_event(&server, &event);
2777                 if (r < 0)
2778                         goto finish;
2779                 else if (r == 0)
2780                         break;
2781         }
2782
2783         log_debug("systemd-journald stopped as pid %lu", (unsigned long) getpid());
2784         driver_message(&server, SD_MESSAGE_JOURNAL_STOP, "Journal stopped");
2785
2786 finish:
2787         sd_notify(false,
2788                   "STATUS=Shutting down...");
2789
2790         server_done(&server);
2791
2792         return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
2793 }