chiark / gitweb /
mount: fix assertion
[elogind.git] / src / journal / journald.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU General Public License as published by
10   the Free Software Foundation; either version 2 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   General Public License for more details.
17
18   You should have received a copy of the GNU General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/epoll.h>
23 #include <sys/socket.h>
24 #include <errno.h>
25 #include <sys/signalfd.h>
26 #include <unistd.h>
27 #include <fcntl.h>
28 #include <stddef.h>
29 #include <sys/ioctl.h>
30 #include <linux/sockios.h>
31 #include <sys/statvfs.h>
32
33 #include <systemd/sd-journal.h>
34 #include <systemd/sd-login.h>
35 #include <systemd/sd-messages.h>
36 #include <systemd/sd-daemon.h>
37
38 #include "hashmap.h"
39 #include "journal-file.h"
40 #include "socket-util.h"
41 #include "cgroup-util.h"
42 #include "list.h"
43 #include "journal-rate-limit.h"
44 #include "journal-internal.h"
45 #include "conf-parser.h"
46 #include "journald.h"
47 #include "virt.h"
48 #include "missing.h"
49
50 #ifdef HAVE_ACL
51 #include <sys/acl.h>
52 #include <acl/libacl.h>
53 #include "acl-util.h"
54 #endif
55
56 #ifdef HAVE_SELINUX
57 #include <selinux/selinux.h>
58 #endif
59
60 #define USER_JOURNALS_MAX 1024
61 #define STDOUT_STREAMS_MAX 4096
62
63 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
64 #define DEFAULT_RATE_LIMIT_BURST 200
65
66 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
67
68 #define RECHECK_VAR_AVAILABLE_USEC (30*USEC_PER_SEC)
69
70 #define N_IOVEC_META_FIELDS 17
71
72 #define ENTRY_SIZE_MAX (1024*1024*32)
73
74 typedef enum StdoutStreamState {
75         STDOUT_STREAM_IDENTIFIER,
76         STDOUT_STREAM_PRIORITY,
77         STDOUT_STREAM_LEVEL_PREFIX,
78         STDOUT_STREAM_FORWARD_TO_SYSLOG,
79         STDOUT_STREAM_FORWARD_TO_KMSG,
80         STDOUT_STREAM_FORWARD_TO_CONSOLE,
81         STDOUT_STREAM_RUNNING
82 } StdoutStreamState;
83
84 struct StdoutStream {
85         Server *server;
86         StdoutStreamState state;
87
88         int fd;
89
90         struct ucred ucred;
91 #ifdef HAVE_SELINUX
92         security_context_t security_context;
93 #endif
94
95         char *identifier;
96         int priority;
97         bool level_prefix:1;
98         bool forward_to_syslog:1;
99         bool forward_to_kmsg:1;
100         bool forward_to_console:1;
101
102         char buffer[LINE_MAX+1];
103         size_t length;
104
105         LIST_FIELDS(StdoutStream, stdout_stream);
106 };
107
108 static int server_flush_to_var(Server *s);
109
110 static uint64_t available_space(Server *s) {
111         char ids[33], *p;
112         const char *f;
113         sd_id128_t machine;
114         struct statvfs ss;
115         uint64_t sum = 0, avail = 0, ss_avail = 0;
116         int r;
117         DIR *d;
118         usec_t ts;
119         JournalMetrics *m;
120
121         ts = now(CLOCK_MONOTONIC);
122
123         if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
124                 return s->cached_available_space;
125
126         r = sd_id128_get_machine(&machine);
127         if (r < 0)
128                 return 0;
129
130         if (s->system_journal) {
131                 f = "/var/log/journal/";
132                 m = &s->system_metrics;
133         } else {
134                 f = "/run/log/journal/";
135                 m = &s->runtime_metrics;
136         }
137
138         assert(m);
139
140         p = strappend(f, sd_id128_to_string(machine, ids));
141         if (!p)
142                 return 0;
143
144         d = opendir(p);
145         free(p);
146
147         if (!d)
148                 return 0;
149
150         if (fstatvfs(dirfd(d), &ss) < 0)
151                 goto finish;
152
153         for (;;) {
154                 struct stat st;
155                 struct dirent buf, *de;
156                 int k;
157
158                 k = readdir_r(d, &buf, &de);
159                 if (k != 0) {
160                         r = -k;
161                         goto finish;
162                 }
163
164                 if (!de)
165                         break;
166
167                 if (!dirent_is_file_with_suffix(de, ".journal"))
168                         continue;
169
170                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
171                         continue;
172
173                 sum += (uint64_t) st.st_blocks * (uint64_t) st.st_blksize;
174         }
175
176         avail = sum >= m->max_use ? 0 : m->max_use - sum;
177
178         ss_avail = ss.f_bsize * ss.f_bavail;
179
180         ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
181
182         if (ss_avail < avail)
183                 avail = ss_avail;
184
185         s->cached_available_space = avail;
186         s->cached_available_space_timestamp = ts;
187
188 finish:
189         closedir(d);
190
191         return avail;
192 }
193
194 static void server_read_file_gid(Server *s) {
195         const char *adm = "adm";
196         int r;
197
198         assert(s);
199
200         if (s->file_gid_valid)
201                 return;
202
203         r = get_group_creds(&adm, &s->file_gid);
204         if (r < 0)
205                 log_warning("Failed to resolve 'adm' group: %s", strerror(-r));
206
207         /* if we couldn't read the gid, then it will be 0, but that's
208          * fine and we shouldn't try to resolve the group again, so
209          * let's just pretend it worked right-away. */
210         s->file_gid_valid = true;
211 }
212
213 static void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
214         int r;
215 #ifdef HAVE_ACL
216         acl_t acl;
217         acl_entry_t entry;
218         acl_permset_t permset;
219 #endif
220
221         assert(f);
222
223         server_read_file_gid(s);
224
225         r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
226         if (r < 0)
227                 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
228
229 #ifdef HAVE_ACL
230         if (uid <= 0)
231                 return;
232
233         acl = acl_get_fd(f->fd);
234         if (!acl) {
235                 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
236                 return;
237         }
238
239         r = acl_find_uid(acl, uid, &entry);
240         if (r <= 0) {
241
242                 if (acl_create_entry(&acl, &entry) < 0 ||
243                     acl_set_tag_type(entry, ACL_USER) < 0 ||
244                     acl_set_qualifier(entry, &uid) < 0) {
245                         log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
246                         goto finish;
247                 }
248         }
249
250         if (acl_get_permset(entry, &permset) < 0 ||
251             acl_add_perm(permset, ACL_READ) < 0 ||
252             acl_calc_mask(&acl) < 0) {
253                 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
254                 goto finish;
255         }
256
257         if (acl_set_fd(f->fd, acl) < 0)
258                 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
259
260 finish:
261         acl_free(acl);
262 #endif
263 }
264
265 static JournalFile* find_journal(Server *s, uid_t uid) {
266         char *p;
267         int r;
268         JournalFile *f;
269         char ids[33];
270         sd_id128_t machine;
271
272         assert(s);
273
274         /* We split up user logs only on /var, not on /run. If the
275          * runtime file is open, we write to it exclusively, in order
276          * to guarantee proper order as soon as we flush /run to
277          * /var and close the runtime file. */
278
279         if (s->runtime_journal)
280                 return s->runtime_journal;
281
282         if (uid <= 0)
283                 return s->system_journal;
284
285         r = sd_id128_get_machine(&machine);
286         if (r < 0)
287                 return s->system_journal;
288
289         f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
290         if (f)
291                 return f;
292
293         if (asprintf(&p, "/var/log/journal/%s/user-%lu.journal", sd_id128_to_string(machine, ids), (unsigned long) uid) < 0)
294                 return s->system_journal;
295
296         while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
297                 /* Too many open? Then let's close one */
298                 f = hashmap_steal_first(s->user_journals);
299                 assert(f);
300                 journal_file_close(f);
301         }
302
303         r = journal_file_open(p, O_RDWR|O_CREAT, 0640, s->system_journal, &f);
304         free(p);
305
306         if (r < 0)
307                 return s->system_journal;
308
309         server_fix_perms(s, f, uid);
310         f->metrics = s->system_metrics;
311         f->compress = s->compress;
312
313         r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
314         if (r < 0) {
315                 journal_file_close(f);
316                 return s->system_journal;
317         }
318
319         return f;
320 }
321
322 static void server_rotate(Server *s) {
323         JournalFile *f;
324         void *k;
325         Iterator i;
326         int r;
327
328         log_info("Rotating...");
329
330         if (s->runtime_journal) {
331                 r = journal_file_rotate(&s->runtime_journal);
332                 if (r < 0)
333                         log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
334         }
335
336         if (s->system_journal) {
337                 r = journal_file_rotate(&s->system_journal);
338                 if (r < 0)
339                         log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
340         }
341
342         HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
343                 r = journal_file_rotate(&f);
344                 if (r < 0)
345                         log_error("Failed to rotate %s: %s", f->path, strerror(-r));
346                 else
347                         hashmap_replace(s->user_journals, k, f);
348         }
349 }
350
351 static void server_vacuum(Server *s) {
352         char *p;
353         char ids[33];
354         sd_id128_t machine;
355         int r;
356
357         log_info("Vacuuming...");
358
359         r = sd_id128_get_machine(&machine);
360         if (r < 0) {
361                 log_error("Failed to get machine ID: %s", strerror(-r));
362                 return;
363         }
364
365         sd_id128_to_string(machine, ids);
366
367         if (s->system_journal) {
368                 if (asprintf(&p, "/var/log/journal/%s", ids) < 0) {
369                         log_error("Out of memory.");
370                         return;
371                 }
372
373                 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free);
374                 if (r < 0 && r != -ENOENT)
375                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
376                 free(p);
377         }
378
379
380         if (s->runtime_journal) {
381                 if (asprintf(&p, "/run/log/journal/%s", ids) < 0) {
382                         log_error("Out of memory.");
383                         return;
384                 }
385
386                 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free);
387                 if (r < 0 && r != -ENOENT)
388                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
389                 free(p);
390         }
391
392         s->cached_available_space_timestamp = 0;
393 }
394
395 static char *shortened_cgroup_path(pid_t pid) {
396         int r;
397         char *process_path, *init_path, *path;
398
399         assert(pid > 0);
400
401         r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, pid, &process_path);
402         if (r < 0)
403                 return NULL;
404
405         r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &init_path);
406         if (r < 0) {
407                 free(process_path);
408                 return NULL;
409         }
410
411         if (endswith(init_path, "/system"))
412                 init_path[strlen(init_path) - 7] = 0;
413         else if (streq(init_path, "/"))
414                 init_path[0] = 0;
415
416         if (startswith(process_path, init_path)) {
417                 char *p;
418
419                 p = strdup(process_path + strlen(init_path));
420                 if (!p) {
421                         free(process_path);
422                         free(init_path);
423                         return NULL;
424                 }
425                 path = p;
426         } else {
427                 path = process_path;
428                 process_path = NULL;
429         }
430
431         free(process_path);
432         free(init_path);
433
434         return path;
435 }
436
437 static void dispatch_message_real(
438                 Server *s,
439                 struct iovec *iovec, unsigned n, unsigned m,
440                 struct ucred *ucred,
441                 struct timeval *tv,
442                 const char *label, size_t label_len) {
443
444         char *pid = NULL, *uid = NULL, *gid = NULL,
445                 *source_time = NULL, *boot_id = NULL, *machine_id = NULL,
446                 *comm = NULL, *cmdline = NULL, *hostname = NULL,
447                 *audit_session = NULL, *audit_loginuid = NULL,
448                 *exe = NULL, *cgroup = NULL, *session = NULL,
449                 *owner_uid = NULL, *unit = NULL, *selinux_context = NULL;
450
451         char idbuf[33];
452         sd_id128_t id;
453         int r;
454         char *t;
455         uid_t loginuid = 0, realuid = 0;
456         JournalFile *f;
457         bool vacuumed = false;
458
459         assert(s);
460         assert(iovec);
461         assert(n > 0);
462         assert(n + N_IOVEC_META_FIELDS <= m);
463
464         if (ucred) {
465                 uint32_t audit;
466                 uid_t owner;
467
468                 realuid = ucred->uid;
469
470                 if (asprintf(&pid, "_PID=%lu", (unsigned long) ucred->pid) >= 0)
471                         IOVEC_SET_STRING(iovec[n++], pid);
472
473                 if (asprintf(&uid, "_UID=%lu", (unsigned long) ucred->uid) >= 0)
474                         IOVEC_SET_STRING(iovec[n++], uid);
475
476                 if (asprintf(&gid, "_GID=%lu", (unsigned long) ucred->gid) >= 0)
477                         IOVEC_SET_STRING(iovec[n++], gid);
478
479                 r = get_process_comm(ucred->pid, &t);
480                 if (r >= 0) {
481                         comm = strappend("_COMM=", t);
482                         free(t);
483
484                         if (comm)
485                                 IOVEC_SET_STRING(iovec[n++], comm);
486                 }
487
488                 r = get_process_exe(ucred->pid, &t);
489                 if (r >= 0) {
490                         exe = strappend("_EXE=", t);
491                         free(t);
492
493                         if (exe)
494                                 IOVEC_SET_STRING(iovec[n++], exe);
495                 }
496
497                 r = get_process_cmdline(ucred->pid, LINE_MAX, false, &t);
498                 if (r >= 0) {
499                         cmdline = strappend("_CMDLINE=", t);
500                         free(t);
501
502                         if (cmdline)
503                                 IOVEC_SET_STRING(iovec[n++], cmdline);
504                 }
505
506                 r = audit_session_from_pid(ucred->pid, &audit);
507                 if (r >= 0)
508                         if (asprintf(&audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit) >= 0)
509                                 IOVEC_SET_STRING(iovec[n++], audit_session);
510
511                 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
512                 if (r >= 0)
513                         if (asprintf(&audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0)
514                                 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
515
516                 t = shortened_cgroup_path(ucred->pid);
517                 if (t) {
518                         cgroup = strappend("_SYSTEMD_CGROUP=", t);
519                         free(t);
520
521                         if (cgroup)
522                                 IOVEC_SET_STRING(iovec[n++], cgroup);
523                 }
524
525                 if (sd_pid_get_session(ucred->pid, &t) >= 0) {
526                         session = strappend("_SYSTEMD_SESSION=", t);
527                         free(t);
528
529                         if (session)
530                                 IOVEC_SET_STRING(iovec[n++], session);
531                 }
532
533                 if (sd_pid_get_unit(ucred->pid, &t) >= 0) {
534                         unit = strappend("_SYSTEMD_UNIT=", t);
535                         free(t);
536
537                         if (unit)
538                                 IOVEC_SET_STRING(iovec[n++], unit);
539                 }
540
541                 if (sd_pid_get_owner_uid(ucred->uid, &owner) >= 0)
542                         if (asprintf(&owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner) >= 0)
543                                 IOVEC_SET_STRING(iovec[n++], owner_uid);
544
545 #ifdef HAVE_SELINUX
546                 if (label) {
547                         selinux_context = malloc(sizeof("_SELINUX_CONTEXT=") + label_len);
548                         if (selinux_context) {
549                                 memcpy(selinux_context, "_SELINUX_CONTEXT=", sizeof("_SELINUX_CONTEXT=")-1);
550                                 memcpy(selinux_context+sizeof("_SELINUX_CONTEXT=")-1, label, label_len);
551                                 selinux_context[sizeof("_SELINUX_CONTEXT=")-1+label_len] = 0;
552                                 IOVEC_SET_STRING(iovec[n++], selinux_context);
553                         }
554                 } else {
555                         security_context_t con;
556
557                         if (getpidcon(ucred->pid, &con) >= 0) {
558                                 selinux_context = strappend("_SELINUX_CONTEXT=", con);
559                                 if (selinux_context)
560                                         IOVEC_SET_STRING(iovec[n++], selinux_context);
561
562                                 freecon(con);
563                         }
564                 }
565 #endif
566         }
567
568         if (tv) {
569                 if (asprintf(&source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu",
570                              (unsigned long long) timeval_load(tv)) >= 0)
571                         IOVEC_SET_STRING(iovec[n++], source_time);
572         }
573
574         /* Note that strictly speaking storing the boot id here is
575          * redundant since the entry includes this in-line
576          * anyway. However, we need this indexed, too. */
577         r = sd_id128_get_boot(&id);
578         if (r >= 0)
579                 if (asprintf(&boot_id, "_BOOT_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
580                         IOVEC_SET_STRING(iovec[n++], boot_id);
581
582         r = sd_id128_get_machine(&id);
583         if (r >= 0)
584                 if (asprintf(&machine_id, "_MACHINE_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
585                         IOVEC_SET_STRING(iovec[n++], machine_id);
586
587         t = gethostname_malloc();
588         if (t) {
589                 hostname = strappend("_HOSTNAME=", t);
590                 free(t);
591                 if (hostname)
592                         IOVEC_SET_STRING(iovec[n++], hostname);
593         }
594
595         assert(n <= m);
596
597         server_flush_to_var(s);
598
599 retry:
600         f = find_journal(s, realuid == 0 ? 0 : loginuid);
601         if (!f)
602                 log_warning("Dropping message, as we can't find a place to store the data.");
603         else {
604                 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
605
606                 if (r == -E2BIG && !vacuumed) {
607                         log_info("Allocation limit reached.");
608
609                         server_rotate(s);
610                         server_vacuum(s);
611                         vacuumed = true;
612
613                         log_info("Retrying write.");
614                         goto retry;
615                 }
616
617                 if (r < 0)
618                         log_error("Failed to write entry, ignoring: %s", strerror(-r));
619         }
620
621         free(pid);
622         free(uid);
623         free(gid);
624         free(comm);
625         free(exe);
626         free(cmdline);
627         free(source_time);
628         free(boot_id);
629         free(machine_id);
630         free(hostname);
631         free(audit_session);
632         free(audit_loginuid);
633         free(cgroup);
634         free(session);
635         free(owner_uid);
636         free(unit);
637         free(selinux_context);
638 }
639
640 static void driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
641         char mid[11 + 32 + 1];
642         char buffer[16 + LINE_MAX + 1];
643         struct iovec iovec[N_IOVEC_META_FIELDS + 4];
644         int n = 0;
645         va_list ap;
646         struct ucred ucred;
647
648         assert(s);
649         assert(format);
650
651         IOVEC_SET_STRING(iovec[n++], "PRIORITY=5");
652         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
653
654         memcpy(buffer, "MESSAGE=", 8);
655         va_start(ap, format);
656         vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
657         va_end(ap);
658         char_array_0(buffer);
659         IOVEC_SET_STRING(iovec[n++], buffer);
660
661         snprintf(mid, sizeof(mid), "MESSAGE_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(message_id));
662         char_array_0(mid);
663         IOVEC_SET_STRING(iovec[n++], mid);
664
665         zero(ucred);
666         ucred.pid = getpid();
667         ucred.uid = getuid();
668         ucred.gid = getgid();
669
670         dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0);
671 }
672
673 static void dispatch_message(Server *s,
674                              struct iovec *iovec, unsigned n, unsigned m,
675                              struct ucred *ucred,
676                              struct timeval *tv,
677                              const char *label, size_t label_len,
678                              int priority) {
679         int rl;
680         char *path = NULL, *c;
681
682         assert(s);
683         assert(iovec || n == 0);
684
685         if (n == 0)
686                 return;
687
688         if (!ucred)
689                 goto finish;
690
691         path = shortened_cgroup_path(ucred->pid);
692         if (!path)
693                 goto finish;
694
695         /* example: /user/lennart/3/foobar
696          *          /system/dbus.service/foobar
697          *
698          * So let's cut of everything past the third /, since that is
699          * wher user directories start */
700
701         c = strchr(path, '/');
702         if (c) {
703                 c = strchr(c+1, '/');
704                 if (c) {
705                         c = strchr(c+1, '/');
706                         if (c)
707                                 *c = 0;
708                 }
709         }
710
711         rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available_space(s));
712
713         if (rl == 0) {
714                 free(path);
715                 return;
716         }
717
718         /* Write a suppression message if we suppressed something */
719         if (rl > 1)
720                 driver_message(s, SD_MESSAGE_JOURNAL_DROPPED, "Suppressed %u messages from %s", rl - 1, path);
721
722         free(path);
723
724 finish:
725         dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len);
726 }
727
728 static void forward_syslog_iovec(Server *s, const struct iovec *iovec, unsigned n_iovec, struct ucred *ucred, struct timeval *tv) {
729         struct msghdr msghdr;
730         struct cmsghdr *cmsg;
731         union {
732                 struct cmsghdr cmsghdr;
733                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred))];
734         } control;
735         union sockaddr_union sa;
736
737         assert(s);
738         assert(iovec);
739         assert(n_iovec > 0);
740
741         zero(msghdr);
742         msghdr.msg_iov = (struct iovec*) iovec;
743         msghdr.msg_iovlen = n_iovec;
744
745         zero(sa);
746         sa.un.sun_family = AF_UNIX;
747         strncpy(sa.un.sun_path, "/run/systemd/journal/syslog", sizeof(sa.un.sun_path));
748         msghdr.msg_name = &sa;
749         msghdr.msg_namelen = offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path);
750
751         if (ucred) {
752                 zero(control);
753                 msghdr.msg_control = &control;
754                 msghdr.msg_controllen = sizeof(control);
755
756                 cmsg = CMSG_FIRSTHDR(&msghdr);
757                 cmsg->cmsg_level = SOL_SOCKET;
758                 cmsg->cmsg_type = SCM_CREDENTIALS;
759                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred));
760                 memcpy(CMSG_DATA(cmsg), ucred, sizeof(struct ucred));
761                 msghdr.msg_controllen = cmsg->cmsg_len;
762         }
763
764         /* Forward the syslog message we received via /dev/log to
765          * /run/systemd/syslog. Unfortunately we currently can't set
766          * the SO_TIMESTAMP auxiliary data, and hence we don't. */
767
768         if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
769                 return;
770
771         /* The socket is full? I guess the syslog implementation is
772          * too slow, and we shouldn't wait for that... */
773         if (errno == EAGAIN)
774                 return;
775
776         if (ucred && errno == ESRCH) {
777                 struct ucred u;
778
779                 /* Hmm, presumably the sender process vanished
780                  * by now, so let's fix it as good as we
781                  * can, and retry */
782
783                 u = *ucred;
784                 u.pid = getpid();
785                 memcpy(CMSG_DATA(cmsg), &u, sizeof(struct ucred));
786
787                 if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
788                         return;
789
790                 if (errno == EAGAIN)
791                         return;
792         }
793
794         log_debug("Failed to forward syslog message: %m");
795 }
796
797 static void forward_syslog_raw(Server *s, const char *buffer, struct ucred *ucred, struct timeval *tv) {
798         struct iovec iovec;
799
800         assert(s);
801         assert(buffer);
802
803         IOVEC_SET_STRING(iovec, buffer);
804         forward_syslog_iovec(s, &iovec, 1, ucred, tv);
805 }
806
807 static void forward_syslog(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred, struct timeval *tv) {
808         struct iovec iovec[5];
809         char header_priority[6], header_time[64], header_pid[16];
810         int n = 0;
811         time_t t;
812         struct tm *tm;
813         char *ident_buf = NULL;
814
815         assert(s);
816         assert(priority >= 0);
817         assert(priority <= 999);
818         assert(message);
819
820         /* First: priority field */
821         snprintf(header_priority, sizeof(header_priority), "<%i>", priority);
822         char_array_0(header_priority);
823         IOVEC_SET_STRING(iovec[n++], header_priority);
824
825         /* Second: timestamp */
826         t = tv ? tv->tv_sec : ((time_t) (now(CLOCK_REALTIME) / USEC_PER_SEC));
827         tm = localtime(&t);
828         if (!tm)
829                 return;
830         if (strftime(header_time, sizeof(header_time), "%h %e %T ", tm) <= 0)
831                 return;
832         IOVEC_SET_STRING(iovec[n++], header_time);
833
834         /* Third: identifier and PID */
835         if (ucred) {
836                 if (!identifier) {
837                         get_process_comm(ucred->pid, &ident_buf);
838                         identifier = ident_buf;
839                 }
840
841                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
842                 char_array_0(header_pid);
843
844                 if (identifier)
845                         IOVEC_SET_STRING(iovec[n++], identifier);
846
847                 IOVEC_SET_STRING(iovec[n++], header_pid);
848         } else if (identifier) {
849                 IOVEC_SET_STRING(iovec[n++], identifier);
850                 IOVEC_SET_STRING(iovec[n++], ": ");
851         }
852
853         /* Fourth: message */
854         IOVEC_SET_STRING(iovec[n++], message);
855
856         forward_syslog_iovec(s, iovec, n, ucred, tv);
857
858         free(ident_buf);
859 }
860
861 static int fixup_priority(int priority) {
862
863         if ((priority & LOG_FACMASK) == 0)
864                 return (priority & LOG_PRIMASK) | LOG_USER;
865
866         return priority;
867 }
868
869 static void forward_kmsg(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred) {
870         struct iovec iovec[5];
871         char header_priority[6], header_pid[16];
872         int n = 0;
873         char *ident_buf = NULL;
874         int fd;
875
876         assert(s);
877         assert(priority >= 0);
878         assert(priority <= 999);
879         assert(message);
880
881         /* Never allow messages with kernel facility to be written to
882          * kmsg, regardless where the data comes from. */
883         priority = fixup_priority(priority);
884
885         /* First: priority field */
886         snprintf(header_priority, sizeof(header_priority), "<%i>", priority);
887         char_array_0(header_priority);
888         IOVEC_SET_STRING(iovec[n++], header_priority);
889
890         /* Second: identifier and PID */
891         if (ucred) {
892                 if (!identifier) {
893                         get_process_comm(ucred->pid, &ident_buf);
894                         identifier = ident_buf;
895                 }
896
897                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
898                 char_array_0(header_pid);
899
900                 if (identifier)
901                         IOVEC_SET_STRING(iovec[n++], identifier);
902
903                 IOVEC_SET_STRING(iovec[n++], header_pid);
904         } else if (identifier) {
905                 IOVEC_SET_STRING(iovec[n++], identifier);
906                 IOVEC_SET_STRING(iovec[n++], ": ");
907         }
908
909         /* Fourth: message */
910         IOVEC_SET_STRING(iovec[n++], message);
911         IOVEC_SET_STRING(iovec[n++], "\n");
912
913         fd = open("/dev/kmsg", O_WRONLY|O_NOCTTY|O_CLOEXEC);
914         if (fd < 0) {
915                 log_debug("Failed to open /dev/kmsg for logging: %s", strerror(errno));
916                 goto finish;
917         }
918
919         if (writev(fd, iovec, n) < 0)
920                 log_debug("Failed to write to /dev/kmsg for logging: %s", strerror(errno));
921
922         close_nointr_nofail(fd);
923
924 finish:
925         free(ident_buf);
926 }
927
928 static void forward_console(Server *s, const char *identifier, const char *message, struct ucred *ucred) {
929         struct iovec iovec[4];
930         char header_pid[16];
931         int n = 0, fd;
932         char *ident_buf = NULL;
933
934         assert(s);
935         assert(message);
936
937         /* First: identifier and PID */
938         if (ucred) {
939                 if (!identifier) {
940                         get_process_comm(ucred->pid, &ident_buf);
941                         identifier = ident_buf;
942                 }
943
944                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
945                 char_array_0(header_pid);
946
947                 if (identifier)
948                         IOVEC_SET_STRING(iovec[n++], identifier);
949
950                 IOVEC_SET_STRING(iovec[n++], header_pid);
951         } else if (identifier) {
952                 IOVEC_SET_STRING(iovec[n++], identifier);
953                 IOVEC_SET_STRING(iovec[n++], ": ");
954         }
955
956         /* Third: message */
957         IOVEC_SET_STRING(iovec[n++], message);
958         IOVEC_SET_STRING(iovec[n++], "\n");
959
960         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
961         if (fd < 0) {
962                 log_debug("Failed to open /dev/console for logging: %s", strerror(errno));
963                 goto finish;
964         }
965
966         if (writev(fd, iovec, n) < 0)
967                 log_debug("Failed to write to /dev/console for logging: %s", strerror(errno));
968
969         close_nointr_nofail(fd);
970
971 finish:
972         free(ident_buf);
973 }
974
975 static void read_identifier(const char **buf, char **identifier, char **pid) {
976         const char *p;
977         char *t;
978         size_t l, e;
979
980         assert(buf);
981         assert(identifier);
982         assert(pid);
983
984         p = *buf;
985
986         p += strspn(p, WHITESPACE);
987         l = strcspn(p, WHITESPACE);
988
989         if (l <= 0 ||
990             p[l-1] != ':')
991                 return;
992
993         e = l;
994         l--;
995
996         if (p[l-1] == ']') {
997                 size_t k = l-1;
998
999                 for (;;) {
1000
1001                         if (p[k] == '[') {
1002                                 t = strndup(p+k+1, l-k-2);
1003                                 if (t)
1004                                         *pid = t;
1005
1006                                 l = k;
1007                                 break;
1008                         }
1009
1010                         if (k == 0)
1011                                 break;
1012
1013                         k--;
1014                 }
1015         }
1016
1017         t = strndup(p, l);
1018         if (t)
1019                 *identifier = t;
1020
1021         *buf = p + e;
1022         *buf += strspn(*buf, WHITESPACE);
1023 }
1024
1025 static void process_syslog_message(Server *s, const char *buf, struct ucred *ucred, struct timeval *tv, const char *label, size_t label_len) {
1026         char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *syslog_pid = NULL;
1027         struct iovec iovec[N_IOVEC_META_FIELDS + 6];
1028         unsigned n = 0;
1029         int priority = LOG_USER | LOG_INFO;
1030         char *identifier = NULL, *pid = NULL;
1031
1032         assert(s);
1033         assert(buf);
1034
1035         if (s->forward_to_syslog)
1036                 forward_syslog_raw(s, buf, ucred, tv);
1037
1038         parse_syslog_priority((char**) &buf, &priority);
1039         skip_syslog_date((char**) &buf);
1040         read_identifier(&buf, &identifier, &pid);
1041
1042         if (s->forward_to_kmsg)
1043                 forward_kmsg(s, priority, identifier, buf, ucred);
1044
1045         if (s->forward_to_console)
1046                 forward_console(s, identifier, buf, ucred);
1047
1048         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=syslog");
1049
1050         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1051                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1052
1053         if (priority & LOG_FACMASK)
1054                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1055                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1056
1057         if (identifier) {
1058                 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
1059                 if (syslog_identifier)
1060                         IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1061         }
1062
1063         if (pid) {
1064                 syslog_pid = strappend("SYSLOG_PID=", pid);
1065                 if (syslog_pid)
1066                         IOVEC_SET_STRING(iovec[n++], syslog_pid);
1067         }
1068
1069         message = strappend("MESSAGE=", buf);
1070         if (message)
1071                 IOVEC_SET_STRING(iovec[n++], message);
1072
1073         dispatch_message(s, iovec, n, ELEMENTSOF(iovec), ucred, tv, label, label_len, priority);
1074
1075         free(message);
1076         free(identifier);
1077         free(pid);
1078         free(syslog_priority);
1079         free(syslog_facility);
1080         free(syslog_identifier);
1081 }
1082
1083 static bool valid_user_field(const char *p, size_t l) {
1084         const char *a;
1085
1086         /* We kinda enforce POSIX syntax recommendations for
1087            environment variables here, but make a couple of additional
1088            requirements.
1089
1090            http://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html */
1091
1092         /* No empty field names */
1093         if (l <= 0)
1094                 return false;
1095
1096         /* Don't allow names longer than 64 chars */
1097         if (l > 64)
1098                 return false;
1099
1100         /* Variables starting with an underscore are protected */
1101         if (p[0] == '_')
1102                 return false;
1103
1104         /* Don't allow digits as first character */
1105         if (p[0] >= '0' && p[0] <= '9')
1106                 return false;
1107
1108         /* Only allow A-Z0-9 and '_' */
1109         for (a = p; a < p + l; a++)
1110                 if (!((*a >= 'A' && *a <= 'Z') ||
1111                       (*a >= '0' && *a <= '9') ||
1112                       *a == '_'))
1113                         return false;
1114
1115         return true;
1116 }
1117
1118 static void process_native_message(
1119                 Server *s,
1120                 const void *buffer, size_t buffer_size,
1121                 struct ucred *ucred,
1122                 struct timeval *tv,
1123                 const char *label, size_t label_len) {
1124
1125         struct iovec *iovec = NULL;
1126         unsigned n = 0, m = 0, j, tn = (unsigned) -1;
1127         const char *p;
1128         size_t remaining;
1129         int priority = LOG_INFO;
1130         char *identifier = NULL, *message = NULL;
1131
1132         assert(s);
1133         assert(buffer || n == 0);
1134
1135         p = buffer;
1136         remaining = buffer_size;
1137
1138         while (remaining > 0) {
1139                 const char *e, *q;
1140
1141                 e = memchr(p, '\n', remaining);
1142
1143                 if (!e) {
1144                         /* Trailing noise, let's ignore it, and flush what we collected */
1145                         log_debug("Received message with trailing noise, ignoring.");
1146                         break;
1147                 }
1148
1149                 if (e == p) {
1150                         /* Entry separator */
1151                         dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, priority);
1152                         n = 0;
1153                         priority = LOG_INFO;
1154
1155                         p++;
1156                         remaining--;
1157                         continue;
1158                 }
1159
1160                 if (*p == '.' || *p == '#') {
1161                         /* Ignore control commands for now, and
1162                          * comments too. */
1163                         remaining -= (e - p) + 1;
1164                         p = e + 1;
1165                         continue;
1166                 }
1167
1168                 /* A property follows */
1169
1170                 if (n+N_IOVEC_META_FIELDS >= m) {
1171                         struct iovec *c;
1172                         unsigned u;
1173
1174                         u = MAX((n+N_IOVEC_META_FIELDS+1) * 2U, 4U);
1175                         c = realloc(iovec, u * sizeof(struct iovec));
1176                         if (!c) {
1177                                 log_error("Out of memory");
1178                                 break;
1179                         }
1180
1181                         iovec = c;
1182                         m = u;
1183                 }
1184
1185                 q = memchr(p, '=', e - p);
1186                 if (q) {
1187                         if (valid_user_field(p, q - p)) {
1188                                 size_t l;
1189
1190                                 l = e - p;
1191
1192                                 /* If the field name starts with an
1193                                  * underscore, skip the variable,
1194                                  * since that indidates a trusted
1195                                  * field */
1196                                 iovec[n].iov_base = (char*) p;
1197                                 iovec[n].iov_len = l;
1198                                 n++;
1199
1200                                 /* We need to determine the priority
1201                                  * of this entry for the rate limiting
1202                                  * logic */
1203                                 if (l == 10 &&
1204                                     memcmp(p, "PRIORITY=", 9) == 0 &&
1205                                     p[9] >= '0' && p[9] <= '9')
1206                                         priority = (priority & LOG_FACMASK) | (p[9] - '0');
1207
1208                                 else if (l == 17 &&
1209                                          memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
1210                                          p[16] >= '0' && p[16] <= '9')
1211                                         priority = (priority & LOG_PRIMASK) | ((p[16] - '0') << 3);
1212
1213                                 else if (l == 18 &&
1214                                          memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
1215                                          p[16] >= '0' && p[16] <= '9' &&
1216                                          p[17] >= '0' && p[17] <= '9')
1217                                         priority = (priority & LOG_PRIMASK) | (((p[16] - '0')*10 + (p[17] - '0')) << 3);
1218
1219                                 else if (l >= 12 &&
1220                                          memcmp(p, "SYSLOG_IDENTIFIER=", 11) == 0) {
1221                                         char *t;
1222
1223                                         t = strndup(p + 11, l - 11);
1224                                         if (t) {
1225                                                 free(identifier);
1226                                                 identifier = t;
1227                                         }
1228                                 } else if (l >= 8 &&
1229                                            memcmp(p, "MESSAGE=", 8) == 0) {
1230                                         char *t;
1231
1232                                         t = strndup(p + 8, l - 8);
1233                                         if (t) {
1234                                                 free(message);
1235                                                 message = t;
1236                                         }
1237                                 }
1238                         }
1239
1240                         remaining -= (e - p) + 1;
1241                         p = e + 1;
1242                         continue;
1243                 } else {
1244                         uint64_t l;
1245                         char *k;
1246
1247                         if (remaining < e - p + 1 + sizeof(uint64_t) + 1) {
1248                                 log_debug("Failed to parse message, ignoring.");
1249                                 break;
1250                         }
1251
1252                         memcpy(&l, e + 1, sizeof(uint64_t));
1253                         l = le64toh(l);
1254
1255                         if (remaining < e - p + 1 + sizeof(uint64_t) + l + 1 ||
1256                             e[1+sizeof(uint64_t)+l] != '\n') {
1257                                 log_debug("Failed to parse message, ignoring.");
1258                                 break;
1259                         }
1260
1261                         k = malloc((e - p) + 1 + l);
1262                         if (!k) {
1263                                 log_error("Out of memory");
1264                                 break;
1265                         }
1266
1267                         memcpy(k, p, e - p);
1268                         k[e - p] = '=';
1269                         memcpy(k + (e - p) + 1, e + 1 + sizeof(uint64_t), l);
1270
1271                         if (valid_user_field(p, e - p)) {
1272                                 iovec[n].iov_base = k;
1273                                 iovec[n].iov_len = (e - p) + 1 + l;
1274                                 n++;
1275                         } else
1276                                 free(k);
1277
1278                         remaining -= (e - p) + 1 + sizeof(uint64_t) + l + 1;
1279                         p = e + 1 + sizeof(uint64_t) + l + 1;
1280                 }
1281         }
1282
1283         if (n <= 0)
1284                 goto finish;
1285
1286         tn = n++;
1287         IOVEC_SET_STRING(iovec[tn], "_TRANSPORT=journal");
1288
1289         if (message) {
1290                 if (s->forward_to_syslog)
1291                         forward_syslog(s, priority, identifier, message, ucred, tv);
1292
1293                 if (s->forward_to_kmsg)
1294                         forward_kmsg(s, priority, identifier, message, ucred);
1295
1296                 if (s->forward_to_console)
1297                         forward_console(s, identifier, message, ucred);
1298         }
1299
1300         dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, priority);
1301
1302 finish:
1303         for (j = 0; j < n; j++)  {
1304                 if (j == tn)
1305                         continue;
1306
1307                 if (iovec[j].iov_base < buffer ||
1308                     (const uint8_t*) iovec[j].iov_base >= (const uint8_t*) buffer + buffer_size)
1309                         free(iovec[j].iov_base);
1310         }
1311
1312         free(identifier);
1313         free(message);
1314 }
1315
1316 static void process_native_file(
1317                 Server *s,
1318                 int fd,
1319                 struct ucred *ucred,
1320                 struct timeval *tv,
1321                 const char *label, size_t label_len) {
1322
1323         struct stat st;
1324         void *p;
1325         ssize_t n;
1326
1327         assert(s);
1328         assert(fd >= 0);
1329
1330         /* Data is in the passed file, since it didn't fit in a
1331          * datagram. We can't map the file here, since clients might
1332          * then truncate it and trigger a SIGBUS for us. So let's
1333          * stupidly read it */
1334
1335         if (fstat(fd, &st) < 0) {
1336                 log_error("Failed to stat passed file, ignoring: %m");
1337                 return;
1338         }
1339
1340         if (!S_ISREG(st.st_mode)) {
1341                 log_error("File passed is not regular. Ignoring.");
1342                 return;
1343         }
1344
1345         if (st.st_size <= 0)
1346                 return;
1347
1348         if (st.st_size > ENTRY_SIZE_MAX) {
1349                 log_error("File passed too large. Ignoring.");
1350                 return;
1351         }
1352
1353         p = malloc(st.st_size);
1354         if (!p) {
1355                 log_error("Out of memory");
1356                 return;
1357         }
1358
1359         n = pread(fd, p, st.st_size, 0);
1360         if (n < 0)
1361                 log_error("Failed to read file, ignoring: %s", strerror(-n));
1362         else if (n > 0)
1363                 process_native_message(s, p, n, ucred, tv, label, label_len);
1364
1365         free(p);
1366 }
1367
1368 static int stdout_stream_log(StdoutStream *s, const char *p) {
1369         struct iovec iovec[N_IOVEC_META_FIELDS + 5];
1370         char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *syslog_identifier = NULL;
1371         unsigned n = 0;
1372         int priority;
1373         char *label = NULL;
1374         size_t label_len = 0;
1375
1376         assert(s);
1377         assert(p);
1378
1379         if (isempty(p))
1380                 return 0;
1381
1382         priority = s->priority;
1383
1384         if (s->level_prefix)
1385                 parse_syslog_priority((char**) &p, &priority);
1386
1387         if (s->forward_to_syslog || s->server->forward_to_syslog)
1388                 forward_syslog(s->server, fixup_priority(priority), s->identifier, p, &s->ucred, NULL);
1389
1390         if (s->forward_to_kmsg || s->server->forward_to_kmsg)
1391                 forward_kmsg(s->server, priority, s->identifier, p, &s->ucred);
1392
1393         if (s->forward_to_console || s->server->forward_to_console)
1394                 forward_console(s->server, s->identifier, p, &s->ucred);
1395
1396         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=stdout");
1397
1398         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1399                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1400
1401         if (priority & LOG_FACMASK)
1402                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1403                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1404
1405         if (s->identifier) {
1406                 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", s->identifier);
1407                 if (syslog_identifier)
1408                         IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1409         }
1410
1411         message = strappend("MESSAGE=", p);
1412         if (message)
1413                 IOVEC_SET_STRING(iovec[n++], message);
1414
1415 #ifdef HAVE_SELINUX
1416         if (s->security_context) {
1417                 label = (char*) s->security_context;
1418                 label_len = strlen((char*) s->security_context);
1419         }
1420 #endif
1421
1422         dispatch_message(s->server, iovec, n, ELEMENTSOF(iovec), &s->ucred, NULL, label, label_len, priority);
1423
1424         free(message);
1425         free(syslog_priority);
1426         free(syslog_facility);
1427         free(syslog_identifier);
1428
1429         return 0;
1430 }
1431
1432 static int stdout_stream_line(StdoutStream *s, char *p) {
1433         int r;
1434
1435         assert(s);
1436         assert(p);
1437
1438         p = strstrip(p);
1439
1440         switch (s->state) {
1441
1442         case STDOUT_STREAM_IDENTIFIER:
1443                 if (isempty(p))
1444                         s->identifier = NULL;
1445                 else  {
1446                         s->identifier = strdup(p);
1447                         if (!s->identifier) {
1448                                 log_error("Out of memory");
1449                                 return -ENOMEM;
1450                         }
1451                 }
1452
1453                 s->state = STDOUT_STREAM_PRIORITY;
1454                 return 0;
1455
1456         case STDOUT_STREAM_PRIORITY:
1457                 r = safe_atoi(p, &s->priority);
1458                 if (r < 0 || s->priority <= 0 || s->priority >= 999) {
1459                         log_warning("Failed to parse log priority line.");
1460                         return -EINVAL;
1461                 }
1462
1463                 s->state = STDOUT_STREAM_LEVEL_PREFIX;
1464                 return 0;
1465
1466         case STDOUT_STREAM_LEVEL_PREFIX:
1467                 r = parse_boolean(p);
1468                 if (r < 0) {
1469                         log_warning("Failed to parse level prefix line.");
1470                         return -EINVAL;
1471                 }
1472
1473                 s->level_prefix = !!r;
1474                 s->state = STDOUT_STREAM_FORWARD_TO_SYSLOG;
1475                 return 0;
1476
1477         case STDOUT_STREAM_FORWARD_TO_SYSLOG:
1478                 r = parse_boolean(p);
1479                 if (r < 0) {
1480                         log_warning("Failed to parse forward to syslog line.");
1481                         return -EINVAL;
1482                 }
1483
1484                 s->forward_to_syslog = !!r;
1485                 s->state = STDOUT_STREAM_FORWARD_TO_KMSG;
1486                 return 0;
1487
1488         case STDOUT_STREAM_FORWARD_TO_KMSG:
1489                 r = parse_boolean(p);
1490                 if (r < 0) {
1491                         log_warning("Failed to parse copy to kmsg line.");
1492                         return -EINVAL;
1493                 }
1494
1495                 s->forward_to_kmsg = !!r;
1496                 s->state = STDOUT_STREAM_FORWARD_TO_CONSOLE;
1497                 return 0;
1498
1499         case STDOUT_STREAM_FORWARD_TO_CONSOLE:
1500                 r = parse_boolean(p);
1501                 if (r < 0) {
1502                         log_warning("Failed to parse copy to console line.");
1503                         return -EINVAL;
1504                 }
1505
1506                 s->forward_to_console = !!r;
1507                 s->state = STDOUT_STREAM_RUNNING;
1508                 return 0;
1509
1510         case STDOUT_STREAM_RUNNING:
1511                 return stdout_stream_log(s, p);
1512         }
1513
1514         assert_not_reached("Unknown stream state");
1515 }
1516
1517 static int stdout_stream_scan(StdoutStream *s, bool force_flush) {
1518         char *p;
1519         size_t remaining;
1520         int r;
1521
1522         assert(s);
1523
1524         p = s->buffer;
1525         remaining = s->length;
1526         for (;;) {
1527                 char *end;
1528                 size_t skip;
1529
1530                 end = memchr(p, '\n', remaining);
1531                 if (end)
1532                         skip = end - p + 1;
1533                 else if (remaining >= sizeof(s->buffer) - 1) {
1534                         end = p + sizeof(s->buffer) - 1;
1535                         skip = remaining;
1536                 } else
1537                         break;
1538
1539                 *end = 0;
1540
1541                 r = stdout_stream_line(s, p);
1542                 if (r < 0)
1543                         return r;
1544
1545                 remaining -= skip;
1546                 p += skip;
1547         }
1548
1549         if (force_flush && remaining > 0) {
1550                 p[remaining] = 0;
1551                 r = stdout_stream_line(s, p);
1552                 if (r < 0)
1553                         return r;
1554
1555                 p += remaining;
1556                 remaining = 0;
1557         }
1558
1559         if (p > s->buffer) {
1560                 memmove(s->buffer, p, remaining);
1561                 s->length = remaining;
1562         }
1563
1564         return 0;
1565 }
1566
1567 static int stdout_stream_process(StdoutStream *s) {
1568         ssize_t l;
1569         int r;
1570
1571         assert(s);
1572
1573         l = read(s->fd, s->buffer+s->length, sizeof(s->buffer)-1-s->length);
1574         if (l < 0) {
1575
1576                 if (errno == EAGAIN)
1577                         return 0;
1578
1579                 log_warning("Failed to read from stream: %m");
1580                 return -errno;
1581         }
1582
1583         if (l == 0) {
1584                 r = stdout_stream_scan(s, true);
1585                 if (r < 0)
1586                         return r;
1587
1588                 return 0;
1589         }
1590
1591         s->length += l;
1592         r = stdout_stream_scan(s, false);
1593         if (r < 0)
1594                 return r;
1595
1596         return 1;
1597
1598 }
1599
1600 static void stdout_stream_free(StdoutStream *s) {
1601         assert(s);
1602
1603         if (s->server) {
1604                 assert(s->server->n_stdout_streams > 0);
1605                 s->server->n_stdout_streams --;
1606                 LIST_REMOVE(StdoutStream, stdout_stream, s->server->stdout_streams, s);
1607         }
1608
1609         if (s->fd >= 0) {
1610                 if (s->server)
1611                         epoll_ctl(s->server->epoll_fd, EPOLL_CTL_DEL, s->fd, NULL);
1612
1613                 close_nointr_nofail(s->fd);
1614         }
1615
1616 #ifdef HAVE_SELINUX
1617         if (s->security_context)
1618                 freecon(s->security_context);
1619 #endif
1620
1621         free(s->identifier);
1622         free(s);
1623 }
1624
1625 static int stdout_stream_new(Server *s) {
1626         StdoutStream *stream;
1627         int fd, r;
1628         socklen_t len;
1629         struct epoll_event ev;
1630
1631         assert(s);
1632
1633         fd = accept4(s->stdout_fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC);
1634         if (fd < 0) {
1635                 if (errno == EAGAIN)
1636                         return 0;
1637
1638                 log_error("Failed to accept stdout connection: %m");
1639                 return -errno;
1640         }
1641
1642         if (s->n_stdout_streams >= STDOUT_STREAMS_MAX) {
1643                 log_warning("Too many stdout streams, refusing connection.");
1644                 close_nointr_nofail(fd);
1645                 return 0;
1646         }
1647
1648         stream = new0(StdoutStream, 1);
1649         if (!stream) {
1650                 log_error("Out of memory.");
1651                 close_nointr_nofail(fd);
1652                 return -ENOMEM;
1653         }
1654
1655         stream->fd = fd;
1656
1657         len = sizeof(stream->ucred);
1658         if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &stream->ucred, &len) < 0) {
1659                 log_error("Failed to determine peer credentials: %m");
1660                 r = -errno;
1661                 goto fail;
1662         }
1663
1664 #ifdef HAVE_SELINUX
1665         if (getpeercon(fd, &stream->security_context) < 0)
1666                 log_error("Failed to determine peer security context.");
1667 #endif
1668
1669         if (shutdown(fd, SHUT_WR) < 0) {
1670                 log_error("Failed to shutdown writing side of socket: %m");
1671                 r = -errno;
1672                 goto fail;
1673         }
1674
1675         zero(ev);
1676         ev.data.ptr = stream;
1677         ev.events = EPOLLIN;
1678         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0) {
1679                 log_error("Failed to add stream to event loop: %m");
1680                 r = -errno;
1681                 goto fail;
1682         }
1683
1684         stream->server = s;
1685         LIST_PREPEND(StdoutStream, stdout_stream, s->stdout_streams, stream);
1686         s->n_stdout_streams ++;
1687
1688         return 0;
1689
1690 fail:
1691         stdout_stream_free(stream);
1692         return r;
1693 }
1694
1695 static int parse_kernel_timestamp(char **_p, usec_t *t) {
1696         usec_t r;
1697         int k, i;
1698         char *p;
1699
1700         assert(_p);
1701         assert(*_p);
1702         assert(t);
1703
1704         p = *_p;
1705
1706         if (strlen(p) < 14 || p[0] != '[' || p[13] != ']' || p[6] != '.')
1707                 return 0;
1708
1709         r = 0;
1710
1711         for (i = 1; i <= 5; i++) {
1712                 r *= 10;
1713
1714                 if (p[i] == ' ')
1715                         continue;
1716
1717                 k = undecchar(p[i]);
1718                 if (k < 0)
1719                         return 0;
1720
1721                 r += k;
1722         }
1723
1724         for (i = 7; i <= 12; i++) {
1725                 r *= 10;
1726
1727                 k = undecchar(p[i]);
1728                 if (k < 0)
1729                         return 0;
1730
1731                 r += k;
1732         }
1733
1734         *t = r;
1735         *_p += 14;
1736         *_p += strspn(*_p, WHITESPACE);
1737
1738         return 1;
1739 }
1740
1741 static void proc_kmsg_line(Server *s, const char *p) {
1742         struct iovec iovec[N_IOVEC_META_FIELDS + 7];
1743         char *message = NULL, *syslog_priority = NULL, *syslog_pid = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *source_time = NULL;
1744         int priority = LOG_KERN | LOG_INFO;
1745         unsigned n = 0;
1746         usec_t usec;
1747         char *identifier = NULL, *pid = NULL;
1748
1749         assert(s);
1750         assert(p);
1751
1752         if (isempty(p))
1753                 return;
1754
1755         parse_syslog_priority((char **) &p, &priority);
1756
1757         if (s->forward_to_kmsg && (priority & LOG_FACMASK) != LOG_KERN)
1758                 return;
1759
1760         if (parse_kernel_timestamp((char **) &p, &usec) > 0) {
1761                 if (asprintf(&source_time, "_SOURCE_MONOTONIC_TIMESTAMP=%llu",
1762                              (unsigned long long) usec) >= 0)
1763                         IOVEC_SET_STRING(iovec[n++], source_time);
1764         }
1765
1766         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=kernel");
1767
1768         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1769                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1770
1771         if ((priority & LOG_FACMASK) == LOG_KERN) {
1772
1773                 if (s->forward_to_syslog)
1774                         forward_syslog(s, priority, "kernel", p, NULL, NULL);
1775
1776                 IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=kernel");
1777         } else {
1778                 read_identifier(&p, &identifier, &pid);
1779
1780                 if (s->forward_to_syslog)
1781                         forward_syslog(s, priority, identifier, p, NULL, NULL);
1782
1783                 if (identifier) {
1784                         syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
1785                         if (syslog_identifier)
1786                                 IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1787                 }
1788
1789                 if (pid) {
1790                         syslog_pid = strappend("SYSLOG_PID=", pid);
1791                         if (syslog_pid)
1792                                 IOVEC_SET_STRING(iovec[n++], syslog_pid);
1793                 }
1794
1795                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1796                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1797         }
1798
1799         message = strappend("MESSAGE=", p);
1800         if (message)
1801                 IOVEC_SET_STRING(iovec[n++], message);
1802
1803         dispatch_message(s, iovec, n, ELEMENTSOF(iovec), NULL, NULL, NULL, 0, priority);
1804
1805         free(message);
1806         free(syslog_priority);
1807         free(syslog_identifier);
1808         free(syslog_pid);
1809         free(syslog_facility);
1810         free(source_time);
1811         free(identifier);
1812         free(pid);
1813 }
1814
1815 static void proc_kmsg_scan(Server *s) {
1816         char *p;
1817         size_t remaining;
1818
1819         assert(s);
1820
1821         p = s->proc_kmsg_buffer;
1822         remaining = s->proc_kmsg_length;
1823         for (;;) {
1824                 char *end;
1825                 size_t skip;
1826
1827                 end = memchr(p, '\n', remaining);
1828                 if (end)
1829                         skip = end - p + 1;
1830                 else if (remaining >= sizeof(s->proc_kmsg_buffer) - 1) {
1831                         end = p + sizeof(s->proc_kmsg_buffer) - 1;
1832                         skip = remaining;
1833                 } else
1834                         break;
1835
1836                 *end = 0;
1837
1838                 proc_kmsg_line(s, p);
1839
1840                 remaining -= skip;
1841                 p += skip;
1842         }
1843
1844         if (p > s->proc_kmsg_buffer) {
1845                 memmove(s->proc_kmsg_buffer, p, remaining);
1846                 s->proc_kmsg_length = remaining;
1847         }
1848 }
1849
1850 static int system_journal_open(Server *s) {
1851         int r;
1852         char *fn;
1853         sd_id128_t machine;
1854         char ids[33];
1855
1856         r = sd_id128_get_machine(&machine);
1857         if (r < 0)
1858                 return r;
1859
1860         sd_id128_to_string(machine, ids);
1861
1862         if (!s->system_journal) {
1863
1864                 /* First try to create the machine path, but not the prefix */
1865                 fn = strappend("/var/log/journal/", ids);
1866                 if (!fn)
1867                         return -ENOMEM;
1868                 (void) mkdir(fn, 0755);
1869                 free(fn);
1870
1871                 /* The create the system journal file */
1872                 fn = join("/var/log/journal/", ids, "/system.journal", NULL);
1873                 if (!fn)
1874                         return -ENOMEM;
1875
1876                 r = journal_file_open(fn, O_RDWR|O_CREAT, 0640, NULL, &s->system_journal);
1877                 free(fn);
1878
1879                 if (r >= 0) {
1880                         journal_default_metrics(&s->system_metrics, s->system_journal->fd);
1881
1882                         s->system_journal->metrics = s->system_metrics;
1883                         s->system_journal->compress = s->compress;
1884
1885                         server_fix_perms(s, s->system_journal, 0);
1886                 } else if (r < 0) {
1887
1888                         if (r != -ENOENT && r != -EROFS)
1889                                 log_warning("Failed to open system journal: %s", strerror(-r));
1890
1891                         r = 0;
1892                 }
1893         }
1894
1895         if (!s->runtime_journal) {
1896
1897                 fn = join("/run/log/journal/", ids, "/system.journal", NULL);
1898                 if (!fn)
1899                         return -ENOMEM;
1900
1901                 if (s->system_journal) {
1902
1903                         /* Try to open the runtime journal, but only
1904                          * if it already exists, so that we can flush
1905                          * it into the system journal */
1906
1907                         r = journal_file_open(fn, O_RDWR, 0640, NULL, &s->runtime_journal);
1908                         free(fn);
1909
1910                         if (r < 0) {
1911                                 if (r != -ENOENT)
1912                                         log_warning("Failed to open runtime journal: %s", strerror(-r));
1913
1914                                 r = 0;
1915                         }
1916
1917                 } else {
1918
1919                         /* OK, we really need the runtime journal, so create
1920                          * it if necessary. */
1921
1922                         (void) mkdir_parents(fn, 0755);
1923                         r = journal_file_open(fn, O_RDWR|O_CREAT, 0640, NULL, &s->runtime_journal);
1924                         free(fn);
1925
1926                         if (r < 0) {
1927                                 log_error("Failed to open runtime journal: %s", strerror(-r));
1928                                 return r;
1929                         }
1930                 }
1931
1932                 if (s->runtime_journal) {
1933                         journal_default_metrics(&s->runtime_metrics, s->runtime_journal->fd);
1934
1935                         s->runtime_journal->metrics = s->runtime_metrics;
1936                         s->runtime_journal->compress = s->compress;
1937
1938                         server_fix_perms(s, s->runtime_journal, 0);
1939                 }
1940         }
1941
1942         return r;
1943 }
1944
1945 static int server_flush_to_var(Server *s) {
1946         char path[] = "/run/log/journal/xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
1947         Object *o = NULL;
1948         int r;
1949         sd_id128_t machine;
1950         sd_journal *j;
1951         usec_t ts;
1952
1953         assert(s);
1954
1955         if (!s->runtime_journal)
1956                 return 0;
1957
1958         ts = now(CLOCK_MONOTONIC);
1959         if (s->var_available_timestamp + RECHECK_VAR_AVAILABLE_USEC > ts)
1960                 return 0;
1961
1962         s->var_available_timestamp = ts;
1963
1964         system_journal_open(s);
1965
1966         if (!s->system_journal)
1967                 return 0;
1968
1969         log_info("Flushing to /var...");
1970
1971         r = sd_id128_get_machine(&machine);
1972         if (r < 0) {
1973                 log_error("Failed to get machine id: %s", strerror(-r));
1974                 return r;
1975         }
1976
1977         r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1978         if (r < 0) {
1979                 log_error("Failed to read runtime journal: %s", strerror(-r));
1980                 return r;
1981         }
1982
1983         SD_JOURNAL_FOREACH(j) {
1984                 JournalFile *f;
1985
1986                 f = j->current_file;
1987                 assert(f && f->current_offset > 0);
1988
1989                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1990                 if (r < 0) {
1991                         log_error("Can't read entry: %s", strerror(-r));
1992                         goto finish;
1993                 }
1994
1995                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1996                 if (r == -E2BIG) {
1997                         log_info("Allocation limit reached.");
1998
1999                         journal_file_post_change(s->system_journal);
2000                         server_rotate(s);
2001                         server_vacuum(s);
2002
2003                         r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
2004                 }
2005
2006                 if (r < 0) {
2007                         log_error("Can't write entry: %s", strerror(-r));
2008                         goto finish;
2009                 }
2010         }
2011
2012 finish:
2013         journal_file_post_change(s->system_journal);
2014
2015         journal_file_close(s->runtime_journal);
2016         s->runtime_journal = NULL;
2017
2018         if (r >= 0) {
2019                 sd_id128_to_string(machine, path + 17);
2020                 rm_rf(path, false, true, false);
2021         }
2022
2023         return r;
2024 }
2025
2026 static int server_read_proc_kmsg(Server *s) {
2027         ssize_t l;
2028         assert(s);
2029         assert(s->proc_kmsg_fd >= 0);
2030
2031         l = read(s->proc_kmsg_fd, s->proc_kmsg_buffer + s->proc_kmsg_length, sizeof(s->proc_kmsg_buffer) - 1 - s->proc_kmsg_length);
2032         if (l < 0) {
2033
2034                 if (errno == EAGAIN || errno == EINTR)
2035                         return 0;
2036
2037                 log_error("Failed to read from kernel: %m");
2038                 return -errno;
2039         }
2040
2041         s->proc_kmsg_length += l;
2042
2043         proc_kmsg_scan(s);
2044         return 1;
2045 }
2046
2047 static int server_flush_proc_kmsg(Server *s) {
2048         int r;
2049
2050         assert(s);
2051
2052         if (s->proc_kmsg_fd < 0)
2053                 return 0;
2054
2055         log_info("Flushing /proc/kmsg...");
2056
2057         for (;;) {
2058                 r = server_read_proc_kmsg(s);
2059                 if (r < 0)
2060                         return r;
2061
2062                 if (r == 0)
2063                         break;
2064         }
2065
2066         return 0;
2067 }
2068
2069 static int process_event(Server *s, struct epoll_event *ev) {
2070         assert(s);
2071
2072         if (ev->data.fd == s->signal_fd) {
2073                 struct signalfd_siginfo sfsi;
2074                 ssize_t n;
2075
2076                 if (ev->events != EPOLLIN) {
2077                         log_info("Got invalid event from epoll.");
2078                         return -EIO;
2079                 }
2080
2081                 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
2082                 if (n != sizeof(sfsi)) {
2083
2084                         if (n >= 0)
2085                                 return -EIO;
2086
2087                         if (errno == EINTR || errno == EAGAIN)
2088                                 return 1;
2089
2090                         return -errno;
2091                 }
2092
2093                 if (sfsi.ssi_signo == SIGUSR1) {
2094                         server_flush_to_var(s);
2095                         return 0;
2096                 }
2097
2098                 log_debug("Received SIG%s", signal_to_string(sfsi.ssi_signo));
2099                 return 0;
2100
2101         } else if (ev->data.fd == s->proc_kmsg_fd) {
2102                 int r;
2103
2104                 if (ev->events != EPOLLIN) {
2105                         log_info("Got invalid event from epoll.");
2106                         return -EIO;
2107                 }
2108
2109                 r = server_read_proc_kmsg(s);
2110                 if (r < 0)
2111                         return r;
2112
2113                 return 1;
2114
2115         } else if (ev->data.fd == s->native_fd ||
2116                    ev->data.fd == s->syslog_fd) {
2117
2118                 if (ev->events != EPOLLIN) {
2119                         log_info("Got invalid event from epoll.");
2120                         return -EIO;
2121                 }
2122
2123                 for (;;) {
2124                         struct msghdr msghdr;
2125                         struct iovec iovec;
2126                         struct ucred *ucred = NULL;
2127                         struct timeval *tv = NULL;
2128                         struct cmsghdr *cmsg;
2129                         char *label = NULL;
2130                         size_t label_len = 0;
2131                         union {
2132                                 struct cmsghdr cmsghdr;
2133                                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
2134                                             CMSG_SPACE(sizeof(struct timeval)) +
2135                                             CMSG_SPACE(sizeof(int)) +
2136                                             CMSG_SPACE(LINE_MAX)]; /* selinux label */
2137                         } control;
2138                         ssize_t n;
2139                         int v;
2140                         int *fds = NULL;
2141                         unsigned n_fds = 0;
2142
2143                         if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
2144                                 log_error("SIOCINQ failed: %m");
2145                                 return -errno;
2146                         }
2147
2148                         if (s->buffer_size < (size_t) v) {
2149                                 void *b;
2150                                 size_t l;
2151
2152                                 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
2153                                 b = realloc(s->buffer, l+1);
2154
2155                                 if (!b) {
2156                                         log_error("Couldn't increase buffer.");
2157                                         return -ENOMEM;
2158                                 }
2159
2160                                 s->buffer_size = l;
2161                                 s->buffer = b;
2162                         }
2163
2164                         zero(iovec);
2165                         iovec.iov_base = s->buffer;
2166                         iovec.iov_len = s->buffer_size;
2167
2168                         zero(control);
2169                         zero(msghdr);
2170                         msghdr.msg_iov = &iovec;
2171                         msghdr.msg_iovlen = 1;
2172                         msghdr.msg_control = &control;
2173                         msghdr.msg_controllen = sizeof(control);
2174
2175                         n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
2176                         if (n < 0) {
2177
2178                                 if (errno == EINTR || errno == EAGAIN)
2179                                         return 1;
2180
2181                                 log_error("recvmsg() failed: %m");
2182                                 return -errno;
2183                         }
2184
2185                         for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
2186
2187                                 if (cmsg->cmsg_level == SOL_SOCKET &&
2188                                     cmsg->cmsg_type == SCM_CREDENTIALS &&
2189                                     cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
2190                                         ucred = (struct ucred*) CMSG_DATA(cmsg);
2191                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
2192                                          cmsg->cmsg_type == SCM_SECURITY) {
2193                                         label = (char*) CMSG_DATA(cmsg);
2194                                         label_len = cmsg->cmsg_len - CMSG_LEN(0);
2195                                 } else if (cmsg->cmsg_level == SOL_SOCKET &&
2196                                          cmsg->cmsg_type == SO_TIMESTAMP &&
2197                                          cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
2198                                         tv = (struct timeval*) CMSG_DATA(cmsg);
2199                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
2200                                          cmsg->cmsg_type == SCM_RIGHTS) {
2201                                         fds = (int*) CMSG_DATA(cmsg);
2202                                         n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
2203                                 }
2204                         }
2205
2206                         if (ev->data.fd == s->syslog_fd) {
2207                                 char *e;
2208
2209                                 if (n > 0 && n_fds == 0) {
2210                                         e = memchr(s->buffer, '\n', n);
2211                                         if (e)
2212                                                 *e = 0;
2213                                         else
2214                                                 s->buffer[n] = 0;
2215
2216                                         process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
2217                                 } else if (n_fds > 0)
2218                                         log_warning("Got file descriptors via syslog socket. Ignoring.");
2219
2220                         } else {
2221                                 if (n > 0 && n_fds == 0)
2222                                         process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
2223                                 else if (n == 0 && n_fds == 1)
2224                                         process_native_file(s, fds[0], ucred, tv, label, label_len);
2225                                 else if (n_fds > 0)
2226                                         log_warning("Got too many file descriptors via native socket. Ignoring.");
2227                         }
2228
2229                         close_many(fds, n_fds);
2230                 }
2231
2232                 return 1;
2233
2234         } else if (ev->data.fd == s->stdout_fd) {
2235
2236                 if (ev->events != EPOLLIN) {
2237                         log_info("Got invalid event from epoll.");
2238                         return -EIO;
2239                 }
2240
2241                 stdout_stream_new(s);
2242                 return 1;
2243
2244         } else {
2245                 StdoutStream *stream;
2246
2247                 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
2248                         log_info("Got invalid event from epoll.");
2249                         return -EIO;
2250                 }
2251
2252                 /* If it is none of the well-known fds, it must be an
2253                  * stdout stream fd. Note that this is a bit ugly here
2254                  * (since we rely that none of the well-known fds
2255                  * could be interpreted as pointer), but nonetheless
2256                  * safe, since the well-known fds would never get an
2257                  * fd > 4096, i.e. beyond the first memory page */
2258
2259                 stream = ev->data.ptr;
2260
2261                 if (stdout_stream_process(stream) <= 0)
2262                         stdout_stream_free(stream);
2263
2264                 return 1;
2265         }
2266
2267         log_error("Unknown event.");
2268         return 0;
2269 }
2270
2271 static int open_syslog_socket(Server *s) {
2272         union sockaddr_union sa;
2273         int one, r;
2274         struct epoll_event ev;
2275
2276         assert(s);
2277
2278         if (s->syslog_fd < 0) {
2279
2280                 s->syslog_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2281                 if (s->syslog_fd < 0) {
2282                         log_error("socket() failed: %m");
2283                         return -errno;
2284                 }
2285
2286                 zero(sa);
2287                 sa.un.sun_family = AF_UNIX;
2288                 strncpy(sa.un.sun_path, "/dev/log", sizeof(sa.un.sun_path));
2289
2290                 unlink(sa.un.sun_path);
2291
2292                 r = bind(s->syslog_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2293                 if (r < 0) {
2294                         log_error("bind() failed: %m");
2295                         return -errno;
2296                 }
2297
2298                 chmod(sa.un.sun_path, 0666);
2299         } else
2300                 fd_nonblock(s->syslog_fd, 1);
2301
2302         one = 1;
2303         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
2304         if (r < 0) {
2305                 log_error("SO_PASSCRED failed: %m");
2306                 return -errno;
2307         }
2308
2309         one = 1;
2310         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
2311         if (r < 0)
2312                 log_warning("SO_PASSSEC failed: %m");
2313
2314         one = 1;
2315         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
2316         if (r < 0) {
2317                 log_error("SO_TIMESTAMP failed: %m");
2318                 return -errno;
2319         }
2320
2321         zero(ev);
2322         ev.events = EPOLLIN;
2323         ev.data.fd = s->syslog_fd;
2324         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->syslog_fd, &ev) < 0) {
2325                 log_error("Failed to add syslog server fd to epoll object: %m");
2326                 return -errno;
2327         }
2328
2329         return 0;
2330 }
2331
2332 static int open_native_socket(Server*s) {
2333         union sockaddr_union sa;
2334         int one, r;
2335         struct epoll_event ev;
2336
2337         assert(s);
2338
2339         if (s->native_fd < 0) {
2340
2341                 s->native_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2342                 if (s->native_fd < 0) {
2343                         log_error("socket() failed: %m");
2344                         return -errno;
2345                 }
2346
2347                 zero(sa);
2348                 sa.un.sun_family = AF_UNIX;
2349                 strncpy(sa.un.sun_path, "/run/systemd/journal/socket", sizeof(sa.un.sun_path));
2350
2351                 unlink(sa.un.sun_path);
2352
2353                 r = bind(s->native_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2354                 if (r < 0) {
2355                         log_error("bind() failed: %m");
2356                         return -errno;
2357                 }
2358
2359                 chmod(sa.un.sun_path, 0666);
2360         } else
2361                 fd_nonblock(s->native_fd, 1);
2362
2363         one = 1;
2364         r = setsockopt(s->native_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
2365         if (r < 0) {
2366                 log_error("SO_PASSCRED failed: %m");
2367                 return -errno;
2368         }
2369
2370         one = 1;
2371         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
2372         if (r < 0)
2373                 log_warning("SO_PASSSEC failed: %m");
2374
2375         one = 1;
2376         r = setsockopt(s->native_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
2377         if (r < 0) {
2378                 log_error("SO_TIMESTAMP failed: %m");
2379                 return -errno;
2380         }
2381
2382         zero(ev);
2383         ev.events = EPOLLIN;
2384         ev.data.fd = s->native_fd;
2385         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->native_fd, &ev) < 0) {
2386                 log_error("Failed to add native server fd to epoll object: %m");
2387                 return -errno;
2388         }
2389
2390         return 0;
2391 }
2392
2393 static int open_stdout_socket(Server *s) {
2394         union sockaddr_union sa;
2395         int r;
2396         struct epoll_event ev;
2397
2398         assert(s);
2399
2400         if (s->stdout_fd < 0) {
2401
2402                 s->stdout_fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2403                 if (s->stdout_fd < 0) {
2404                         log_error("socket() failed: %m");
2405                         return -errno;
2406                 }
2407
2408                 zero(sa);
2409                 sa.un.sun_family = AF_UNIX;
2410                 strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
2411
2412                 unlink(sa.un.sun_path);
2413
2414                 r = bind(s->stdout_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2415                 if (r < 0) {
2416                         log_error("bind() failed: %m");
2417                         return -errno;
2418                 }
2419
2420                 chmod(sa.un.sun_path, 0666);
2421
2422                 if (listen(s->stdout_fd, SOMAXCONN) < 0) {
2423                         log_error("liste() failed: %m");
2424                         return -errno;
2425                 }
2426         } else
2427                 fd_nonblock(s->stdout_fd, 1);
2428
2429         zero(ev);
2430         ev.events = EPOLLIN;
2431         ev.data.fd = s->stdout_fd;
2432         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->stdout_fd, &ev) < 0) {
2433                 log_error("Failed to add stdout server fd to epoll object: %m");
2434                 return -errno;
2435         }
2436
2437         return 0;
2438 }
2439
2440 static int open_proc_kmsg(Server *s) {
2441         struct epoll_event ev;
2442
2443         assert(s);
2444
2445         if (!s->import_proc_kmsg)
2446                 return 0;
2447
2448
2449         s->proc_kmsg_fd = open("/proc/kmsg", O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
2450         if (s->proc_kmsg_fd < 0) {
2451                 log_warning("Failed to open /proc/kmsg, ignoring: %m");
2452                 return 0;
2453         }
2454
2455         zero(ev);
2456         ev.events = EPOLLIN;
2457         ev.data.fd = s->proc_kmsg_fd;
2458         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->proc_kmsg_fd, &ev) < 0) {
2459                 log_error("Failed to add /proc/kmsg fd to epoll object: %m");
2460                 return -errno;
2461         }
2462
2463         return 0;
2464 }
2465
2466 static int open_signalfd(Server *s) {
2467         sigset_t mask;
2468         struct epoll_event ev;
2469
2470         assert(s);
2471
2472         assert_se(sigemptyset(&mask) == 0);
2473         sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, -1);
2474         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
2475
2476         s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
2477         if (s->signal_fd < 0) {
2478                 log_error("signalfd(): %m");
2479                 return -errno;
2480         }
2481
2482         zero(ev);
2483         ev.events = EPOLLIN;
2484         ev.data.fd = s->signal_fd;
2485
2486         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
2487                 log_error("epoll_ctl(): %m");
2488                 return -errno;
2489         }
2490
2491         return 0;
2492 }
2493
2494 static int server_parse_proc_cmdline(Server *s) {
2495         char *line, *w, *state;
2496         int r;
2497         size_t l;
2498
2499         if (detect_container(NULL) > 0)
2500                 return 0;
2501
2502         r = read_one_line_file("/proc/cmdline", &line);
2503         if (r < 0) {
2504                 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
2505                 return 0;
2506         }
2507
2508         FOREACH_WORD_QUOTED(w, l, line, state) {
2509                 char *word;
2510
2511                 word = strndup(w, l);
2512                 if (!word) {
2513                         r = -ENOMEM;
2514                         goto finish;
2515                 }
2516
2517                 if (startswith(word, "systemd_journald.forward_to_syslog=")) {
2518                         r = parse_boolean(word + 35);
2519                         if (r < 0)
2520                                 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
2521                         else
2522                                 s->forward_to_syslog = r;
2523                 } else if (startswith(word, "systemd_journald.forward_to_kmsg=")) {
2524                         r = parse_boolean(word + 33);
2525                         if (r < 0)
2526                                 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
2527                         else
2528                                 s->forward_to_kmsg = r;
2529                 } else if (startswith(word, "systemd_journald.forward_to_console=")) {
2530                         r = parse_boolean(word + 36);
2531                         if (r < 0)
2532                                 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
2533                         else
2534                                 s->forward_to_console = r;
2535                 }
2536
2537                 free(word);
2538         }
2539
2540         r = 0;
2541
2542 finish:
2543         free(line);
2544         return r;
2545 }
2546
2547 static int server_parse_config_file(Server *s) {
2548         FILE *f;
2549         const char *fn;
2550         int r;
2551
2552         assert(s);
2553
2554         fn = "/etc/systemd/systemd-journald.conf";
2555         f = fopen(fn, "re");
2556         if (!f) {
2557                 if (errno == ENOENT)
2558                         return 0;
2559
2560                 log_warning("Failed to open configuration file %s: %m", fn);
2561                 return -errno;
2562         }
2563
2564         r = config_parse(fn, f, "Journal\0", config_item_perf_lookup, (void*) journald_gperf_lookup, false, s);
2565         if (r < 0)
2566                 log_warning("Failed to parse configuration file: %s", strerror(-r));
2567
2568         fclose(f);
2569
2570         return r;
2571 }
2572
2573 static int server_init(Server *s) {
2574         int n, r, fd;
2575
2576         assert(s);
2577
2578         zero(*s);
2579         s->syslog_fd = s->native_fd = s->stdout_fd = s->signal_fd = s->epoll_fd = s->proc_kmsg_fd = -1;
2580         s->compress = true;
2581
2582         s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
2583         s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
2584
2585         s->forward_to_syslog = true;
2586         s->import_proc_kmsg = true;
2587
2588         memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
2589         memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
2590
2591         server_parse_config_file(s);
2592         server_parse_proc_cmdline(s);
2593
2594         s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
2595         if (!s->user_journals) {
2596                 log_error("Out of memory.");
2597                 return -ENOMEM;
2598         }
2599
2600         s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
2601         if (s->epoll_fd < 0) {
2602                 log_error("Failed to create epoll object: %m");
2603                 return -errno;
2604         }
2605
2606         n = sd_listen_fds(true);
2607         if (n < 0) {
2608                 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
2609                 return n;
2610         }
2611
2612         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
2613
2614                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
2615
2616                         if (s->native_fd >= 0) {
2617                                 log_error("Too many native sockets passed.");
2618                                 return -EINVAL;
2619                         }
2620
2621                         s->native_fd = fd;
2622
2623                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
2624
2625                         if (s->stdout_fd >= 0) {
2626                                 log_error("Too many stdout sockets passed.");
2627                                 return -EINVAL;
2628                         }
2629
2630                         s->stdout_fd = fd;
2631
2632                 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
2633
2634                         if (s->syslog_fd >= 0) {
2635                                 log_error("Too many /dev/log sockets passed.");
2636                                 return -EINVAL;
2637                         }
2638
2639                         s->syslog_fd = fd;
2640
2641                 } else {
2642                         log_error("Unknown socket passed.");
2643                         return -EINVAL;
2644                 }
2645         }
2646
2647         r = open_syslog_socket(s);
2648         if (r < 0)
2649                 return r;
2650
2651         r = open_native_socket(s);
2652         if (r < 0)
2653                 return r;
2654
2655         r = open_stdout_socket(s);
2656         if (r < 0)
2657                 return r;
2658
2659         r = open_proc_kmsg(s);
2660         if (r < 0)
2661                 return r;
2662
2663         r = system_journal_open(s);
2664         if (r < 0)
2665                 return r;
2666
2667         r = open_signalfd(s);
2668         if (r < 0)
2669                 return r;
2670
2671         s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
2672         if (!s->rate_limit)
2673                 return -ENOMEM;
2674
2675         return 0;
2676 }
2677
2678 static void server_done(Server *s) {
2679         JournalFile *f;
2680         assert(s);
2681
2682         while (s->stdout_streams)
2683                 stdout_stream_free(s->stdout_streams);
2684
2685         if (s->system_journal)
2686                 journal_file_close(s->system_journal);
2687
2688         if (s->runtime_journal)
2689                 journal_file_close(s->runtime_journal);
2690
2691         while ((f = hashmap_steal_first(s->user_journals)))
2692                 journal_file_close(f);
2693
2694         hashmap_free(s->user_journals);
2695
2696         if (s->epoll_fd >= 0)
2697                 close_nointr_nofail(s->epoll_fd);
2698
2699         if (s->signal_fd >= 0)
2700                 close_nointr_nofail(s->signal_fd);
2701
2702         if (s->syslog_fd >= 0)
2703                 close_nointr_nofail(s->syslog_fd);
2704
2705         if (s->native_fd >= 0)
2706                 close_nointr_nofail(s->native_fd);
2707
2708         if (s->stdout_fd >= 0)
2709                 close_nointr_nofail(s->stdout_fd);
2710
2711         if (s->proc_kmsg_fd >= 0)
2712                 close_nointr_nofail(s->proc_kmsg_fd);
2713
2714         if (s->rate_limit)
2715                 journal_rate_limit_free(s->rate_limit);
2716
2717         free(s->buffer);
2718 }
2719
2720 int main(int argc, char *argv[]) {
2721         Server server;
2722         int r;
2723
2724         /* if (getppid() != 1) { */
2725         /*         log_error("This program should be invoked by init only."); */
2726         /*         return EXIT_FAILURE; */
2727         /* } */
2728
2729         if (argc > 1) {
2730                 log_error("This program does not take arguments.");
2731                 return EXIT_FAILURE;
2732         }
2733
2734         log_set_target(LOG_TARGET_CONSOLE);
2735         log_parse_environment();
2736         log_open();
2737
2738         umask(0022);
2739
2740         r = server_init(&server);
2741         if (r < 0)
2742                 goto finish;
2743
2744         server_vacuum(&server);
2745         server_flush_to_var(&server);
2746         server_flush_proc_kmsg(&server);
2747
2748         log_debug("systemd-journald running as pid %lu", (unsigned long) getpid());
2749         driver_message(&server, SD_MESSAGE_JOURNAL_START, "Journal started");
2750
2751         sd_notify(false,
2752                   "READY=1\n"
2753                   "STATUS=Processing requests...");
2754
2755         for (;;) {
2756                 struct epoll_event event;
2757
2758                 r = epoll_wait(server.epoll_fd, &event, 1, -1);
2759                 if (r < 0) {
2760
2761                         if (errno == EINTR)
2762                                 continue;
2763
2764                         log_error("epoll_wait() failed: %m");
2765                         r = -errno;
2766                         goto finish;
2767                 } else if (r == 0)
2768                         break;
2769
2770                 r = process_event(&server, &event);
2771                 if (r < 0)
2772                         goto finish;
2773                 else if (r == 0)
2774                         break;
2775         }
2776
2777         log_debug("systemd-journald stopped as pid %lu", (unsigned long) getpid());
2778         driver_message(&server, SD_MESSAGE_JOURNAL_STOP, "Journal stopped");
2779
2780 finish:
2781         sd_notify(false,
2782                   "STATUS=Shutting down...");
2783
2784         server_done(&server);
2785
2786         return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
2787 }