chiark / gitweb /
journal: allow setting of a cutoff log level for disk storage, syslog, kmsg, console...
[elogind.git] / src / journal / journald.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/epoll.h>
23 #include <sys/socket.h>
24 #include <errno.h>
25 #include <sys/signalfd.h>
26 #include <unistd.h>
27 #include <fcntl.h>
28 #include <stddef.h>
29 #include <sys/ioctl.h>
30 #include <linux/sockios.h>
31 #include <sys/statvfs.h>
32
33 #include <systemd/sd-journal.h>
34 #include <systemd/sd-messages.h>
35 #include <systemd/sd-daemon.h>
36
37 #ifdef HAVE_LOGIND
38 #include <systemd/sd-login.h>
39 #endif
40
41 #include "mkdir.h"
42 #include "hashmap.h"
43 #include "journal-file.h"
44 #include "socket-util.h"
45 #include "cgroup-util.h"
46 #include "list.h"
47 #include "journal-rate-limit.h"
48 #include "journal-internal.h"
49 #include "conf-parser.h"
50 #include "journald.h"
51 #include "virt.h"
52 #include "missing.h"
53
54 #ifdef HAVE_ACL
55 #include <sys/acl.h>
56 #include <acl/libacl.h>
57 #include "acl-util.h"
58 #endif
59
60 #ifdef HAVE_SELINUX
61 #include <selinux/selinux.h>
62 #endif
63
64 #define USER_JOURNALS_MAX 1024
65 #define STDOUT_STREAMS_MAX 4096
66
67 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
68 #define DEFAULT_RATE_LIMIT_BURST 200
69
70 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
71
72 #define RECHECK_VAR_AVAILABLE_USEC (30*USEC_PER_SEC)
73
74 #define N_IOVEC_META_FIELDS 17
75
76 #define ENTRY_SIZE_MAX (1024*1024*32)
77
78 typedef enum StdoutStreamState {
79         STDOUT_STREAM_IDENTIFIER,
80         STDOUT_STREAM_PRIORITY,
81         STDOUT_STREAM_LEVEL_PREFIX,
82         STDOUT_STREAM_FORWARD_TO_SYSLOG,
83         STDOUT_STREAM_FORWARD_TO_KMSG,
84         STDOUT_STREAM_FORWARD_TO_CONSOLE,
85         STDOUT_STREAM_RUNNING
86 } StdoutStreamState;
87
88 struct StdoutStream {
89         Server *server;
90         StdoutStreamState state;
91
92         int fd;
93
94         struct ucred ucred;
95 #ifdef HAVE_SELINUX
96         security_context_t security_context;
97 #endif
98
99         char *identifier;
100         int priority;
101         bool level_prefix:1;
102         bool forward_to_syslog:1;
103         bool forward_to_kmsg:1;
104         bool forward_to_console:1;
105
106         char buffer[LINE_MAX+1];
107         size_t length;
108
109         LIST_FIELDS(StdoutStream, stdout_stream);
110 };
111
112 static int server_flush_to_var(Server *s);
113
114 static uint64_t available_space(Server *s) {
115         char ids[33], *p;
116         const char *f;
117         sd_id128_t machine;
118         struct statvfs ss;
119         uint64_t sum = 0, avail = 0, ss_avail = 0;
120         int r;
121         DIR *d;
122         usec_t ts;
123         JournalMetrics *m;
124
125         ts = now(CLOCK_MONOTONIC);
126
127         if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
128                 return s->cached_available_space;
129
130         r = sd_id128_get_machine(&machine);
131         if (r < 0)
132                 return 0;
133
134         if (s->system_journal) {
135                 f = "/var/log/journal/";
136                 m = &s->system_metrics;
137         } else {
138                 f = "/run/log/journal/";
139                 m = &s->runtime_metrics;
140         }
141
142         assert(m);
143
144         p = strappend(f, sd_id128_to_string(machine, ids));
145         if (!p)
146                 return 0;
147
148         d = opendir(p);
149         free(p);
150
151         if (!d)
152                 return 0;
153
154         if (fstatvfs(dirfd(d), &ss) < 0)
155                 goto finish;
156
157         for (;;) {
158                 struct stat st;
159                 struct dirent buf, *de;
160
161                 r = readdir_r(d, &buf, &de);
162                 if (r != 0)
163                         break;
164
165                 if (!de)
166                         break;
167
168                 if (!endswith(de->d_name, ".journal") &&
169                     !endswith(de->d_name, ".journal~"))
170                         continue;
171
172                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
173                         continue;
174
175                 if (!S_ISREG(st.st_mode))
176                         continue;
177
178                 sum += (uint64_t) st.st_blocks * 512UL;
179         }
180
181         avail = sum >= m->max_use ? 0 : m->max_use - sum;
182
183         ss_avail = ss.f_bsize * ss.f_bavail;
184
185         ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
186
187         if (ss_avail < avail)
188                 avail = ss_avail;
189
190         s->cached_available_space = avail;
191         s->cached_available_space_timestamp = ts;
192
193 finish:
194         closedir(d);
195
196         return avail;
197 }
198
199 static void server_read_file_gid(Server *s) {
200         const char *adm = "adm";
201         int r;
202
203         assert(s);
204
205         if (s->file_gid_valid)
206                 return;
207
208         r = get_group_creds(&adm, &s->file_gid);
209         if (r < 0)
210                 log_warning("Failed to resolve 'adm' group: %s", strerror(-r));
211
212         /* if we couldn't read the gid, then it will be 0, but that's
213          * fine and we shouldn't try to resolve the group again, so
214          * let's just pretend it worked right-away. */
215         s->file_gid_valid = true;
216 }
217
218 static void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
219         int r;
220 #ifdef HAVE_ACL
221         acl_t acl;
222         acl_entry_t entry;
223         acl_permset_t permset;
224 #endif
225
226         assert(f);
227
228         server_read_file_gid(s);
229
230         r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
231         if (r < 0)
232                 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
233
234 #ifdef HAVE_ACL
235         if (uid <= 0)
236                 return;
237
238         acl = acl_get_fd(f->fd);
239         if (!acl) {
240                 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
241                 return;
242         }
243
244         r = acl_find_uid(acl, uid, &entry);
245         if (r <= 0) {
246
247                 if (acl_create_entry(&acl, &entry) < 0 ||
248                     acl_set_tag_type(entry, ACL_USER) < 0 ||
249                     acl_set_qualifier(entry, &uid) < 0) {
250                         log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
251                         goto finish;
252                 }
253         }
254
255         if (acl_get_permset(entry, &permset) < 0 ||
256             acl_add_perm(permset, ACL_READ) < 0 ||
257             acl_calc_mask(&acl) < 0) {
258                 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
259                 goto finish;
260         }
261
262         if (acl_set_fd(f->fd, acl) < 0)
263                 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
264
265 finish:
266         acl_free(acl);
267 #endif
268 }
269
270 static JournalFile* find_journal(Server *s, uid_t uid) {
271         char *p;
272         int r;
273         JournalFile *f;
274         char ids[33];
275         sd_id128_t machine;
276
277         assert(s);
278
279         /* We split up user logs only on /var, not on /run. If the
280          * runtime file is open, we write to it exclusively, in order
281          * to guarantee proper order as soon as we flush /run to
282          * /var and close the runtime file. */
283
284         if (s->runtime_journal)
285                 return s->runtime_journal;
286
287         if (uid <= 0)
288                 return s->system_journal;
289
290         r = sd_id128_get_machine(&machine);
291         if (r < 0)
292                 return s->system_journal;
293
294         f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
295         if (f)
296                 return f;
297
298         if (asprintf(&p, "/var/log/journal/%s/user-%lu.journal", sd_id128_to_string(machine, ids), (unsigned long) uid) < 0)
299                 return s->system_journal;
300
301         while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
302                 /* Too many open? Then let's close one */
303                 f = hashmap_steal_first(s->user_journals);
304                 assert(f);
305                 journal_file_close(f);
306         }
307
308         r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->system_journal, &f);
309         free(p);
310
311         if (r < 0)
312                 return s->system_journal;
313
314         server_fix_perms(s, f, uid);
315
316         r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
317         if (r < 0) {
318                 journal_file_close(f);
319                 return s->system_journal;
320         }
321
322         return f;
323 }
324
325 static void server_rotate(Server *s) {
326         JournalFile *f;
327         void *k;
328         Iterator i;
329         int r;
330
331         log_info("Rotating...");
332
333         if (s->runtime_journal) {
334                 r = journal_file_rotate(&s->runtime_journal);
335                 if (r < 0)
336                         if (s->runtime_journal)
337                                 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
338                         else
339                                 log_error("Failed to create new runtime journal: %s", strerror(-r));
340                 else
341                         server_fix_perms(s, s->runtime_journal, 0);
342         }
343
344         if (s->system_journal) {
345                 r = journal_file_rotate(&s->system_journal);
346                 if (r < 0)
347                         if (s->system_journal)
348                                 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
349                         else
350                                 log_error("Failed to create new system journal: %s", strerror(-r));
351
352                 else
353                         server_fix_perms(s, s->system_journal, 0);
354         }
355
356         HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
357                 r = journal_file_rotate(&f);
358                 if (r < 0)
359                         if (f->path)
360                                 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
361                         else
362                                 log_error("Failed to create user journal: %s", strerror(-r));
363                 else {
364                         hashmap_replace(s->user_journals, k, f);
365                         server_fix_perms(s, s->system_journal, PTR_TO_UINT32(k));
366                 }
367         }
368 }
369
370 static void server_vacuum(Server *s) {
371         char *p;
372         char ids[33];
373         sd_id128_t machine;
374         int r;
375
376         log_info("Vacuuming...");
377
378         r = sd_id128_get_machine(&machine);
379         if (r < 0) {
380                 log_error("Failed to get machine ID: %s", strerror(-r));
381                 return;
382         }
383
384         sd_id128_to_string(machine, ids);
385
386         if (s->system_journal) {
387                 if (asprintf(&p, "/var/log/journal/%s", ids) < 0) {
388                         log_error("Out of memory.");
389                         return;
390                 }
391
392                 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free);
393                 if (r < 0 && r != -ENOENT)
394                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
395                 free(p);
396         }
397
398
399         if (s->runtime_journal) {
400                 if (asprintf(&p, "/run/log/journal/%s", ids) < 0) {
401                         log_error("Out of memory.");
402                         return;
403                 }
404
405                 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free);
406                 if (r < 0 && r != -ENOENT)
407                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
408                 free(p);
409         }
410
411         s->cached_available_space_timestamp = 0;
412 }
413
414 static char *shortened_cgroup_path(pid_t pid) {
415         int r;
416         char *process_path, *init_path, *path;
417
418         assert(pid > 0);
419
420         r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, pid, &process_path);
421         if (r < 0)
422                 return NULL;
423
424         r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &init_path);
425         if (r < 0) {
426                 free(process_path);
427                 return NULL;
428         }
429
430         if (endswith(init_path, "/system"))
431                 init_path[strlen(init_path) - 7] = 0;
432         else if (streq(init_path, "/"))
433                 init_path[0] = 0;
434
435         if (startswith(process_path, init_path)) {
436                 char *p;
437
438                 p = strdup(process_path + strlen(init_path));
439                 if (!p) {
440                         free(process_path);
441                         free(init_path);
442                         return NULL;
443                 }
444                 path = p;
445         } else {
446                 path = process_path;
447                 process_path = NULL;
448         }
449
450         free(process_path);
451         free(init_path);
452
453         return path;
454 }
455
456 static void dispatch_message_real(
457                 Server *s,
458                 struct iovec *iovec, unsigned n, unsigned m,
459                 struct ucred *ucred,
460                 struct timeval *tv,
461                 const char *label, size_t label_len) {
462
463         char *pid = NULL, *uid = NULL, *gid = NULL,
464                 *source_time = NULL, *boot_id = NULL, *machine_id = NULL,
465                 *comm = NULL, *cmdline = NULL, *hostname = NULL,
466                 *audit_session = NULL, *audit_loginuid = NULL,
467                 *exe = NULL, *cgroup = NULL, *session = NULL,
468                 *owner_uid = NULL, *unit = NULL, *selinux_context = NULL;
469
470         char idbuf[33];
471         sd_id128_t id;
472         int r;
473         char *t;
474         uid_t loginuid = 0, realuid = 0;
475         JournalFile *f;
476         bool vacuumed = false;
477
478         assert(s);
479         assert(iovec);
480         assert(n > 0);
481         assert(n + N_IOVEC_META_FIELDS <= m);
482
483         if (ucred) {
484                 uint32_t audit;
485 #ifdef HAVE_LOGIND
486                 uid_t owner;
487 #endif
488
489                 realuid = ucred->uid;
490
491                 if (asprintf(&pid, "_PID=%lu", (unsigned long) ucred->pid) >= 0)
492                         IOVEC_SET_STRING(iovec[n++], pid);
493
494                 if (asprintf(&uid, "_UID=%lu", (unsigned long) ucred->uid) >= 0)
495                         IOVEC_SET_STRING(iovec[n++], uid);
496
497                 if (asprintf(&gid, "_GID=%lu", (unsigned long) ucred->gid) >= 0)
498                         IOVEC_SET_STRING(iovec[n++], gid);
499
500                 r = get_process_comm(ucred->pid, &t);
501                 if (r >= 0) {
502                         comm = strappend("_COMM=", t);
503                         free(t);
504
505                         if (comm)
506                                 IOVEC_SET_STRING(iovec[n++], comm);
507                 }
508
509                 r = get_process_exe(ucred->pid, &t);
510                 if (r >= 0) {
511                         exe = strappend("_EXE=", t);
512                         free(t);
513
514                         if (exe)
515                                 IOVEC_SET_STRING(iovec[n++], exe);
516                 }
517
518                 r = get_process_cmdline(ucred->pid, LINE_MAX, false, &t);
519                 if (r >= 0) {
520                         cmdline = strappend("_CMDLINE=", t);
521                         free(t);
522
523                         if (cmdline)
524                                 IOVEC_SET_STRING(iovec[n++], cmdline);
525                 }
526
527                 r = audit_session_from_pid(ucred->pid, &audit);
528                 if (r >= 0)
529                         if (asprintf(&audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit) >= 0)
530                                 IOVEC_SET_STRING(iovec[n++], audit_session);
531
532                 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
533                 if (r >= 0)
534                         if (asprintf(&audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0)
535                                 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
536
537                 t = shortened_cgroup_path(ucred->pid);
538                 if (t) {
539                         cgroup = strappend("_SYSTEMD_CGROUP=", t);
540                         free(t);
541
542                         if (cgroup)
543                                 IOVEC_SET_STRING(iovec[n++], cgroup);
544                 }
545
546 #ifdef HAVE_LOGIND
547                 if (sd_pid_get_session(ucred->pid, &t) >= 0) {
548                         session = strappend("_SYSTEMD_SESSION=", t);
549                         free(t);
550
551                         if (session)
552                                 IOVEC_SET_STRING(iovec[n++], session);
553                 }
554
555                 if (sd_pid_get_owner_uid(ucred->uid, &owner) >= 0)
556                         if (asprintf(&owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner) >= 0)
557                                 IOVEC_SET_STRING(iovec[n++], owner_uid);
558 #endif
559
560                 if (cg_pid_get_unit(ucred->pid, &t) >= 0) {
561                         unit = strappend("_SYSTEMD_UNIT=", t);
562                         free(t);
563
564                         if (unit)
565                                 IOVEC_SET_STRING(iovec[n++], unit);
566                 }
567
568 #ifdef HAVE_SELINUX
569                 if (label) {
570                         selinux_context = malloc(sizeof("_SELINUX_CONTEXT=") + label_len);
571                         if (selinux_context) {
572                                 memcpy(selinux_context, "_SELINUX_CONTEXT=", sizeof("_SELINUX_CONTEXT=")-1);
573                                 memcpy(selinux_context+sizeof("_SELINUX_CONTEXT=")-1, label, label_len);
574                                 selinux_context[sizeof("_SELINUX_CONTEXT=")-1+label_len] = 0;
575                                 IOVEC_SET_STRING(iovec[n++], selinux_context);
576                         }
577                 } else {
578                         security_context_t con;
579
580                         if (getpidcon(ucred->pid, &con) >= 0) {
581                                 selinux_context = strappend("_SELINUX_CONTEXT=", con);
582                                 if (selinux_context)
583                                         IOVEC_SET_STRING(iovec[n++], selinux_context);
584
585                                 freecon(con);
586                         }
587                 }
588 #endif
589         }
590
591         if (tv) {
592                 if (asprintf(&source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu",
593                              (unsigned long long) timeval_load(tv)) >= 0)
594                         IOVEC_SET_STRING(iovec[n++], source_time);
595         }
596
597         /* Note that strictly speaking storing the boot id here is
598          * redundant since the entry includes this in-line
599          * anyway. However, we need this indexed, too. */
600         r = sd_id128_get_boot(&id);
601         if (r >= 0)
602                 if (asprintf(&boot_id, "_BOOT_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
603                         IOVEC_SET_STRING(iovec[n++], boot_id);
604
605         r = sd_id128_get_machine(&id);
606         if (r >= 0)
607                 if (asprintf(&machine_id, "_MACHINE_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
608                         IOVEC_SET_STRING(iovec[n++], machine_id);
609
610         t = gethostname_malloc();
611         if (t) {
612                 hostname = strappend("_HOSTNAME=", t);
613                 free(t);
614                 if (hostname)
615                         IOVEC_SET_STRING(iovec[n++], hostname);
616         }
617
618         assert(n <= m);
619
620         server_flush_to_var(s);
621
622 retry:
623         f = find_journal(s, realuid == 0 ? 0 : loginuid);
624         if (!f)
625                 log_warning("Dropping message, as we can't find a place to store the data.");
626         else {
627                 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
628
629                 if ((r == -E2BIG || /* hit limit */
630                      r == -EFBIG || /* hit fs limit */
631                      r == -EDQUOT || /* quota hit */
632                      r == -ENOSPC || /* disk full */
633                      r == -EBADMSG || /* corrupted */
634                      r == -ENODATA || /* truncated */
635                      r == -EHOSTDOWN || /* other machine */
636                      r == -EPROTONOSUPPORT) && /* unsupported feature */
637                     !vacuumed) {
638
639                         if (r == -E2BIG)
640                                 log_info("Allocation limit reached, rotating.");
641                         else
642                                 log_warning("Journal file corrupted, rotating.");
643
644                         server_rotate(s);
645                         server_vacuum(s);
646                         vacuumed = true;
647
648                         log_info("Retrying write.");
649                         goto retry;
650                 }
651
652                 if (r < 0)
653                         log_error("Failed to write entry, ignoring: %s", strerror(-r));
654         }
655
656         free(pid);
657         free(uid);
658         free(gid);
659         free(comm);
660         free(exe);
661         free(cmdline);
662         free(source_time);
663         free(boot_id);
664         free(machine_id);
665         free(hostname);
666         free(audit_session);
667         free(audit_loginuid);
668         free(cgroup);
669         free(session);
670         free(owner_uid);
671         free(unit);
672         free(selinux_context);
673 }
674
675 static void driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
676         char mid[11 + 32 + 1];
677         char buffer[16 + LINE_MAX + 1];
678         struct iovec iovec[N_IOVEC_META_FIELDS + 4];
679         int n = 0;
680         va_list ap;
681         struct ucred ucred;
682
683         assert(s);
684         assert(format);
685
686         IOVEC_SET_STRING(iovec[n++], "PRIORITY=5");
687         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
688
689         memcpy(buffer, "MESSAGE=", 8);
690         va_start(ap, format);
691         vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
692         va_end(ap);
693         char_array_0(buffer);
694         IOVEC_SET_STRING(iovec[n++], buffer);
695
696         snprintf(mid, sizeof(mid), "MESSAGE_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(message_id));
697         char_array_0(mid);
698         IOVEC_SET_STRING(iovec[n++], mid);
699
700         zero(ucred);
701         ucred.pid = getpid();
702         ucred.uid = getuid();
703         ucred.gid = getgid();
704
705         dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0);
706 }
707
708 static void dispatch_message(Server *s,
709                              struct iovec *iovec, unsigned n, unsigned m,
710                              struct ucred *ucred,
711                              struct timeval *tv,
712                              const char *label, size_t label_len,
713                              int priority) {
714         int rl;
715         char *path = NULL, *c;
716
717         assert(s);
718         assert(iovec || n == 0);
719
720         if (n == 0)
721                 return;
722
723         if (LOG_PRI(priority) > s->max_level_store)
724                 return;
725
726         if (!ucred)
727                 goto finish;
728
729         path = shortened_cgroup_path(ucred->pid);
730         if (!path)
731                 goto finish;
732
733         /* example: /user/lennart/3/foobar
734          *          /system/dbus.service/foobar
735          *
736          * So let's cut of everything past the third /, since that is
737          * wher user directories start */
738
739         c = strchr(path, '/');
740         if (c) {
741                 c = strchr(c+1, '/');
742                 if (c) {
743                         c = strchr(c+1, '/');
744                         if (c)
745                                 *c = 0;
746                 }
747         }
748
749         rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available_space(s));
750
751         if (rl == 0) {
752                 free(path);
753                 return;
754         }
755
756         /* Write a suppression message if we suppressed something */
757         if (rl > 1)
758                 driver_message(s, SD_MESSAGE_JOURNAL_DROPPED, "Suppressed %u messages from %s", rl - 1, path);
759
760         free(path);
761
762 finish:
763         dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len);
764 }
765
766 static void forward_syslog_iovec(Server *s, const struct iovec *iovec, unsigned n_iovec, struct ucred *ucred, struct timeval *tv) {
767         struct msghdr msghdr;
768         struct cmsghdr *cmsg;
769         union {
770                 struct cmsghdr cmsghdr;
771                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred))];
772         } control;
773         union sockaddr_union sa;
774
775         assert(s);
776         assert(iovec);
777         assert(n_iovec > 0);
778
779         zero(msghdr);
780         msghdr.msg_iov = (struct iovec*) iovec;
781         msghdr.msg_iovlen = n_iovec;
782
783         zero(sa);
784         sa.un.sun_family = AF_UNIX;
785         strncpy(sa.un.sun_path, "/run/systemd/journal/syslog", sizeof(sa.un.sun_path));
786         msghdr.msg_name = &sa;
787         msghdr.msg_namelen = offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path);
788
789         if (ucred) {
790                 zero(control);
791                 msghdr.msg_control = &control;
792                 msghdr.msg_controllen = sizeof(control);
793
794                 cmsg = CMSG_FIRSTHDR(&msghdr);
795                 cmsg->cmsg_level = SOL_SOCKET;
796                 cmsg->cmsg_type = SCM_CREDENTIALS;
797                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred));
798                 memcpy(CMSG_DATA(cmsg), ucred, sizeof(struct ucred));
799                 msghdr.msg_controllen = cmsg->cmsg_len;
800         }
801
802         /* Forward the syslog message we received via /dev/log to
803          * /run/systemd/syslog. Unfortunately we currently can't set
804          * the SO_TIMESTAMP auxiliary data, and hence we don't. */
805
806         if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
807                 return;
808
809         /* The socket is full? I guess the syslog implementation is
810          * too slow, and we shouldn't wait for that... */
811         if (errno == EAGAIN)
812                 return;
813
814         if (ucred && errno == ESRCH) {
815                 struct ucred u;
816
817                 /* Hmm, presumably the sender process vanished
818                  * by now, so let's fix it as good as we
819                  * can, and retry */
820
821                 u = *ucred;
822                 u.pid = getpid();
823                 memcpy(CMSG_DATA(cmsg), &u, sizeof(struct ucred));
824
825                 if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
826                         return;
827
828                 if (errno == EAGAIN)
829                         return;
830         }
831
832         log_debug("Failed to forward syslog message: %m");
833 }
834
835 static void forward_syslog_raw(Server *s, int priority, const char *buffer, struct ucred *ucred, struct timeval *tv) {
836         struct iovec iovec;
837
838         assert(s);
839         assert(buffer);
840
841         if (LOG_PRI(priority) > s->max_level_syslog)
842                 return;
843
844         IOVEC_SET_STRING(iovec, buffer);
845         forward_syslog_iovec(s, &iovec, 1, ucred, tv);
846 }
847
848 static void forward_syslog(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred, struct timeval *tv) {
849         struct iovec iovec[5];
850         char header_priority[6], header_time[64], header_pid[16];
851         int n = 0;
852         time_t t;
853         struct tm *tm;
854         char *ident_buf = NULL;
855
856         assert(s);
857         assert(priority >= 0);
858         assert(priority <= 999);
859         assert(message);
860
861         if (LOG_PRI(priority) > s->max_level_syslog)
862                 return;
863
864         /* First: priority field */
865         snprintf(header_priority, sizeof(header_priority), "<%i>", priority);
866         char_array_0(header_priority);
867         IOVEC_SET_STRING(iovec[n++], header_priority);
868
869         /* Second: timestamp */
870         t = tv ? tv->tv_sec : ((time_t) (now(CLOCK_REALTIME) / USEC_PER_SEC));
871         tm = localtime(&t);
872         if (!tm)
873                 return;
874         if (strftime(header_time, sizeof(header_time), "%h %e %T ", tm) <= 0)
875                 return;
876         IOVEC_SET_STRING(iovec[n++], header_time);
877
878         /* Third: identifier and PID */
879         if (ucred) {
880                 if (!identifier) {
881                         get_process_comm(ucred->pid, &ident_buf);
882                         identifier = ident_buf;
883                 }
884
885                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
886                 char_array_0(header_pid);
887
888                 if (identifier)
889                         IOVEC_SET_STRING(iovec[n++], identifier);
890
891                 IOVEC_SET_STRING(iovec[n++], header_pid);
892         } else if (identifier) {
893                 IOVEC_SET_STRING(iovec[n++], identifier);
894                 IOVEC_SET_STRING(iovec[n++], ": ");
895         }
896
897         /* Fourth: message */
898         IOVEC_SET_STRING(iovec[n++], message);
899
900         forward_syslog_iovec(s, iovec, n, ucred, tv);
901
902         free(ident_buf);
903 }
904
905 static int fixup_priority(int priority) {
906
907         if ((priority & LOG_FACMASK) == 0)
908                 return (priority & LOG_PRIMASK) | LOG_USER;
909
910         return priority;
911 }
912
913 static void forward_kmsg(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred) {
914         struct iovec iovec[5];
915         char header_priority[6], header_pid[16];
916         int n = 0;
917         char *ident_buf = NULL;
918         int fd;
919
920         assert(s);
921         assert(priority >= 0);
922         assert(priority <= 999);
923         assert(message);
924
925         if (LOG_PRI(priority) > s->max_level_kmsg)
926                 return;
927
928         /* Never allow messages with kernel facility to be written to
929          * kmsg, regardless where the data comes from. */
930         priority = fixup_priority(priority);
931
932         /* First: priority field */
933         snprintf(header_priority, sizeof(header_priority), "<%i>", priority);
934         char_array_0(header_priority);
935         IOVEC_SET_STRING(iovec[n++], header_priority);
936
937         /* Second: identifier and PID */
938         if (ucred) {
939                 if (!identifier) {
940                         get_process_comm(ucred->pid, &ident_buf);
941                         identifier = ident_buf;
942                 }
943
944                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
945                 char_array_0(header_pid);
946
947                 if (identifier)
948                         IOVEC_SET_STRING(iovec[n++], identifier);
949
950                 IOVEC_SET_STRING(iovec[n++], header_pid);
951         } else if (identifier) {
952                 IOVEC_SET_STRING(iovec[n++], identifier);
953                 IOVEC_SET_STRING(iovec[n++], ": ");
954         }
955
956         /* Fourth: message */
957         IOVEC_SET_STRING(iovec[n++], message);
958         IOVEC_SET_STRING(iovec[n++], "\n");
959
960         fd = open("/dev/kmsg", O_WRONLY|O_NOCTTY|O_CLOEXEC);
961         if (fd < 0) {
962                 log_debug("Failed to open /dev/kmsg for logging: %s", strerror(errno));
963                 goto finish;
964         }
965
966         if (writev(fd, iovec, n) < 0)
967                 log_debug("Failed to write to /dev/kmsg for logging: %s", strerror(errno));
968
969         close_nointr_nofail(fd);
970
971 finish:
972         free(ident_buf);
973 }
974
975 static void forward_console(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred) {
976         struct iovec iovec[4];
977         char header_pid[16];
978         int n = 0, fd;
979         char *ident_buf = NULL;
980         const char *tty;
981
982         assert(s);
983         assert(message);
984
985         if (LOG_PRI(priority) > s->max_level_console)
986                 return;
987
988         /* First: identifier and PID */
989         if (ucred) {
990                 if (!identifier) {
991                         get_process_comm(ucred->pid, &ident_buf);
992                         identifier = ident_buf;
993                 }
994
995                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
996                 char_array_0(header_pid);
997
998                 if (identifier)
999                         IOVEC_SET_STRING(iovec[n++], identifier);
1000
1001                 IOVEC_SET_STRING(iovec[n++], header_pid);
1002         } else if (identifier) {
1003                 IOVEC_SET_STRING(iovec[n++], identifier);
1004                 IOVEC_SET_STRING(iovec[n++], ": ");
1005         }
1006
1007         /* Third: message */
1008         IOVEC_SET_STRING(iovec[n++], message);
1009         IOVEC_SET_STRING(iovec[n++], "\n");
1010
1011         tty = s->tty_path ? s->tty_path : "/dev/console";
1012
1013         fd = open_terminal(tty, O_WRONLY|O_NOCTTY|O_CLOEXEC);
1014         if (fd < 0) {
1015                 log_debug("Failed to open %s for logging: %s", tty, strerror(errno));
1016                 goto finish;
1017         }
1018
1019         if (writev(fd, iovec, n) < 0)
1020                 log_debug("Failed to write to %s for logging: %s", tty, strerror(errno));
1021
1022         close_nointr_nofail(fd);
1023
1024 finish:
1025         free(ident_buf);
1026 }
1027
1028 static void read_identifier(const char **buf, char **identifier, char **pid) {
1029         const char *p;
1030         char *t;
1031         size_t l, e;
1032
1033         assert(buf);
1034         assert(identifier);
1035         assert(pid);
1036
1037         p = *buf;
1038
1039         p += strspn(p, WHITESPACE);
1040         l = strcspn(p, WHITESPACE);
1041
1042         if (l <= 0 ||
1043             p[l-1] != ':')
1044                 return;
1045
1046         e = l;
1047         l--;
1048
1049         if (p[l-1] == ']') {
1050                 size_t k = l-1;
1051
1052                 for (;;) {
1053
1054                         if (p[k] == '[') {
1055                                 t = strndup(p+k+1, l-k-2);
1056                                 if (t)
1057                                         *pid = t;
1058
1059                                 l = k;
1060                                 break;
1061                         }
1062
1063                         if (k == 0)
1064                                 break;
1065
1066                         k--;
1067                 }
1068         }
1069
1070         t = strndup(p, l);
1071         if (t)
1072                 *identifier = t;
1073
1074         *buf = p + e;
1075         *buf += strspn(*buf, WHITESPACE);
1076 }
1077
1078 static void process_syslog_message(Server *s, const char *buf, struct ucred *ucred, struct timeval *tv, const char *label, size_t label_len) {
1079         char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *syslog_pid = NULL;
1080         struct iovec iovec[N_IOVEC_META_FIELDS + 6];
1081         unsigned n = 0;
1082         int priority = LOG_USER | LOG_INFO;
1083         char *identifier = NULL, *pid = NULL;
1084         const char *orig;
1085
1086         assert(s);
1087         assert(buf);
1088
1089         orig = buf;
1090         parse_syslog_priority((char**) &buf, &priority);
1091
1092         if (s->forward_to_syslog)
1093                 forward_syslog_raw(s, priority, orig, ucred, tv);
1094
1095         skip_syslog_date((char**) &buf);
1096         read_identifier(&buf, &identifier, &pid);
1097
1098         if (s->forward_to_kmsg)
1099                 forward_kmsg(s, priority, identifier, buf, ucred);
1100
1101         if (s->forward_to_console)
1102                 forward_console(s, priority, identifier, buf, ucred);
1103
1104         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=syslog");
1105
1106         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1107                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1108
1109         if (priority & LOG_FACMASK)
1110                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1111                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1112
1113         if (identifier) {
1114                 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
1115                 if (syslog_identifier)
1116                         IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1117         }
1118
1119         if (pid) {
1120                 syslog_pid = strappend("SYSLOG_PID=", pid);
1121                 if (syslog_pid)
1122                         IOVEC_SET_STRING(iovec[n++], syslog_pid);
1123         }
1124
1125         message = strappend("MESSAGE=", buf);
1126         if (message)
1127                 IOVEC_SET_STRING(iovec[n++], message);
1128
1129         dispatch_message(s, iovec, n, ELEMENTSOF(iovec), ucred, tv, label, label_len, priority);
1130
1131         free(message);
1132         free(identifier);
1133         free(pid);
1134         free(syslog_priority);
1135         free(syslog_facility);
1136         free(syslog_identifier);
1137 }
1138
1139 static bool valid_user_field(const char *p, size_t l) {
1140         const char *a;
1141
1142         /* We kinda enforce POSIX syntax recommendations for
1143            environment variables here, but make a couple of additional
1144            requirements.
1145
1146            http://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html */
1147
1148         /* No empty field names */
1149         if (l <= 0)
1150                 return false;
1151
1152         /* Don't allow names longer than 64 chars */
1153         if (l > 64)
1154                 return false;
1155
1156         /* Variables starting with an underscore are protected */
1157         if (p[0] == '_')
1158                 return false;
1159
1160         /* Don't allow digits as first character */
1161         if (p[0] >= '0' && p[0] <= '9')
1162                 return false;
1163
1164         /* Only allow A-Z0-9 and '_' */
1165         for (a = p; a < p + l; a++)
1166                 if (!((*a >= 'A' && *a <= 'Z') ||
1167                       (*a >= '0' && *a <= '9') ||
1168                       *a == '_'))
1169                         return false;
1170
1171         return true;
1172 }
1173
1174 static void process_native_message(
1175                 Server *s,
1176                 const void *buffer, size_t buffer_size,
1177                 struct ucred *ucred,
1178                 struct timeval *tv,
1179                 const char *label, size_t label_len) {
1180
1181         struct iovec *iovec = NULL;
1182         unsigned n = 0, m = 0, j, tn = (unsigned) -1;
1183         const char *p;
1184         size_t remaining;
1185         int priority = LOG_INFO;
1186         char *identifier = NULL, *message = NULL;
1187
1188         assert(s);
1189         assert(buffer || buffer_size == 0);
1190
1191         p = buffer;
1192         remaining = buffer_size;
1193
1194         while (remaining > 0) {
1195                 const char *e, *q;
1196
1197                 e = memchr(p, '\n', remaining);
1198
1199                 if (!e) {
1200                         /* Trailing noise, let's ignore it, and flush what we collected */
1201                         log_debug("Received message with trailing noise, ignoring.");
1202                         break;
1203                 }
1204
1205                 if (e == p) {
1206                         /* Entry separator */
1207                         dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, priority);
1208                         n = 0;
1209                         priority = LOG_INFO;
1210
1211                         p++;
1212                         remaining--;
1213                         continue;
1214                 }
1215
1216                 if (*p == '.' || *p == '#') {
1217                         /* Ignore control commands for now, and
1218                          * comments too. */
1219                         remaining -= (e - p) + 1;
1220                         p = e + 1;
1221                         continue;
1222                 }
1223
1224                 /* A property follows */
1225
1226                 if (n+N_IOVEC_META_FIELDS >= m) {
1227                         struct iovec *c;
1228                         unsigned u;
1229
1230                         u = MAX((n+N_IOVEC_META_FIELDS+1) * 2U, 4U);
1231                         c = realloc(iovec, u * sizeof(struct iovec));
1232                         if (!c) {
1233                                 log_error("Out of memory");
1234                                 break;
1235                         }
1236
1237                         iovec = c;
1238                         m = u;
1239                 }
1240
1241                 q = memchr(p, '=', e - p);
1242                 if (q) {
1243                         if (valid_user_field(p, q - p)) {
1244                                 size_t l;
1245
1246                                 l = e - p;
1247
1248                                 /* If the field name starts with an
1249                                  * underscore, skip the variable,
1250                                  * since that indidates a trusted
1251                                  * field */
1252                                 iovec[n].iov_base = (char*) p;
1253                                 iovec[n].iov_len = l;
1254                                 n++;
1255
1256                                 /* We need to determine the priority
1257                                  * of this entry for the rate limiting
1258                                  * logic */
1259                                 if (l == 10 &&
1260                                     memcmp(p, "PRIORITY=", 9) == 0 &&
1261                                     p[9] >= '0' && p[9] <= '9')
1262                                         priority = (priority & LOG_FACMASK) | (p[9] - '0');
1263
1264                                 else if (l == 17 &&
1265                                          memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
1266                                          p[16] >= '0' && p[16] <= '9')
1267                                         priority = (priority & LOG_PRIMASK) | ((p[16] - '0') << 3);
1268
1269                                 else if (l == 18 &&
1270                                          memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
1271                                          p[16] >= '0' && p[16] <= '9' &&
1272                                          p[17] >= '0' && p[17] <= '9')
1273                                         priority = (priority & LOG_PRIMASK) | (((p[16] - '0')*10 + (p[17] - '0')) << 3);
1274
1275                                 else if (l >= 19 &&
1276                                          memcmp(p, "SYSLOG_IDENTIFIER=", 18) == 0) {
1277                                         char *t;
1278
1279                                         t = strndup(p + 18, l - 18);
1280                                         if (t) {
1281                                                 free(identifier);
1282                                                 identifier = t;
1283                                         }
1284                                 } else if (l >= 8 &&
1285                                            memcmp(p, "MESSAGE=", 8) == 0) {
1286                                         char *t;
1287
1288                                         t = strndup(p + 8, l - 8);
1289                                         if (t) {
1290                                                 free(message);
1291                                                 message = t;
1292                                         }
1293                                 }
1294                         }
1295
1296                         remaining -= (e - p) + 1;
1297                         p = e + 1;
1298                         continue;
1299                 } else {
1300                         le64_t l_le;
1301                         uint64_t l;
1302                         char *k;
1303
1304                         if (remaining < e - p + 1 + sizeof(uint64_t) + 1) {
1305                                 log_debug("Failed to parse message, ignoring.");
1306                                 break;
1307                         }
1308
1309                         memcpy(&l_le, e + 1, sizeof(uint64_t));
1310                         l = le64toh(l_le);
1311
1312                         if (remaining < e - p + 1 + sizeof(uint64_t) + l + 1 ||
1313                             e[1+sizeof(uint64_t)+l] != '\n') {
1314                                 log_debug("Failed to parse message, ignoring.");
1315                                 break;
1316                         }
1317
1318                         k = malloc((e - p) + 1 + l);
1319                         if (!k) {
1320                                 log_error("Out of memory");
1321                                 break;
1322                         }
1323
1324                         memcpy(k, p, e - p);
1325                         k[e - p] = '=';
1326                         memcpy(k + (e - p) + 1, e + 1 + sizeof(uint64_t), l);
1327
1328                         if (valid_user_field(p, e - p)) {
1329                                 iovec[n].iov_base = k;
1330                                 iovec[n].iov_len = (e - p) + 1 + l;
1331                                 n++;
1332                         } else
1333                                 free(k);
1334
1335                         remaining -= (e - p) + 1 + sizeof(uint64_t) + l + 1;
1336                         p = e + 1 + sizeof(uint64_t) + l + 1;
1337                 }
1338         }
1339
1340         if (n <= 0)
1341                 goto finish;
1342
1343         tn = n++;
1344         IOVEC_SET_STRING(iovec[tn], "_TRANSPORT=journal");
1345
1346         if (message) {
1347                 if (s->forward_to_syslog)
1348                         forward_syslog(s, priority, identifier, message, ucred, tv);
1349
1350                 if (s->forward_to_kmsg)
1351                         forward_kmsg(s, priority, identifier, message, ucred);
1352
1353                 if (s->forward_to_console)
1354                         forward_console(s, priority, identifier, message, ucred);
1355         }
1356
1357         dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, priority);
1358
1359 finish:
1360         for (j = 0; j < n; j++)  {
1361                 if (j == tn)
1362                         continue;
1363
1364                 if (iovec[j].iov_base < buffer ||
1365                     (const uint8_t*) iovec[j].iov_base >= (const uint8_t*) buffer + buffer_size)
1366                         free(iovec[j].iov_base);
1367         }
1368
1369         free(iovec);
1370         free(identifier);
1371         free(message);
1372 }
1373
1374 static void process_native_file(
1375                 Server *s,
1376                 int fd,
1377                 struct ucred *ucred,
1378                 struct timeval *tv,
1379                 const char *label, size_t label_len) {
1380
1381         struct stat st;
1382         void *p;
1383         ssize_t n;
1384
1385         assert(s);
1386         assert(fd >= 0);
1387
1388         /* Data is in the passed file, since it didn't fit in a
1389          * datagram. We can't map the file here, since clients might
1390          * then truncate it and trigger a SIGBUS for us. So let's
1391          * stupidly read it */
1392
1393         if (fstat(fd, &st) < 0) {
1394                 log_error("Failed to stat passed file, ignoring: %m");
1395                 return;
1396         }
1397
1398         if (!S_ISREG(st.st_mode)) {
1399                 log_error("File passed is not regular. Ignoring.");
1400                 return;
1401         }
1402
1403         if (st.st_size <= 0)
1404                 return;
1405
1406         if (st.st_size > ENTRY_SIZE_MAX) {
1407                 log_error("File passed too large. Ignoring.");
1408                 return;
1409         }
1410
1411         p = malloc(st.st_size);
1412         if (!p) {
1413                 log_error("Out of memory");
1414                 return;
1415         }
1416
1417         n = pread(fd, p, st.st_size, 0);
1418         if (n < 0)
1419                 log_error("Failed to read file, ignoring: %s", strerror(-n));
1420         else if (n > 0)
1421                 process_native_message(s, p, n, ucred, tv, label, label_len);
1422
1423         free(p);
1424 }
1425
1426 static int stdout_stream_log(StdoutStream *s, const char *p) {
1427         struct iovec iovec[N_IOVEC_META_FIELDS + 5];
1428         char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *syslog_identifier = NULL;
1429         unsigned n = 0;
1430         int priority;
1431         char *label = NULL;
1432         size_t label_len = 0;
1433
1434         assert(s);
1435         assert(p);
1436
1437         if (isempty(p))
1438                 return 0;
1439
1440         priority = s->priority;
1441
1442         if (s->level_prefix)
1443                 parse_syslog_priority((char**) &p, &priority);
1444
1445         if (s->forward_to_syslog || s->server->forward_to_syslog)
1446                 forward_syslog(s->server, fixup_priority(priority), s->identifier, p, &s->ucred, NULL);
1447
1448         if (s->forward_to_kmsg || s->server->forward_to_kmsg)
1449                 forward_kmsg(s->server, priority, s->identifier, p, &s->ucred);
1450
1451         if (s->forward_to_console || s->server->forward_to_console)
1452                 forward_console(s->server, priority, s->identifier, p, &s->ucred);
1453
1454         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=stdout");
1455
1456         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1457                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1458
1459         if (priority & LOG_FACMASK)
1460                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1461                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1462
1463         if (s->identifier) {
1464                 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", s->identifier);
1465                 if (syslog_identifier)
1466                         IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1467         }
1468
1469         message = strappend("MESSAGE=", p);
1470         if (message)
1471                 IOVEC_SET_STRING(iovec[n++], message);
1472
1473 #ifdef HAVE_SELINUX
1474         if (s->security_context) {
1475                 label = (char*) s->security_context;
1476                 label_len = strlen((char*) s->security_context);
1477         }
1478 #endif
1479
1480         dispatch_message(s->server, iovec, n, ELEMENTSOF(iovec), &s->ucred, NULL, label, label_len, priority);
1481
1482         free(message);
1483         free(syslog_priority);
1484         free(syslog_facility);
1485         free(syslog_identifier);
1486
1487         return 0;
1488 }
1489
1490 static int stdout_stream_line(StdoutStream *s, char *p) {
1491         int r;
1492
1493         assert(s);
1494         assert(p);
1495
1496         p = strstrip(p);
1497
1498         switch (s->state) {
1499
1500         case STDOUT_STREAM_IDENTIFIER:
1501                 if (isempty(p))
1502                         s->identifier = NULL;
1503                 else  {
1504                         s->identifier = strdup(p);
1505                         if (!s->identifier) {
1506                                 log_error("Out of memory");
1507                                 return -ENOMEM;
1508                         }
1509                 }
1510
1511                 s->state = STDOUT_STREAM_PRIORITY;
1512                 return 0;
1513
1514         case STDOUT_STREAM_PRIORITY:
1515                 r = safe_atoi(p, &s->priority);
1516                 if (r < 0 || s->priority <= 0 || s->priority >= 999) {
1517                         log_warning("Failed to parse log priority line.");
1518                         return -EINVAL;
1519                 }
1520
1521                 s->state = STDOUT_STREAM_LEVEL_PREFIX;
1522                 return 0;
1523
1524         case STDOUT_STREAM_LEVEL_PREFIX:
1525                 r = parse_boolean(p);
1526                 if (r < 0) {
1527                         log_warning("Failed to parse level prefix line.");
1528                         return -EINVAL;
1529                 }
1530
1531                 s->level_prefix = !!r;
1532                 s->state = STDOUT_STREAM_FORWARD_TO_SYSLOG;
1533                 return 0;
1534
1535         case STDOUT_STREAM_FORWARD_TO_SYSLOG:
1536                 r = parse_boolean(p);
1537                 if (r < 0) {
1538                         log_warning("Failed to parse forward to syslog line.");
1539                         return -EINVAL;
1540                 }
1541
1542                 s->forward_to_syslog = !!r;
1543                 s->state = STDOUT_STREAM_FORWARD_TO_KMSG;
1544                 return 0;
1545
1546         case STDOUT_STREAM_FORWARD_TO_KMSG:
1547                 r = parse_boolean(p);
1548                 if (r < 0) {
1549                         log_warning("Failed to parse copy to kmsg line.");
1550                         return -EINVAL;
1551                 }
1552
1553                 s->forward_to_kmsg = !!r;
1554                 s->state = STDOUT_STREAM_FORWARD_TO_CONSOLE;
1555                 return 0;
1556
1557         case STDOUT_STREAM_FORWARD_TO_CONSOLE:
1558                 r = parse_boolean(p);
1559                 if (r < 0) {
1560                         log_warning("Failed to parse copy to console line.");
1561                         return -EINVAL;
1562                 }
1563
1564                 s->forward_to_console = !!r;
1565                 s->state = STDOUT_STREAM_RUNNING;
1566                 return 0;
1567
1568         case STDOUT_STREAM_RUNNING:
1569                 return stdout_stream_log(s, p);
1570         }
1571
1572         assert_not_reached("Unknown stream state");
1573 }
1574
1575 static int stdout_stream_scan(StdoutStream *s, bool force_flush) {
1576         char *p;
1577         size_t remaining;
1578         int r;
1579
1580         assert(s);
1581
1582         p = s->buffer;
1583         remaining = s->length;
1584         for (;;) {
1585                 char *end;
1586                 size_t skip;
1587
1588                 end = memchr(p, '\n', remaining);
1589                 if (end)
1590                         skip = end - p + 1;
1591                 else if (remaining >= sizeof(s->buffer) - 1) {
1592                         end = p + sizeof(s->buffer) - 1;
1593                         skip = remaining;
1594                 } else
1595                         break;
1596
1597                 *end = 0;
1598
1599                 r = stdout_stream_line(s, p);
1600                 if (r < 0)
1601                         return r;
1602
1603                 remaining -= skip;
1604                 p += skip;
1605         }
1606
1607         if (force_flush && remaining > 0) {
1608                 p[remaining] = 0;
1609                 r = stdout_stream_line(s, p);
1610                 if (r < 0)
1611                         return r;
1612
1613                 p += remaining;
1614                 remaining = 0;
1615         }
1616
1617         if (p > s->buffer) {
1618                 memmove(s->buffer, p, remaining);
1619                 s->length = remaining;
1620         }
1621
1622         return 0;
1623 }
1624
1625 static int stdout_stream_process(StdoutStream *s) {
1626         ssize_t l;
1627         int r;
1628
1629         assert(s);
1630
1631         l = read(s->fd, s->buffer+s->length, sizeof(s->buffer)-1-s->length);
1632         if (l < 0) {
1633
1634                 if (errno == EAGAIN)
1635                         return 0;
1636
1637                 log_warning("Failed to read from stream: %m");
1638                 return -errno;
1639         }
1640
1641         if (l == 0) {
1642                 r = stdout_stream_scan(s, true);
1643                 if (r < 0)
1644                         return r;
1645
1646                 return 0;
1647         }
1648
1649         s->length += l;
1650         r = stdout_stream_scan(s, false);
1651         if (r < 0)
1652                 return r;
1653
1654         return 1;
1655
1656 }
1657
1658 static void stdout_stream_free(StdoutStream *s) {
1659         assert(s);
1660
1661         if (s->server) {
1662                 assert(s->server->n_stdout_streams > 0);
1663                 s->server->n_stdout_streams --;
1664                 LIST_REMOVE(StdoutStream, stdout_stream, s->server->stdout_streams, s);
1665         }
1666
1667         if (s->fd >= 0) {
1668                 if (s->server)
1669                         epoll_ctl(s->server->epoll_fd, EPOLL_CTL_DEL, s->fd, NULL);
1670
1671                 close_nointr_nofail(s->fd);
1672         }
1673
1674 #ifdef HAVE_SELINUX
1675         if (s->security_context)
1676                 freecon(s->security_context);
1677 #endif
1678
1679         free(s->identifier);
1680         free(s);
1681 }
1682
1683 static int stdout_stream_new(Server *s) {
1684         StdoutStream *stream;
1685         int fd, r;
1686         socklen_t len;
1687         struct epoll_event ev;
1688
1689         assert(s);
1690
1691         fd = accept4(s->stdout_fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC);
1692         if (fd < 0) {
1693                 if (errno == EAGAIN)
1694                         return 0;
1695
1696                 log_error("Failed to accept stdout connection: %m");
1697                 return -errno;
1698         }
1699
1700         if (s->n_stdout_streams >= STDOUT_STREAMS_MAX) {
1701                 log_warning("Too many stdout streams, refusing connection.");
1702                 close_nointr_nofail(fd);
1703                 return 0;
1704         }
1705
1706         stream = new0(StdoutStream, 1);
1707         if (!stream) {
1708                 log_error("Out of memory.");
1709                 close_nointr_nofail(fd);
1710                 return -ENOMEM;
1711         }
1712
1713         stream->fd = fd;
1714
1715         len = sizeof(stream->ucred);
1716         if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &stream->ucred, &len) < 0) {
1717                 log_error("Failed to determine peer credentials: %m");
1718                 r = -errno;
1719                 goto fail;
1720         }
1721
1722 #ifdef HAVE_SELINUX
1723         if (getpeercon(fd, &stream->security_context) < 0 && errno != ENOPROTOOPT)
1724                 log_error("Failed to determine peer security context: %m");
1725 #endif
1726
1727         if (shutdown(fd, SHUT_WR) < 0) {
1728                 log_error("Failed to shutdown writing side of socket: %m");
1729                 r = -errno;
1730                 goto fail;
1731         }
1732
1733         zero(ev);
1734         ev.data.ptr = stream;
1735         ev.events = EPOLLIN;
1736         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0) {
1737                 log_error("Failed to add stream to event loop: %m");
1738                 r = -errno;
1739                 goto fail;
1740         }
1741
1742         stream->server = s;
1743         LIST_PREPEND(StdoutStream, stdout_stream, s->stdout_streams, stream);
1744         s->n_stdout_streams ++;
1745
1746         return 0;
1747
1748 fail:
1749         stdout_stream_free(stream);
1750         return r;
1751 }
1752
1753 static int parse_kernel_timestamp(char **_p, usec_t *t) {
1754         usec_t r;
1755         int k, i;
1756         char *p;
1757
1758         assert(_p);
1759         assert(*_p);
1760         assert(t);
1761
1762         p = *_p;
1763
1764         if (strlen(p) < 14 || p[0] != '[' || p[13] != ']' || p[6] != '.')
1765                 return 0;
1766
1767         r = 0;
1768
1769         for (i = 1; i <= 5; i++) {
1770                 r *= 10;
1771
1772                 if (p[i] == ' ')
1773                         continue;
1774
1775                 k = undecchar(p[i]);
1776                 if (k < 0)
1777                         return 0;
1778
1779                 r += k;
1780         }
1781
1782         for (i = 7; i <= 12; i++) {
1783                 r *= 10;
1784
1785                 k = undecchar(p[i]);
1786                 if (k < 0)
1787                         return 0;
1788
1789                 r += k;
1790         }
1791
1792         *t = r;
1793         *_p += 14;
1794         *_p += strspn(*_p, WHITESPACE);
1795
1796         return 1;
1797 }
1798
1799 static bool is_us(const char *pid) {
1800         pid_t t;
1801
1802         assert(pid);
1803
1804         if (parse_pid(pid, &t) < 0)
1805                 return false;
1806
1807         return t == getpid();
1808 }
1809
1810 static void proc_kmsg_line(Server *s, const char *p) {
1811         struct iovec iovec[N_IOVEC_META_FIELDS + 7];
1812         char *message = NULL, *syslog_priority = NULL, *syslog_pid = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *source_time = NULL;
1813         int priority = LOG_KERN | LOG_INFO;
1814         unsigned n = 0;
1815         usec_t usec;
1816         char *identifier = NULL, *pid = NULL;
1817
1818         assert(s);
1819         assert(p);
1820
1821         if (isempty(p))
1822                 return;
1823
1824         parse_syslog_priority((char **) &p, &priority);
1825
1826         if (s->forward_to_kmsg && (priority & LOG_FACMASK) != LOG_KERN)
1827                 return;
1828
1829         if (parse_kernel_timestamp((char **) &p, &usec) > 0) {
1830                 if (asprintf(&source_time, "_SOURCE_MONOTONIC_TIMESTAMP=%llu",
1831                              (unsigned long long) usec) >= 0)
1832                         IOVEC_SET_STRING(iovec[n++], source_time);
1833         }
1834
1835         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=kernel");
1836
1837         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1838                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1839
1840         if ((priority & LOG_FACMASK) == LOG_KERN) {
1841
1842                 if (s->forward_to_syslog)
1843                         forward_syslog(s, priority, "kernel", p, NULL, NULL);
1844
1845                 IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=kernel");
1846         } else {
1847                 read_identifier(&p, &identifier, &pid);
1848
1849                 /* Avoid any messages we generated ourselves via
1850                  * log_info() and friends. */
1851                 if (pid && is_us(pid))
1852                         goto finish;
1853
1854                 if (s->forward_to_syslog)
1855                         forward_syslog(s, priority, identifier, p, NULL, NULL);
1856
1857                 if (identifier) {
1858                         syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
1859                         if (syslog_identifier)
1860                                 IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1861                 }
1862
1863                 if (pid) {
1864                         syslog_pid = strappend("SYSLOG_PID=", pid);
1865                         if (syslog_pid)
1866                                 IOVEC_SET_STRING(iovec[n++], syslog_pid);
1867                 }
1868
1869                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1870                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1871         }
1872
1873         message = strappend("MESSAGE=", p);
1874         if (message)
1875                 IOVEC_SET_STRING(iovec[n++], message);
1876
1877         dispatch_message(s, iovec, n, ELEMENTSOF(iovec), NULL, NULL, NULL, 0, priority);
1878
1879 finish:
1880         free(message);
1881         free(syslog_priority);
1882         free(syslog_identifier);
1883         free(syslog_pid);
1884         free(syslog_facility);
1885         free(source_time);
1886         free(identifier);
1887         free(pid);
1888 }
1889
1890 static void proc_kmsg_scan(Server *s) {
1891         char *p;
1892         size_t remaining;
1893
1894         assert(s);
1895
1896         p = s->proc_kmsg_buffer;
1897         remaining = s->proc_kmsg_length;
1898         for (;;) {
1899                 char *end;
1900                 size_t skip;
1901
1902                 end = memchr(p, '\n', remaining);
1903                 if (end)
1904                         skip = end - p + 1;
1905                 else if (remaining >= sizeof(s->proc_kmsg_buffer) - 1) {
1906                         end = p + sizeof(s->proc_kmsg_buffer) - 1;
1907                         skip = remaining;
1908                 } else
1909                         break;
1910
1911                 *end = 0;
1912
1913                 proc_kmsg_line(s, p);
1914
1915                 remaining -= skip;
1916                 p += skip;
1917         }
1918
1919         if (p > s->proc_kmsg_buffer) {
1920                 memmove(s->proc_kmsg_buffer, p, remaining);
1921                 s->proc_kmsg_length = remaining;
1922         }
1923 }
1924
1925 static int system_journal_open(Server *s) {
1926         int r;
1927         char *fn;
1928         sd_id128_t machine;
1929         char ids[33];
1930
1931         r = sd_id128_get_machine(&machine);
1932         if (r < 0)
1933                 return r;
1934
1935         sd_id128_to_string(machine, ids);
1936
1937         if (!s->system_journal) {
1938
1939                 /* First try to create the machine path, but not the prefix */
1940                 fn = strappend("/var/log/journal/", ids);
1941                 if (!fn)
1942                         return -ENOMEM;
1943                 (void) mkdir(fn, 0755);
1944                 free(fn);
1945
1946                 /* The create the system journal file */
1947                 fn = join("/var/log/journal/", ids, "/system.journal", NULL);
1948                 if (!fn)
1949                         return -ENOMEM;
1950
1951                 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, NULL, &s->system_journal);
1952                 free(fn);
1953
1954                 if (r >= 0) {
1955                         journal_default_metrics(&s->system_metrics, s->system_journal->fd);
1956
1957                         s->system_journal->metrics = s->system_metrics;
1958                         s->system_journal->compress = s->compress;
1959
1960                         server_fix_perms(s, s->system_journal, 0);
1961                 } else if (r < 0) {
1962
1963                         if (r != -ENOENT && r != -EROFS)
1964                                 log_warning("Failed to open system journal: %s", strerror(-r));
1965
1966                         r = 0;
1967                 }
1968         }
1969
1970         if (!s->runtime_journal) {
1971
1972                 fn = join("/run/log/journal/", ids, "/system.journal", NULL);
1973                 if (!fn)
1974                         return -ENOMEM;
1975
1976                 if (s->system_journal) {
1977
1978                         /* Try to open the runtime journal, but only
1979                          * if it already exists, so that we can flush
1980                          * it into the system journal */
1981
1982                         r = journal_file_open(fn, O_RDWR, 0640, NULL, &s->runtime_journal);
1983                         free(fn);
1984
1985                         if (r < 0) {
1986                                 if (r != -ENOENT)
1987                                         log_warning("Failed to open runtime journal: %s", strerror(-r));
1988
1989                                 r = 0;
1990                         }
1991
1992                 } else {
1993
1994                         /* OK, we really need the runtime journal, so create
1995                          * it if necessary. */
1996
1997                         (void) mkdir_parents_label(fn, 0755);
1998                         r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, NULL, &s->runtime_journal);
1999                         free(fn);
2000
2001                         if (r < 0) {
2002                                 log_error("Failed to open runtime journal: %s", strerror(-r));
2003                                 return r;
2004                         }
2005                 }
2006
2007                 if (s->runtime_journal) {
2008                         journal_default_metrics(&s->runtime_metrics, s->runtime_journal->fd);
2009
2010                         s->runtime_journal->metrics = s->runtime_metrics;
2011                         s->runtime_journal->compress = s->compress;
2012
2013                         server_fix_perms(s, s->runtime_journal, 0);
2014                 }
2015         }
2016
2017         return r;
2018 }
2019
2020 static int server_flush_to_var(Server *s) {
2021         char path[] = "/run/log/journal/xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
2022         Object *o = NULL;
2023         int r;
2024         sd_id128_t machine;
2025         sd_journal *j;
2026         usec_t ts;
2027
2028         assert(s);
2029
2030         if (!s->runtime_journal)
2031                 return 0;
2032
2033         ts = now(CLOCK_MONOTONIC);
2034         if (s->var_available_timestamp + RECHECK_VAR_AVAILABLE_USEC > ts)
2035                 return 0;
2036
2037         s->var_available_timestamp = ts;
2038
2039         system_journal_open(s);
2040
2041         if (!s->system_journal)
2042                 return 0;
2043
2044         log_info("Flushing to /var...");
2045
2046         r = sd_id128_get_machine(&machine);
2047         if (r < 0) {
2048                 log_error("Failed to get machine id: %s", strerror(-r));
2049                 return r;
2050         }
2051
2052         r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
2053         if (r < 0) {
2054                 log_error("Failed to read runtime journal: %s", strerror(-r));
2055                 return r;
2056         }
2057
2058         SD_JOURNAL_FOREACH(j) {
2059                 JournalFile *f;
2060
2061                 f = j->current_file;
2062                 assert(f && f->current_offset > 0);
2063
2064                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
2065                 if (r < 0) {
2066                         log_error("Can't read entry: %s", strerror(-r));
2067                         goto finish;
2068                 }
2069
2070                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
2071                 if (r == -E2BIG) {
2072                         log_info("Allocation limit reached.");
2073
2074                         journal_file_post_change(s->system_journal);
2075                         server_rotate(s);
2076                         server_vacuum(s);
2077
2078                         r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
2079                 }
2080
2081                 if (r < 0) {
2082                         log_error("Can't write entry: %s", strerror(-r));
2083                         goto finish;
2084                 }
2085         }
2086
2087 finish:
2088         journal_file_post_change(s->system_journal);
2089
2090         journal_file_close(s->runtime_journal);
2091         s->runtime_journal = NULL;
2092
2093         if (r >= 0) {
2094                 sd_id128_to_string(machine, path + 17);
2095                 rm_rf(path, false, true, false);
2096         }
2097
2098         return r;
2099 }
2100
2101 static int server_read_proc_kmsg(Server *s) {
2102         ssize_t l;
2103         assert(s);
2104         assert(s->proc_kmsg_fd >= 0);
2105
2106         l = read(s->proc_kmsg_fd, s->proc_kmsg_buffer + s->proc_kmsg_length, sizeof(s->proc_kmsg_buffer) - 1 - s->proc_kmsg_length);
2107         if (l < 0) {
2108
2109                 if (errno == EAGAIN || errno == EINTR)
2110                         return 0;
2111
2112                 log_error("Failed to read from kernel: %m");
2113                 return -errno;
2114         }
2115
2116         s->proc_kmsg_length += l;
2117
2118         proc_kmsg_scan(s);
2119         return 1;
2120 }
2121
2122 static int server_flush_proc_kmsg(Server *s) {
2123         int r;
2124
2125         assert(s);
2126
2127         if (s->proc_kmsg_fd < 0)
2128                 return 0;
2129
2130         log_info("Flushing /proc/kmsg...");
2131
2132         for (;;) {
2133                 r = server_read_proc_kmsg(s);
2134                 if (r < 0)
2135                         return r;
2136
2137                 if (r == 0)
2138                         break;
2139         }
2140
2141         return 0;
2142 }
2143
2144 static int process_event(Server *s, struct epoll_event *ev) {
2145         assert(s);
2146
2147         if (ev->data.fd == s->signal_fd) {
2148                 struct signalfd_siginfo sfsi;
2149                 ssize_t n;
2150
2151                 if (ev->events != EPOLLIN) {
2152                         log_info("Got invalid event from epoll.");
2153                         return -EIO;
2154                 }
2155
2156                 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
2157                 if (n != sizeof(sfsi)) {
2158
2159                         if (n >= 0)
2160                                 return -EIO;
2161
2162                         if (errno == EINTR || errno == EAGAIN)
2163                                 return 1;
2164
2165                         return -errno;
2166                 }
2167
2168                 if (sfsi.ssi_signo == SIGUSR1) {
2169                         server_flush_to_var(s);
2170                         return 0;
2171                 }
2172
2173                 log_debug("Received SIG%s", signal_to_string(sfsi.ssi_signo));
2174                 return 0;
2175
2176         } else if (ev->data.fd == s->proc_kmsg_fd) {
2177                 int r;
2178
2179                 if (ev->events != EPOLLIN) {
2180                         log_info("Got invalid event from epoll.");
2181                         return -EIO;
2182                 }
2183
2184                 r = server_read_proc_kmsg(s);
2185                 if (r < 0)
2186                         return r;
2187
2188                 return 1;
2189
2190         } else if (ev->data.fd == s->native_fd ||
2191                    ev->data.fd == s->syslog_fd) {
2192
2193                 if (ev->events != EPOLLIN) {
2194                         log_info("Got invalid event from epoll.");
2195                         return -EIO;
2196                 }
2197
2198                 for (;;) {
2199                         struct msghdr msghdr;
2200                         struct iovec iovec;
2201                         struct ucred *ucred = NULL;
2202                         struct timeval *tv = NULL;
2203                         struct cmsghdr *cmsg;
2204                         char *label = NULL;
2205                         size_t label_len = 0;
2206                         union {
2207                                 struct cmsghdr cmsghdr;
2208
2209                                 /* We use NAME_MAX space for the
2210                                  * SELinux label here. The kernel
2211                                  * currently enforces no limit, but
2212                                  * according to suggestions from the
2213                                  * SELinux people this will change and
2214                                  * it will probably be identical to
2215                                  * NAME_MAX. For now we use that, but
2216                                  * this should be updated one day when
2217                                  * the final limit is known.*/
2218                                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
2219                                             CMSG_SPACE(sizeof(struct timeval)) +
2220                                             CMSG_SPACE(sizeof(int)) + /* fd */
2221                                             CMSG_SPACE(NAME_MAX)]; /* selinux label */
2222                         } control;
2223                         ssize_t n;
2224                         int v;
2225                         int *fds = NULL;
2226                         unsigned n_fds = 0;
2227
2228                         if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
2229                                 log_error("SIOCINQ failed: %m");
2230                                 return -errno;
2231                         }
2232
2233                         if (s->buffer_size < (size_t) v) {
2234                                 void *b;
2235                                 size_t l;
2236
2237                                 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
2238                                 b = realloc(s->buffer, l+1);
2239
2240                                 if (!b) {
2241                                         log_error("Couldn't increase buffer.");
2242                                         return -ENOMEM;
2243                                 }
2244
2245                                 s->buffer_size = l;
2246                                 s->buffer = b;
2247                         }
2248
2249                         zero(iovec);
2250                         iovec.iov_base = s->buffer;
2251                         iovec.iov_len = s->buffer_size;
2252
2253                         zero(control);
2254                         zero(msghdr);
2255                         msghdr.msg_iov = &iovec;
2256                         msghdr.msg_iovlen = 1;
2257                         msghdr.msg_control = &control;
2258                         msghdr.msg_controllen = sizeof(control);
2259
2260                         n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
2261                         if (n < 0) {
2262
2263                                 if (errno == EINTR || errno == EAGAIN)
2264                                         return 1;
2265
2266                                 log_error("recvmsg() failed: %m");
2267                                 return -errno;
2268                         }
2269
2270                         for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
2271
2272                                 if (cmsg->cmsg_level == SOL_SOCKET &&
2273                                     cmsg->cmsg_type == SCM_CREDENTIALS &&
2274                                     cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
2275                                         ucred = (struct ucred*) CMSG_DATA(cmsg);
2276                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
2277                                          cmsg->cmsg_type == SCM_SECURITY) {
2278                                         label = (char*) CMSG_DATA(cmsg);
2279                                         label_len = cmsg->cmsg_len - CMSG_LEN(0);
2280                                 } else if (cmsg->cmsg_level == SOL_SOCKET &&
2281                                          cmsg->cmsg_type == SO_TIMESTAMP &&
2282                                          cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
2283                                         tv = (struct timeval*) CMSG_DATA(cmsg);
2284                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
2285                                          cmsg->cmsg_type == SCM_RIGHTS) {
2286                                         fds = (int*) CMSG_DATA(cmsg);
2287                                         n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
2288                                 }
2289                         }
2290
2291                         if (ev->data.fd == s->syslog_fd) {
2292                                 char *e;
2293
2294                                 if (n > 0 && n_fds == 0) {
2295                                         e = memchr(s->buffer, '\n', n);
2296                                         if (e)
2297                                                 *e = 0;
2298                                         else
2299                                                 s->buffer[n] = 0;
2300
2301                                         process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
2302                                 } else if (n_fds > 0)
2303                                         log_warning("Got file descriptors via syslog socket. Ignoring.");
2304
2305                         } else {
2306                                 if (n > 0 && n_fds == 0)
2307                                         process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
2308                                 else if (n == 0 && n_fds == 1)
2309                                         process_native_file(s, fds[0], ucred, tv, label, label_len);
2310                                 else if (n_fds > 0)
2311                                         log_warning("Got too many file descriptors via native socket. Ignoring.");
2312                         }
2313
2314                         close_many(fds, n_fds);
2315                 }
2316
2317                 return 1;
2318
2319         } else if (ev->data.fd == s->stdout_fd) {
2320
2321                 if (ev->events != EPOLLIN) {
2322                         log_info("Got invalid event from epoll.");
2323                         return -EIO;
2324                 }
2325
2326                 stdout_stream_new(s);
2327                 return 1;
2328
2329         } else {
2330                 StdoutStream *stream;
2331
2332                 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
2333                         log_info("Got invalid event from epoll.");
2334                         return -EIO;
2335                 }
2336
2337                 /* If it is none of the well-known fds, it must be an
2338                  * stdout stream fd. Note that this is a bit ugly here
2339                  * (since we rely that none of the well-known fds
2340                  * could be interpreted as pointer), but nonetheless
2341                  * safe, since the well-known fds would never get an
2342                  * fd > 4096, i.e. beyond the first memory page */
2343
2344                 stream = ev->data.ptr;
2345
2346                 if (stdout_stream_process(stream) <= 0)
2347                         stdout_stream_free(stream);
2348
2349                 return 1;
2350         }
2351
2352         log_error("Unknown event.");
2353         return 0;
2354 }
2355
2356 static int open_syslog_socket(Server *s) {
2357         union sockaddr_union sa;
2358         int one, r;
2359         struct epoll_event ev;
2360
2361         assert(s);
2362
2363         if (s->syslog_fd < 0) {
2364
2365                 s->syslog_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2366                 if (s->syslog_fd < 0) {
2367                         log_error("socket() failed: %m");
2368                         return -errno;
2369                 }
2370
2371                 zero(sa);
2372                 sa.un.sun_family = AF_UNIX;
2373                 strncpy(sa.un.sun_path, "/dev/log", sizeof(sa.un.sun_path));
2374
2375                 unlink(sa.un.sun_path);
2376
2377                 r = bind(s->syslog_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2378                 if (r < 0) {
2379                         log_error("bind() failed: %m");
2380                         return -errno;
2381                 }
2382
2383                 chmod(sa.un.sun_path, 0666);
2384         } else
2385                 fd_nonblock(s->syslog_fd, 1);
2386
2387         one = 1;
2388         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
2389         if (r < 0) {
2390                 log_error("SO_PASSCRED failed: %m");
2391                 return -errno;
2392         }
2393
2394 #ifdef HAVE_SELINUX
2395         one = 1;
2396         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
2397         if (r < 0)
2398                 log_warning("SO_PASSSEC failed: %m");
2399 #endif
2400
2401         one = 1;
2402         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
2403         if (r < 0) {
2404                 log_error("SO_TIMESTAMP failed: %m");
2405                 return -errno;
2406         }
2407
2408         zero(ev);
2409         ev.events = EPOLLIN;
2410         ev.data.fd = s->syslog_fd;
2411         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->syslog_fd, &ev) < 0) {
2412                 log_error("Failed to add syslog server fd to epoll object: %m");
2413                 return -errno;
2414         }
2415
2416         return 0;
2417 }
2418
2419 static int open_native_socket(Server*s) {
2420         union sockaddr_union sa;
2421         int one, r;
2422         struct epoll_event ev;
2423
2424         assert(s);
2425
2426         if (s->native_fd < 0) {
2427
2428                 s->native_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2429                 if (s->native_fd < 0) {
2430                         log_error("socket() failed: %m");
2431                         return -errno;
2432                 }
2433
2434                 zero(sa);
2435                 sa.un.sun_family = AF_UNIX;
2436                 strncpy(sa.un.sun_path, "/run/systemd/journal/socket", sizeof(sa.un.sun_path));
2437
2438                 unlink(sa.un.sun_path);
2439
2440                 r = bind(s->native_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2441                 if (r < 0) {
2442                         log_error("bind() failed: %m");
2443                         return -errno;
2444                 }
2445
2446                 chmod(sa.un.sun_path, 0666);
2447         } else
2448                 fd_nonblock(s->native_fd, 1);
2449
2450         one = 1;
2451         r = setsockopt(s->native_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
2452         if (r < 0) {
2453                 log_error("SO_PASSCRED failed: %m");
2454                 return -errno;
2455         }
2456
2457 #ifdef HAVE_SELINUX
2458         one = 1;
2459         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
2460         if (r < 0)
2461                 log_warning("SO_PASSSEC failed: %m");
2462 #endif
2463
2464         one = 1;
2465         r = setsockopt(s->native_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
2466         if (r < 0) {
2467                 log_error("SO_TIMESTAMP failed: %m");
2468                 return -errno;
2469         }
2470
2471         zero(ev);
2472         ev.events = EPOLLIN;
2473         ev.data.fd = s->native_fd;
2474         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->native_fd, &ev) < 0) {
2475                 log_error("Failed to add native server fd to epoll object: %m");
2476                 return -errno;
2477         }
2478
2479         return 0;
2480 }
2481
2482 static int open_stdout_socket(Server *s) {
2483         union sockaddr_union sa;
2484         int r;
2485         struct epoll_event ev;
2486
2487         assert(s);
2488
2489         if (s->stdout_fd < 0) {
2490
2491                 s->stdout_fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2492                 if (s->stdout_fd < 0) {
2493                         log_error("socket() failed: %m");
2494                         return -errno;
2495                 }
2496
2497                 zero(sa);
2498                 sa.un.sun_family = AF_UNIX;
2499                 strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
2500
2501                 unlink(sa.un.sun_path);
2502
2503                 r = bind(s->stdout_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2504                 if (r < 0) {
2505                         log_error("bind() failed: %m");
2506                         return -errno;
2507                 }
2508
2509                 chmod(sa.un.sun_path, 0666);
2510
2511                 if (listen(s->stdout_fd, SOMAXCONN) < 0) {
2512                         log_error("liste() failed: %m");
2513                         return -errno;
2514                 }
2515         } else
2516                 fd_nonblock(s->stdout_fd, 1);
2517
2518         zero(ev);
2519         ev.events = EPOLLIN;
2520         ev.data.fd = s->stdout_fd;
2521         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->stdout_fd, &ev) < 0) {
2522                 log_error("Failed to add stdout server fd to epoll object: %m");
2523                 return -errno;
2524         }
2525
2526         return 0;
2527 }
2528
2529 static int open_proc_kmsg(Server *s) {
2530         struct epoll_event ev;
2531
2532         assert(s);
2533
2534         if (!s->import_proc_kmsg)
2535                 return 0;
2536
2537         s->proc_kmsg_fd = open("/proc/kmsg", O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
2538         if (s->proc_kmsg_fd < 0) {
2539                 log_warning("Failed to open /proc/kmsg, ignoring: %m");
2540                 return 0;
2541         }
2542
2543         zero(ev);
2544         ev.events = EPOLLIN;
2545         ev.data.fd = s->proc_kmsg_fd;
2546         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->proc_kmsg_fd, &ev) < 0) {
2547                 log_error("Failed to add /proc/kmsg fd to epoll object: %m");
2548                 return -errno;
2549         }
2550
2551         return 0;
2552 }
2553
2554 static int open_signalfd(Server *s) {
2555         sigset_t mask;
2556         struct epoll_event ev;
2557
2558         assert(s);
2559
2560         assert_se(sigemptyset(&mask) == 0);
2561         sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, -1);
2562         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
2563
2564         s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
2565         if (s->signal_fd < 0) {
2566                 log_error("signalfd(): %m");
2567                 return -errno;
2568         }
2569
2570         zero(ev);
2571         ev.events = EPOLLIN;
2572         ev.data.fd = s->signal_fd;
2573
2574         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
2575                 log_error("epoll_ctl(): %m");
2576                 return -errno;
2577         }
2578
2579         return 0;
2580 }
2581
2582 static int server_parse_proc_cmdline(Server *s) {
2583         char *line, *w, *state;
2584         int r;
2585         size_t l;
2586
2587         if (detect_container(NULL) > 0)
2588                 return 0;
2589
2590         r = read_one_line_file("/proc/cmdline", &line);
2591         if (r < 0) {
2592                 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
2593                 return 0;
2594         }
2595
2596         FOREACH_WORD_QUOTED(w, l, line, state) {
2597                 char *word;
2598
2599                 word = strndup(w, l);
2600                 if (!word) {
2601                         r = -ENOMEM;
2602                         goto finish;
2603                 }
2604
2605                 if (startswith(word, "systemd_journald.forward_to_syslog=")) {
2606                         r = parse_boolean(word + 35);
2607                         if (r < 0)
2608                                 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
2609                         else
2610                                 s->forward_to_syslog = r;
2611                 } else if (startswith(word, "systemd_journald.forward_to_kmsg=")) {
2612                         r = parse_boolean(word + 33);
2613                         if (r < 0)
2614                                 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
2615                         else
2616                                 s->forward_to_kmsg = r;
2617                 } else if (startswith(word, "systemd_journald.forward_to_console=")) {
2618                         r = parse_boolean(word + 36);
2619                         if (r < 0)
2620                                 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
2621                         else
2622                                 s->forward_to_console = r;
2623                 }
2624
2625                 free(word);
2626         }
2627
2628         r = 0;
2629
2630 finish:
2631         free(line);
2632         return r;
2633 }
2634
2635 static int server_parse_config_file(Server *s) {
2636         FILE *f;
2637         const char *fn;
2638         int r;
2639
2640         assert(s);
2641
2642         fn = "/etc/systemd/journald.conf";
2643         f = fopen(fn, "re");
2644         if (!f) {
2645                 if (errno == ENOENT)
2646                         return 0;
2647
2648                 log_warning("Failed to open configuration file %s: %m", fn);
2649                 return -errno;
2650         }
2651
2652         r = config_parse(fn, f, "Journal\0", config_item_perf_lookup, (void*) journald_gperf_lookup, false, s);
2653         if (r < 0)
2654                 log_warning("Failed to parse configuration file: %s", strerror(-r));
2655
2656         fclose(f);
2657
2658         return r;
2659 }
2660
2661 static int server_init(Server *s) {
2662         int n, r, fd;
2663
2664         assert(s);
2665
2666         zero(*s);
2667         s->syslog_fd = s->native_fd = s->stdout_fd = s->signal_fd = s->epoll_fd = s->proc_kmsg_fd = -1;
2668         s->compress = true;
2669
2670         s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
2671         s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
2672
2673         s->forward_to_syslog = true;
2674         s->import_proc_kmsg = true;
2675
2676         s->max_level_store = LOG_DEBUG;
2677         s->max_level_syslog = LOG_DEBUG;
2678         s->max_level_kmsg = LOG_NOTICE;
2679         s->max_level_console = LOG_INFO;
2680
2681         memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
2682         memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
2683
2684         server_parse_config_file(s);
2685         server_parse_proc_cmdline(s);
2686
2687         s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
2688         if (!s->user_journals) {
2689                 log_error("Out of memory.");
2690                 return -ENOMEM;
2691         }
2692
2693         s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
2694         if (s->epoll_fd < 0) {
2695                 log_error("Failed to create epoll object: %m");
2696                 return -errno;
2697         }
2698
2699         n = sd_listen_fds(true);
2700         if (n < 0) {
2701                 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
2702                 return n;
2703         }
2704
2705         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
2706
2707                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
2708
2709                         if (s->native_fd >= 0) {
2710                                 log_error("Too many native sockets passed.");
2711                                 return -EINVAL;
2712                         }
2713
2714                         s->native_fd = fd;
2715
2716                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
2717
2718                         if (s->stdout_fd >= 0) {
2719                                 log_error("Too many stdout sockets passed.");
2720                                 return -EINVAL;
2721                         }
2722
2723                         s->stdout_fd = fd;
2724
2725                 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
2726
2727                         if (s->syslog_fd >= 0) {
2728                                 log_error("Too many /dev/log sockets passed.");
2729                                 return -EINVAL;
2730                         }
2731
2732                         s->syslog_fd = fd;
2733
2734                 } else {
2735                         log_error("Unknown socket passed.");
2736                         return -EINVAL;
2737                 }
2738         }
2739
2740         r = open_syslog_socket(s);
2741         if (r < 0)
2742                 return r;
2743
2744         r = open_native_socket(s);
2745         if (r < 0)
2746                 return r;
2747
2748         r = open_stdout_socket(s);
2749         if (r < 0)
2750                 return r;
2751
2752         r = open_proc_kmsg(s);
2753         if (r < 0)
2754                 return r;
2755
2756         r = open_signalfd(s);
2757         if (r < 0)
2758                 return r;
2759
2760         s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
2761         if (!s->rate_limit)
2762                 return -ENOMEM;
2763
2764         r = system_journal_open(s);
2765         if (r < 0)
2766                 return r;
2767
2768         return 0;
2769 }
2770
2771 static void server_done(Server *s) {
2772         JournalFile *f;
2773         assert(s);
2774
2775         while (s->stdout_streams)
2776                 stdout_stream_free(s->stdout_streams);
2777
2778         if (s->system_journal)
2779                 journal_file_close(s->system_journal);
2780
2781         if (s->runtime_journal)
2782                 journal_file_close(s->runtime_journal);
2783
2784         while ((f = hashmap_steal_first(s->user_journals)))
2785                 journal_file_close(f);
2786
2787         hashmap_free(s->user_journals);
2788
2789         if (s->epoll_fd >= 0)
2790                 close_nointr_nofail(s->epoll_fd);
2791
2792         if (s->signal_fd >= 0)
2793                 close_nointr_nofail(s->signal_fd);
2794
2795         if (s->syslog_fd >= 0)
2796                 close_nointr_nofail(s->syslog_fd);
2797
2798         if (s->native_fd >= 0)
2799                 close_nointr_nofail(s->native_fd);
2800
2801         if (s->stdout_fd >= 0)
2802                 close_nointr_nofail(s->stdout_fd);
2803
2804         if (s->proc_kmsg_fd >= 0)
2805                 close_nointr_nofail(s->proc_kmsg_fd);
2806
2807         if (s->rate_limit)
2808                 journal_rate_limit_free(s->rate_limit);
2809
2810         free(s->buffer);
2811         free(s->tty_path);
2812 }
2813
2814 int main(int argc, char *argv[]) {
2815         Server server;
2816         int r;
2817
2818         /* if (getppid() != 1) { */
2819         /*         log_error("This program should be invoked by init only."); */
2820         /*         return EXIT_FAILURE; */
2821         /* } */
2822
2823         if (argc > 1) {
2824                 log_error("This program does not take arguments.");
2825                 return EXIT_FAILURE;
2826         }
2827
2828         log_set_target(LOG_TARGET_SAFE);
2829         log_set_facility(LOG_SYSLOG);
2830         log_parse_environment();
2831         log_open();
2832
2833         umask(0022);
2834
2835         r = server_init(&server);
2836         if (r < 0)
2837                 goto finish;
2838
2839         server_vacuum(&server);
2840         server_flush_to_var(&server);
2841         server_flush_proc_kmsg(&server);
2842
2843         log_debug("systemd-journald running as pid %lu", (unsigned long) getpid());
2844         driver_message(&server, SD_MESSAGE_JOURNAL_START, "Journal started");
2845
2846         sd_notify(false,
2847                   "READY=1\n"
2848                   "STATUS=Processing requests...");
2849
2850         for (;;) {
2851                 struct epoll_event event;
2852
2853                 r = epoll_wait(server.epoll_fd, &event, 1, -1);
2854                 if (r < 0) {
2855
2856                         if (errno == EINTR)
2857                                 continue;
2858
2859                         log_error("epoll_wait() failed: %m");
2860                         r = -errno;
2861                         goto finish;
2862                 } else if (r == 0)
2863                         break;
2864
2865                 r = process_event(&server, &event);
2866                 if (r < 0)
2867                         goto finish;
2868                 else if (r == 0)
2869                         break;
2870         }
2871
2872         log_debug("systemd-journald stopped as pid %lu", (unsigned long) getpid());
2873         driver_message(&server, SD_MESSAGE_JOURNAL_STOP, "Journal stopped");
2874
2875 finish:
2876         sd_notify(false,
2877                   "STATUS=Shutting down...");
2878
2879         server_done(&server);
2880
2881         return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
2882 }