chiark / gitweb /
journald: ignore messages read from /proc/kmsg that we generated via /dev/kmsg
[elogind.git] / src / journal / journald.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/epoll.h>
23 #include <sys/socket.h>
24 #include <errno.h>
25 #include <sys/signalfd.h>
26 #include <unistd.h>
27 #include <fcntl.h>
28 #include <stddef.h>
29 #include <sys/ioctl.h>
30 #include <linux/sockios.h>
31 #include <sys/statvfs.h>
32
33 #include <systemd/sd-journal.h>
34 #include <systemd/sd-login.h>
35 #include <systemd/sd-messages.h>
36 #include <systemd/sd-daemon.h>
37
38 #include "mkdir.h"
39 #include "hashmap.h"
40 #include "journal-file.h"
41 #include "socket-util.h"
42 #include "cgroup-util.h"
43 #include "list.h"
44 #include "journal-rate-limit.h"
45 #include "journal-internal.h"
46 #include "conf-parser.h"
47 #include "journald.h"
48 #include "virt.h"
49 #include "missing.h"
50
51 #ifdef HAVE_ACL
52 #include <sys/acl.h>
53 #include <acl/libacl.h>
54 #include "acl-util.h"
55 #endif
56
57 #ifdef HAVE_SELINUX
58 #include <selinux/selinux.h>
59 #endif
60
61 #define USER_JOURNALS_MAX 1024
62 #define STDOUT_STREAMS_MAX 4096
63
64 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
65 #define DEFAULT_RATE_LIMIT_BURST 200
66
67 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
68
69 #define RECHECK_VAR_AVAILABLE_USEC (30*USEC_PER_SEC)
70
71 #define N_IOVEC_META_FIELDS 17
72
73 #define ENTRY_SIZE_MAX (1024*1024*32)
74
75 typedef enum StdoutStreamState {
76         STDOUT_STREAM_IDENTIFIER,
77         STDOUT_STREAM_PRIORITY,
78         STDOUT_STREAM_LEVEL_PREFIX,
79         STDOUT_STREAM_FORWARD_TO_SYSLOG,
80         STDOUT_STREAM_FORWARD_TO_KMSG,
81         STDOUT_STREAM_FORWARD_TO_CONSOLE,
82         STDOUT_STREAM_RUNNING
83 } StdoutStreamState;
84
85 struct StdoutStream {
86         Server *server;
87         StdoutStreamState state;
88
89         int fd;
90
91         struct ucred ucred;
92 #ifdef HAVE_SELINUX
93         security_context_t security_context;
94 #endif
95
96         char *identifier;
97         int priority;
98         bool level_prefix:1;
99         bool forward_to_syslog:1;
100         bool forward_to_kmsg:1;
101         bool forward_to_console:1;
102
103         char buffer[LINE_MAX+1];
104         size_t length;
105
106         LIST_FIELDS(StdoutStream, stdout_stream);
107 };
108
109 static int server_flush_to_var(Server *s);
110
111 static uint64_t available_space(Server *s) {
112         char ids[33], *p;
113         const char *f;
114         sd_id128_t machine;
115         struct statvfs ss;
116         uint64_t sum = 0, avail = 0, ss_avail = 0;
117         int r;
118         DIR *d;
119         usec_t ts;
120         JournalMetrics *m;
121
122         ts = now(CLOCK_MONOTONIC);
123
124         if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
125                 return s->cached_available_space;
126
127         r = sd_id128_get_machine(&machine);
128         if (r < 0)
129                 return 0;
130
131         if (s->system_journal) {
132                 f = "/var/log/journal/";
133                 m = &s->system_metrics;
134         } else {
135                 f = "/run/log/journal/";
136                 m = &s->runtime_metrics;
137         }
138
139         assert(m);
140
141         p = strappend(f, sd_id128_to_string(machine, ids));
142         if (!p)
143                 return 0;
144
145         d = opendir(p);
146         free(p);
147
148         if (!d)
149                 return 0;
150
151         if (fstatvfs(dirfd(d), &ss) < 0)
152                 goto finish;
153
154         for (;;) {
155                 struct stat st;
156                 struct dirent buf, *de;
157
158                 r = readdir_r(d, &buf, &de);
159                 if (r != 0)
160                         break;
161
162                 if (!de)
163                         break;
164
165                 if (!endswith(de->d_name, ".journal") &&
166                     !endswith(de->d_name, ".journal~"))
167                         continue;
168
169                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
170                         continue;
171
172                 if (!S_ISREG(st.st_mode))
173                         continue;
174
175                 sum += (uint64_t) st.st_blocks * 512UL;
176         }
177
178         avail = sum >= m->max_use ? 0 : m->max_use - sum;
179
180         ss_avail = ss.f_bsize * ss.f_bavail;
181
182         ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
183
184         if (ss_avail < avail)
185                 avail = ss_avail;
186
187         s->cached_available_space = avail;
188         s->cached_available_space_timestamp = ts;
189
190 finish:
191         closedir(d);
192
193         return avail;
194 }
195
196 static void server_read_file_gid(Server *s) {
197         const char *adm = "adm";
198         int r;
199
200         assert(s);
201
202         if (s->file_gid_valid)
203                 return;
204
205         r = get_group_creds(&adm, &s->file_gid);
206         if (r < 0)
207                 log_warning("Failed to resolve 'adm' group: %s", strerror(-r));
208
209         /* if we couldn't read the gid, then it will be 0, but that's
210          * fine and we shouldn't try to resolve the group again, so
211          * let's just pretend it worked right-away. */
212         s->file_gid_valid = true;
213 }
214
215 static void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
216         int r;
217 #ifdef HAVE_ACL
218         acl_t acl;
219         acl_entry_t entry;
220         acl_permset_t permset;
221 #endif
222
223         assert(f);
224
225         server_read_file_gid(s);
226
227         r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
228         if (r < 0)
229                 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
230
231 #ifdef HAVE_ACL
232         if (uid <= 0)
233                 return;
234
235         acl = acl_get_fd(f->fd);
236         if (!acl) {
237                 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
238                 return;
239         }
240
241         r = acl_find_uid(acl, uid, &entry);
242         if (r <= 0) {
243
244                 if (acl_create_entry(&acl, &entry) < 0 ||
245                     acl_set_tag_type(entry, ACL_USER) < 0 ||
246                     acl_set_qualifier(entry, &uid) < 0) {
247                         log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
248                         goto finish;
249                 }
250         }
251
252         if (acl_get_permset(entry, &permset) < 0 ||
253             acl_add_perm(permset, ACL_READ) < 0 ||
254             acl_calc_mask(&acl) < 0) {
255                 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
256                 goto finish;
257         }
258
259         if (acl_set_fd(f->fd, acl) < 0)
260                 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
261
262 finish:
263         acl_free(acl);
264 #endif
265 }
266
267 static JournalFile* find_journal(Server *s, uid_t uid) {
268         char *p;
269         int r;
270         JournalFile *f;
271         char ids[33];
272         sd_id128_t machine;
273
274         assert(s);
275
276         /* We split up user logs only on /var, not on /run. If the
277          * runtime file is open, we write to it exclusively, in order
278          * to guarantee proper order as soon as we flush /run to
279          * /var and close the runtime file. */
280
281         if (s->runtime_journal)
282                 return s->runtime_journal;
283
284         if (uid <= 0)
285                 return s->system_journal;
286
287         r = sd_id128_get_machine(&machine);
288         if (r < 0)
289                 return s->system_journal;
290
291         f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
292         if (f)
293                 return f;
294
295         if (asprintf(&p, "/var/log/journal/%s/user-%lu.journal", sd_id128_to_string(machine, ids), (unsigned long) uid) < 0)
296                 return s->system_journal;
297
298         while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
299                 /* Too many open? Then let's close one */
300                 f = hashmap_steal_first(s->user_journals);
301                 assert(f);
302                 journal_file_close(f);
303         }
304
305         r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->system_journal, &f);
306         free(p);
307
308         if (r < 0)
309                 return s->system_journal;
310
311         server_fix_perms(s, f, uid);
312
313         r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
314         if (r < 0) {
315                 journal_file_close(f);
316                 return s->system_journal;
317         }
318
319         return f;
320 }
321
322 static void server_rotate(Server *s) {
323         JournalFile *f;
324         void *k;
325         Iterator i;
326         int r;
327
328         log_info("Rotating...");
329
330         if (s->runtime_journal) {
331                 r = journal_file_rotate(&s->runtime_journal);
332                 if (r < 0)
333                         if (s->runtime_journal)
334                                 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
335                         else
336                                 log_error("Failed to create new runtime journal: %s", strerror(-r));
337                 else
338                         server_fix_perms(s, s->runtime_journal, 0);
339         }
340
341         if (s->system_journal) {
342                 r = journal_file_rotate(&s->system_journal);
343                 if (r < 0)
344                         if (s->system_journal)
345                                 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
346                         else
347                                 log_error("Failed to create new system journal: %s", strerror(-r));
348
349                 else
350                         server_fix_perms(s, s->system_journal, 0);
351         }
352
353         HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
354                 r = journal_file_rotate(&f);
355                 if (r < 0)
356                         if (f->path)
357                                 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
358                         else
359                                 log_error("Failed to create user journal: %s", strerror(-r));
360                 else {
361                         hashmap_replace(s->user_journals, k, f);
362                         server_fix_perms(s, s->system_journal, PTR_TO_UINT32(k));
363                 }
364         }
365 }
366
367 static void server_vacuum(Server *s) {
368         char *p;
369         char ids[33];
370         sd_id128_t machine;
371         int r;
372
373         log_info("Vacuuming...");
374
375         r = sd_id128_get_machine(&machine);
376         if (r < 0) {
377                 log_error("Failed to get machine ID: %s", strerror(-r));
378                 return;
379         }
380
381         sd_id128_to_string(machine, ids);
382
383         if (s->system_journal) {
384                 if (asprintf(&p, "/var/log/journal/%s", ids) < 0) {
385                         log_error("Out of memory.");
386                         return;
387                 }
388
389                 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free);
390                 if (r < 0 && r != -ENOENT)
391                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
392                 free(p);
393         }
394
395
396         if (s->runtime_journal) {
397                 if (asprintf(&p, "/run/log/journal/%s", ids) < 0) {
398                         log_error("Out of memory.");
399                         return;
400                 }
401
402                 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free);
403                 if (r < 0 && r != -ENOENT)
404                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
405                 free(p);
406         }
407
408         s->cached_available_space_timestamp = 0;
409 }
410
411 static char *shortened_cgroup_path(pid_t pid) {
412         int r;
413         char *process_path, *init_path, *path;
414
415         assert(pid > 0);
416
417         r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, pid, &process_path);
418         if (r < 0)
419                 return NULL;
420
421         r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &init_path);
422         if (r < 0) {
423                 free(process_path);
424                 return NULL;
425         }
426
427         if (endswith(init_path, "/system"))
428                 init_path[strlen(init_path) - 7] = 0;
429         else if (streq(init_path, "/"))
430                 init_path[0] = 0;
431
432         if (startswith(process_path, init_path)) {
433                 char *p;
434
435                 p = strdup(process_path + strlen(init_path));
436                 if (!p) {
437                         free(process_path);
438                         free(init_path);
439                         return NULL;
440                 }
441                 path = p;
442         } else {
443                 path = process_path;
444                 process_path = NULL;
445         }
446
447         free(process_path);
448         free(init_path);
449
450         return path;
451 }
452
453 static void dispatch_message_real(
454                 Server *s,
455                 struct iovec *iovec, unsigned n, unsigned m,
456                 struct ucred *ucred,
457                 struct timeval *tv,
458                 const char *label, size_t label_len) {
459
460         char *pid = NULL, *uid = NULL, *gid = NULL,
461                 *source_time = NULL, *boot_id = NULL, *machine_id = NULL,
462                 *comm = NULL, *cmdline = NULL, *hostname = NULL,
463                 *audit_session = NULL, *audit_loginuid = NULL,
464                 *exe = NULL, *cgroup = NULL, *session = NULL,
465                 *owner_uid = NULL, *unit = NULL, *selinux_context = NULL;
466
467         char idbuf[33];
468         sd_id128_t id;
469         int r;
470         char *t;
471         uid_t loginuid = 0, realuid = 0;
472         JournalFile *f;
473         bool vacuumed = false;
474
475         assert(s);
476         assert(iovec);
477         assert(n > 0);
478         assert(n + N_IOVEC_META_FIELDS <= m);
479
480         if (ucred) {
481                 uint32_t audit;
482                 uid_t owner;
483
484                 realuid = ucred->uid;
485
486                 if (asprintf(&pid, "_PID=%lu", (unsigned long) ucred->pid) >= 0)
487                         IOVEC_SET_STRING(iovec[n++], pid);
488
489                 if (asprintf(&uid, "_UID=%lu", (unsigned long) ucred->uid) >= 0)
490                         IOVEC_SET_STRING(iovec[n++], uid);
491
492                 if (asprintf(&gid, "_GID=%lu", (unsigned long) ucred->gid) >= 0)
493                         IOVEC_SET_STRING(iovec[n++], gid);
494
495                 r = get_process_comm(ucred->pid, &t);
496                 if (r >= 0) {
497                         comm = strappend("_COMM=", t);
498                         free(t);
499
500                         if (comm)
501                                 IOVEC_SET_STRING(iovec[n++], comm);
502                 }
503
504                 r = get_process_exe(ucred->pid, &t);
505                 if (r >= 0) {
506                         exe = strappend("_EXE=", t);
507                         free(t);
508
509                         if (exe)
510                                 IOVEC_SET_STRING(iovec[n++], exe);
511                 }
512
513                 r = get_process_cmdline(ucred->pid, LINE_MAX, false, &t);
514                 if (r >= 0) {
515                         cmdline = strappend("_CMDLINE=", t);
516                         free(t);
517
518                         if (cmdline)
519                                 IOVEC_SET_STRING(iovec[n++], cmdline);
520                 }
521
522                 r = audit_session_from_pid(ucred->pid, &audit);
523                 if (r >= 0)
524                         if (asprintf(&audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit) >= 0)
525                                 IOVEC_SET_STRING(iovec[n++], audit_session);
526
527                 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
528                 if (r >= 0)
529                         if (asprintf(&audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0)
530                                 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
531
532                 t = shortened_cgroup_path(ucred->pid);
533                 if (t) {
534                         cgroup = strappend("_SYSTEMD_CGROUP=", t);
535                         free(t);
536
537                         if (cgroup)
538                                 IOVEC_SET_STRING(iovec[n++], cgroup);
539                 }
540
541                 if (sd_pid_get_session(ucred->pid, &t) >= 0) {
542                         session = strappend("_SYSTEMD_SESSION=", t);
543                         free(t);
544
545                         if (session)
546                                 IOVEC_SET_STRING(iovec[n++], session);
547                 }
548
549                 if (sd_pid_get_unit(ucred->pid, &t) >= 0) {
550                         unit = strappend("_SYSTEMD_UNIT=", t);
551                         free(t);
552
553                         if (unit)
554                                 IOVEC_SET_STRING(iovec[n++], unit);
555                 }
556
557                 if (sd_pid_get_owner_uid(ucred->uid, &owner) >= 0)
558                         if (asprintf(&owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner) >= 0)
559                                 IOVEC_SET_STRING(iovec[n++], owner_uid);
560
561 #ifdef HAVE_SELINUX
562                 if (label) {
563                         selinux_context = malloc(sizeof("_SELINUX_CONTEXT=") + label_len);
564                         if (selinux_context) {
565                                 memcpy(selinux_context, "_SELINUX_CONTEXT=", sizeof("_SELINUX_CONTEXT=")-1);
566                                 memcpy(selinux_context+sizeof("_SELINUX_CONTEXT=")-1, label, label_len);
567                                 selinux_context[sizeof("_SELINUX_CONTEXT=")-1+label_len] = 0;
568                                 IOVEC_SET_STRING(iovec[n++], selinux_context);
569                         }
570                 } else {
571                         security_context_t con;
572
573                         if (getpidcon(ucred->pid, &con) >= 0) {
574                                 selinux_context = strappend("_SELINUX_CONTEXT=", con);
575                                 if (selinux_context)
576                                         IOVEC_SET_STRING(iovec[n++], selinux_context);
577
578                                 freecon(con);
579                         }
580                 }
581 #endif
582         }
583
584         if (tv) {
585                 if (asprintf(&source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu",
586                              (unsigned long long) timeval_load(tv)) >= 0)
587                         IOVEC_SET_STRING(iovec[n++], source_time);
588         }
589
590         /* Note that strictly speaking storing the boot id here is
591          * redundant since the entry includes this in-line
592          * anyway. However, we need this indexed, too. */
593         r = sd_id128_get_boot(&id);
594         if (r >= 0)
595                 if (asprintf(&boot_id, "_BOOT_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
596                         IOVEC_SET_STRING(iovec[n++], boot_id);
597
598         r = sd_id128_get_machine(&id);
599         if (r >= 0)
600                 if (asprintf(&machine_id, "_MACHINE_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
601                         IOVEC_SET_STRING(iovec[n++], machine_id);
602
603         t = gethostname_malloc();
604         if (t) {
605                 hostname = strappend("_HOSTNAME=", t);
606                 free(t);
607                 if (hostname)
608                         IOVEC_SET_STRING(iovec[n++], hostname);
609         }
610
611         assert(n <= m);
612
613         server_flush_to_var(s);
614
615 retry:
616         f = find_journal(s, realuid == 0 ? 0 : loginuid);
617         if (!f)
618                 log_warning("Dropping message, as we can't find a place to store the data.");
619         else {
620                 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
621
622                 if ((r == -E2BIG || /* hit limit */
623                      r == -EFBIG || /* hit fs limit */
624                      r == -EDQUOT || /* quota hit */
625                      r == -ENOSPC || /* disk full */
626                      r == -EBADMSG || /* corrupted */
627                      r == -ENODATA || /* truncated */
628                      r == -EHOSTDOWN || /* other machine */
629                      r == -EPROTONOSUPPORT) && /* unsupported feature */
630                     !vacuumed) {
631
632                         if (r == -E2BIG)
633                                 log_info("Allocation limit reached, rotating.");
634                         else
635                                 log_warning("Journal file corrupted, rotating.");
636
637                         server_rotate(s);
638                         server_vacuum(s);
639                         vacuumed = true;
640
641                         log_info("Retrying write.");
642                         goto retry;
643                 }
644
645                 if (r < 0)
646                         log_error("Failed to write entry, ignoring: %s", strerror(-r));
647         }
648
649         free(pid);
650         free(uid);
651         free(gid);
652         free(comm);
653         free(exe);
654         free(cmdline);
655         free(source_time);
656         free(boot_id);
657         free(machine_id);
658         free(hostname);
659         free(audit_session);
660         free(audit_loginuid);
661         free(cgroup);
662         free(session);
663         free(owner_uid);
664         free(unit);
665         free(selinux_context);
666 }
667
668 static void driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
669         char mid[11 + 32 + 1];
670         char buffer[16 + LINE_MAX + 1];
671         struct iovec iovec[N_IOVEC_META_FIELDS + 4];
672         int n = 0;
673         va_list ap;
674         struct ucred ucred;
675
676         assert(s);
677         assert(format);
678
679         IOVEC_SET_STRING(iovec[n++], "PRIORITY=5");
680         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
681
682         memcpy(buffer, "MESSAGE=", 8);
683         va_start(ap, format);
684         vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
685         va_end(ap);
686         char_array_0(buffer);
687         IOVEC_SET_STRING(iovec[n++], buffer);
688
689         snprintf(mid, sizeof(mid), "MESSAGE_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(message_id));
690         char_array_0(mid);
691         IOVEC_SET_STRING(iovec[n++], mid);
692
693         zero(ucred);
694         ucred.pid = getpid();
695         ucred.uid = getuid();
696         ucred.gid = getgid();
697
698         dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0);
699 }
700
701 static void dispatch_message(Server *s,
702                              struct iovec *iovec, unsigned n, unsigned m,
703                              struct ucred *ucred,
704                              struct timeval *tv,
705                              const char *label, size_t label_len,
706                              int priority) {
707         int rl;
708         char *path = NULL, *c;
709
710         assert(s);
711         assert(iovec || n == 0);
712
713         if (n == 0)
714                 return;
715
716         if (!ucred)
717                 goto finish;
718
719         path = shortened_cgroup_path(ucred->pid);
720         if (!path)
721                 goto finish;
722
723         /* example: /user/lennart/3/foobar
724          *          /system/dbus.service/foobar
725          *
726          * So let's cut of everything past the third /, since that is
727          * wher user directories start */
728
729         c = strchr(path, '/');
730         if (c) {
731                 c = strchr(c+1, '/');
732                 if (c) {
733                         c = strchr(c+1, '/');
734                         if (c)
735                                 *c = 0;
736                 }
737         }
738
739         rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available_space(s));
740
741         if (rl == 0) {
742                 free(path);
743                 return;
744         }
745
746         /* Write a suppression message if we suppressed something */
747         if (rl > 1)
748                 driver_message(s, SD_MESSAGE_JOURNAL_DROPPED, "Suppressed %u messages from %s", rl - 1, path);
749
750         free(path);
751
752 finish:
753         dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len);
754 }
755
756 static void forward_syslog_iovec(Server *s, const struct iovec *iovec, unsigned n_iovec, struct ucred *ucred, struct timeval *tv) {
757         struct msghdr msghdr;
758         struct cmsghdr *cmsg;
759         union {
760                 struct cmsghdr cmsghdr;
761                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred))];
762         } control;
763         union sockaddr_union sa;
764
765         assert(s);
766         assert(iovec);
767         assert(n_iovec > 0);
768
769         zero(msghdr);
770         msghdr.msg_iov = (struct iovec*) iovec;
771         msghdr.msg_iovlen = n_iovec;
772
773         zero(sa);
774         sa.un.sun_family = AF_UNIX;
775         strncpy(sa.un.sun_path, "/run/systemd/journal/syslog", sizeof(sa.un.sun_path));
776         msghdr.msg_name = &sa;
777         msghdr.msg_namelen = offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path);
778
779         if (ucred) {
780                 zero(control);
781                 msghdr.msg_control = &control;
782                 msghdr.msg_controllen = sizeof(control);
783
784                 cmsg = CMSG_FIRSTHDR(&msghdr);
785                 cmsg->cmsg_level = SOL_SOCKET;
786                 cmsg->cmsg_type = SCM_CREDENTIALS;
787                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred));
788                 memcpy(CMSG_DATA(cmsg), ucred, sizeof(struct ucred));
789                 msghdr.msg_controllen = cmsg->cmsg_len;
790         }
791
792         /* Forward the syslog message we received via /dev/log to
793          * /run/systemd/syslog. Unfortunately we currently can't set
794          * the SO_TIMESTAMP auxiliary data, and hence we don't. */
795
796         if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
797                 return;
798
799         /* The socket is full? I guess the syslog implementation is
800          * too slow, and we shouldn't wait for that... */
801         if (errno == EAGAIN)
802                 return;
803
804         if (ucred && errno == ESRCH) {
805                 struct ucred u;
806
807                 /* Hmm, presumably the sender process vanished
808                  * by now, so let's fix it as good as we
809                  * can, and retry */
810
811                 u = *ucred;
812                 u.pid = getpid();
813                 memcpy(CMSG_DATA(cmsg), &u, sizeof(struct ucred));
814
815                 if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
816                         return;
817
818                 if (errno == EAGAIN)
819                         return;
820         }
821
822         log_debug("Failed to forward syslog message: %m");
823 }
824
825 static void forward_syslog_raw(Server *s, const char *buffer, struct ucred *ucred, struct timeval *tv) {
826         struct iovec iovec;
827
828         assert(s);
829         assert(buffer);
830
831         IOVEC_SET_STRING(iovec, buffer);
832         forward_syslog_iovec(s, &iovec, 1, ucred, tv);
833 }
834
835 static void forward_syslog(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred, struct timeval *tv) {
836         struct iovec iovec[5];
837         char header_priority[6], header_time[64], header_pid[16];
838         int n = 0;
839         time_t t;
840         struct tm *tm;
841         char *ident_buf = NULL;
842
843         assert(s);
844         assert(priority >= 0);
845         assert(priority <= 999);
846         assert(message);
847
848         /* First: priority field */
849         snprintf(header_priority, sizeof(header_priority), "<%i>", priority);
850         char_array_0(header_priority);
851         IOVEC_SET_STRING(iovec[n++], header_priority);
852
853         /* Second: timestamp */
854         t = tv ? tv->tv_sec : ((time_t) (now(CLOCK_REALTIME) / USEC_PER_SEC));
855         tm = localtime(&t);
856         if (!tm)
857                 return;
858         if (strftime(header_time, sizeof(header_time), "%h %e %T ", tm) <= 0)
859                 return;
860         IOVEC_SET_STRING(iovec[n++], header_time);
861
862         /* Third: identifier and PID */
863         if (ucred) {
864                 if (!identifier) {
865                         get_process_comm(ucred->pid, &ident_buf);
866                         identifier = ident_buf;
867                 }
868
869                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
870                 char_array_0(header_pid);
871
872                 if (identifier)
873                         IOVEC_SET_STRING(iovec[n++], identifier);
874
875                 IOVEC_SET_STRING(iovec[n++], header_pid);
876         } else if (identifier) {
877                 IOVEC_SET_STRING(iovec[n++], identifier);
878                 IOVEC_SET_STRING(iovec[n++], ": ");
879         }
880
881         /* Fourth: message */
882         IOVEC_SET_STRING(iovec[n++], message);
883
884         forward_syslog_iovec(s, iovec, n, ucred, tv);
885
886         free(ident_buf);
887 }
888
889 static int fixup_priority(int priority) {
890
891         if ((priority & LOG_FACMASK) == 0)
892                 return (priority & LOG_PRIMASK) | LOG_USER;
893
894         return priority;
895 }
896
897 static void forward_kmsg(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred) {
898         struct iovec iovec[5];
899         char header_priority[6], header_pid[16];
900         int n = 0;
901         char *ident_buf = NULL;
902         int fd;
903
904         assert(s);
905         assert(priority >= 0);
906         assert(priority <= 999);
907         assert(message);
908
909         /* Never allow messages with kernel facility to be written to
910          * kmsg, regardless where the data comes from. */
911         priority = fixup_priority(priority);
912
913         /* First: priority field */
914         snprintf(header_priority, sizeof(header_priority), "<%i>", priority);
915         char_array_0(header_priority);
916         IOVEC_SET_STRING(iovec[n++], header_priority);
917
918         /* Second: identifier and PID */
919         if (ucred) {
920                 if (!identifier) {
921                         get_process_comm(ucred->pid, &ident_buf);
922                         identifier = ident_buf;
923                 }
924
925                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
926                 char_array_0(header_pid);
927
928                 if (identifier)
929                         IOVEC_SET_STRING(iovec[n++], identifier);
930
931                 IOVEC_SET_STRING(iovec[n++], header_pid);
932         } else if (identifier) {
933                 IOVEC_SET_STRING(iovec[n++], identifier);
934                 IOVEC_SET_STRING(iovec[n++], ": ");
935         }
936
937         /* Fourth: message */
938         IOVEC_SET_STRING(iovec[n++], message);
939         IOVEC_SET_STRING(iovec[n++], "\n");
940
941         fd = open("/dev/kmsg", O_WRONLY|O_NOCTTY|O_CLOEXEC);
942         if (fd < 0) {
943                 log_debug("Failed to open /dev/kmsg for logging: %s", strerror(errno));
944                 goto finish;
945         }
946
947         if (writev(fd, iovec, n) < 0)
948                 log_debug("Failed to write to /dev/kmsg for logging: %s", strerror(errno));
949
950         close_nointr_nofail(fd);
951
952 finish:
953         free(ident_buf);
954 }
955
956 static void forward_console(Server *s, const char *identifier, const char *message, struct ucred *ucred) {
957         struct iovec iovec[4];
958         char header_pid[16];
959         int n = 0, fd;
960         char *ident_buf = NULL;
961
962         assert(s);
963         assert(message);
964
965         /* First: identifier and PID */
966         if (ucred) {
967                 if (!identifier) {
968                         get_process_comm(ucred->pid, &ident_buf);
969                         identifier = ident_buf;
970                 }
971
972                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
973                 char_array_0(header_pid);
974
975                 if (identifier)
976                         IOVEC_SET_STRING(iovec[n++], identifier);
977
978                 IOVEC_SET_STRING(iovec[n++], header_pid);
979         } else if (identifier) {
980                 IOVEC_SET_STRING(iovec[n++], identifier);
981                 IOVEC_SET_STRING(iovec[n++], ": ");
982         }
983
984         /* Third: message */
985         IOVEC_SET_STRING(iovec[n++], message);
986         IOVEC_SET_STRING(iovec[n++], "\n");
987
988         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
989         if (fd < 0) {
990                 log_debug("Failed to open /dev/console for logging: %s", strerror(errno));
991                 goto finish;
992         }
993
994         if (writev(fd, iovec, n) < 0)
995                 log_debug("Failed to write to /dev/console for logging: %s", strerror(errno));
996
997         close_nointr_nofail(fd);
998
999 finish:
1000         free(ident_buf);
1001 }
1002
1003 static void read_identifier(const char **buf, char **identifier, char **pid) {
1004         const char *p;
1005         char *t;
1006         size_t l, e;
1007
1008         assert(buf);
1009         assert(identifier);
1010         assert(pid);
1011
1012         p = *buf;
1013
1014         p += strspn(p, WHITESPACE);
1015         l = strcspn(p, WHITESPACE);
1016
1017         if (l <= 0 ||
1018             p[l-1] != ':')
1019                 return;
1020
1021         e = l;
1022         l--;
1023
1024         if (p[l-1] == ']') {
1025                 size_t k = l-1;
1026
1027                 for (;;) {
1028
1029                         if (p[k] == '[') {
1030                                 t = strndup(p+k+1, l-k-2);
1031                                 if (t)
1032                                         *pid = t;
1033
1034                                 l = k;
1035                                 break;
1036                         }
1037
1038                         if (k == 0)
1039                                 break;
1040
1041                         k--;
1042                 }
1043         }
1044
1045         t = strndup(p, l);
1046         if (t)
1047                 *identifier = t;
1048
1049         *buf = p + e;
1050         *buf += strspn(*buf, WHITESPACE);
1051 }
1052
1053 static void process_syslog_message(Server *s, const char *buf, struct ucred *ucred, struct timeval *tv, const char *label, size_t label_len) {
1054         char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *syslog_pid = NULL;
1055         struct iovec iovec[N_IOVEC_META_FIELDS + 6];
1056         unsigned n = 0;
1057         int priority = LOG_USER | LOG_INFO;
1058         char *identifier = NULL, *pid = NULL;
1059
1060         assert(s);
1061         assert(buf);
1062
1063         if (s->forward_to_syslog)
1064                 forward_syslog_raw(s, buf, ucred, tv);
1065
1066         parse_syslog_priority((char**) &buf, &priority);
1067         skip_syslog_date((char**) &buf);
1068         read_identifier(&buf, &identifier, &pid);
1069
1070         if (s->forward_to_kmsg)
1071                 forward_kmsg(s, priority, identifier, buf, ucred);
1072
1073         if (s->forward_to_console)
1074                 forward_console(s, identifier, buf, ucred);
1075
1076         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=syslog");
1077
1078         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1079                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1080
1081         if (priority & LOG_FACMASK)
1082                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1083                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1084
1085         if (identifier) {
1086                 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
1087                 if (syslog_identifier)
1088                         IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1089         }
1090
1091         if (pid) {
1092                 syslog_pid = strappend("SYSLOG_PID=", pid);
1093                 if (syslog_pid)
1094                         IOVEC_SET_STRING(iovec[n++], syslog_pid);
1095         }
1096
1097         message = strappend("MESSAGE=", buf);
1098         if (message)
1099                 IOVEC_SET_STRING(iovec[n++], message);
1100
1101         dispatch_message(s, iovec, n, ELEMENTSOF(iovec), ucred, tv, label, label_len, priority);
1102
1103         free(message);
1104         free(identifier);
1105         free(pid);
1106         free(syslog_priority);
1107         free(syslog_facility);
1108         free(syslog_identifier);
1109 }
1110
1111 static bool valid_user_field(const char *p, size_t l) {
1112         const char *a;
1113
1114         /* We kinda enforce POSIX syntax recommendations for
1115            environment variables here, but make a couple of additional
1116            requirements.
1117
1118            http://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html */
1119
1120         /* No empty field names */
1121         if (l <= 0)
1122                 return false;
1123
1124         /* Don't allow names longer than 64 chars */
1125         if (l > 64)
1126                 return false;
1127
1128         /* Variables starting with an underscore are protected */
1129         if (p[0] == '_')
1130                 return false;
1131
1132         /* Don't allow digits as first character */
1133         if (p[0] >= '0' && p[0] <= '9')
1134                 return false;
1135
1136         /* Only allow A-Z0-9 and '_' */
1137         for (a = p; a < p + l; a++)
1138                 if (!((*a >= 'A' && *a <= 'Z') ||
1139                       (*a >= '0' && *a <= '9') ||
1140                       *a == '_'))
1141                         return false;
1142
1143         return true;
1144 }
1145
1146 static void process_native_message(
1147                 Server *s,
1148                 const void *buffer, size_t buffer_size,
1149                 struct ucred *ucred,
1150                 struct timeval *tv,
1151                 const char *label, size_t label_len) {
1152
1153         struct iovec *iovec = NULL;
1154         unsigned n = 0, m = 0, j, tn = (unsigned) -1;
1155         const char *p;
1156         size_t remaining;
1157         int priority = LOG_INFO;
1158         char *identifier = NULL, *message = NULL;
1159
1160         assert(s);
1161         assert(buffer || buffer_size == 0);
1162
1163         p = buffer;
1164         remaining = buffer_size;
1165
1166         while (remaining > 0) {
1167                 const char *e, *q;
1168
1169                 e = memchr(p, '\n', remaining);
1170
1171                 if (!e) {
1172                         /* Trailing noise, let's ignore it, and flush what we collected */
1173                         log_debug("Received message with trailing noise, ignoring.");
1174                         break;
1175                 }
1176
1177                 if (e == p) {
1178                         /* Entry separator */
1179                         dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, priority);
1180                         n = 0;
1181                         priority = LOG_INFO;
1182
1183                         p++;
1184                         remaining--;
1185                         continue;
1186                 }
1187
1188                 if (*p == '.' || *p == '#') {
1189                         /* Ignore control commands for now, and
1190                          * comments too. */
1191                         remaining -= (e - p) + 1;
1192                         p = e + 1;
1193                         continue;
1194                 }
1195
1196                 /* A property follows */
1197
1198                 if (n+N_IOVEC_META_FIELDS >= m) {
1199                         struct iovec *c;
1200                         unsigned u;
1201
1202                         u = MAX((n+N_IOVEC_META_FIELDS+1) * 2U, 4U);
1203                         c = realloc(iovec, u * sizeof(struct iovec));
1204                         if (!c) {
1205                                 log_error("Out of memory");
1206                                 break;
1207                         }
1208
1209                         iovec = c;
1210                         m = u;
1211                 }
1212
1213                 q = memchr(p, '=', e - p);
1214                 if (q) {
1215                         if (valid_user_field(p, q - p)) {
1216                                 size_t l;
1217
1218                                 l = e - p;
1219
1220                                 /* If the field name starts with an
1221                                  * underscore, skip the variable,
1222                                  * since that indidates a trusted
1223                                  * field */
1224                                 iovec[n].iov_base = (char*) p;
1225                                 iovec[n].iov_len = l;
1226                                 n++;
1227
1228                                 /* We need to determine the priority
1229                                  * of this entry for the rate limiting
1230                                  * logic */
1231                                 if (l == 10 &&
1232                                     memcmp(p, "PRIORITY=", 9) == 0 &&
1233                                     p[9] >= '0' && p[9] <= '9')
1234                                         priority = (priority & LOG_FACMASK) | (p[9] - '0');
1235
1236                                 else if (l == 17 &&
1237                                          memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
1238                                          p[16] >= '0' && p[16] <= '9')
1239                                         priority = (priority & LOG_PRIMASK) | ((p[16] - '0') << 3);
1240
1241                                 else if (l == 18 &&
1242                                          memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
1243                                          p[16] >= '0' && p[16] <= '9' &&
1244                                          p[17] >= '0' && p[17] <= '9')
1245                                         priority = (priority & LOG_PRIMASK) | (((p[16] - '0')*10 + (p[17] - '0')) << 3);
1246
1247                                 else if (l >= 19 &&
1248                                          memcmp(p, "SYSLOG_IDENTIFIER=", 18) == 0) {
1249                                         char *t;
1250
1251                                         t = strndup(p + 18, l - 18);
1252                                         if (t) {
1253                                                 free(identifier);
1254                                                 identifier = t;
1255                                         }
1256                                 } else if (l >= 8 &&
1257                                            memcmp(p, "MESSAGE=", 8) == 0) {
1258                                         char *t;
1259
1260                                         t = strndup(p + 8, l - 8);
1261                                         if (t) {
1262                                                 free(message);
1263                                                 message = t;
1264                                         }
1265                                 }
1266                         }
1267
1268                         remaining -= (e - p) + 1;
1269                         p = e + 1;
1270                         continue;
1271                 } else {
1272                         le64_t l_le;
1273                         uint64_t l;
1274                         char *k;
1275
1276                         if (remaining < e - p + 1 + sizeof(uint64_t) + 1) {
1277                                 log_debug("Failed to parse message, ignoring.");
1278                                 break;
1279                         }
1280
1281                         memcpy(&l_le, e + 1, sizeof(uint64_t));
1282                         l = le64toh(l_le);
1283
1284                         if (remaining < e - p + 1 + sizeof(uint64_t) + l + 1 ||
1285                             e[1+sizeof(uint64_t)+l] != '\n') {
1286                                 log_debug("Failed to parse message, ignoring.");
1287                                 break;
1288                         }
1289
1290                         k = malloc((e - p) + 1 + l);
1291                         if (!k) {
1292                                 log_error("Out of memory");
1293                                 break;
1294                         }
1295
1296                         memcpy(k, p, e - p);
1297                         k[e - p] = '=';
1298                         memcpy(k + (e - p) + 1, e + 1 + sizeof(uint64_t), l);
1299
1300                         if (valid_user_field(p, e - p)) {
1301                                 iovec[n].iov_base = k;
1302                                 iovec[n].iov_len = (e - p) + 1 + l;
1303                                 n++;
1304                         } else
1305                                 free(k);
1306
1307                         remaining -= (e - p) + 1 + sizeof(uint64_t) + l + 1;
1308                         p = e + 1 + sizeof(uint64_t) + l + 1;
1309                 }
1310         }
1311
1312         if (n <= 0)
1313                 goto finish;
1314
1315         tn = n++;
1316         IOVEC_SET_STRING(iovec[tn], "_TRANSPORT=journal");
1317
1318         if (message) {
1319                 if (s->forward_to_syslog)
1320                         forward_syslog(s, priority, identifier, message, ucred, tv);
1321
1322                 if (s->forward_to_kmsg)
1323                         forward_kmsg(s, priority, identifier, message, ucred);
1324
1325                 if (s->forward_to_console)
1326                         forward_console(s, identifier, message, ucred);
1327         }
1328
1329         dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, priority);
1330
1331 finish:
1332         for (j = 0; j < n; j++)  {
1333                 if (j == tn)
1334                         continue;
1335
1336                 if (iovec[j].iov_base < buffer ||
1337                     (const uint8_t*) iovec[j].iov_base >= (const uint8_t*) buffer + buffer_size)
1338                         free(iovec[j].iov_base);
1339         }
1340
1341         free(iovec);
1342         free(identifier);
1343         free(message);
1344 }
1345
1346 static void process_native_file(
1347                 Server *s,
1348                 int fd,
1349                 struct ucred *ucred,
1350                 struct timeval *tv,
1351                 const char *label, size_t label_len) {
1352
1353         struct stat st;
1354         void *p;
1355         ssize_t n;
1356
1357         assert(s);
1358         assert(fd >= 0);
1359
1360         /* Data is in the passed file, since it didn't fit in a
1361          * datagram. We can't map the file here, since clients might
1362          * then truncate it and trigger a SIGBUS for us. So let's
1363          * stupidly read it */
1364
1365         if (fstat(fd, &st) < 0) {
1366                 log_error("Failed to stat passed file, ignoring: %m");
1367                 return;
1368         }
1369
1370         if (!S_ISREG(st.st_mode)) {
1371                 log_error("File passed is not regular. Ignoring.");
1372                 return;
1373         }
1374
1375         if (st.st_size <= 0)
1376                 return;
1377
1378         if (st.st_size > ENTRY_SIZE_MAX) {
1379                 log_error("File passed too large. Ignoring.");
1380                 return;
1381         }
1382
1383         p = malloc(st.st_size);
1384         if (!p) {
1385                 log_error("Out of memory");
1386                 return;
1387         }
1388
1389         n = pread(fd, p, st.st_size, 0);
1390         if (n < 0)
1391                 log_error("Failed to read file, ignoring: %s", strerror(-n));
1392         else if (n > 0)
1393                 process_native_message(s, p, n, ucred, tv, label, label_len);
1394
1395         free(p);
1396 }
1397
1398 static int stdout_stream_log(StdoutStream *s, const char *p) {
1399         struct iovec iovec[N_IOVEC_META_FIELDS + 5];
1400         char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *syslog_identifier = NULL;
1401         unsigned n = 0;
1402         int priority;
1403         char *label = NULL;
1404         size_t label_len = 0;
1405
1406         assert(s);
1407         assert(p);
1408
1409         if (isempty(p))
1410                 return 0;
1411
1412         priority = s->priority;
1413
1414         if (s->level_prefix)
1415                 parse_syslog_priority((char**) &p, &priority);
1416
1417         if (s->forward_to_syslog || s->server->forward_to_syslog)
1418                 forward_syslog(s->server, fixup_priority(priority), s->identifier, p, &s->ucred, NULL);
1419
1420         if (s->forward_to_kmsg || s->server->forward_to_kmsg)
1421                 forward_kmsg(s->server, priority, s->identifier, p, &s->ucred);
1422
1423         if (s->forward_to_console || s->server->forward_to_console)
1424                 forward_console(s->server, s->identifier, p, &s->ucred);
1425
1426         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=stdout");
1427
1428         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1429                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1430
1431         if (priority & LOG_FACMASK)
1432                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1433                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1434
1435         if (s->identifier) {
1436                 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", s->identifier);
1437                 if (syslog_identifier)
1438                         IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1439         }
1440
1441         message = strappend("MESSAGE=", p);
1442         if (message)
1443                 IOVEC_SET_STRING(iovec[n++], message);
1444
1445 #ifdef HAVE_SELINUX
1446         if (s->security_context) {
1447                 label = (char*) s->security_context;
1448                 label_len = strlen((char*) s->security_context);
1449         }
1450 #endif
1451
1452         dispatch_message(s->server, iovec, n, ELEMENTSOF(iovec), &s->ucred, NULL, label, label_len, priority);
1453
1454         free(message);
1455         free(syslog_priority);
1456         free(syslog_facility);
1457         free(syslog_identifier);
1458
1459         return 0;
1460 }
1461
1462 static int stdout_stream_line(StdoutStream *s, char *p) {
1463         int r;
1464
1465         assert(s);
1466         assert(p);
1467
1468         p = strstrip(p);
1469
1470         switch (s->state) {
1471
1472         case STDOUT_STREAM_IDENTIFIER:
1473                 if (isempty(p))
1474                         s->identifier = NULL;
1475                 else  {
1476                         s->identifier = strdup(p);
1477                         if (!s->identifier) {
1478                                 log_error("Out of memory");
1479                                 return -ENOMEM;
1480                         }
1481                 }
1482
1483                 s->state = STDOUT_STREAM_PRIORITY;
1484                 return 0;
1485
1486         case STDOUT_STREAM_PRIORITY:
1487                 r = safe_atoi(p, &s->priority);
1488                 if (r < 0 || s->priority <= 0 || s->priority >= 999) {
1489                         log_warning("Failed to parse log priority line.");
1490                         return -EINVAL;
1491                 }
1492
1493                 s->state = STDOUT_STREAM_LEVEL_PREFIX;
1494                 return 0;
1495
1496         case STDOUT_STREAM_LEVEL_PREFIX:
1497                 r = parse_boolean(p);
1498                 if (r < 0) {
1499                         log_warning("Failed to parse level prefix line.");
1500                         return -EINVAL;
1501                 }
1502
1503                 s->level_prefix = !!r;
1504                 s->state = STDOUT_STREAM_FORWARD_TO_SYSLOG;
1505                 return 0;
1506
1507         case STDOUT_STREAM_FORWARD_TO_SYSLOG:
1508                 r = parse_boolean(p);
1509                 if (r < 0) {
1510                         log_warning("Failed to parse forward to syslog line.");
1511                         return -EINVAL;
1512                 }
1513
1514                 s->forward_to_syslog = !!r;
1515                 s->state = STDOUT_STREAM_FORWARD_TO_KMSG;
1516                 return 0;
1517
1518         case STDOUT_STREAM_FORWARD_TO_KMSG:
1519                 r = parse_boolean(p);
1520                 if (r < 0) {
1521                         log_warning("Failed to parse copy to kmsg line.");
1522                         return -EINVAL;
1523                 }
1524
1525                 s->forward_to_kmsg = !!r;
1526                 s->state = STDOUT_STREAM_FORWARD_TO_CONSOLE;
1527                 return 0;
1528
1529         case STDOUT_STREAM_FORWARD_TO_CONSOLE:
1530                 r = parse_boolean(p);
1531                 if (r < 0) {
1532                         log_warning("Failed to parse copy to console line.");
1533                         return -EINVAL;
1534                 }
1535
1536                 s->forward_to_console = !!r;
1537                 s->state = STDOUT_STREAM_RUNNING;
1538                 return 0;
1539
1540         case STDOUT_STREAM_RUNNING:
1541                 return stdout_stream_log(s, p);
1542         }
1543
1544         assert_not_reached("Unknown stream state");
1545 }
1546
1547 static int stdout_stream_scan(StdoutStream *s, bool force_flush) {
1548         char *p;
1549         size_t remaining;
1550         int r;
1551
1552         assert(s);
1553
1554         p = s->buffer;
1555         remaining = s->length;
1556         for (;;) {
1557                 char *end;
1558                 size_t skip;
1559
1560                 end = memchr(p, '\n', remaining);
1561                 if (end)
1562                         skip = end - p + 1;
1563                 else if (remaining >= sizeof(s->buffer) - 1) {
1564                         end = p + sizeof(s->buffer) - 1;
1565                         skip = remaining;
1566                 } else
1567                         break;
1568
1569                 *end = 0;
1570
1571                 r = stdout_stream_line(s, p);
1572                 if (r < 0)
1573                         return r;
1574
1575                 remaining -= skip;
1576                 p += skip;
1577         }
1578
1579         if (force_flush && remaining > 0) {
1580                 p[remaining] = 0;
1581                 r = stdout_stream_line(s, p);
1582                 if (r < 0)
1583                         return r;
1584
1585                 p += remaining;
1586                 remaining = 0;
1587         }
1588
1589         if (p > s->buffer) {
1590                 memmove(s->buffer, p, remaining);
1591                 s->length = remaining;
1592         }
1593
1594         return 0;
1595 }
1596
1597 static int stdout_stream_process(StdoutStream *s) {
1598         ssize_t l;
1599         int r;
1600
1601         assert(s);
1602
1603         l = read(s->fd, s->buffer+s->length, sizeof(s->buffer)-1-s->length);
1604         if (l < 0) {
1605
1606                 if (errno == EAGAIN)
1607                         return 0;
1608
1609                 log_warning("Failed to read from stream: %m");
1610                 return -errno;
1611         }
1612
1613         if (l == 0) {
1614                 r = stdout_stream_scan(s, true);
1615                 if (r < 0)
1616                         return r;
1617
1618                 return 0;
1619         }
1620
1621         s->length += l;
1622         r = stdout_stream_scan(s, false);
1623         if (r < 0)
1624                 return r;
1625
1626         return 1;
1627
1628 }
1629
1630 static void stdout_stream_free(StdoutStream *s) {
1631         assert(s);
1632
1633         if (s->server) {
1634                 assert(s->server->n_stdout_streams > 0);
1635                 s->server->n_stdout_streams --;
1636                 LIST_REMOVE(StdoutStream, stdout_stream, s->server->stdout_streams, s);
1637         }
1638
1639         if (s->fd >= 0) {
1640                 if (s->server)
1641                         epoll_ctl(s->server->epoll_fd, EPOLL_CTL_DEL, s->fd, NULL);
1642
1643                 close_nointr_nofail(s->fd);
1644         }
1645
1646 #ifdef HAVE_SELINUX
1647         if (s->security_context)
1648                 freecon(s->security_context);
1649 #endif
1650
1651         free(s->identifier);
1652         free(s);
1653 }
1654
1655 static int stdout_stream_new(Server *s) {
1656         StdoutStream *stream;
1657         int fd, r;
1658         socklen_t len;
1659         struct epoll_event ev;
1660
1661         assert(s);
1662
1663         fd = accept4(s->stdout_fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC);
1664         if (fd < 0) {
1665                 if (errno == EAGAIN)
1666                         return 0;
1667
1668                 log_error("Failed to accept stdout connection: %m");
1669                 return -errno;
1670         }
1671
1672         if (s->n_stdout_streams >= STDOUT_STREAMS_MAX) {
1673                 log_warning("Too many stdout streams, refusing connection.");
1674                 close_nointr_nofail(fd);
1675                 return 0;
1676         }
1677
1678         stream = new0(StdoutStream, 1);
1679         if (!stream) {
1680                 log_error("Out of memory.");
1681                 close_nointr_nofail(fd);
1682                 return -ENOMEM;
1683         }
1684
1685         stream->fd = fd;
1686
1687         len = sizeof(stream->ucred);
1688         if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &stream->ucred, &len) < 0) {
1689                 log_error("Failed to determine peer credentials: %m");
1690                 r = -errno;
1691                 goto fail;
1692         }
1693
1694 #ifdef HAVE_SELINUX
1695         if (getpeercon(fd, &stream->security_context) < 0 && errno != ENOPROTOOPT)
1696                 log_error("Failed to determine peer security context: %m");
1697 #endif
1698
1699         if (shutdown(fd, SHUT_WR) < 0) {
1700                 log_error("Failed to shutdown writing side of socket: %m");
1701                 r = -errno;
1702                 goto fail;
1703         }
1704
1705         zero(ev);
1706         ev.data.ptr = stream;
1707         ev.events = EPOLLIN;
1708         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0) {
1709                 log_error("Failed to add stream to event loop: %m");
1710                 r = -errno;
1711                 goto fail;
1712         }
1713
1714         stream->server = s;
1715         LIST_PREPEND(StdoutStream, stdout_stream, s->stdout_streams, stream);
1716         s->n_stdout_streams ++;
1717
1718         return 0;
1719
1720 fail:
1721         stdout_stream_free(stream);
1722         return r;
1723 }
1724
1725 static int parse_kernel_timestamp(char **_p, usec_t *t) {
1726         usec_t r;
1727         int k, i;
1728         char *p;
1729
1730         assert(_p);
1731         assert(*_p);
1732         assert(t);
1733
1734         p = *_p;
1735
1736         if (strlen(p) < 14 || p[0] != '[' || p[13] != ']' || p[6] != '.')
1737                 return 0;
1738
1739         r = 0;
1740
1741         for (i = 1; i <= 5; i++) {
1742                 r *= 10;
1743
1744                 if (p[i] == ' ')
1745                         continue;
1746
1747                 k = undecchar(p[i]);
1748                 if (k < 0)
1749                         return 0;
1750
1751                 r += k;
1752         }
1753
1754         for (i = 7; i <= 12; i++) {
1755                 r *= 10;
1756
1757                 k = undecchar(p[i]);
1758                 if (k < 0)
1759                         return 0;
1760
1761                 r += k;
1762         }
1763
1764         *t = r;
1765         *_p += 14;
1766         *_p += strspn(*_p, WHITESPACE);
1767
1768         return 1;
1769 }
1770
1771 static bool is_us(const char *pid) {
1772         pid_t t;
1773
1774         assert(pid);
1775
1776         if (parse_pid(pid, &t) < 0)
1777                 return false;
1778
1779         return t == getpid();
1780 }
1781
1782 static void proc_kmsg_line(Server *s, const char *p) {
1783         struct iovec iovec[N_IOVEC_META_FIELDS + 7];
1784         char *message = NULL, *syslog_priority = NULL, *syslog_pid = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *source_time = NULL;
1785         int priority = LOG_KERN | LOG_INFO;
1786         unsigned n = 0;
1787         usec_t usec;
1788         char *identifier = NULL, *pid = NULL;
1789
1790         assert(s);
1791         assert(p);
1792
1793         if (isempty(p))
1794                 return;
1795
1796         parse_syslog_priority((char **) &p, &priority);
1797
1798         if (s->forward_to_kmsg && (priority & LOG_FACMASK) != LOG_KERN)
1799                 return;
1800
1801         if (parse_kernel_timestamp((char **) &p, &usec) > 0) {
1802                 if (asprintf(&source_time, "_SOURCE_MONOTONIC_TIMESTAMP=%llu",
1803                              (unsigned long long) usec) >= 0)
1804                         IOVEC_SET_STRING(iovec[n++], source_time);
1805         }
1806
1807         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=kernel");
1808
1809         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1810                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1811
1812         if ((priority & LOG_FACMASK) == LOG_KERN) {
1813
1814                 if (s->forward_to_syslog)
1815                         forward_syslog(s, priority, "kernel", p, NULL, NULL);
1816
1817                 IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=kernel");
1818         } else {
1819                 read_identifier(&p, &identifier, &pid);
1820
1821                 /* Avoid any messages we generated ourselves via
1822                  * log_info() and friends. */
1823                 if (is_us(pid))
1824                         goto finish;
1825
1826                 if (s->forward_to_syslog)
1827                         forward_syslog(s, priority, identifier, p, NULL, NULL);
1828
1829                 if (identifier) {
1830                         syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
1831                         if (syslog_identifier)
1832                                 IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1833                 }
1834
1835                 if (pid) {
1836                         syslog_pid = strappend("SYSLOG_PID=", pid);
1837                         if (syslog_pid)
1838                                 IOVEC_SET_STRING(iovec[n++], syslog_pid);
1839                 }
1840
1841                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1842                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1843         }
1844
1845         message = strappend("MESSAGE=", p);
1846         if (message)
1847                 IOVEC_SET_STRING(iovec[n++], message);
1848
1849         dispatch_message(s, iovec, n, ELEMENTSOF(iovec), NULL, NULL, NULL, 0, priority);
1850
1851 finish:
1852         free(message);
1853         free(syslog_priority);
1854         free(syslog_identifier);
1855         free(syslog_pid);
1856         free(syslog_facility);
1857         free(source_time);
1858         free(identifier);
1859         free(pid);
1860 }
1861
1862 static void proc_kmsg_scan(Server *s) {
1863         char *p;
1864         size_t remaining;
1865
1866         assert(s);
1867
1868         p = s->proc_kmsg_buffer;
1869         remaining = s->proc_kmsg_length;
1870         for (;;) {
1871                 char *end;
1872                 size_t skip;
1873
1874                 end = memchr(p, '\n', remaining);
1875                 if (end)
1876                         skip = end - p + 1;
1877                 else if (remaining >= sizeof(s->proc_kmsg_buffer) - 1) {
1878                         end = p + sizeof(s->proc_kmsg_buffer) - 1;
1879                         skip = remaining;
1880                 } else
1881                         break;
1882
1883                 *end = 0;
1884
1885                 proc_kmsg_line(s, p);
1886
1887                 remaining -= skip;
1888                 p += skip;
1889         }
1890
1891         if (p > s->proc_kmsg_buffer) {
1892                 memmove(s->proc_kmsg_buffer, p, remaining);
1893                 s->proc_kmsg_length = remaining;
1894         }
1895 }
1896
1897 static int system_journal_open(Server *s) {
1898         int r;
1899         char *fn;
1900         sd_id128_t machine;
1901         char ids[33];
1902
1903         r = sd_id128_get_machine(&machine);
1904         if (r < 0)
1905                 return r;
1906
1907         sd_id128_to_string(machine, ids);
1908
1909         if (!s->system_journal) {
1910
1911                 /* First try to create the machine path, but not the prefix */
1912                 fn = strappend("/var/log/journal/", ids);
1913                 if (!fn)
1914                         return -ENOMEM;
1915                 (void) mkdir(fn, 0755);
1916                 free(fn);
1917
1918                 /* The create the system journal file */
1919                 fn = join("/var/log/journal/", ids, "/system.journal", NULL);
1920                 if (!fn)
1921                         return -ENOMEM;
1922
1923                 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, NULL, &s->system_journal);
1924                 free(fn);
1925
1926                 if (r >= 0) {
1927                         journal_default_metrics(&s->system_metrics, s->system_journal->fd);
1928
1929                         s->system_journal->metrics = s->system_metrics;
1930                         s->system_journal->compress = s->compress;
1931
1932                         server_fix_perms(s, s->system_journal, 0);
1933                 } else if (r < 0) {
1934
1935                         if (r != -ENOENT && r != -EROFS)
1936                                 log_warning("Failed to open system journal: %s", strerror(-r));
1937
1938                         r = 0;
1939                 }
1940         }
1941
1942         if (!s->runtime_journal) {
1943
1944                 fn = join("/run/log/journal/", ids, "/system.journal", NULL);
1945                 if (!fn)
1946                         return -ENOMEM;
1947
1948                 if (s->system_journal) {
1949
1950                         /* Try to open the runtime journal, but only
1951                          * if it already exists, so that we can flush
1952                          * it into the system journal */
1953
1954                         r = journal_file_open(fn, O_RDWR, 0640, NULL, &s->runtime_journal);
1955                         free(fn);
1956
1957                         if (r < 0) {
1958                                 if (r != -ENOENT)
1959                                         log_warning("Failed to open runtime journal: %s", strerror(-r));
1960
1961                                 r = 0;
1962                         }
1963
1964                 } else {
1965
1966                         /* OK, we really need the runtime journal, so create
1967                          * it if necessary. */
1968
1969                         (void) mkdir_parents(fn, 0755);
1970                         r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, NULL, &s->runtime_journal);
1971                         free(fn);
1972
1973                         if (r < 0) {
1974                                 log_error("Failed to open runtime journal: %s", strerror(-r));
1975                                 return r;
1976                         }
1977                 }
1978
1979                 if (s->runtime_journal) {
1980                         journal_default_metrics(&s->runtime_metrics, s->runtime_journal->fd);
1981
1982                         s->runtime_journal->metrics = s->runtime_metrics;
1983                         s->runtime_journal->compress = s->compress;
1984
1985                         server_fix_perms(s, s->runtime_journal, 0);
1986                 }
1987         }
1988
1989         return r;
1990 }
1991
1992 static int server_flush_to_var(Server *s) {
1993         char path[] = "/run/log/journal/xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
1994         Object *o = NULL;
1995         int r;
1996         sd_id128_t machine;
1997         sd_journal *j;
1998         usec_t ts;
1999
2000         assert(s);
2001
2002         if (!s->runtime_journal)
2003                 return 0;
2004
2005         ts = now(CLOCK_MONOTONIC);
2006         if (s->var_available_timestamp + RECHECK_VAR_AVAILABLE_USEC > ts)
2007                 return 0;
2008
2009         s->var_available_timestamp = ts;
2010
2011         system_journal_open(s);
2012
2013         if (!s->system_journal)
2014                 return 0;
2015
2016         log_info("Flushing to /var...");
2017
2018         r = sd_id128_get_machine(&machine);
2019         if (r < 0) {
2020                 log_error("Failed to get machine id: %s", strerror(-r));
2021                 return r;
2022         }
2023
2024         r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
2025         if (r < 0) {
2026                 log_error("Failed to read runtime journal: %s", strerror(-r));
2027                 return r;
2028         }
2029
2030         SD_JOURNAL_FOREACH(j) {
2031                 JournalFile *f;
2032
2033                 f = j->current_file;
2034                 assert(f && f->current_offset > 0);
2035
2036                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
2037                 if (r < 0) {
2038                         log_error("Can't read entry: %s", strerror(-r));
2039                         goto finish;
2040                 }
2041
2042                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
2043                 if (r == -E2BIG) {
2044                         log_info("Allocation limit reached.");
2045
2046                         journal_file_post_change(s->system_journal);
2047                         server_rotate(s);
2048                         server_vacuum(s);
2049
2050                         r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
2051                 }
2052
2053                 if (r < 0) {
2054                         log_error("Can't write entry: %s", strerror(-r));
2055                         goto finish;
2056                 }
2057         }
2058
2059 finish:
2060         journal_file_post_change(s->system_journal);
2061
2062         journal_file_close(s->runtime_journal);
2063         s->runtime_journal = NULL;
2064
2065         if (r >= 0) {
2066                 sd_id128_to_string(machine, path + 17);
2067                 rm_rf(path, false, true, false);
2068         }
2069
2070         return r;
2071 }
2072
2073 static int server_read_proc_kmsg(Server *s) {
2074         ssize_t l;
2075         assert(s);
2076         assert(s->proc_kmsg_fd >= 0);
2077
2078         l = read(s->proc_kmsg_fd, s->proc_kmsg_buffer + s->proc_kmsg_length, sizeof(s->proc_kmsg_buffer) - 1 - s->proc_kmsg_length);
2079         if (l < 0) {
2080
2081                 if (errno == EAGAIN || errno == EINTR)
2082                         return 0;
2083
2084                 log_error("Failed to read from kernel: %m");
2085                 return -errno;
2086         }
2087
2088         s->proc_kmsg_length += l;
2089
2090         proc_kmsg_scan(s);
2091         return 1;
2092 }
2093
2094 static int server_flush_proc_kmsg(Server *s) {
2095         int r;
2096
2097         assert(s);
2098
2099         if (s->proc_kmsg_fd < 0)
2100                 return 0;
2101
2102         log_info("Flushing /proc/kmsg...");
2103
2104         for (;;) {
2105                 r = server_read_proc_kmsg(s);
2106                 if (r < 0)
2107                         return r;
2108
2109                 if (r == 0)
2110                         break;
2111         }
2112
2113         return 0;
2114 }
2115
2116 static int process_event(Server *s, struct epoll_event *ev) {
2117         assert(s);
2118
2119         if (ev->data.fd == s->signal_fd) {
2120                 struct signalfd_siginfo sfsi;
2121                 ssize_t n;
2122
2123                 if (ev->events != EPOLLIN) {
2124                         log_info("Got invalid event from epoll.");
2125                         return -EIO;
2126                 }
2127
2128                 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
2129                 if (n != sizeof(sfsi)) {
2130
2131                         if (n >= 0)
2132                                 return -EIO;
2133
2134                         if (errno == EINTR || errno == EAGAIN)
2135                                 return 1;
2136
2137                         return -errno;
2138                 }
2139
2140                 if (sfsi.ssi_signo == SIGUSR1) {
2141                         server_flush_to_var(s);
2142                         return 0;
2143                 }
2144
2145                 log_debug("Received SIG%s", signal_to_string(sfsi.ssi_signo));
2146                 return 0;
2147
2148         } else if (ev->data.fd == s->proc_kmsg_fd) {
2149                 int r;
2150
2151                 if (ev->events != EPOLLIN) {
2152                         log_info("Got invalid event from epoll.");
2153                         return -EIO;
2154                 }
2155
2156                 r = server_read_proc_kmsg(s);
2157                 if (r < 0)
2158                         return r;
2159
2160                 return 1;
2161
2162         } else if (ev->data.fd == s->native_fd ||
2163                    ev->data.fd == s->syslog_fd) {
2164
2165                 if (ev->events != EPOLLIN) {
2166                         log_info("Got invalid event from epoll.");
2167                         return -EIO;
2168                 }
2169
2170                 for (;;) {
2171                         struct msghdr msghdr;
2172                         struct iovec iovec;
2173                         struct ucred *ucred = NULL;
2174                         struct timeval *tv = NULL;
2175                         struct cmsghdr *cmsg;
2176                         char *label = NULL;
2177                         size_t label_len = 0;
2178                         union {
2179                                 struct cmsghdr cmsghdr;
2180
2181                                 /* We use NAME_MAX space for the
2182                                  * SELinux label here. The kernel
2183                                  * currently enforces no limit, but
2184                                  * according to suggestions from the
2185                                  * SELinux people this will change and
2186                                  * it will probably be identical to
2187                                  * NAME_MAX. For now we use that, but
2188                                  * this should be updated one day when
2189                                  * the final limit is known.*/
2190                                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
2191                                             CMSG_SPACE(sizeof(struct timeval)) +
2192                                             CMSG_SPACE(sizeof(int)) + /* fd */
2193                                             CMSG_SPACE(NAME_MAX)]; /* selinux label */
2194                         } control;
2195                         ssize_t n;
2196                         int v;
2197                         int *fds = NULL;
2198                         unsigned n_fds = 0;
2199
2200                         if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
2201                                 log_error("SIOCINQ failed: %m");
2202                                 return -errno;
2203                         }
2204
2205                         if (s->buffer_size < (size_t) v) {
2206                                 void *b;
2207                                 size_t l;
2208
2209                                 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
2210                                 b = realloc(s->buffer, l+1);
2211
2212                                 if (!b) {
2213                                         log_error("Couldn't increase buffer.");
2214                                         return -ENOMEM;
2215                                 }
2216
2217                                 s->buffer_size = l;
2218                                 s->buffer = b;
2219                         }
2220
2221                         zero(iovec);
2222                         iovec.iov_base = s->buffer;
2223                         iovec.iov_len = s->buffer_size;
2224
2225                         zero(control);
2226                         zero(msghdr);
2227                         msghdr.msg_iov = &iovec;
2228                         msghdr.msg_iovlen = 1;
2229                         msghdr.msg_control = &control;
2230                         msghdr.msg_controllen = sizeof(control);
2231
2232                         n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
2233                         if (n < 0) {
2234
2235                                 if (errno == EINTR || errno == EAGAIN)
2236                                         return 1;
2237
2238                                 log_error("recvmsg() failed: %m");
2239                                 return -errno;
2240                         }
2241
2242                         for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
2243
2244                                 if (cmsg->cmsg_level == SOL_SOCKET &&
2245                                     cmsg->cmsg_type == SCM_CREDENTIALS &&
2246                                     cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
2247                                         ucred = (struct ucred*) CMSG_DATA(cmsg);
2248                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
2249                                          cmsg->cmsg_type == SCM_SECURITY) {
2250                                         label = (char*) CMSG_DATA(cmsg);
2251                                         label_len = cmsg->cmsg_len - CMSG_LEN(0);
2252                                 } else if (cmsg->cmsg_level == SOL_SOCKET &&
2253                                          cmsg->cmsg_type == SO_TIMESTAMP &&
2254                                          cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
2255                                         tv = (struct timeval*) CMSG_DATA(cmsg);
2256                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
2257                                          cmsg->cmsg_type == SCM_RIGHTS) {
2258                                         fds = (int*) CMSG_DATA(cmsg);
2259                                         n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
2260                                 }
2261                         }
2262
2263                         if (ev->data.fd == s->syslog_fd) {
2264                                 char *e;
2265
2266                                 if (n > 0 && n_fds == 0) {
2267                                         e = memchr(s->buffer, '\n', n);
2268                                         if (e)
2269                                                 *e = 0;
2270                                         else
2271                                                 s->buffer[n] = 0;
2272
2273                                         process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
2274                                 } else if (n_fds > 0)
2275                                         log_warning("Got file descriptors via syslog socket. Ignoring.");
2276
2277                         } else {
2278                                 if (n > 0 && n_fds == 0)
2279                                         process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
2280                                 else if (n == 0 && n_fds == 1)
2281                                         process_native_file(s, fds[0], ucred, tv, label, label_len);
2282                                 else if (n_fds > 0)
2283                                         log_warning("Got too many file descriptors via native socket. Ignoring.");
2284                         }
2285
2286                         close_many(fds, n_fds);
2287                 }
2288
2289                 return 1;
2290
2291         } else if (ev->data.fd == s->stdout_fd) {
2292
2293                 if (ev->events != EPOLLIN) {
2294                         log_info("Got invalid event from epoll.");
2295                         return -EIO;
2296                 }
2297
2298                 stdout_stream_new(s);
2299                 return 1;
2300
2301         } else {
2302                 StdoutStream *stream;
2303
2304                 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
2305                         log_info("Got invalid event from epoll.");
2306                         return -EIO;
2307                 }
2308
2309                 /* If it is none of the well-known fds, it must be an
2310                  * stdout stream fd. Note that this is a bit ugly here
2311                  * (since we rely that none of the well-known fds
2312                  * could be interpreted as pointer), but nonetheless
2313                  * safe, since the well-known fds would never get an
2314                  * fd > 4096, i.e. beyond the first memory page */
2315
2316                 stream = ev->data.ptr;
2317
2318                 if (stdout_stream_process(stream) <= 0)
2319                         stdout_stream_free(stream);
2320
2321                 return 1;
2322         }
2323
2324         log_error("Unknown event.");
2325         return 0;
2326 }
2327
2328 static int open_syslog_socket(Server *s) {
2329         union sockaddr_union sa;
2330         int one, r;
2331         struct epoll_event ev;
2332
2333         assert(s);
2334
2335         if (s->syslog_fd < 0) {
2336
2337                 s->syslog_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2338                 if (s->syslog_fd < 0) {
2339                         log_error("socket() failed: %m");
2340                         return -errno;
2341                 }
2342
2343                 zero(sa);
2344                 sa.un.sun_family = AF_UNIX;
2345                 strncpy(sa.un.sun_path, "/dev/log", sizeof(sa.un.sun_path));
2346
2347                 unlink(sa.un.sun_path);
2348
2349                 r = bind(s->syslog_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2350                 if (r < 0) {
2351                         log_error("bind() failed: %m");
2352                         return -errno;
2353                 }
2354
2355                 chmod(sa.un.sun_path, 0666);
2356         } else
2357                 fd_nonblock(s->syslog_fd, 1);
2358
2359         one = 1;
2360         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
2361         if (r < 0) {
2362                 log_error("SO_PASSCRED failed: %m");
2363                 return -errno;
2364         }
2365
2366 #ifdef HAVE_SELINUX
2367         one = 1;
2368         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
2369         if (r < 0)
2370                 log_warning("SO_PASSSEC failed: %m");
2371 #endif
2372
2373         one = 1;
2374         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
2375         if (r < 0) {
2376                 log_error("SO_TIMESTAMP failed: %m");
2377                 return -errno;
2378         }
2379
2380         zero(ev);
2381         ev.events = EPOLLIN;
2382         ev.data.fd = s->syslog_fd;
2383         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->syslog_fd, &ev) < 0) {
2384                 log_error("Failed to add syslog server fd to epoll object: %m");
2385                 return -errno;
2386         }
2387
2388         return 0;
2389 }
2390
2391 static int open_native_socket(Server*s) {
2392         union sockaddr_union sa;
2393         int one, r;
2394         struct epoll_event ev;
2395
2396         assert(s);
2397
2398         if (s->native_fd < 0) {
2399
2400                 s->native_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2401                 if (s->native_fd < 0) {
2402                         log_error("socket() failed: %m");
2403                         return -errno;
2404                 }
2405
2406                 zero(sa);
2407                 sa.un.sun_family = AF_UNIX;
2408                 strncpy(sa.un.sun_path, "/run/systemd/journal/socket", sizeof(sa.un.sun_path));
2409
2410                 unlink(sa.un.sun_path);
2411
2412                 r = bind(s->native_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2413                 if (r < 0) {
2414                         log_error("bind() failed: %m");
2415                         return -errno;
2416                 }
2417
2418                 chmod(sa.un.sun_path, 0666);
2419         } else
2420                 fd_nonblock(s->native_fd, 1);
2421
2422         one = 1;
2423         r = setsockopt(s->native_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
2424         if (r < 0) {
2425                 log_error("SO_PASSCRED failed: %m");
2426                 return -errno;
2427         }
2428
2429 #ifdef HAVE_SELINUX
2430         one = 1;
2431         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
2432         if (r < 0)
2433                 log_warning("SO_PASSSEC failed: %m");
2434 #endif
2435
2436         one = 1;
2437         r = setsockopt(s->native_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
2438         if (r < 0) {
2439                 log_error("SO_TIMESTAMP failed: %m");
2440                 return -errno;
2441         }
2442
2443         zero(ev);
2444         ev.events = EPOLLIN;
2445         ev.data.fd = s->native_fd;
2446         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->native_fd, &ev) < 0) {
2447                 log_error("Failed to add native server fd to epoll object: %m");
2448                 return -errno;
2449         }
2450
2451         return 0;
2452 }
2453
2454 static int open_stdout_socket(Server *s) {
2455         union sockaddr_union sa;
2456         int r;
2457         struct epoll_event ev;
2458
2459         assert(s);
2460
2461         if (s->stdout_fd < 0) {
2462
2463                 s->stdout_fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2464                 if (s->stdout_fd < 0) {
2465                         log_error("socket() failed: %m");
2466                         return -errno;
2467                 }
2468
2469                 zero(sa);
2470                 sa.un.sun_family = AF_UNIX;
2471                 strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
2472
2473                 unlink(sa.un.sun_path);
2474
2475                 r = bind(s->stdout_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2476                 if (r < 0) {
2477                         log_error("bind() failed: %m");
2478                         return -errno;
2479                 }
2480
2481                 chmod(sa.un.sun_path, 0666);
2482
2483                 if (listen(s->stdout_fd, SOMAXCONN) < 0) {
2484                         log_error("liste() failed: %m");
2485                         return -errno;
2486                 }
2487         } else
2488                 fd_nonblock(s->stdout_fd, 1);
2489
2490         zero(ev);
2491         ev.events = EPOLLIN;
2492         ev.data.fd = s->stdout_fd;
2493         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->stdout_fd, &ev) < 0) {
2494                 log_error("Failed to add stdout server fd to epoll object: %m");
2495                 return -errno;
2496         }
2497
2498         return 0;
2499 }
2500
2501 static int open_proc_kmsg(Server *s) {
2502         struct epoll_event ev;
2503
2504         assert(s);
2505
2506         if (!s->import_proc_kmsg)
2507                 return 0;
2508
2509         s->proc_kmsg_fd = open("/proc/kmsg", O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
2510         if (s->proc_kmsg_fd < 0) {
2511                 log_warning("Failed to open /proc/kmsg, ignoring: %m");
2512                 return 0;
2513         }
2514
2515         zero(ev);
2516         ev.events = EPOLLIN;
2517         ev.data.fd = s->proc_kmsg_fd;
2518         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->proc_kmsg_fd, &ev) < 0) {
2519                 log_error("Failed to add /proc/kmsg fd to epoll object: %m");
2520                 return -errno;
2521         }
2522
2523         return 0;
2524 }
2525
2526 static int open_signalfd(Server *s) {
2527         sigset_t mask;
2528         struct epoll_event ev;
2529
2530         assert(s);
2531
2532         assert_se(sigemptyset(&mask) == 0);
2533         sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, -1);
2534         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
2535
2536         s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
2537         if (s->signal_fd < 0) {
2538                 log_error("signalfd(): %m");
2539                 return -errno;
2540         }
2541
2542         zero(ev);
2543         ev.events = EPOLLIN;
2544         ev.data.fd = s->signal_fd;
2545
2546         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
2547                 log_error("epoll_ctl(): %m");
2548                 return -errno;
2549         }
2550
2551         return 0;
2552 }
2553
2554 static int server_parse_proc_cmdline(Server *s) {
2555         char *line, *w, *state;
2556         int r;
2557         size_t l;
2558
2559         if (detect_container(NULL) > 0)
2560                 return 0;
2561
2562         r = read_one_line_file("/proc/cmdline", &line);
2563         if (r < 0) {
2564                 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
2565                 return 0;
2566         }
2567
2568         FOREACH_WORD_QUOTED(w, l, line, state) {
2569                 char *word;
2570
2571                 word = strndup(w, l);
2572                 if (!word) {
2573                         r = -ENOMEM;
2574                         goto finish;
2575                 }
2576
2577                 if (startswith(word, "systemd_journald.forward_to_syslog=")) {
2578                         r = parse_boolean(word + 35);
2579                         if (r < 0)
2580                                 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
2581                         else
2582                                 s->forward_to_syslog = r;
2583                 } else if (startswith(word, "systemd_journald.forward_to_kmsg=")) {
2584                         r = parse_boolean(word + 33);
2585                         if (r < 0)
2586                                 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
2587                         else
2588                                 s->forward_to_kmsg = r;
2589                 } else if (startswith(word, "systemd_journald.forward_to_console=")) {
2590                         r = parse_boolean(word + 36);
2591                         if (r < 0)
2592                                 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
2593                         else
2594                                 s->forward_to_console = r;
2595                 }
2596
2597                 free(word);
2598         }
2599
2600         r = 0;
2601
2602 finish:
2603         free(line);
2604         return r;
2605 }
2606
2607 static int server_parse_config_file(Server *s) {
2608         FILE *f;
2609         const char *fn;
2610         int r;
2611
2612         assert(s);
2613
2614         fn = "/etc/systemd/journald.conf";
2615         f = fopen(fn, "re");
2616         if (!f) {
2617                 if (errno == ENOENT)
2618                         return 0;
2619
2620                 log_warning("Failed to open configuration file %s: %m", fn);
2621                 return -errno;
2622         }
2623
2624         r = config_parse(fn, f, "Journal\0", config_item_perf_lookup, (void*) journald_gperf_lookup, false, s);
2625         if (r < 0)
2626                 log_warning("Failed to parse configuration file: %s", strerror(-r));
2627
2628         fclose(f);
2629
2630         return r;
2631 }
2632
2633 static int server_init(Server *s) {
2634         int n, r, fd;
2635
2636         assert(s);
2637
2638         zero(*s);
2639         s->syslog_fd = s->native_fd = s->stdout_fd = s->signal_fd = s->epoll_fd = s->proc_kmsg_fd = -1;
2640         s->compress = true;
2641
2642         s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
2643         s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
2644
2645         s->forward_to_syslog = true;
2646         s->import_proc_kmsg = true;
2647
2648         memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
2649         memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
2650
2651         server_parse_config_file(s);
2652         server_parse_proc_cmdline(s);
2653
2654         s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
2655         if (!s->user_journals) {
2656                 log_error("Out of memory.");
2657                 return -ENOMEM;
2658         }
2659
2660         s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
2661         if (s->epoll_fd < 0) {
2662                 log_error("Failed to create epoll object: %m");
2663                 return -errno;
2664         }
2665
2666         n = sd_listen_fds(true);
2667         if (n < 0) {
2668                 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
2669                 return n;
2670         }
2671
2672         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
2673
2674                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
2675
2676                         if (s->native_fd >= 0) {
2677                                 log_error("Too many native sockets passed.");
2678                                 return -EINVAL;
2679                         }
2680
2681                         s->native_fd = fd;
2682
2683                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
2684
2685                         if (s->stdout_fd >= 0) {
2686                                 log_error("Too many stdout sockets passed.");
2687                                 return -EINVAL;
2688                         }
2689
2690                         s->stdout_fd = fd;
2691
2692                 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
2693
2694                         if (s->syslog_fd >= 0) {
2695                                 log_error("Too many /dev/log sockets passed.");
2696                                 return -EINVAL;
2697                         }
2698
2699                         s->syslog_fd = fd;
2700
2701                 } else {
2702                         log_error("Unknown socket passed.");
2703                         return -EINVAL;
2704                 }
2705         }
2706
2707         r = open_syslog_socket(s);
2708         if (r < 0)
2709                 return r;
2710
2711         r = open_native_socket(s);
2712         if (r < 0)
2713                 return r;
2714
2715         r = open_stdout_socket(s);
2716         if (r < 0)
2717                 return r;
2718
2719         r = open_proc_kmsg(s);
2720         if (r < 0)
2721                 return r;
2722
2723         r = open_signalfd(s);
2724         if (r < 0)
2725                 return r;
2726
2727         s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
2728         if (!s->rate_limit)
2729                 return -ENOMEM;
2730
2731         r = system_journal_open(s);
2732         if (r < 0)
2733                 return r;
2734
2735         return 0;
2736 }
2737
2738 static void server_done(Server *s) {
2739         JournalFile *f;
2740         assert(s);
2741
2742         while (s->stdout_streams)
2743                 stdout_stream_free(s->stdout_streams);
2744
2745         if (s->system_journal)
2746                 journal_file_close(s->system_journal);
2747
2748         if (s->runtime_journal)
2749                 journal_file_close(s->runtime_journal);
2750
2751         while ((f = hashmap_steal_first(s->user_journals)))
2752                 journal_file_close(f);
2753
2754         hashmap_free(s->user_journals);
2755
2756         if (s->epoll_fd >= 0)
2757                 close_nointr_nofail(s->epoll_fd);
2758
2759         if (s->signal_fd >= 0)
2760                 close_nointr_nofail(s->signal_fd);
2761
2762         if (s->syslog_fd >= 0)
2763                 close_nointr_nofail(s->syslog_fd);
2764
2765         if (s->native_fd >= 0)
2766                 close_nointr_nofail(s->native_fd);
2767
2768         if (s->stdout_fd >= 0)
2769                 close_nointr_nofail(s->stdout_fd);
2770
2771         if (s->proc_kmsg_fd >= 0)
2772                 close_nointr_nofail(s->proc_kmsg_fd);
2773
2774         if (s->rate_limit)
2775                 journal_rate_limit_free(s->rate_limit);
2776
2777         free(s->buffer);
2778 }
2779
2780 int main(int argc, char *argv[]) {
2781         Server server;
2782         int r;
2783
2784         /* if (getppid() != 1) { */
2785         /*         log_error("This program should be invoked by init only."); */
2786         /*         return EXIT_FAILURE; */
2787         /* } */
2788
2789         if (argc > 1) {
2790                 log_error("This program does not take arguments.");
2791                 return EXIT_FAILURE;
2792         }
2793
2794         log_set_target(LOG_TARGET_SAFE);
2795         log_set_facility(LOG_SYSLOG);
2796         log_parse_environment();
2797         log_open();
2798
2799         umask(0022);
2800
2801         r = server_init(&server);
2802         if (r < 0)
2803                 goto finish;
2804
2805         server_vacuum(&server);
2806         server_flush_to_var(&server);
2807         server_flush_proc_kmsg(&server);
2808
2809         log_debug("systemd-journald running as pid %lu", (unsigned long) getpid());
2810         driver_message(&server, SD_MESSAGE_JOURNAL_START, "Journal started");
2811
2812         sd_notify(false,
2813                   "READY=1\n"
2814                   "STATUS=Processing requests...");
2815
2816         for (;;) {
2817                 struct epoll_event event;
2818
2819                 r = epoll_wait(server.epoll_fd, &event, 1, -1);
2820                 if (r < 0) {
2821
2822                         if (errno == EINTR)
2823                                 continue;
2824
2825                         log_error("epoll_wait() failed: %m");
2826                         r = -errno;
2827                         goto finish;
2828                 } else if (r == 0)
2829                         break;
2830
2831                 r = process_event(&server, &event);
2832                 if (r < 0)
2833                         goto finish;
2834                 else if (r == 0)
2835                         break;
2836         }
2837
2838         log_debug("systemd-journald stopped as pid %lu", (unsigned long) getpid());
2839         driver_message(&server, SD_MESSAGE_JOURNAL_STOP, "Journal stopped");
2840
2841 finish:
2842         sd_notify(false,
2843                   "STATUS=Shutting down...");
2844
2845         server_done(&server);
2846
2847         return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
2848 }