chiark / gitweb /
journal: don't complain if SELinux userspace is available but the kernel lacks it
[elogind.git] / src / journal / journald.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/epoll.h>
23 #include <sys/socket.h>
24 #include <errno.h>
25 #include <sys/signalfd.h>
26 #include <unistd.h>
27 #include <fcntl.h>
28 #include <stddef.h>
29 #include <sys/ioctl.h>
30 #include <linux/sockios.h>
31 #include <sys/statvfs.h>
32
33 #include <systemd/sd-journal.h>
34 #include <systemd/sd-login.h>
35 #include <systemd/sd-messages.h>
36 #include <systemd/sd-daemon.h>
37
38 #include "mkdir.h"
39 #include "hashmap.h"
40 #include "journal-file.h"
41 #include "socket-util.h"
42 #include "cgroup-util.h"
43 #include "list.h"
44 #include "journal-rate-limit.h"
45 #include "journal-internal.h"
46 #include "conf-parser.h"
47 #include "journald.h"
48 #include "virt.h"
49 #include "missing.h"
50
51 #ifdef HAVE_ACL
52 #include <sys/acl.h>
53 #include <acl/libacl.h>
54 #include "acl-util.h"
55 #endif
56
57 #ifdef HAVE_SELINUX
58 #include <selinux/selinux.h>
59 #endif
60
61 #define USER_JOURNALS_MAX 1024
62 #define STDOUT_STREAMS_MAX 4096
63
64 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
65 #define DEFAULT_RATE_LIMIT_BURST 200
66
67 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
68
69 #define RECHECK_VAR_AVAILABLE_USEC (30*USEC_PER_SEC)
70
71 #define N_IOVEC_META_FIELDS 17
72
73 #define ENTRY_SIZE_MAX (1024*1024*32)
74
75 typedef enum StdoutStreamState {
76         STDOUT_STREAM_IDENTIFIER,
77         STDOUT_STREAM_PRIORITY,
78         STDOUT_STREAM_LEVEL_PREFIX,
79         STDOUT_STREAM_FORWARD_TO_SYSLOG,
80         STDOUT_STREAM_FORWARD_TO_KMSG,
81         STDOUT_STREAM_FORWARD_TO_CONSOLE,
82         STDOUT_STREAM_RUNNING
83 } StdoutStreamState;
84
85 struct StdoutStream {
86         Server *server;
87         StdoutStreamState state;
88
89         int fd;
90
91         struct ucred ucred;
92 #ifdef HAVE_SELINUX
93         security_context_t security_context;
94 #endif
95
96         char *identifier;
97         int priority;
98         bool level_prefix:1;
99         bool forward_to_syslog:1;
100         bool forward_to_kmsg:1;
101         bool forward_to_console:1;
102
103         char buffer[LINE_MAX+1];
104         size_t length;
105
106         LIST_FIELDS(StdoutStream, stdout_stream);
107 };
108
109 static int server_flush_to_var(Server *s);
110
111 static uint64_t available_space(Server *s) {
112         char ids[33], *p;
113         const char *f;
114         sd_id128_t machine;
115         struct statvfs ss;
116         uint64_t sum = 0, avail = 0, ss_avail = 0;
117         int r;
118         DIR *d;
119         usec_t ts;
120         JournalMetrics *m;
121
122         ts = now(CLOCK_MONOTONIC);
123
124         if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
125                 return s->cached_available_space;
126
127         r = sd_id128_get_machine(&machine);
128         if (r < 0)
129                 return 0;
130
131         if (s->system_journal) {
132                 f = "/var/log/journal/";
133                 m = &s->system_metrics;
134         } else {
135                 f = "/run/log/journal/";
136                 m = &s->runtime_metrics;
137         }
138
139         assert(m);
140
141         p = strappend(f, sd_id128_to_string(machine, ids));
142         if (!p)
143                 return 0;
144
145         d = opendir(p);
146         free(p);
147
148         if (!d)
149                 return 0;
150
151         if (fstatvfs(dirfd(d), &ss) < 0)
152                 goto finish;
153
154         for (;;) {
155                 struct stat st;
156                 struct dirent buf, *de;
157
158                 r = readdir_r(d, &buf, &de);
159                 if (r != 0)
160                         break;
161
162                 if (!de)
163                         break;
164
165                 if (!endswith(de->d_name, ".journal") &&
166                     !endswith(de->d_name, ".journal~"))
167                         continue;
168
169                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
170                         continue;
171
172                 if (!S_ISREG(st.st_mode))
173                         continue;
174
175                 sum += (uint64_t) st.st_blocks * 512UL;
176         }
177
178         avail = sum >= m->max_use ? 0 : m->max_use - sum;
179
180         ss_avail = ss.f_bsize * ss.f_bavail;
181
182         ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
183
184         if (ss_avail < avail)
185                 avail = ss_avail;
186
187         s->cached_available_space = avail;
188         s->cached_available_space_timestamp = ts;
189
190 finish:
191         closedir(d);
192
193         return avail;
194 }
195
196 static void server_read_file_gid(Server *s) {
197         const char *adm = "adm";
198         int r;
199
200         assert(s);
201
202         if (s->file_gid_valid)
203                 return;
204
205         r = get_group_creds(&adm, &s->file_gid);
206         if (r < 0)
207                 log_warning("Failed to resolve 'adm' group: %s", strerror(-r));
208
209         /* if we couldn't read the gid, then it will be 0, but that's
210          * fine and we shouldn't try to resolve the group again, so
211          * let's just pretend it worked right-away. */
212         s->file_gid_valid = true;
213 }
214
215 static void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
216         int r;
217 #ifdef HAVE_ACL
218         acl_t acl;
219         acl_entry_t entry;
220         acl_permset_t permset;
221 #endif
222
223         assert(f);
224
225         server_read_file_gid(s);
226
227         r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
228         if (r < 0)
229                 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
230
231 #ifdef HAVE_ACL
232         if (uid <= 0)
233                 return;
234
235         acl = acl_get_fd(f->fd);
236         if (!acl) {
237                 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
238                 return;
239         }
240
241         r = acl_find_uid(acl, uid, &entry);
242         if (r <= 0) {
243
244                 if (acl_create_entry(&acl, &entry) < 0 ||
245                     acl_set_tag_type(entry, ACL_USER) < 0 ||
246                     acl_set_qualifier(entry, &uid) < 0) {
247                         log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
248                         goto finish;
249                 }
250         }
251
252         if (acl_get_permset(entry, &permset) < 0 ||
253             acl_add_perm(permset, ACL_READ) < 0 ||
254             acl_calc_mask(&acl) < 0) {
255                 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
256                 goto finish;
257         }
258
259         if (acl_set_fd(f->fd, acl) < 0)
260                 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
261
262 finish:
263         acl_free(acl);
264 #endif
265 }
266
267 static JournalFile* find_journal(Server *s, uid_t uid) {
268         char *p;
269         int r;
270         JournalFile *f;
271         char ids[33];
272         sd_id128_t machine;
273
274         assert(s);
275
276         /* We split up user logs only on /var, not on /run. If the
277          * runtime file is open, we write to it exclusively, in order
278          * to guarantee proper order as soon as we flush /run to
279          * /var and close the runtime file. */
280
281         if (s->runtime_journal)
282                 return s->runtime_journal;
283
284         if (uid <= 0)
285                 return s->system_journal;
286
287         r = sd_id128_get_machine(&machine);
288         if (r < 0)
289                 return s->system_journal;
290
291         f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
292         if (f)
293                 return f;
294
295         if (asprintf(&p, "/var/log/journal/%s/user-%lu.journal", sd_id128_to_string(machine, ids), (unsigned long) uid) < 0)
296                 return s->system_journal;
297
298         while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
299                 /* Too many open? Then let's close one */
300                 f = hashmap_steal_first(s->user_journals);
301                 assert(f);
302                 journal_file_close(f);
303         }
304
305         r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->system_journal, &f);
306         free(p);
307
308         if (r < 0)
309                 return s->system_journal;
310
311         server_fix_perms(s, f, uid);
312
313         r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
314         if (r < 0) {
315                 journal_file_close(f);
316                 return s->system_journal;
317         }
318
319         return f;
320 }
321
322 static void server_rotate(Server *s) {
323         JournalFile *f;
324         void *k;
325         Iterator i;
326         int r;
327
328         log_info("Rotating...");
329
330         if (s->runtime_journal) {
331                 r = journal_file_rotate(&s->runtime_journal);
332                 if (r < 0)
333                         if (s->runtime_journal)
334                                 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
335                         else
336                                 log_error("Failed to create new runtime journal: %s", strerror(-r));
337                 else
338                         server_fix_perms(s, s->runtime_journal, 0);
339         }
340
341         if (s->system_journal) {
342                 r = journal_file_rotate(&s->system_journal);
343                 if (r < 0)
344                         if (s->system_journal)
345                                 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
346                         else
347                                 log_error("Failed to create new system journal: %s", strerror(-r));
348
349                 else
350                         server_fix_perms(s, s->system_journal, 0);
351         }
352
353         HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
354                 r = journal_file_rotate(&f);
355                 if (r < 0)
356                         if (f->path)
357                                 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
358                         else
359                                 log_error("Failed to create user journal: %s", strerror(-r));
360                 else {
361                         hashmap_replace(s->user_journals, k, f);
362                         server_fix_perms(s, s->system_journal, PTR_TO_UINT32(k));
363                 }
364         }
365 }
366
367 static void server_vacuum(Server *s) {
368         char *p;
369         char ids[33];
370         sd_id128_t machine;
371         int r;
372
373         log_info("Vacuuming...");
374
375         r = sd_id128_get_machine(&machine);
376         if (r < 0) {
377                 log_error("Failed to get machine ID: %s", strerror(-r));
378                 return;
379         }
380
381         sd_id128_to_string(machine, ids);
382
383         if (s->system_journal) {
384                 if (asprintf(&p, "/var/log/journal/%s", ids) < 0) {
385                         log_error("Out of memory.");
386                         return;
387                 }
388
389                 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free);
390                 if (r < 0 && r != -ENOENT)
391                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
392                 free(p);
393         }
394
395
396         if (s->runtime_journal) {
397                 if (asprintf(&p, "/run/log/journal/%s", ids) < 0) {
398                         log_error("Out of memory.");
399                         return;
400                 }
401
402                 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free);
403                 if (r < 0 && r != -ENOENT)
404                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
405                 free(p);
406         }
407
408         s->cached_available_space_timestamp = 0;
409 }
410
411 static char *shortened_cgroup_path(pid_t pid) {
412         int r;
413         char *process_path, *init_path, *path;
414
415         assert(pid > 0);
416
417         r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, pid, &process_path);
418         if (r < 0)
419                 return NULL;
420
421         r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &init_path);
422         if (r < 0) {
423                 free(process_path);
424                 return NULL;
425         }
426
427         if (endswith(init_path, "/system"))
428                 init_path[strlen(init_path) - 7] = 0;
429         else if (streq(init_path, "/"))
430                 init_path[0] = 0;
431
432         if (startswith(process_path, init_path)) {
433                 char *p;
434
435                 p = strdup(process_path + strlen(init_path));
436                 if (!p) {
437                         free(process_path);
438                         free(init_path);
439                         return NULL;
440                 }
441                 path = p;
442         } else {
443                 path = process_path;
444                 process_path = NULL;
445         }
446
447         free(process_path);
448         free(init_path);
449
450         return path;
451 }
452
453 static void dispatch_message_real(
454                 Server *s,
455                 struct iovec *iovec, unsigned n, unsigned m,
456                 struct ucred *ucred,
457                 struct timeval *tv,
458                 const char *label, size_t label_len) {
459
460         char *pid = NULL, *uid = NULL, *gid = NULL,
461                 *source_time = NULL, *boot_id = NULL, *machine_id = NULL,
462                 *comm = NULL, *cmdline = NULL, *hostname = NULL,
463                 *audit_session = NULL, *audit_loginuid = NULL,
464                 *exe = NULL, *cgroup = NULL, *session = NULL,
465                 *owner_uid = NULL, *unit = NULL, *selinux_context = NULL;
466
467         char idbuf[33];
468         sd_id128_t id;
469         int r;
470         char *t;
471         uid_t loginuid = 0, realuid = 0;
472         JournalFile *f;
473         bool vacuumed = false;
474
475         assert(s);
476         assert(iovec);
477         assert(n > 0);
478         assert(n + N_IOVEC_META_FIELDS <= m);
479
480         if (ucred) {
481                 uint32_t audit;
482                 uid_t owner;
483
484                 realuid = ucred->uid;
485
486                 if (asprintf(&pid, "_PID=%lu", (unsigned long) ucred->pid) >= 0)
487                         IOVEC_SET_STRING(iovec[n++], pid);
488
489                 if (asprintf(&uid, "_UID=%lu", (unsigned long) ucred->uid) >= 0)
490                         IOVEC_SET_STRING(iovec[n++], uid);
491
492                 if (asprintf(&gid, "_GID=%lu", (unsigned long) ucred->gid) >= 0)
493                         IOVEC_SET_STRING(iovec[n++], gid);
494
495                 r = get_process_comm(ucred->pid, &t);
496                 if (r >= 0) {
497                         comm = strappend("_COMM=", t);
498                         free(t);
499
500                         if (comm)
501                                 IOVEC_SET_STRING(iovec[n++], comm);
502                 }
503
504                 r = get_process_exe(ucred->pid, &t);
505                 if (r >= 0) {
506                         exe = strappend("_EXE=", t);
507                         free(t);
508
509                         if (exe)
510                                 IOVEC_SET_STRING(iovec[n++], exe);
511                 }
512
513                 r = get_process_cmdline(ucred->pid, LINE_MAX, false, &t);
514                 if (r >= 0) {
515                         cmdline = strappend("_CMDLINE=", t);
516                         free(t);
517
518                         if (cmdline)
519                                 IOVEC_SET_STRING(iovec[n++], cmdline);
520                 }
521
522                 r = audit_session_from_pid(ucred->pid, &audit);
523                 if (r >= 0)
524                         if (asprintf(&audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit) >= 0)
525                                 IOVEC_SET_STRING(iovec[n++], audit_session);
526
527                 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
528                 if (r >= 0)
529                         if (asprintf(&audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0)
530                                 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
531
532                 t = shortened_cgroup_path(ucred->pid);
533                 if (t) {
534                         cgroup = strappend("_SYSTEMD_CGROUP=", t);
535                         free(t);
536
537                         if (cgroup)
538                                 IOVEC_SET_STRING(iovec[n++], cgroup);
539                 }
540
541                 if (sd_pid_get_session(ucred->pid, &t) >= 0) {
542                         session = strappend("_SYSTEMD_SESSION=", t);
543                         free(t);
544
545                         if (session)
546                                 IOVEC_SET_STRING(iovec[n++], session);
547                 }
548
549                 if (sd_pid_get_unit(ucred->pid, &t) >= 0) {
550                         unit = strappend("_SYSTEMD_UNIT=", t);
551                         free(t);
552
553                         if (unit)
554                                 IOVEC_SET_STRING(iovec[n++], unit);
555                 }
556
557                 if (sd_pid_get_owner_uid(ucred->uid, &owner) >= 0)
558                         if (asprintf(&owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner) >= 0)
559                                 IOVEC_SET_STRING(iovec[n++], owner_uid);
560
561 #ifdef HAVE_SELINUX
562                 if (label) {
563                         selinux_context = malloc(sizeof("_SELINUX_CONTEXT=") + label_len);
564                         if (selinux_context) {
565                                 memcpy(selinux_context, "_SELINUX_CONTEXT=", sizeof("_SELINUX_CONTEXT=")-1);
566                                 memcpy(selinux_context+sizeof("_SELINUX_CONTEXT=")-1, label, label_len);
567                                 selinux_context[sizeof("_SELINUX_CONTEXT=")-1+label_len] = 0;
568                                 IOVEC_SET_STRING(iovec[n++], selinux_context);
569                         }
570                 } else {
571                         security_context_t con;
572
573                         if (getpidcon(ucred->pid, &con) >= 0) {
574                                 selinux_context = strappend("_SELINUX_CONTEXT=", con);
575                                 if (selinux_context)
576                                         IOVEC_SET_STRING(iovec[n++], selinux_context);
577
578                                 freecon(con);
579                         }
580                 }
581 #endif
582         }
583
584         if (tv) {
585                 if (asprintf(&source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu",
586                              (unsigned long long) timeval_load(tv)) >= 0)
587                         IOVEC_SET_STRING(iovec[n++], source_time);
588         }
589
590         /* Note that strictly speaking storing the boot id here is
591          * redundant since the entry includes this in-line
592          * anyway. However, we need this indexed, too. */
593         r = sd_id128_get_boot(&id);
594         if (r >= 0)
595                 if (asprintf(&boot_id, "_BOOT_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
596                         IOVEC_SET_STRING(iovec[n++], boot_id);
597
598         r = sd_id128_get_machine(&id);
599         if (r >= 0)
600                 if (asprintf(&machine_id, "_MACHINE_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
601                         IOVEC_SET_STRING(iovec[n++], machine_id);
602
603         t = gethostname_malloc();
604         if (t) {
605                 hostname = strappend("_HOSTNAME=", t);
606                 free(t);
607                 if (hostname)
608                         IOVEC_SET_STRING(iovec[n++], hostname);
609         }
610
611         assert(n <= m);
612
613         server_flush_to_var(s);
614
615 retry:
616         f = find_journal(s, realuid == 0 ? 0 : loginuid);
617         if (!f)
618                 log_warning("Dropping message, as we can't find a place to store the data.");
619         else {
620                 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
621
622                 if ((r == -E2BIG || /* hit limit */
623                      r == -EFBIG || /* hit fs limit */
624                      r == -EDQUOT || /* quota hit */
625                      r == -ENOSPC || /* disk full */
626                      r == -EBADMSG || /* corrupted */
627                      r == -ENODATA || /* truncated */
628                      r == -EHOSTDOWN || /* other machine */
629                      r == -EPROTONOSUPPORT) && /* unsupported feature */
630                     !vacuumed) {
631
632                         if (r == -E2BIG)
633                                 log_info("Allocation limit reached, rotating.");
634                         else
635                                 log_warning("Journal file corrupted, rotating.");
636
637                         server_rotate(s);
638                         server_vacuum(s);
639                         vacuumed = true;
640
641                         log_info("Retrying write.");
642                         goto retry;
643                 }
644
645                 if (r < 0)
646                         log_error("Failed to write entry, ignoring: %s", strerror(-r));
647         }
648
649         free(pid);
650         free(uid);
651         free(gid);
652         free(comm);
653         free(exe);
654         free(cmdline);
655         free(source_time);
656         free(boot_id);
657         free(machine_id);
658         free(hostname);
659         free(audit_session);
660         free(audit_loginuid);
661         free(cgroup);
662         free(session);
663         free(owner_uid);
664         free(unit);
665         free(selinux_context);
666 }
667
668 static void driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
669         char mid[11 + 32 + 1];
670         char buffer[16 + LINE_MAX + 1];
671         struct iovec iovec[N_IOVEC_META_FIELDS + 4];
672         int n = 0;
673         va_list ap;
674         struct ucred ucred;
675
676         assert(s);
677         assert(format);
678
679         IOVEC_SET_STRING(iovec[n++], "PRIORITY=5");
680         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
681
682         memcpy(buffer, "MESSAGE=", 8);
683         va_start(ap, format);
684         vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
685         va_end(ap);
686         char_array_0(buffer);
687         IOVEC_SET_STRING(iovec[n++], buffer);
688
689         snprintf(mid, sizeof(mid), "MESSAGE_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(message_id));
690         char_array_0(mid);
691         IOVEC_SET_STRING(iovec[n++], mid);
692
693         zero(ucred);
694         ucred.pid = getpid();
695         ucred.uid = getuid();
696         ucred.gid = getgid();
697
698         dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0);
699 }
700
701 static void dispatch_message(Server *s,
702                              struct iovec *iovec, unsigned n, unsigned m,
703                              struct ucred *ucred,
704                              struct timeval *tv,
705                              const char *label, size_t label_len,
706                              int priority) {
707         int rl;
708         char *path = NULL, *c;
709
710         assert(s);
711         assert(iovec || n == 0);
712
713         if (n == 0)
714                 return;
715
716         if (!ucred)
717                 goto finish;
718
719         path = shortened_cgroup_path(ucred->pid);
720         if (!path)
721                 goto finish;
722
723         /* example: /user/lennart/3/foobar
724          *          /system/dbus.service/foobar
725          *
726          * So let's cut of everything past the third /, since that is
727          * wher user directories start */
728
729         c = strchr(path, '/');
730         if (c) {
731                 c = strchr(c+1, '/');
732                 if (c) {
733                         c = strchr(c+1, '/');
734                         if (c)
735                                 *c = 0;
736                 }
737         }
738
739         rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available_space(s));
740
741         if (rl == 0) {
742                 free(path);
743                 return;
744         }
745
746         /* Write a suppression message if we suppressed something */
747         if (rl > 1)
748                 driver_message(s, SD_MESSAGE_JOURNAL_DROPPED, "Suppressed %u messages from %s", rl - 1, path);
749
750         free(path);
751
752 finish:
753         dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len);
754 }
755
756 static void forward_syslog_iovec(Server *s, const struct iovec *iovec, unsigned n_iovec, struct ucred *ucred, struct timeval *tv) {
757         struct msghdr msghdr;
758         struct cmsghdr *cmsg;
759         union {
760                 struct cmsghdr cmsghdr;
761                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred))];
762         } control;
763         union sockaddr_union sa;
764
765         assert(s);
766         assert(iovec);
767         assert(n_iovec > 0);
768
769         zero(msghdr);
770         msghdr.msg_iov = (struct iovec*) iovec;
771         msghdr.msg_iovlen = n_iovec;
772
773         zero(sa);
774         sa.un.sun_family = AF_UNIX;
775         strncpy(sa.un.sun_path, "/run/systemd/journal/syslog", sizeof(sa.un.sun_path));
776         msghdr.msg_name = &sa;
777         msghdr.msg_namelen = offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path);
778
779         if (ucred) {
780                 zero(control);
781                 msghdr.msg_control = &control;
782                 msghdr.msg_controllen = sizeof(control);
783
784                 cmsg = CMSG_FIRSTHDR(&msghdr);
785                 cmsg->cmsg_level = SOL_SOCKET;
786                 cmsg->cmsg_type = SCM_CREDENTIALS;
787                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred));
788                 memcpy(CMSG_DATA(cmsg), ucred, sizeof(struct ucred));
789                 msghdr.msg_controllen = cmsg->cmsg_len;
790         }
791
792         /* Forward the syslog message we received via /dev/log to
793          * /run/systemd/syslog. Unfortunately we currently can't set
794          * the SO_TIMESTAMP auxiliary data, and hence we don't. */
795
796         if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
797                 return;
798
799         /* The socket is full? I guess the syslog implementation is
800          * too slow, and we shouldn't wait for that... */
801         if (errno == EAGAIN)
802                 return;
803
804         if (ucred && errno == ESRCH) {
805                 struct ucred u;
806
807                 /* Hmm, presumably the sender process vanished
808                  * by now, so let's fix it as good as we
809                  * can, and retry */
810
811                 u = *ucred;
812                 u.pid = getpid();
813                 memcpy(CMSG_DATA(cmsg), &u, sizeof(struct ucred));
814
815                 if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
816                         return;
817
818                 if (errno == EAGAIN)
819                         return;
820         }
821
822         log_debug("Failed to forward syslog message: %m");
823 }
824
825 static void forward_syslog_raw(Server *s, const char *buffer, struct ucred *ucred, struct timeval *tv) {
826         struct iovec iovec;
827
828         assert(s);
829         assert(buffer);
830
831         IOVEC_SET_STRING(iovec, buffer);
832         forward_syslog_iovec(s, &iovec, 1, ucred, tv);
833 }
834
835 static void forward_syslog(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred, struct timeval *tv) {
836         struct iovec iovec[5];
837         char header_priority[6], header_time[64], header_pid[16];
838         int n = 0;
839         time_t t;
840         struct tm *tm;
841         char *ident_buf = NULL;
842
843         assert(s);
844         assert(priority >= 0);
845         assert(priority <= 999);
846         assert(message);
847
848         /* First: priority field */
849         snprintf(header_priority, sizeof(header_priority), "<%i>", priority);
850         char_array_0(header_priority);
851         IOVEC_SET_STRING(iovec[n++], header_priority);
852
853         /* Second: timestamp */
854         t = tv ? tv->tv_sec : ((time_t) (now(CLOCK_REALTIME) / USEC_PER_SEC));
855         tm = localtime(&t);
856         if (!tm)
857                 return;
858         if (strftime(header_time, sizeof(header_time), "%h %e %T ", tm) <= 0)
859                 return;
860         IOVEC_SET_STRING(iovec[n++], header_time);
861
862         /* Third: identifier and PID */
863         if (ucred) {
864                 if (!identifier) {
865                         get_process_comm(ucred->pid, &ident_buf);
866                         identifier = ident_buf;
867                 }
868
869                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
870                 char_array_0(header_pid);
871
872                 if (identifier)
873                         IOVEC_SET_STRING(iovec[n++], identifier);
874
875                 IOVEC_SET_STRING(iovec[n++], header_pid);
876         } else if (identifier) {
877                 IOVEC_SET_STRING(iovec[n++], identifier);
878                 IOVEC_SET_STRING(iovec[n++], ": ");
879         }
880
881         /* Fourth: message */
882         IOVEC_SET_STRING(iovec[n++], message);
883
884         forward_syslog_iovec(s, iovec, n, ucred, tv);
885
886         free(ident_buf);
887 }
888
889 static int fixup_priority(int priority) {
890
891         if ((priority & LOG_FACMASK) == 0)
892                 return (priority & LOG_PRIMASK) | LOG_USER;
893
894         return priority;
895 }
896
897 static void forward_kmsg(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred) {
898         struct iovec iovec[5];
899         char header_priority[6], header_pid[16];
900         int n = 0;
901         char *ident_buf = NULL;
902         int fd;
903
904         assert(s);
905         assert(priority >= 0);
906         assert(priority <= 999);
907         assert(message);
908
909         /* Never allow messages with kernel facility to be written to
910          * kmsg, regardless where the data comes from. */
911         priority = fixup_priority(priority);
912
913         /* First: priority field */
914         snprintf(header_priority, sizeof(header_priority), "<%i>", priority);
915         char_array_0(header_priority);
916         IOVEC_SET_STRING(iovec[n++], header_priority);
917
918         /* Second: identifier and PID */
919         if (ucred) {
920                 if (!identifier) {
921                         get_process_comm(ucred->pid, &ident_buf);
922                         identifier = ident_buf;
923                 }
924
925                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
926                 char_array_0(header_pid);
927
928                 if (identifier)
929                         IOVEC_SET_STRING(iovec[n++], identifier);
930
931                 IOVEC_SET_STRING(iovec[n++], header_pid);
932         } else if (identifier) {
933                 IOVEC_SET_STRING(iovec[n++], identifier);
934                 IOVEC_SET_STRING(iovec[n++], ": ");
935         }
936
937         /* Fourth: message */
938         IOVEC_SET_STRING(iovec[n++], message);
939         IOVEC_SET_STRING(iovec[n++], "\n");
940
941         fd = open("/dev/kmsg", O_WRONLY|O_NOCTTY|O_CLOEXEC);
942         if (fd < 0) {
943                 log_debug("Failed to open /dev/kmsg for logging: %s", strerror(errno));
944                 goto finish;
945         }
946
947         if (writev(fd, iovec, n) < 0)
948                 log_debug("Failed to write to /dev/kmsg for logging: %s", strerror(errno));
949
950         close_nointr_nofail(fd);
951
952 finish:
953         free(ident_buf);
954 }
955
956 static void forward_console(Server *s, const char *identifier, const char *message, struct ucred *ucred) {
957         struct iovec iovec[4];
958         char header_pid[16];
959         int n = 0, fd;
960         char *ident_buf = NULL;
961
962         assert(s);
963         assert(message);
964
965         /* First: identifier and PID */
966         if (ucred) {
967                 if (!identifier) {
968                         get_process_comm(ucred->pid, &ident_buf);
969                         identifier = ident_buf;
970                 }
971
972                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
973                 char_array_0(header_pid);
974
975                 if (identifier)
976                         IOVEC_SET_STRING(iovec[n++], identifier);
977
978                 IOVEC_SET_STRING(iovec[n++], header_pid);
979         } else if (identifier) {
980                 IOVEC_SET_STRING(iovec[n++], identifier);
981                 IOVEC_SET_STRING(iovec[n++], ": ");
982         }
983
984         /* Third: message */
985         IOVEC_SET_STRING(iovec[n++], message);
986         IOVEC_SET_STRING(iovec[n++], "\n");
987
988         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
989         if (fd < 0) {
990                 log_debug("Failed to open /dev/console for logging: %s", strerror(errno));
991                 goto finish;
992         }
993
994         if (writev(fd, iovec, n) < 0)
995                 log_debug("Failed to write to /dev/console for logging: %s", strerror(errno));
996
997         close_nointr_nofail(fd);
998
999 finish:
1000         free(ident_buf);
1001 }
1002
1003 static void read_identifier(const char **buf, char **identifier, char **pid) {
1004         const char *p;
1005         char *t;
1006         size_t l, e;
1007
1008         assert(buf);
1009         assert(identifier);
1010         assert(pid);
1011
1012         p = *buf;
1013
1014         p += strspn(p, WHITESPACE);
1015         l = strcspn(p, WHITESPACE);
1016
1017         if (l <= 0 ||
1018             p[l-1] != ':')
1019                 return;
1020
1021         e = l;
1022         l--;
1023
1024         if (p[l-1] == ']') {
1025                 size_t k = l-1;
1026
1027                 for (;;) {
1028
1029                         if (p[k] == '[') {
1030                                 t = strndup(p+k+1, l-k-2);
1031                                 if (t)
1032                                         *pid = t;
1033
1034                                 l = k;
1035                                 break;
1036                         }
1037
1038                         if (k == 0)
1039                                 break;
1040
1041                         k--;
1042                 }
1043         }
1044
1045         t = strndup(p, l);
1046         if (t)
1047                 *identifier = t;
1048
1049         *buf = p + e;
1050         *buf += strspn(*buf, WHITESPACE);
1051 }
1052
1053 static void process_syslog_message(Server *s, const char *buf, struct ucred *ucred, struct timeval *tv, const char *label, size_t label_len) {
1054         char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *syslog_pid = NULL;
1055         struct iovec iovec[N_IOVEC_META_FIELDS + 6];
1056         unsigned n = 0;
1057         int priority = LOG_USER | LOG_INFO;
1058         char *identifier = NULL, *pid = NULL;
1059
1060         assert(s);
1061         assert(buf);
1062
1063         if (s->forward_to_syslog)
1064                 forward_syslog_raw(s, buf, ucred, tv);
1065
1066         parse_syslog_priority((char**) &buf, &priority);
1067         skip_syslog_date((char**) &buf);
1068         read_identifier(&buf, &identifier, &pid);
1069
1070         if (s->forward_to_kmsg)
1071                 forward_kmsg(s, priority, identifier, buf, ucred);
1072
1073         if (s->forward_to_console)
1074                 forward_console(s, identifier, buf, ucred);
1075
1076         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=syslog");
1077
1078         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1079                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1080
1081         if (priority & LOG_FACMASK)
1082                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1083                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1084
1085         if (identifier) {
1086                 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
1087                 if (syslog_identifier)
1088                         IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1089         }
1090
1091         if (pid) {
1092                 syslog_pid = strappend("SYSLOG_PID=", pid);
1093                 if (syslog_pid)
1094                         IOVEC_SET_STRING(iovec[n++], syslog_pid);
1095         }
1096
1097         message = strappend("MESSAGE=", buf);
1098         if (message)
1099                 IOVEC_SET_STRING(iovec[n++], message);
1100
1101         dispatch_message(s, iovec, n, ELEMENTSOF(iovec), ucred, tv, label, label_len, priority);
1102
1103         free(message);
1104         free(identifier);
1105         free(pid);
1106         free(syslog_priority);
1107         free(syslog_facility);
1108         free(syslog_identifier);
1109 }
1110
1111 static bool valid_user_field(const char *p, size_t l) {
1112         const char *a;
1113
1114         /* We kinda enforce POSIX syntax recommendations for
1115            environment variables here, but make a couple of additional
1116            requirements.
1117
1118            http://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html */
1119
1120         /* No empty field names */
1121         if (l <= 0)
1122                 return false;
1123
1124         /* Don't allow names longer than 64 chars */
1125         if (l > 64)
1126                 return false;
1127
1128         /* Variables starting with an underscore are protected */
1129         if (p[0] == '_')
1130                 return false;
1131
1132         /* Don't allow digits as first character */
1133         if (p[0] >= '0' && p[0] <= '9')
1134                 return false;
1135
1136         /* Only allow A-Z0-9 and '_' */
1137         for (a = p; a < p + l; a++)
1138                 if (!((*a >= 'A' && *a <= 'Z') ||
1139                       (*a >= '0' && *a <= '9') ||
1140                       *a == '_'))
1141                         return false;
1142
1143         return true;
1144 }
1145
1146 static void process_native_message(
1147                 Server *s,
1148                 const void *buffer, size_t buffer_size,
1149                 struct ucred *ucred,
1150                 struct timeval *tv,
1151                 const char *label, size_t label_len) {
1152
1153         struct iovec *iovec = NULL;
1154         unsigned n = 0, m = 0, j, tn = (unsigned) -1;
1155         const char *p;
1156         size_t remaining;
1157         int priority = LOG_INFO;
1158         char *identifier = NULL, *message = NULL;
1159
1160         assert(s);
1161         assert(buffer || buffer_size == 0);
1162
1163         p = buffer;
1164         remaining = buffer_size;
1165
1166         while (remaining > 0) {
1167                 const char *e, *q;
1168
1169                 e = memchr(p, '\n', remaining);
1170
1171                 if (!e) {
1172                         /* Trailing noise, let's ignore it, and flush what we collected */
1173                         log_debug("Received message with trailing noise, ignoring.");
1174                         break;
1175                 }
1176
1177                 if (e == p) {
1178                         /* Entry separator */
1179                         dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, priority);
1180                         n = 0;
1181                         priority = LOG_INFO;
1182
1183                         p++;
1184                         remaining--;
1185                         continue;
1186                 }
1187
1188                 if (*p == '.' || *p == '#') {
1189                         /* Ignore control commands for now, and
1190                          * comments too. */
1191                         remaining -= (e - p) + 1;
1192                         p = e + 1;
1193                         continue;
1194                 }
1195
1196                 /* A property follows */
1197
1198                 if (n+N_IOVEC_META_FIELDS >= m) {
1199                         struct iovec *c;
1200                         unsigned u;
1201
1202                         u = MAX((n+N_IOVEC_META_FIELDS+1) * 2U, 4U);
1203                         c = realloc(iovec, u * sizeof(struct iovec));
1204                         if (!c) {
1205                                 log_error("Out of memory");
1206                                 break;
1207                         }
1208
1209                         iovec = c;
1210                         m = u;
1211                 }
1212
1213                 q = memchr(p, '=', e - p);
1214                 if (q) {
1215                         if (valid_user_field(p, q - p)) {
1216                                 size_t l;
1217
1218                                 l = e - p;
1219
1220                                 /* If the field name starts with an
1221                                  * underscore, skip the variable,
1222                                  * since that indidates a trusted
1223                                  * field */
1224                                 iovec[n].iov_base = (char*) p;
1225                                 iovec[n].iov_len = l;
1226                                 n++;
1227
1228                                 /* We need to determine the priority
1229                                  * of this entry for the rate limiting
1230                                  * logic */
1231                                 if (l == 10 &&
1232                                     memcmp(p, "PRIORITY=", 9) == 0 &&
1233                                     p[9] >= '0' && p[9] <= '9')
1234                                         priority = (priority & LOG_FACMASK) | (p[9] - '0');
1235
1236                                 else if (l == 17 &&
1237                                          memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
1238                                          p[16] >= '0' && p[16] <= '9')
1239                                         priority = (priority & LOG_PRIMASK) | ((p[16] - '0') << 3);
1240
1241                                 else if (l == 18 &&
1242                                          memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
1243                                          p[16] >= '0' && p[16] <= '9' &&
1244                                          p[17] >= '0' && p[17] <= '9')
1245                                         priority = (priority & LOG_PRIMASK) | (((p[16] - '0')*10 + (p[17] - '0')) << 3);
1246
1247                                 else if (l >= 19 &&
1248                                          memcmp(p, "SYSLOG_IDENTIFIER=", 18) == 0) {
1249                                         char *t;
1250
1251                                         t = strndup(p + 18, l - 18);
1252                                         if (t) {
1253                                                 free(identifier);
1254                                                 identifier = t;
1255                                         }
1256                                 } else if (l >= 8 &&
1257                                            memcmp(p, "MESSAGE=", 8) == 0) {
1258                                         char *t;
1259
1260                                         t = strndup(p + 8, l - 8);
1261                                         if (t) {
1262                                                 free(message);
1263                                                 message = t;
1264                                         }
1265                                 }
1266                         }
1267
1268                         remaining -= (e - p) + 1;
1269                         p = e + 1;
1270                         continue;
1271                 } else {
1272                         le64_t l_le;
1273                         uint64_t l;
1274                         char *k;
1275
1276                         if (remaining < e - p + 1 + sizeof(uint64_t) + 1) {
1277                                 log_debug("Failed to parse message, ignoring.");
1278                                 break;
1279                         }
1280
1281                         memcpy(&l_le, e + 1, sizeof(uint64_t));
1282                         l = le64toh(l_le);
1283
1284                         if (remaining < e - p + 1 + sizeof(uint64_t) + l + 1 ||
1285                             e[1+sizeof(uint64_t)+l] != '\n') {
1286                                 log_debug("Failed to parse message, ignoring.");
1287                                 break;
1288                         }
1289
1290                         k = malloc((e - p) + 1 + l);
1291                         if (!k) {
1292                                 log_error("Out of memory");
1293                                 break;
1294                         }
1295
1296                         memcpy(k, p, e - p);
1297                         k[e - p] = '=';
1298                         memcpy(k + (e - p) + 1, e + 1 + sizeof(uint64_t), l);
1299
1300                         if (valid_user_field(p, e - p)) {
1301                                 iovec[n].iov_base = k;
1302                                 iovec[n].iov_len = (e - p) + 1 + l;
1303                                 n++;
1304                         } else
1305                                 free(k);
1306
1307                         remaining -= (e - p) + 1 + sizeof(uint64_t) + l + 1;
1308                         p = e + 1 + sizeof(uint64_t) + l + 1;
1309                 }
1310         }
1311
1312         if (n <= 0)
1313                 goto finish;
1314
1315         tn = n++;
1316         IOVEC_SET_STRING(iovec[tn], "_TRANSPORT=journal");
1317
1318         if (message) {
1319                 if (s->forward_to_syslog)
1320                         forward_syslog(s, priority, identifier, message, ucred, tv);
1321
1322                 if (s->forward_to_kmsg)
1323                         forward_kmsg(s, priority, identifier, message, ucred);
1324
1325                 if (s->forward_to_console)
1326                         forward_console(s, identifier, message, ucred);
1327         }
1328
1329         dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, priority);
1330
1331 finish:
1332         for (j = 0; j < n; j++)  {
1333                 if (j == tn)
1334                         continue;
1335
1336                 if (iovec[j].iov_base < buffer ||
1337                     (const uint8_t*) iovec[j].iov_base >= (const uint8_t*) buffer + buffer_size)
1338                         free(iovec[j].iov_base);
1339         }
1340
1341         free(iovec);
1342         free(identifier);
1343         free(message);
1344 }
1345
1346 static void process_native_file(
1347                 Server *s,
1348                 int fd,
1349                 struct ucred *ucred,
1350                 struct timeval *tv,
1351                 const char *label, size_t label_len) {
1352
1353         struct stat st;
1354         void *p;
1355         ssize_t n;
1356
1357         assert(s);
1358         assert(fd >= 0);
1359
1360         /* Data is in the passed file, since it didn't fit in a
1361          * datagram. We can't map the file here, since clients might
1362          * then truncate it and trigger a SIGBUS for us. So let's
1363          * stupidly read it */
1364
1365         if (fstat(fd, &st) < 0) {
1366                 log_error("Failed to stat passed file, ignoring: %m");
1367                 return;
1368         }
1369
1370         if (!S_ISREG(st.st_mode)) {
1371                 log_error("File passed is not regular. Ignoring.");
1372                 return;
1373         }
1374
1375         if (st.st_size <= 0)
1376                 return;
1377
1378         if (st.st_size > ENTRY_SIZE_MAX) {
1379                 log_error("File passed too large. Ignoring.");
1380                 return;
1381         }
1382
1383         p = malloc(st.st_size);
1384         if (!p) {
1385                 log_error("Out of memory");
1386                 return;
1387         }
1388
1389         n = pread(fd, p, st.st_size, 0);
1390         if (n < 0)
1391                 log_error("Failed to read file, ignoring: %s", strerror(-n));
1392         else if (n > 0)
1393                 process_native_message(s, p, n, ucred, tv, label, label_len);
1394
1395         free(p);
1396 }
1397
1398 static int stdout_stream_log(StdoutStream *s, const char *p) {
1399         struct iovec iovec[N_IOVEC_META_FIELDS + 5];
1400         char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *syslog_identifier = NULL;
1401         unsigned n = 0;
1402         int priority;
1403         char *label = NULL;
1404         size_t label_len = 0;
1405
1406         assert(s);
1407         assert(p);
1408
1409         if (isempty(p))
1410                 return 0;
1411
1412         priority = s->priority;
1413
1414         if (s->level_prefix)
1415                 parse_syslog_priority((char**) &p, &priority);
1416
1417         if (s->forward_to_syslog || s->server->forward_to_syslog)
1418                 forward_syslog(s->server, fixup_priority(priority), s->identifier, p, &s->ucred, NULL);
1419
1420         if (s->forward_to_kmsg || s->server->forward_to_kmsg)
1421                 forward_kmsg(s->server, priority, s->identifier, p, &s->ucred);
1422
1423         if (s->forward_to_console || s->server->forward_to_console)
1424                 forward_console(s->server, s->identifier, p, &s->ucred);
1425
1426         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=stdout");
1427
1428         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1429                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1430
1431         if (priority & LOG_FACMASK)
1432                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1433                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1434
1435         if (s->identifier) {
1436                 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", s->identifier);
1437                 if (syslog_identifier)
1438                         IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1439         }
1440
1441         message = strappend("MESSAGE=", p);
1442         if (message)
1443                 IOVEC_SET_STRING(iovec[n++], message);
1444
1445 #ifdef HAVE_SELINUX
1446         if (s->security_context) {
1447                 label = (char*) s->security_context;
1448                 label_len = strlen((char*) s->security_context);
1449         }
1450 #endif
1451
1452         dispatch_message(s->server, iovec, n, ELEMENTSOF(iovec), &s->ucred, NULL, label, label_len, priority);
1453
1454         free(message);
1455         free(syslog_priority);
1456         free(syslog_facility);
1457         free(syslog_identifier);
1458
1459         return 0;
1460 }
1461
1462 static int stdout_stream_line(StdoutStream *s, char *p) {
1463         int r;
1464
1465         assert(s);
1466         assert(p);
1467
1468         p = strstrip(p);
1469
1470         switch (s->state) {
1471
1472         case STDOUT_STREAM_IDENTIFIER:
1473                 if (isempty(p))
1474                         s->identifier = NULL;
1475                 else  {
1476                         s->identifier = strdup(p);
1477                         if (!s->identifier) {
1478                                 log_error("Out of memory");
1479                                 return -ENOMEM;
1480                         }
1481                 }
1482
1483                 s->state = STDOUT_STREAM_PRIORITY;
1484                 return 0;
1485
1486         case STDOUT_STREAM_PRIORITY:
1487                 r = safe_atoi(p, &s->priority);
1488                 if (r < 0 || s->priority <= 0 || s->priority >= 999) {
1489                         log_warning("Failed to parse log priority line.");
1490                         return -EINVAL;
1491                 }
1492
1493                 s->state = STDOUT_STREAM_LEVEL_PREFIX;
1494                 return 0;
1495
1496         case STDOUT_STREAM_LEVEL_PREFIX:
1497                 r = parse_boolean(p);
1498                 if (r < 0) {
1499                         log_warning("Failed to parse level prefix line.");
1500                         return -EINVAL;
1501                 }
1502
1503                 s->level_prefix = !!r;
1504                 s->state = STDOUT_STREAM_FORWARD_TO_SYSLOG;
1505                 return 0;
1506
1507         case STDOUT_STREAM_FORWARD_TO_SYSLOG:
1508                 r = parse_boolean(p);
1509                 if (r < 0) {
1510                         log_warning("Failed to parse forward to syslog line.");
1511                         return -EINVAL;
1512                 }
1513
1514                 s->forward_to_syslog = !!r;
1515                 s->state = STDOUT_STREAM_FORWARD_TO_KMSG;
1516                 return 0;
1517
1518         case STDOUT_STREAM_FORWARD_TO_KMSG:
1519                 r = parse_boolean(p);
1520                 if (r < 0) {
1521                         log_warning("Failed to parse copy to kmsg line.");
1522                         return -EINVAL;
1523                 }
1524
1525                 s->forward_to_kmsg = !!r;
1526                 s->state = STDOUT_STREAM_FORWARD_TO_CONSOLE;
1527                 return 0;
1528
1529         case STDOUT_STREAM_FORWARD_TO_CONSOLE:
1530                 r = parse_boolean(p);
1531                 if (r < 0) {
1532                         log_warning("Failed to parse copy to console line.");
1533                         return -EINVAL;
1534                 }
1535
1536                 s->forward_to_console = !!r;
1537                 s->state = STDOUT_STREAM_RUNNING;
1538                 return 0;
1539
1540         case STDOUT_STREAM_RUNNING:
1541                 return stdout_stream_log(s, p);
1542         }
1543
1544         assert_not_reached("Unknown stream state");
1545 }
1546
1547 static int stdout_stream_scan(StdoutStream *s, bool force_flush) {
1548         char *p;
1549         size_t remaining;
1550         int r;
1551
1552         assert(s);
1553
1554         p = s->buffer;
1555         remaining = s->length;
1556         for (;;) {
1557                 char *end;
1558                 size_t skip;
1559
1560                 end = memchr(p, '\n', remaining);
1561                 if (end)
1562                         skip = end - p + 1;
1563                 else if (remaining >= sizeof(s->buffer) - 1) {
1564                         end = p + sizeof(s->buffer) - 1;
1565                         skip = remaining;
1566                 } else
1567                         break;
1568
1569                 *end = 0;
1570
1571                 r = stdout_stream_line(s, p);
1572                 if (r < 0)
1573                         return r;
1574
1575                 remaining -= skip;
1576                 p += skip;
1577         }
1578
1579         if (force_flush && remaining > 0) {
1580                 p[remaining] = 0;
1581                 r = stdout_stream_line(s, p);
1582                 if (r < 0)
1583                         return r;
1584
1585                 p += remaining;
1586                 remaining = 0;
1587         }
1588
1589         if (p > s->buffer) {
1590                 memmove(s->buffer, p, remaining);
1591                 s->length = remaining;
1592         }
1593
1594         return 0;
1595 }
1596
1597 static int stdout_stream_process(StdoutStream *s) {
1598         ssize_t l;
1599         int r;
1600
1601         assert(s);
1602
1603         l = read(s->fd, s->buffer+s->length, sizeof(s->buffer)-1-s->length);
1604         if (l < 0) {
1605
1606                 if (errno == EAGAIN)
1607                         return 0;
1608
1609                 log_warning("Failed to read from stream: %m");
1610                 return -errno;
1611         }
1612
1613         if (l == 0) {
1614                 r = stdout_stream_scan(s, true);
1615                 if (r < 0)
1616                         return r;
1617
1618                 return 0;
1619         }
1620
1621         s->length += l;
1622         r = stdout_stream_scan(s, false);
1623         if (r < 0)
1624                 return r;
1625
1626         return 1;
1627
1628 }
1629
1630 static void stdout_stream_free(StdoutStream *s) {
1631         assert(s);
1632
1633         if (s->server) {
1634                 assert(s->server->n_stdout_streams > 0);
1635                 s->server->n_stdout_streams --;
1636                 LIST_REMOVE(StdoutStream, stdout_stream, s->server->stdout_streams, s);
1637         }
1638
1639         if (s->fd >= 0) {
1640                 if (s->server)
1641                         epoll_ctl(s->server->epoll_fd, EPOLL_CTL_DEL, s->fd, NULL);
1642
1643                 close_nointr_nofail(s->fd);
1644         }
1645
1646 #ifdef HAVE_SELINUX
1647         if (s->security_context)
1648                 freecon(s->security_context);
1649 #endif
1650
1651         free(s->identifier);
1652         free(s);
1653 }
1654
1655 static int stdout_stream_new(Server *s) {
1656         StdoutStream *stream;
1657         int fd, r;
1658         socklen_t len;
1659         struct epoll_event ev;
1660
1661         assert(s);
1662
1663         fd = accept4(s->stdout_fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC);
1664         if (fd < 0) {
1665                 if (errno == EAGAIN)
1666                         return 0;
1667
1668                 log_error("Failed to accept stdout connection: %m");
1669                 return -errno;
1670         }
1671
1672         if (s->n_stdout_streams >= STDOUT_STREAMS_MAX) {
1673                 log_warning("Too many stdout streams, refusing connection.");
1674                 close_nointr_nofail(fd);
1675                 return 0;
1676         }
1677
1678         stream = new0(StdoutStream, 1);
1679         if (!stream) {
1680                 log_error("Out of memory.");
1681                 close_nointr_nofail(fd);
1682                 return -ENOMEM;
1683         }
1684
1685         stream->fd = fd;
1686
1687         len = sizeof(stream->ucred);
1688         if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &stream->ucred, &len) < 0) {
1689                 log_error("Failed to determine peer credentials: %m");
1690                 r = -errno;
1691                 goto fail;
1692         }
1693
1694 #ifdef HAVE_SELINUX
1695         if (getpeercon(fd, &stream->security_context) < 0 && errno != ENOPROTOOPT)
1696                 log_error("Failed to determine peer security context: %m");
1697 #endif
1698
1699         if (shutdown(fd, SHUT_WR) < 0) {
1700                 log_error("Failed to shutdown writing side of socket: %m");
1701                 r = -errno;
1702                 goto fail;
1703         }
1704
1705         zero(ev);
1706         ev.data.ptr = stream;
1707         ev.events = EPOLLIN;
1708         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0) {
1709                 log_error("Failed to add stream to event loop: %m");
1710                 r = -errno;
1711                 goto fail;
1712         }
1713
1714         stream->server = s;
1715         LIST_PREPEND(StdoutStream, stdout_stream, s->stdout_streams, stream);
1716         s->n_stdout_streams ++;
1717
1718         return 0;
1719
1720 fail:
1721         stdout_stream_free(stream);
1722         return r;
1723 }
1724
1725 static int parse_kernel_timestamp(char **_p, usec_t *t) {
1726         usec_t r;
1727         int k, i;
1728         char *p;
1729
1730         assert(_p);
1731         assert(*_p);
1732         assert(t);
1733
1734         p = *_p;
1735
1736         if (strlen(p) < 14 || p[0] != '[' || p[13] != ']' || p[6] != '.')
1737                 return 0;
1738
1739         r = 0;
1740
1741         for (i = 1; i <= 5; i++) {
1742                 r *= 10;
1743
1744                 if (p[i] == ' ')
1745                         continue;
1746
1747                 k = undecchar(p[i]);
1748                 if (k < 0)
1749                         return 0;
1750
1751                 r += k;
1752         }
1753
1754         for (i = 7; i <= 12; i++) {
1755                 r *= 10;
1756
1757                 k = undecchar(p[i]);
1758                 if (k < 0)
1759                         return 0;
1760
1761                 r += k;
1762         }
1763
1764         *t = r;
1765         *_p += 14;
1766         *_p += strspn(*_p, WHITESPACE);
1767
1768         return 1;
1769 }
1770
1771 static void proc_kmsg_line(Server *s, const char *p) {
1772         struct iovec iovec[N_IOVEC_META_FIELDS + 7];
1773         char *message = NULL, *syslog_priority = NULL, *syslog_pid = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *source_time = NULL;
1774         int priority = LOG_KERN | LOG_INFO;
1775         unsigned n = 0;
1776         usec_t usec;
1777         char *identifier = NULL, *pid = NULL;
1778
1779         assert(s);
1780         assert(p);
1781
1782         if (isempty(p))
1783                 return;
1784
1785         parse_syslog_priority((char **) &p, &priority);
1786
1787         if (s->forward_to_kmsg && (priority & LOG_FACMASK) != LOG_KERN)
1788                 return;
1789
1790         if (parse_kernel_timestamp((char **) &p, &usec) > 0) {
1791                 if (asprintf(&source_time, "_SOURCE_MONOTONIC_TIMESTAMP=%llu",
1792                              (unsigned long long) usec) >= 0)
1793                         IOVEC_SET_STRING(iovec[n++], source_time);
1794         }
1795
1796         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=kernel");
1797
1798         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1799                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1800
1801         if ((priority & LOG_FACMASK) == LOG_KERN) {
1802
1803                 if (s->forward_to_syslog)
1804                         forward_syslog(s, priority, "kernel", p, NULL, NULL);
1805
1806                 IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=kernel");
1807         } else {
1808                 read_identifier(&p, &identifier, &pid);
1809
1810                 if (s->forward_to_syslog)
1811                         forward_syslog(s, priority, identifier, p, NULL, NULL);
1812
1813                 if (identifier) {
1814                         syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
1815                         if (syslog_identifier)
1816                                 IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1817                 }
1818
1819                 if (pid) {
1820                         syslog_pid = strappend("SYSLOG_PID=", pid);
1821                         if (syslog_pid)
1822                                 IOVEC_SET_STRING(iovec[n++], syslog_pid);
1823                 }
1824
1825                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1826                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1827         }
1828
1829         message = strappend("MESSAGE=", p);
1830         if (message)
1831                 IOVEC_SET_STRING(iovec[n++], message);
1832
1833         dispatch_message(s, iovec, n, ELEMENTSOF(iovec), NULL, NULL, NULL, 0, priority);
1834
1835         free(message);
1836         free(syslog_priority);
1837         free(syslog_identifier);
1838         free(syslog_pid);
1839         free(syslog_facility);
1840         free(source_time);
1841         free(identifier);
1842         free(pid);
1843 }
1844
1845 static void proc_kmsg_scan(Server *s) {
1846         char *p;
1847         size_t remaining;
1848
1849         assert(s);
1850
1851         p = s->proc_kmsg_buffer;
1852         remaining = s->proc_kmsg_length;
1853         for (;;) {
1854                 char *end;
1855                 size_t skip;
1856
1857                 end = memchr(p, '\n', remaining);
1858                 if (end)
1859                         skip = end - p + 1;
1860                 else if (remaining >= sizeof(s->proc_kmsg_buffer) - 1) {
1861                         end = p + sizeof(s->proc_kmsg_buffer) - 1;
1862                         skip = remaining;
1863                 } else
1864                         break;
1865
1866                 *end = 0;
1867
1868                 proc_kmsg_line(s, p);
1869
1870                 remaining -= skip;
1871                 p += skip;
1872         }
1873
1874         if (p > s->proc_kmsg_buffer) {
1875                 memmove(s->proc_kmsg_buffer, p, remaining);
1876                 s->proc_kmsg_length = remaining;
1877         }
1878 }
1879
1880 static int system_journal_open(Server *s) {
1881         int r;
1882         char *fn;
1883         sd_id128_t machine;
1884         char ids[33];
1885
1886         r = sd_id128_get_machine(&machine);
1887         if (r < 0)
1888                 return r;
1889
1890         sd_id128_to_string(machine, ids);
1891
1892         if (!s->system_journal) {
1893
1894                 /* First try to create the machine path, but not the prefix */
1895                 fn = strappend("/var/log/journal/", ids);
1896                 if (!fn)
1897                         return -ENOMEM;
1898                 (void) mkdir(fn, 0755);
1899                 free(fn);
1900
1901                 /* The create the system journal file */
1902                 fn = join("/var/log/journal/", ids, "/system.journal", NULL);
1903                 if (!fn)
1904                         return -ENOMEM;
1905
1906                 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, NULL, &s->system_journal);
1907                 free(fn);
1908
1909                 if (r >= 0) {
1910                         journal_default_metrics(&s->system_metrics, s->system_journal->fd);
1911
1912                         s->system_journal->metrics = s->system_metrics;
1913                         s->system_journal->compress = s->compress;
1914
1915                         server_fix_perms(s, s->system_journal, 0);
1916                 } else if (r < 0) {
1917
1918                         if (r != -ENOENT && r != -EROFS)
1919                                 log_warning("Failed to open system journal: %s", strerror(-r));
1920
1921                         r = 0;
1922                 }
1923         }
1924
1925         if (!s->runtime_journal) {
1926
1927                 fn = join("/run/log/journal/", ids, "/system.journal", NULL);
1928                 if (!fn)
1929                         return -ENOMEM;
1930
1931                 if (s->system_journal) {
1932
1933                         /* Try to open the runtime journal, but only
1934                          * if it already exists, so that we can flush
1935                          * it into the system journal */
1936
1937                         r = journal_file_open(fn, O_RDWR, 0640, NULL, &s->runtime_journal);
1938                         free(fn);
1939
1940                         if (r < 0) {
1941                                 if (r != -ENOENT)
1942                                         log_warning("Failed to open runtime journal: %s", strerror(-r));
1943
1944                                 r = 0;
1945                         }
1946
1947                 } else {
1948
1949                         /* OK, we really need the runtime journal, so create
1950                          * it if necessary. */
1951
1952                         (void) mkdir_parents(fn, 0755);
1953                         r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, NULL, &s->runtime_journal);
1954                         free(fn);
1955
1956                         if (r < 0) {
1957                                 log_error("Failed to open runtime journal: %s", strerror(-r));
1958                                 return r;
1959                         }
1960                 }
1961
1962                 if (s->runtime_journal) {
1963                         journal_default_metrics(&s->runtime_metrics, s->runtime_journal->fd);
1964
1965                         s->runtime_journal->metrics = s->runtime_metrics;
1966                         s->runtime_journal->compress = s->compress;
1967
1968                         server_fix_perms(s, s->runtime_journal, 0);
1969                 }
1970         }
1971
1972         return r;
1973 }
1974
1975 static int server_flush_to_var(Server *s) {
1976         char path[] = "/run/log/journal/xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
1977         Object *o = NULL;
1978         int r;
1979         sd_id128_t machine;
1980         sd_journal *j;
1981         usec_t ts;
1982
1983         assert(s);
1984
1985         if (!s->runtime_journal)
1986                 return 0;
1987
1988         ts = now(CLOCK_MONOTONIC);
1989         if (s->var_available_timestamp + RECHECK_VAR_AVAILABLE_USEC > ts)
1990                 return 0;
1991
1992         s->var_available_timestamp = ts;
1993
1994         system_journal_open(s);
1995
1996         if (!s->system_journal)
1997                 return 0;
1998
1999         log_info("Flushing to /var...");
2000
2001         r = sd_id128_get_machine(&machine);
2002         if (r < 0) {
2003                 log_error("Failed to get machine id: %s", strerror(-r));
2004                 return r;
2005         }
2006
2007         r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
2008         if (r < 0) {
2009                 log_error("Failed to read runtime journal: %s", strerror(-r));
2010                 return r;
2011         }
2012
2013         SD_JOURNAL_FOREACH(j) {
2014                 JournalFile *f;
2015
2016                 f = j->current_file;
2017                 assert(f && f->current_offset > 0);
2018
2019                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
2020                 if (r < 0) {
2021                         log_error("Can't read entry: %s", strerror(-r));
2022                         goto finish;
2023                 }
2024
2025                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
2026                 if (r == -E2BIG) {
2027                         log_info("Allocation limit reached.");
2028
2029                         journal_file_post_change(s->system_journal);
2030                         server_rotate(s);
2031                         server_vacuum(s);
2032
2033                         r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
2034                 }
2035
2036                 if (r < 0) {
2037                         log_error("Can't write entry: %s", strerror(-r));
2038                         goto finish;
2039                 }
2040         }
2041
2042 finish:
2043         journal_file_post_change(s->system_journal);
2044
2045         journal_file_close(s->runtime_journal);
2046         s->runtime_journal = NULL;
2047
2048         if (r >= 0) {
2049                 sd_id128_to_string(machine, path + 17);
2050                 rm_rf(path, false, true, false);
2051         }
2052
2053         return r;
2054 }
2055
2056 static int server_read_proc_kmsg(Server *s) {
2057         ssize_t l;
2058         assert(s);
2059         assert(s->proc_kmsg_fd >= 0);
2060
2061         l = read(s->proc_kmsg_fd, s->proc_kmsg_buffer + s->proc_kmsg_length, sizeof(s->proc_kmsg_buffer) - 1 - s->proc_kmsg_length);
2062         if (l < 0) {
2063
2064                 if (errno == EAGAIN || errno == EINTR)
2065                         return 0;
2066
2067                 log_error("Failed to read from kernel: %m");
2068                 return -errno;
2069         }
2070
2071         s->proc_kmsg_length += l;
2072
2073         proc_kmsg_scan(s);
2074         return 1;
2075 }
2076
2077 static int server_flush_proc_kmsg(Server *s) {
2078         int r;
2079
2080         assert(s);
2081
2082         if (s->proc_kmsg_fd < 0)
2083                 return 0;
2084
2085         log_info("Flushing /proc/kmsg...");
2086
2087         for (;;) {
2088                 r = server_read_proc_kmsg(s);
2089                 if (r < 0)
2090                         return r;
2091
2092                 if (r == 0)
2093                         break;
2094         }
2095
2096         return 0;
2097 }
2098
2099 static int process_event(Server *s, struct epoll_event *ev) {
2100         assert(s);
2101
2102         if (ev->data.fd == s->signal_fd) {
2103                 struct signalfd_siginfo sfsi;
2104                 ssize_t n;
2105
2106                 if (ev->events != EPOLLIN) {
2107                         log_info("Got invalid event from epoll.");
2108                         return -EIO;
2109                 }
2110
2111                 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
2112                 if (n != sizeof(sfsi)) {
2113
2114                         if (n >= 0)
2115                                 return -EIO;
2116
2117                         if (errno == EINTR || errno == EAGAIN)
2118                                 return 1;
2119
2120                         return -errno;
2121                 }
2122
2123                 if (sfsi.ssi_signo == SIGUSR1) {
2124                         server_flush_to_var(s);
2125                         return 0;
2126                 }
2127
2128                 log_debug("Received SIG%s", signal_to_string(sfsi.ssi_signo));
2129                 return 0;
2130
2131         } else if (ev->data.fd == s->proc_kmsg_fd) {
2132                 int r;
2133
2134                 if (ev->events != EPOLLIN) {
2135                         log_info("Got invalid event from epoll.");
2136                         return -EIO;
2137                 }
2138
2139                 r = server_read_proc_kmsg(s);
2140                 if (r < 0)
2141                         return r;
2142
2143                 return 1;
2144
2145         } else if (ev->data.fd == s->native_fd ||
2146                    ev->data.fd == s->syslog_fd) {
2147
2148                 if (ev->events != EPOLLIN) {
2149                         log_info("Got invalid event from epoll.");
2150                         return -EIO;
2151                 }
2152
2153                 for (;;) {
2154                         struct msghdr msghdr;
2155                         struct iovec iovec;
2156                         struct ucred *ucred = NULL;
2157                         struct timeval *tv = NULL;
2158                         struct cmsghdr *cmsg;
2159                         char *label = NULL;
2160                         size_t label_len = 0;
2161                         union {
2162                                 struct cmsghdr cmsghdr;
2163
2164                                 /* We use NAME_MAX space for the
2165                                  * SELinux label here. The kernel
2166                                  * currently enforces no limit, but
2167                                  * according to suggestions from the
2168                                  * SELinux people this will change and
2169                                  * it will probably be identical to
2170                                  * NAME_MAX. For now we use that, but
2171                                  * this should be updated one day when
2172                                  * the final limit is known.*/
2173                                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
2174                                             CMSG_SPACE(sizeof(struct timeval)) +
2175                                             CMSG_SPACE(sizeof(int)) + /* fd */
2176                                             CMSG_SPACE(NAME_MAX)]; /* selinux label */
2177                         } control;
2178                         ssize_t n;
2179                         int v;
2180                         int *fds = NULL;
2181                         unsigned n_fds = 0;
2182
2183                         if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
2184                                 log_error("SIOCINQ failed: %m");
2185                                 return -errno;
2186                         }
2187
2188                         if (s->buffer_size < (size_t) v) {
2189                                 void *b;
2190                                 size_t l;
2191
2192                                 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
2193                                 b = realloc(s->buffer, l+1);
2194
2195                                 if (!b) {
2196                                         log_error("Couldn't increase buffer.");
2197                                         return -ENOMEM;
2198                                 }
2199
2200                                 s->buffer_size = l;
2201                                 s->buffer = b;
2202                         }
2203
2204                         zero(iovec);
2205                         iovec.iov_base = s->buffer;
2206                         iovec.iov_len = s->buffer_size;
2207
2208                         zero(control);
2209                         zero(msghdr);
2210                         msghdr.msg_iov = &iovec;
2211                         msghdr.msg_iovlen = 1;
2212                         msghdr.msg_control = &control;
2213                         msghdr.msg_controllen = sizeof(control);
2214
2215                         n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
2216                         if (n < 0) {
2217
2218                                 if (errno == EINTR || errno == EAGAIN)
2219                                         return 1;
2220
2221                                 log_error("recvmsg() failed: %m");
2222                                 return -errno;
2223                         }
2224
2225                         for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
2226
2227                                 if (cmsg->cmsg_level == SOL_SOCKET &&
2228                                     cmsg->cmsg_type == SCM_CREDENTIALS &&
2229                                     cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
2230                                         ucred = (struct ucred*) CMSG_DATA(cmsg);
2231                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
2232                                          cmsg->cmsg_type == SCM_SECURITY) {
2233                                         label = (char*) CMSG_DATA(cmsg);
2234                                         label_len = cmsg->cmsg_len - CMSG_LEN(0);
2235                                 } else if (cmsg->cmsg_level == SOL_SOCKET &&
2236                                          cmsg->cmsg_type == SO_TIMESTAMP &&
2237                                          cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
2238                                         tv = (struct timeval*) CMSG_DATA(cmsg);
2239                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
2240                                          cmsg->cmsg_type == SCM_RIGHTS) {
2241                                         fds = (int*) CMSG_DATA(cmsg);
2242                                         n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
2243                                 }
2244                         }
2245
2246                         if (ev->data.fd == s->syslog_fd) {
2247                                 char *e;
2248
2249                                 if (n > 0 && n_fds == 0) {
2250                                         e = memchr(s->buffer, '\n', n);
2251                                         if (e)
2252                                                 *e = 0;
2253                                         else
2254                                                 s->buffer[n] = 0;
2255
2256                                         process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
2257                                 } else if (n_fds > 0)
2258                                         log_warning("Got file descriptors via syslog socket. Ignoring.");
2259
2260                         } else {
2261                                 if (n > 0 && n_fds == 0)
2262                                         process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
2263                                 else if (n == 0 && n_fds == 1)
2264                                         process_native_file(s, fds[0], ucred, tv, label, label_len);
2265                                 else if (n_fds > 0)
2266                                         log_warning("Got too many file descriptors via native socket. Ignoring.");
2267                         }
2268
2269                         close_many(fds, n_fds);
2270                 }
2271
2272                 return 1;
2273
2274         } else if (ev->data.fd == s->stdout_fd) {
2275
2276                 if (ev->events != EPOLLIN) {
2277                         log_info("Got invalid event from epoll.");
2278                         return -EIO;
2279                 }
2280
2281                 stdout_stream_new(s);
2282                 return 1;
2283
2284         } else {
2285                 StdoutStream *stream;
2286
2287                 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
2288                         log_info("Got invalid event from epoll.");
2289                         return -EIO;
2290                 }
2291
2292                 /* If it is none of the well-known fds, it must be an
2293                  * stdout stream fd. Note that this is a bit ugly here
2294                  * (since we rely that none of the well-known fds
2295                  * could be interpreted as pointer), but nonetheless
2296                  * safe, since the well-known fds would never get an
2297                  * fd > 4096, i.e. beyond the first memory page */
2298
2299                 stream = ev->data.ptr;
2300
2301                 if (stdout_stream_process(stream) <= 0)
2302                         stdout_stream_free(stream);
2303
2304                 return 1;
2305         }
2306
2307         log_error("Unknown event.");
2308         return 0;
2309 }
2310
2311 static int open_syslog_socket(Server *s) {
2312         union sockaddr_union sa;
2313         int one, r;
2314         struct epoll_event ev;
2315
2316         assert(s);
2317
2318         if (s->syslog_fd < 0) {
2319
2320                 s->syslog_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2321                 if (s->syslog_fd < 0) {
2322                         log_error("socket() failed: %m");
2323                         return -errno;
2324                 }
2325
2326                 zero(sa);
2327                 sa.un.sun_family = AF_UNIX;
2328                 strncpy(sa.un.sun_path, "/dev/log", sizeof(sa.un.sun_path));
2329
2330                 unlink(sa.un.sun_path);
2331
2332                 r = bind(s->syslog_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2333                 if (r < 0) {
2334                         log_error("bind() failed: %m");
2335                         return -errno;
2336                 }
2337
2338                 chmod(sa.un.sun_path, 0666);
2339         } else
2340                 fd_nonblock(s->syslog_fd, 1);
2341
2342         one = 1;
2343         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
2344         if (r < 0) {
2345                 log_error("SO_PASSCRED failed: %m");
2346                 return -errno;
2347         }
2348
2349 #ifdef HAVE_SELINUX
2350         one = 1;
2351         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
2352         if (r < 0)
2353                 log_warning("SO_PASSSEC failed: %m");
2354 #endif
2355
2356         one = 1;
2357         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
2358         if (r < 0) {
2359                 log_error("SO_TIMESTAMP failed: %m");
2360                 return -errno;
2361         }
2362
2363         zero(ev);
2364         ev.events = EPOLLIN;
2365         ev.data.fd = s->syslog_fd;
2366         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->syslog_fd, &ev) < 0) {
2367                 log_error("Failed to add syslog server fd to epoll object: %m");
2368                 return -errno;
2369         }
2370
2371         return 0;
2372 }
2373
2374 static int open_native_socket(Server*s) {
2375         union sockaddr_union sa;
2376         int one, r;
2377         struct epoll_event ev;
2378
2379         assert(s);
2380
2381         if (s->native_fd < 0) {
2382
2383                 s->native_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2384                 if (s->native_fd < 0) {
2385                         log_error("socket() failed: %m");
2386                         return -errno;
2387                 }
2388
2389                 zero(sa);
2390                 sa.un.sun_family = AF_UNIX;
2391                 strncpy(sa.un.sun_path, "/run/systemd/journal/socket", sizeof(sa.un.sun_path));
2392
2393                 unlink(sa.un.sun_path);
2394
2395                 r = bind(s->native_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2396                 if (r < 0) {
2397                         log_error("bind() failed: %m");
2398                         return -errno;
2399                 }
2400
2401                 chmod(sa.un.sun_path, 0666);
2402         } else
2403                 fd_nonblock(s->native_fd, 1);
2404
2405         one = 1;
2406         r = setsockopt(s->native_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
2407         if (r < 0) {
2408                 log_error("SO_PASSCRED failed: %m");
2409                 return -errno;
2410         }
2411
2412 #ifdef HAVE_SELINUX
2413         one = 1;
2414         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
2415         if (r < 0)
2416                 log_warning("SO_PASSSEC failed: %m");
2417 #endif
2418
2419         one = 1;
2420         r = setsockopt(s->native_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
2421         if (r < 0) {
2422                 log_error("SO_TIMESTAMP failed: %m");
2423                 return -errno;
2424         }
2425
2426         zero(ev);
2427         ev.events = EPOLLIN;
2428         ev.data.fd = s->native_fd;
2429         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->native_fd, &ev) < 0) {
2430                 log_error("Failed to add native server fd to epoll object: %m");
2431                 return -errno;
2432         }
2433
2434         return 0;
2435 }
2436
2437 static int open_stdout_socket(Server *s) {
2438         union sockaddr_union sa;
2439         int r;
2440         struct epoll_event ev;
2441
2442         assert(s);
2443
2444         if (s->stdout_fd < 0) {
2445
2446                 s->stdout_fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2447                 if (s->stdout_fd < 0) {
2448                         log_error("socket() failed: %m");
2449                         return -errno;
2450                 }
2451
2452                 zero(sa);
2453                 sa.un.sun_family = AF_UNIX;
2454                 strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
2455
2456                 unlink(sa.un.sun_path);
2457
2458                 r = bind(s->stdout_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2459                 if (r < 0) {
2460                         log_error("bind() failed: %m");
2461                         return -errno;
2462                 }
2463
2464                 chmod(sa.un.sun_path, 0666);
2465
2466                 if (listen(s->stdout_fd, SOMAXCONN) < 0) {
2467                         log_error("liste() failed: %m");
2468                         return -errno;
2469                 }
2470         } else
2471                 fd_nonblock(s->stdout_fd, 1);
2472
2473         zero(ev);
2474         ev.events = EPOLLIN;
2475         ev.data.fd = s->stdout_fd;
2476         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->stdout_fd, &ev) < 0) {
2477                 log_error("Failed to add stdout server fd to epoll object: %m");
2478                 return -errno;
2479         }
2480
2481         return 0;
2482 }
2483
2484 static int open_proc_kmsg(Server *s) {
2485         struct epoll_event ev;
2486
2487         assert(s);
2488
2489         if (!s->import_proc_kmsg)
2490                 return 0;
2491
2492         s->proc_kmsg_fd = open("/proc/kmsg", O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
2493         if (s->proc_kmsg_fd < 0) {
2494                 log_warning("Failed to open /proc/kmsg, ignoring: %m");
2495                 return 0;
2496         }
2497
2498         zero(ev);
2499         ev.events = EPOLLIN;
2500         ev.data.fd = s->proc_kmsg_fd;
2501         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->proc_kmsg_fd, &ev) < 0) {
2502                 log_error("Failed to add /proc/kmsg fd to epoll object: %m");
2503                 return -errno;
2504         }
2505
2506         return 0;
2507 }
2508
2509 static int open_signalfd(Server *s) {
2510         sigset_t mask;
2511         struct epoll_event ev;
2512
2513         assert(s);
2514
2515         assert_se(sigemptyset(&mask) == 0);
2516         sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, -1);
2517         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
2518
2519         s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
2520         if (s->signal_fd < 0) {
2521                 log_error("signalfd(): %m");
2522                 return -errno;
2523         }
2524
2525         zero(ev);
2526         ev.events = EPOLLIN;
2527         ev.data.fd = s->signal_fd;
2528
2529         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
2530                 log_error("epoll_ctl(): %m");
2531                 return -errno;
2532         }
2533
2534         return 0;
2535 }
2536
2537 static int server_parse_proc_cmdline(Server *s) {
2538         char *line, *w, *state;
2539         int r;
2540         size_t l;
2541
2542         if (detect_container(NULL) > 0)
2543                 return 0;
2544
2545         r = read_one_line_file("/proc/cmdline", &line);
2546         if (r < 0) {
2547                 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
2548                 return 0;
2549         }
2550
2551         FOREACH_WORD_QUOTED(w, l, line, state) {
2552                 char *word;
2553
2554                 word = strndup(w, l);
2555                 if (!word) {
2556                         r = -ENOMEM;
2557                         goto finish;
2558                 }
2559
2560                 if (startswith(word, "systemd_journald.forward_to_syslog=")) {
2561                         r = parse_boolean(word + 35);
2562                         if (r < 0)
2563                                 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
2564                         else
2565                                 s->forward_to_syslog = r;
2566                 } else if (startswith(word, "systemd_journald.forward_to_kmsg=")) {
2567                         r = parse_boolean(word + 33);
2568                         if (r < 0)
2569                                 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
2570                         else
2571                                 s->forward_to_kmsg = r;
2572                 } else if (startswith(word, "systemd_journald.forward_to_console=")) {
2573                         r = parse_boolean(word + 36);
2574                         if (r < 0)
2575                                 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
2576                         else
2577                                 s->forward_to_console = r;
2578                 }
2579
2580                 free(word);
2581         }
2582
2583         r = 0;
2584
2585 finish:
2586         free(line);
2587         return r;
2588 }
2589
2590 static int server_parse_config_file(Server *s) {
2591         FILE *f;
2592         const char *fn;
2593         int r;
2594
2595         assert(s);
2596
2597         fn = "/etc/systemd/journald.conf";
2598         f = fopen(fn, "re");
2599         if (!f) {
2600                 if (errno == ENOENT)
2601                         return 0;
2602
2603                 log_warning("Failed to open configuration file %s: %m", fn);
2604                 return -errno;
2605         }
2606
2607         r = config_parse(fn, f, "Journal\0", config_item_perf_lookup, (void*) journald_gperf_lookup, false, s);
2608         if (r < 0)
2609                 log_warning("Failed to parse configuration file: %s", strerror(-r));
2610
2611         fclose(f);
2612
2613         return r;
2614 }
2615
2616 static int server_init(Server *s) {
2617         int n, r, fd;
2618
2619         assert(s);
2620
2621         zero(*s);
2622         s->syslog_fd = s->native_fd = s->stdout_fd = s->signal_fd = s->epoll_fd = s->proc_kmsg_fd = -1;
2623         s->compress = true;
2624
2625         s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
2626         s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
2627
2628         s->forward_to_syslog = true;
2629         s->import_proc_kmsg = true;
2630
2631         memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
2632         memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
2633
2634         server_parse_config_file(s);
2635         server_parse_proc_cmdline(s);
2636
2637         s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
2638         if (!s->user_journals) {
2639                 log_error("Out of memory.");
2640                 return -ENOMEM;
2641         }
2642
2643         s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
2644         if (s->epoll_fd < 0) {
2645                 log_error("Failed to create epoll object: %m");
2646                 return -errno;
2647         }
2648
2649         n = sd_listen_fds(true);
2650         if (n < 0) {
2651                 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
2652                 return n;
2653         }
2654
2655         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
2656
2657                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
2658
2659                         if (s->native_fd >= 0) {
2660                                 log_error("Too many native sockets passed.");
2661                                 return -EINVAL;
2662                         }
2663
2664                         s->native_fd = fd;
2665
2666                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
2667
2668                         if (s->stdout_fd >= 0) {
2669                                 log_error("Too many stdout sockets passed.");
2670                                 return -EINVAL;
2671                         }
2672
2673                         s->stdout_fd = fd;
2674
2675                 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
2676
2677                         if (s->syslog_fd >= 0) {
2678                                 log_error("Too many /dev/log sockets passed.");
2679                                 return -EINVAL;
2680                         }
2681
2682                         s->syslog_fd = fd;
2683
2684                 } else {
2685                         log_error("Unknown socket passed.");
2686                         return -EINVAL;
2687                 }
2688         }
2689
2690         r = open_syslog_socket(s);
2691         if (r < 0)
2692                 return r;
2693
2694         r = open_native_socket(s);
2695         if (r < 0)
2696                 return r;
2697
2698         r = open_stdout_socket(s);
2699         if (r < 0)
2700                 return r;
2701
2702         r = open_proc_kmsg(s);
2703         if (r < 0)
2704                 return r;
2705
2706         r = open_signalfd(s);
2707         if (r < 0)
2708                 return r;
2709
2710         s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
2711         if (!s->rate_limit)
2712                 return -ENOMEM;
2713
2714         r = system_journal_open(s);
2715         if (r < 0)
2716                 return r;
2717
2718         return 0;
2719 }
2720
2721 static void server_done(Server *s) {
2722         JournalFile *f;
2723         assert(s);
2724
2725         while (s->stdout_streams)
2726                 stdout_stream_free(s->stdout_streams);
2727
2728         if (s->system_journal)
2729                 journal_file_close(s->system_journal);
2730
2731         if (s->runtime_journal)
2732                 journal_file_close(s->runtime_journal);
2733
2734         while ((f = hashmap_steal_first(s->user_journals)))
2735                 journal_file_close(f);
2736
2737         hashmap_free(s->user_journals);
2738
2739         if (s->epoll_fd >= 0)
2740                 close_nointr_nofail(s->epoll_fd);
2741
2742         if (s->signal_fd >= 0)
2743                 close_nointr_nofail(s->signal_fd);
2744
2745         if (s->syslog_fd >= 0)
2746                 close_nointr_nofail(s->syslog_fd);
2747
2748         if (s->native_fd >= 0)
2749                 close_nointr_nofail(s->native_fd);
2750
2751         if (s->stdout_fd >= 0)
2752                 close_nointr_nofail(s->stdout_fd);
2753
2754         if (s->proc_kmsg_fd >= 0)
2755                 close_nointr_nofail(s->proc_kmsg_fd);
2756
2757         if (s->rate_limit)
2758                 journal_rate_limit_free(s->rate_limit);
2759
2760         free(s->buffer);
2761 }
2762
2763 int main(int argc, char *argv[]) {
2764         Server server;
2765         int r;
2766
2767         /* if (getppid() != 1) { */
2768         /*         log_error("This program should be invoked by init only."); */
2769         /*         return EXIT_FAILURE; */
2770         /* } */
2771
2772         if (argc > 1) {
2773                 log_error("This program does not take arguments.");
2774                 return EXIT_FAILURE;
2775         }
2776
2777         log_set_target(LOG_TARGET_SAFE);
2778         log_set_facility(LOG_SYSLOG);
2779         log_parse_environment();
2780         log_open();
2781
2782         umask(0022);
2783
2784         r = server_init(&server);
2785         if (r < 0)
2786                 goto finish;
2787
2788         server_vacuum(&server);
2789         server_flush_to_var(&server);
2790         server_flush_proc_kmsg(&server);
2791
2792         log_debug("systemd-journald running as pid %lu", (unsigned long) getpid());
2793         driver_message(&server, SD_MESSAGE_JOURNAL_START, "Journal started");
2794
2795         sd_notify(false,
2796                   "READY=1\n"
2797                   "STATUS=Processing requests...");
2798
2799         for (;;) {
2800                 struct epoll_event event;
2801
2802                 r = epoll_wait(server.epoll_fd, &event, 1, -1);
2803                 if (r < 0) {
2804
2805                         if (errno == EINTR)
2806                                 continue;
2807
2808                         log_error("epoll_wait() failed: %m");
2809                         r = -errno;
2810                         goto finish;
2811                 } else if (r == 0)
2812                         break;
2813
2814                 r = process_event(&server, &event);
2815                 if (r < 0)
2816                         goto finish;
2817                 else if (r == 0)
2818                         break;
2819         }
2820
2821         log_debug("systemd-journald stopped as pid %lu", (unsigned long) getpid());
2822         driver_message(&server, SD_MESSAGE_JOURNAL_STOP, "Journal stopped");
2823
2824 finish:
2825         sd_notify(false,
2826                   "STATUS=Shutting down...");
2827
2828         server_done(&server);
2829
2830         return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
2831 }