chiark / gitweb /
machine-id: initialize from $container_uuid if not set otherwise
[elogind.git] / src / journal / journald.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU General Public License as published by
10   the Free Software Foundation; either version 2 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   General Public License for more details.
17
18   You should have received a copy of the GNU General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/epoll.h>
23 #include <sys/socket.h>
24 #include <errno.h>
25 #include <sys/signalfd.h>
26 #include <unistd.h>
27 #include <fcntl.h>
28 #include <stddef.h>
29 #include <sys/ioctl.h>
30 #include <linux/sockios.h>
31 #include <sys/statvfs.h>
32 #include <sys/user.h>
33
34 #include <systemd/sd-journal.h>
35 #include <systemd/sd-login.h>
36 #include <systemd/sd-messages.h>
37 #include <systemd/sd-daemon.h>
38
39 #include "hashmap.h"
40 #include "journal-file.h"
41 #include "socket-util.h"
42 #include "cgroup-util.h"
43 #include "list.h"
44 #include "journal-rate-limit.h"
45 #include "journal-internal.h"
46 #include "conf-parser.h"
47 #include "journald.h"
48 #include "virt.h"
49 #include "missing.h"
50
51 #ifdef HAVE_ACL
52 #include <sys/acl.h>
53 #include <acl/libacl.h>
54 #include "acl-util.h"
55 #endif
56
57 #ifdef HAVE_SELINUX
58 #include <selinux/selinux.h>
59 #endif
60
61 #define USER_JOURNALS_MAX 1024
62 #define STDOUT_STREAMS_MAX 4096
63
64 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
65 #define DEFAULT_RATE_LIMIT_BURST 200
66
67 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
68
69 #define RECHECK_VAR_AVAILABLE_USEC (30*USEC_PER_SEC)
70
71 #define N_IOVEC_META_FIELDS 17
72
73 #define ENTRY_SIZE_MAX (1024*1024*32)
74
75 typedef enum StdoutStreamState {
76         STDOUT_STREAM_IDENTIFIER,
77         STDOUT_STREAM_PRIORITY,
78         STDOUT_STREAM_LEVEL_PREFIX,
79         STDOUT_STREAM_FORWARD_TO_SYSLOG,
80         STDOUT_STREAM_FORWARD_TO_KMSG,
81         STDOUT_STREAM_FORWARD_TO_CONSOLE,
82         STDOUT_STREAM_RUNNING
83 } StdoutStreamState;
84
85 struct StdoutStream {
86         Server *server;
87         StdoutStreamState state;
88
89         int fd;
90
91         struct ucred ucred;
92 #ifdef HAVE_SELINUX
93         security_context_t security_context;
94 #endif
95
96         char *identifier;
97         int priority;
98         bool level_prefix:1;
99         bool forward_to_syslog:1;
100         bool forward_to_kmsg:1;
101         bool forward_to_console:1;
102
103         char buffer[LINE_MAX+1];
104         size_t length;
105
106         LIST_FIELDS(StdoutStream, stdout_stream);
107 };
108
109 static int server_flush_to_var(Server *s);
110
111 static uint64_t available_space(Server *s) {
112         char ids[33], *p;
113         const char *f;
114         sd_id128_t machine;
115         struct statvfs ss;
116         uint64_t sum = 0, avail = 0, ss_avail = 0;
117         int r;
118         DIR *d;
119         usec_t ts;
120         JournalMetrics *m;
121
122         ts = now(CLOCK_MONOTONIC);
123
124         if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
125                 return s->cached_available_space;
126
127         r = sd_id128_get_machine(&machine);
128         if (r < 0)
129                 return 0;
130
131         if (s->system_journal) {
132                 f = "/var/log/journal/";
133                 m = &s->system_metrics;
134         } else {
135                 f = "/run/log/journal/";
136                 m = &s->runtime_metrics;
137         }
138
139         assert(m);
140
141         p = strappend(f, sd_id128_to_string(machine, ids));
142         if (!p)
143                 return 0;
144
145         d = opendir(p);
146         free(p);
147
148         if (!d)
149                 return 0;
150
151         if (fstatvfs(dirfd(d), &ss) < 0)
152                 goto finish;
153
154         for (;;) {
155                 struct stat st;
156                 struct dirent buf, *de;
157                 int k;
158
159                 k = readdir_r(d, &buf, &de);
160                 if (k != 0) {
161                         r = -k;
162                         goto finish;
163                 }
164
165                 if (!de)
166                         break;
167
168                 if (!dirent_is_file_with_suffix(de, ".journal"))
169                         continue;
170
171                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
172                         continue;
173
174                 sum += (uint64_t) st.st_blocks * (uint64_t) st.st_blksize;
175         }
176
177         avail = sum >= m->max_use ? 0 : m->max_use - sum;
178
179         ss_avail = ss.f_bsize * ss.f_bavail;
180
181         ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
182
183         if (ss_avail < avail)
184                 avail = ss_avail;
185
186         s->cached_available_space = avail;
187         s->cached_available_space_timestamp = ts;
188
189 finish:
190         closedir(d);
191
192         return avail;
193 }
194
195 static void server_read_file_gid(Server *s) {
196         const char *adm = "adm";
197         int r;
198
199         assert(s);
200
201         if (s->file_gid_valid)
202                 return;
203
204         r = get_group_creds(&adm, &s->file_gid);
205         if (r < 0)
206                 log_warning("Failed to resolve 'adm' group: %s", strerror(-r));
207
208         /* if we couldn't read the gid, then it will be 0, but that's
209          * fine and we shouldn't try to resolve the group again, so
210          * let's just pretend it worked right-away. */
211         s->file_gid_valid = true;
212 }
213
214 static void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
215         int r;
216 #ifdef HAVE_ACL
217         acl_t acl;
218         acl_entry_t entry;
219         acl_permset_t permset;
220 #endif
221
222         assert(f);
223
224         server_read_file_gid(s);
225
226         r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
227         if (r < 0)
228                 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
229
230 #ifdef HAVE_ACL
231         if (uid <= 0)
232                 return;
233
234         acl = acl_get_fd(f->fd);
235         if (!acl) {
236                 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
237                 return;
238         }
239
240         r = acl_find_uid(acl, uid, &entry);
241         if (r <= 0) {
242
243                 if (acl_create_entry(&acl, &entry) < 0 ||
244                     acl_set_tag_type(entry, ACL_USER) < 0 ||
245                     acl_set_qualifier(entry, &uid) < 0) {
246                         log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
247                         goto finish;
248                 }
249         }
250
251         if (acl_get_permset(entry, &permset) < 0 ||
252             acl_add_perm(permset, ACL_READ) < 0 ||
253             acl_calc_mask(&acl) < 0) {
254                 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
255                 goto finish;
256         }
257
258         if (acl_set_fd(f->fd, acl) < 0)
259                 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
260
261 finish:
262         acl_free(acl);
263 #endif
264 }
265
266 static JournalFile* find_journal(Server *s, uid_t uid) {
267         char *p;
268         int r;
269         JournalFile *f;
270         char ids[33];
271         sd_id128_t machine;
272
273         assert(s);
274
275         /* We split up user logs only on /var, not on /run. If the
276          * runtime file is open, we write to it exclusively, in order
277          * to guarantee proper order as soon as we flush /run to
278          * /var and close the runtime file. */
279
280         if (s->runtime_journal)
281                 return s->runtime_journal;
282
283         if (uid <= 0)
284                 return s->system_journal;
285
286         r = sd_id128_get_machine(&machine);
287         if (r < 0)
288                 return s->system_journal;
289
290         f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
291         if (f)
292                 return f;
293
294         if (asprintf(&p, "/var/log/journal/%s/user-%lu.journal", sd_id128_to_string(machine, ids), (unsigned long) uid) < 0)
295                 return s->system_journal;
296
297         while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
298                 /* Too many open? Then let's close one */
299                 f = hashmap_steal_first(s->user_journals);
300                 assert(f);
301                 journal_file_close(f);
302         }
303
304         r = journal_file_open(p, O_RDWR|O_CREAT, 0640, s->system_journal, &f);
305         free(p);
306
307         if (r < 0)
308                 return s->system_journal;
309
310         server_fix_perms(s, f, uid);
311         f->metrics = s->system_metrics;
312         f->compress = s->compress;
313
314         r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
315         if (r < 0) {
316                 journal_file_close(f);
317                 return s->system_journal;
318         }
319
320         return f;
321 }
322
323 static void server_rotate(Server *s) {
324         JournalFile *f;
325         void *k;
326         Iterator i;
327         int r;
328
329         log_info("Rotating...");
330
331         if (s->runtime_journal) {
332                 r = journal_file_rotate(&s->runtime_journal);
333                 if (r < 0)
334                         log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
335         }
336
337         if (s->system_journal) {
338                 r = journal_file_rotate(&s->system_journal);
339                 if (r < 0)
340                         log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
341         }
342
343         HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
344                 r = journal_file_rotate(&f);
345                 if (r < 0)
346                         log_error("Failed to rotate %s: %s", f->path, strerror(-r));
347                 else
348                         hashmap_replace(s->user_journals, k, f);
349         }
350 }
351
352 static void server_vacuum(Server *s) {
353         char *p;
354         char ids[33];
355         sd_id128_t machine;
356         int r;
357
358         log_info("Vacuuming...");
359
360         r = sd_id128_get_machine(&machine);
361         if (r < 0) {
362                 log_error("Failed to get machine ID: %s", strerror(-r));
363                 return;
364         }
365
366         sd_id128_to_string(machine, ids);
367
368         if (s->system_journal) {
369                 if (asprintf(&p, "/var/log/journal/%s", ids) < 0) {
370                         log_error("Out of memory.");
371                         return;
372                 }
373
374                 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free);
375                 if (r < 0 && r != -ENOENT)
376                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
377                 free(p);
378         }
379
380
381         if (s->runtime_journal) {
382                 if (asprintf(&p, "/run/log/journal/%s", ids) < 0) {
383                         log_error("Out of memory.");
384                         return;
385                 }
386
387                 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free);
388                 if (r < 0 && r != -ENOENT)
389                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
390                 free(p);
391         }
392
393         s->cached_available_space_timestamp = 0;
394 }
395
396 static char *shortened_cgroup_path(pid_t pid) {
397         int r;
398         char *process_path, *init_path, *path;
399
400         assert(pid > 0);
401
402         r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, pid, &process_path);
403         if (r < 0)
404                 return NULL;
405
406         r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &init_path);
407         if (r < 0) {
408                 free(process_path);
409                 return NULL;
410         }
411
412         if (endswith(init_path, "/system"))
413                 init_path[strlen(init_path) - 7] = 0;
414         else if (streq(init_path, "/"))
415                 init_path[0] = 0;
416
417         if (startswith(process_path, init_path)) {
418                 char *p;
419
420                 p = strdup(process_path + strlen(init_path));
421                 if (!p) {
422                         free(process_path);
423                         free(init_path);
424                         return NULL;
425                 }
426                 path = p;
427         } else {
428                 path = process_path;
429                 process_path = NULL;
430         }
431
432         free(process_path);
433         free(init_path);
434
435         return path;
436 }
437
438 static void dispatch_message_real(
439                 Server *s,
440                 struct iovec *iovec, unsigned n, unsigned m,
441                 struct ucred *ucred,
442                 struct timeval *tv,
443                 const char *label, size_t label_len) {
444
445         char *pid = NULL, *uid = NULL, *gid = NULL,
446                 *source_time = NULL, *boot_id = NULL, *machine_id = NULL,
447                 *comm = NULL, *cmdline = NULL, *hostname = NULL,
448                 *audit_session = NULL, *audit_loginuid = NULL,
449                 *exe = NULL, *cgroup = NULL, *session = NULL,
450                 *owner_uid = NULL, *unit = NULL, *selinux_context = NULL;
451
452         char idbuf[33];
453         sd_id128_t id;
454         int r;
455         char *t;
456         uid_t loginuid = 0, realuid = 0;
457         JournalFile *f;
458         bool vacuumed = false;
459
460         assert(s);
461         assert(iovec);
462         assert(n > 0);
463         assert(n + N_IOVEC_META_FIELDS <= m);
464
465         if (ucred) {
466                 uint32_t audit;
467                 uid_t owner;
468
469                 realuid = ucred->uid;
470
471                 if (asprintf(&pid, "_PID=%lu", (unsigned long) ucred->pid) >= 0)
472                         IOVEC_SET_STRING(iovec[n++], pid);
473
474                 if (asprintf(&uid, "_UID=%lu", (unsigned long) ucred->uid) >= 0)
475                         IOVEC_SET_STRING(iovec[n++], uid);
476
477                 if (asprintf(&gid, "_GID=%lu", (unsigned long) ucred->gid) >= 0)
478                         IOVEC_SET_STRING(iovec[n++], gid);
479
480                 r = get_process_comm(ucred->pid, &t);
481                 if (r >= 0) {
482                         comm = strappend("_COMM=", t);
483                         free(t);
484
485                         if (comm)
486                                 IOVEC_SET_STRING(iovec[n++], comm);
487                 }
488
489                 r = get_process_exe(ucred->pid, &t);
490                 if (r >= 0) {
491                         exe = strappend("_EXE=", t);
492                         free(t);
493
494                         if (exe)
495                                 IOVEC_SET_STRING(iovec[n++], exe);
496                 }
497
498                 r = get_process_cmdline(ucred->pid, LINE_MAX, false, &t);
499                 if (r >= 0) {
500                         cmdline = strappend("_CMDLINE=", t);
501                         free(t);
502
503                         if (cmdline)
504                                 IOVEC_SET_STRING(iovec[n++], cmdline);
505                 }
506
507                 r = audit_session_from_pid(ucred->pid, &audit);
508                 if (r >= 0)
509                         if (asprintf(&audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit) >= 0)
510                                 IOVEC_SET_STRING(iovec[n++], audit_session);
511
512                 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
513                 if (r >= 0)
514                         if (asprintf(&audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0)
515                                 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
516
517                 t = shortened_cgroup_path(ucred->pid);
518                 if (t) {
519                         cgroup = strappend("_SYSTEMD_CGROUP=", t);
520                         free(t);
521
522                         if (cgroup)
523                                 IOVEC_SET_STRING(iovec[n++], cgroup);
524                 }
525
526                 if (sd_pid_get_session(ucred->pid, &t) >= 0) {
527                         session = strappend("_SYSTEMD_SESSION=", t);
528                         free(t);
529
530                         if (session)
531                                 IOVEC_SET_STRING(iovec[n++], session);
532                 }
533
534                 if (sd_pid_get_unit(ucred->pid, &t) >= 0) {
535                         unit = strappend("_SYSTEMD_UNIT=", t);
536                         free(t);
537
538                         if (unit)
539                                 IOVEC_SET_STRING(iovec[n++], unit);
540                 }
541
542                 if (sd_pid_get_owner_uid(ucred->uid, &owner) >= 0)
543                         if (asprintf(&owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner) >= 0)
544                                 IOVEC_SET_STRING(iovec[n++], owner_uid);
545
546 #ifdef HAVE_SELINUX
547                 if (label) {
548                         selinux_context = malloc(sizeof("_SELINUX_CONTEXT=") + label_len);
549                         if (selinux_context) {
550                                 memcpy(selinux_context, "_SELINUX_CONTEXT=", sizeof("_SELINUX_CONTEXT=")-1);
551                                 memcpy(selinux_context+sizeof("_SELINUX_CONTEXT=")-1, label, label_len);
552                                 selinux_context[sizeof("_SELINUX_CONTEXT=")-1+label_len] = 0;
553                                 IOVEC_SET_STRING(iovec[n++], selinux_context);
554                         }
555                 } else {
556                         security_context_t con;
557
558                         if (getpidcon(ucred->pid, &con) >= 0) {
559                                 selinux_context = strappend("_SELINUX_CONTEXT=", con);
560                                 if (selinux_context)
561                                         IOVEC_SET_STRING(iovec[n++], selinux_context);
562
563                                 freecon(con);
564                         }
565                 }
566 #endif
567         }
568
569         if (tv) {
570                 if (asprintf(&source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu",
571                              (unsigned long long) timeval_load(tv)) >= 0)
572                         IOVEC_SET_STRING(iovec[n++], source_time);
573         }
574
575         /* Note that strictly speaking storing the boot id here is
576          * redundant since the entry includes this in-line
577          * anyway. However, we need this indexed, too. */
578         r = sd_id128_get_boot(&id);
579         if (r >= 0)
580                 if (asprintf(&boot_id, "_BOOT_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
581                         IOVEC_SET_STRING(iovec[n++], boot_id);
582
583         r = sd_id128_get_machine(&id);
584         if (r >= 0)
585                 if (asprintf(&machine_id, "_MACHINE_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
586                         IOVEC_SET_STRING(iovec[n++], machine_id);
587
588         t = gethostname_malloc();
589         if (t) {
590                 hostname = strappend("_HOSTNAME=", t);
591                 free(t);
592                 if (hostname)
593                         IOVEC_SET_STRING(iovec[n++], hostname);
594         }
595
596         assert(n <= m);
597
598         server_flush_to_var(s);
599
600 retry:
601         f = find_journal(s, realuid == 0 ? 0 : loginuid);
602         if (!f)
603                 log_warning("Dropping message, as we can't find a place to store the data.");
604         else {
605                 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
606
607                 if (r == -E2BIG && !vacuumed) {
608                         log_info("Allocation limit reached.");
609
610                         server_rotate(s);
611                         server_vacuum(s);
612                         vacuumed = true;
613
614                         log_info("Retrying write.");
615                         goto retry;
616                 }
617
618                 if (r < 0)
619                         log_error("Failed to write entry, ignoring: %s", strerror(-r));
620         }
621
622         free(pid);
623         free(uid);
624         free(gid);
625         free(comm);
626         free(exe);
627         free(cmdline);
628         free(source_time);
629         free(boot_id);
630         free(machine_id);
631         free(hostname);
632         free(audit_session);
633         free(audit_loginuid);
634         free(cgroup);
635         free(session);
636         free(owner_uid);
637         free(unit);
638         free(selinux_context);
639 }
640
641 static void driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
642         char mid[11 + 32 + 1];
643         char buffer[16 + LINE_MAX + 1];
644         struct iovec iovec[N_IOVEC_META_FIELDS + 4];
645         int n = 0;
646         va_list ap;
647         struct ucred ucred;
648
649         assert(s);
650         assert(format);
651
652         IOVEC_SET_STRING(iovec[n++], "PRIORITY=5");
653         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
654
655         memcpy(buffer, "MESSAGE=", 8);
656         va_start(ap, format);
657         vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
658         va_end(ap);
659         char_array_0(buffer);
660         IOVEC_SET_STRING(iovec[n++], buffer);
661
662         snprintf(mid, sizeof(mid), "MESSAGE_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(message_id));
663         char_array_0(mid);
664         IOVEC_SET_STRING(iovec[n++], mid);
665
666         zero(ucred);
667         ucred.pid = getpid();
668         ucred.uid = getuid();
669         ucred.gid = getgid();
670
671         dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0);
672 }
673
674 static void dispatch_message(Server *s,
675                              struct iovec *iovec, unsigned n, unsigned m,
676                              struct ucred *ucred,
677                              struct timeval *tv,
678                              const char *label, size_t label_len,
679                              int priority) {
680         int rl;
681         char *path = NULL, *c;
682
683         assert(s);
684         assert(iovec || n == 0);
685
686         if (n == 0)
687                 return;
688
689         if (!ucred)
690                 goto finish;
691
692         path = shortened_cgroup_path(ucred->pid);
693         if (!path)
694                 goto finish;
695
696         /* example: /user/lennart/3/foobar
697          *          /system/dbus.service/foobar
698          *
699          * So let's cut of everything past the third /, since that is
700          * wher user directories start */
701
702         c = strchr(path, '/');
703         if (c) {
704                 c = strchr(c+1, '/');
705                 if (c) {
706                         c = strchr(c+1, '/');
707                         if (c)
708                                 *c = 0;
709                 }
710         }
711
712         rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available_space(s));
713
714         if (rl == 0) {
715                 free(path);
716                 return;
717         }
718
719         /* Write a suppression message if we suppressed something */
720         if (rl > 1)
721                 driver_message(s, SD_MESSAGE_JOURNAL_DROPPED, "Suppressed %u messages from %s", rl - 1, path);
722
723         free(path);
724
725 finish:
726         dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len);
727 }
728
729 static void forward_syslog_iovec(Server *s, const struct iovec *iovec, unsigned n_iovec, struct ucred *ucred, struct timeval *tv) {
730         struct msghdr msghdr;
731         struct cmsghdr *cmsg;
732         union {
733                 struct cmsghdr cmsghdr;
734                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred))];
735         } control;
736         union sockaddr_union sa;
737
738         assert(s);
739         assert(iovec);
740         assert(n_iovec > 0);
741
742         zero(msghdr);
743         msghdr.msg_iov = (struct iovec*) iovec;
744         msghdr.msg_iovlen = n_iovec;
745
746         zero(sa);
747         sa.un.sun_family = AF_UNIX;
748         strncpy(sa.un.sun_path, "/run/systemd/journal/syslog", sizeof(sa.un.sun_path));
749         msghdr.msg_name = &sa;
750         msghdr.msg_namelen = offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path);
751
752         if (ucred) {
753                 zero(control);
754                 msghdr.msg_control = &control;
755                 msghdr.msg_controllen = sizeof(control);
756
757                 cmsg = CMSG_FIRSTHDR(&msghdr);
758                 cmsg->cmsg_level = SOL_SOCKET;
759                 cmsg->cmsg_type = SCM_CREDENTIALS;
760                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred));
761                 memcpy(CMSG_DATA(cmsg), ucred, sizeof(struct ucred));
762                 msghdr.msg_controllen = cmsg->cmsg_len;
763         }
764
765         /* Forward the syslog message we received via /dev/log to
766          * /run/systemd/syslog. Unfortunately we currently can't set
767          * the SO_TIMESTAMP auxiliary data, and hence we don't. */
768
769         if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
770                 return;
771
772         /* The socket is full? I guess the syslog implementation is
773          * too slow, and we shouldn't wait for that... */
774         if (errno == EAGAIN)
775                 return;
776
777         if (ucred && errno == ESRCH) {
778                 struct ucred u;
779
780                 /* Hmm, presumably the sender process vanished
781                  * by now, so let's fix it as good as we
782                  * can, and retry */
783
784                 u = *ucred;
785                 u.pid = getpid();
786                 memcpy(CMSG_DATA(cmsg), &u, sizeof(struct ucred));
787
788                 if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
789                         return;
790
791                 if (errno == EAGAIN)
792                         return;
793         }
794
795         log_debug("Failed to forward syslog message: %m");
796 }
797
798 static void forward_syslog_raw(Server *s, const char *buffer, struct ucred *ucred, struct timeval *tv) {
799         struct iovec iovec;
800
801         assert(s);
802         assert(buffer);
803
804         IOVEC_SET_STRING(iovec, buffer);
805         forward_syslog_iovec(s, &iovec, 1, ucred, tv);
806 }
807
808 static void forward_syslog(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred, struct timeval *tv) {
809         struct iovec iovec[5];
810         char header_priority[6], header_time[64], header_pid[16];
811         int n = 0;
812         time_t t;
813         struct tm *tm;
814         char *ident_buf = NULL;
815
816         assert(s);
817         assert(priority >= 0);
818         assert(priority <= 999);
819         assert(message);
820
821         /* First: priority field */
822         snprintf(header_priority, sizeof(header_priority), "<%i>", priority);
823         char_array_0(header_priority);
824         IOVEC_SET_STRING(iovec[n++], header_priority);
825
826         /* Second: timestamp */
827         t = tv ? tv->tv_sec : ((time_t) (now(CLOCK_REALTIME) / USEC_PER_SEC));
828         tm = localtime(&t);
829         if (!tm)
830                 return;
831         if (strftime(header_time, sizeof(header_time), "%h %e %T ", tm) <= 0)
832                 return;
833         IOVEC_SET_STRING(iovec[n++], header_time);
834
835         /* Third: identifier and PID */
836         if (ucred) {
837                 if (!identifier) {
838                         get_process_comm(ucred->pid, &ident_buf);
839                         identifier = ident_buf;
840                 }
841
842                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
843                 char_array_0(header_pid);
844
845                 if (identifier)
846                         IOVEC_SET_STRING(iovec[n++], identifier);
847
848                 IOVEC_SET_STRING(iovec[n++], header_pid);
849         } else if (identifier) {
850                 IOVEC_SET_STRING(iovec[n++], identifier);
851                 IOVEC_SET_STRING(iovec[n++], ": ");
852         }
853
854         /* Fourth: message */
855         IOVEC_SET_STRING(iovec[n++], message);
856
857         forward_syslog_iovec(s, iovec, n, ucred, tv);
858
859         free(ident_buf);
860 }
861
862 static int fixup_priority(int priority) {
863
864         if ((priority & LOG_FACMASK) == 0)
865                 return (priority & LOG_PRIMASK) | LOG_USER;
866
867         return priority;
868 }
869
870 static void forward_kmsg(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred) {
871         struct iovec iovec[5];
872         char header_priority[6], header_pid[16];
873         int n = 0;
874         char *ident_buf = NULL;
875         int fd;
876
877         assert(s);
878         assert(priority >= 0);
879         assert(priority <= 999);
880         assert(message);
881
882         /* Never allow messages with kernel facility to be written to
883          * kmsg, regardless where the data comes from. */
884         priority = fixup_priority(priority);
885
886         /* First: priority field */
887         snprintf(header_priority, sizeof(header_priority), "<%i>", priority);
888         char_array_0(header_priority);
889         IOVEC_SET_STRING(iovec[n++], header_priority);
890
891         /* Second: identifier and PID */
892         if (ucred) {
893                 if (!identifier) {
894                         get_process_comm(ucred->pid, &ident_buf);
895                         identifier = ident_buf;
896                 }
897
898                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
899                 char_array_0(header_pid);
900
901                 if (identifier)
902                         IOVEC_SET_STRING(iovec[n++], identifier);
903
904                 IOVEC_SET_STRING(iovec[n++], header_pid);
905         } else if (identifier) {
906                 IOVEC_SET_STRING(iovec[n++], identifier);
907                 IOVEC_SET_STRING(iovec[n++], ": ");
908         }
909
910         /* Fourth: message */
911         IOVEC_SET_STRING(iovec[n++], message);
912         IOVEC_SET_STRING(iovec[n++], "\n");
913
914         fd = open("/dev/kmsg", O_WRONLY|O_NOCTTY|O_CLOEXEC);
915         if (fd < 0) {
916                 log_debug("Failed to open /dev/kmsg for logging: %s", strerror(errno));
917                 goto finish;
918         }
919
920         if (writev(fd, iovec, n) < 0)
921                 log_debug("Failed to write to /dev/kmsg for logging: %s", strerror(errno));
922
923         close_nointr_nofail(fd);
924
925 finish:
926         free(ident_buf);
927 }
928
929 static void forward_console(Server *s, const char *identifier, const char *message, struct ucred *ucred) {
930         struct iovec iovec[4];
931         char header_pid[16];
932         int n = 0, fd;
933         char *ident_buf = NULL;
934
935         assert(s);
936         assert(message);
937
938         /* First: identifier and PID */
939         if (ucred) {
940                 if (!identifier) {
941                         get_process_comm(ucred->pid, &ident_buf);
942                         identifier = ident_buf;
943                 }
944
945                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
946                 char_array_0(header_pid);
947
948                 if (identifier)
949                         IOVEC_SET_STRING(iovec[n++], identifier);
950
951                 IOVEC_SET_STRING(iovec[n++], header_pid);
952         } else if (identifier) {
953                 IOVEC_SET_STRING(iovec[n++], identifier);
954                 IOVEC_SET_STRING(iovec[n++], ": ");
955         }
956
957         /* Third: message */
958         IOVEC_SET_STRING(iovec[n++], message);
959         IOVEC_SET_STRING(iovec[n++], "\n");
960
961         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
962         if (fd < 0) {
963                 log_debug("Failed to open /dev/console for logging: %s", strerror(errno));
964                 goto finish;
965         }
966
967         if (writev(fd, iovec, n) < 0)
968                 log_debug("Failed to write to /dev/console for logging: %s", strerror(errno));
969
970         close_nointr_nofail(fd);
971
972 finish:
973         free(ident_buf);
974 }
975
976 static void read_identifier(const char **buf, char **identifier, char **pid) {
977         const char *p;
978         char *t;
979         size_t l, e;
980
981         assert(buf);
982         assert(identifier);
983         assert(pid);
984
985         p = *buf;
986
987         p += strspn(p, WHITESPACE);
988         l = strcspn(p, WHITESPACE);
989
990         if (l <= 0 ||
991             p[l-1] != ':')
992                 return;
993
994         e = l;
995         l--;
996
997         if (p[l-1] == ']') {
998                 size_t k = l-1;
999
1000                 for (;;) {
1001
1002                         if (p[k] == '[') {
1003                                 t = strndup(p+k+1, l-k-2);
1004                                 if (t)
1005                                         *pid = t;
1006
1007                                 l = k;
1008                                 break;
1009                         }
1010
1011                         if (k == 0)
1012                                 break;
1013
1014                         k--;
1015                 }
1016         }
1017
1018         t = strndup(p, l);
1019         if (t)
1020                 *identifier = t;
1021
1022         *buf = p + e;
1023         *buf += strspn(*buf, WHITESPACE);
1024 }
1025
1026 static void process_syslog_message(Server *s, const char *buf, struct ucred *ucred, struct timeval *tv, const char *label, size_t label_len) {
1027         char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *syslog_pid = NULL;
1028         struct iovec iovec[N_IOVEC_META_FIELDS + 6];
1029         unsigned n = 0;
1030         int priority = LOG_USER | LOG_INFO;
1031         char *identifier = NULL, *pid = NULL;
1032
1033         assert(s);
1034         assert(buf);
1035
1036         if (s->forward_to_syslog)
1037                 forward_syslog_raw(s, buf, ucred, tv);
1038
1039         parse_syslog_priority((char**) &buf, &priority);
1040         skip_syslog_date((char**) &buf);
1041         read_identifier(&buf, &identifier, &pid);
1042
1043         if (s->forward_to_kmsg)
1044                 forward_kmsg(s, priority, identifier, buf, ucred);
1045
1046         if (s->forward_to_console)
1047                 forward_console(s, identifier, buf, ucred);
1048
1049         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=syslog");
1050
1051         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1052                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1053
1054         if (priority & LOG_FACMASK)
1055                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1056                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1057
1058         if (identifier) {
1059                 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
1060                 if (syslog_identifier)
1061                         IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1062         }
1063
1064         if (pid) {
1065                 syslog_pid = strappend("SYSLOG_PID=", pid);
1066                 if (syslog_pid)
1067                         IOVEC_SET_STRING(iovec[n++], syslog_pid);
1068         }
1069
1070         message = strappend("MESSAGE=", buf);
1071         if (message)
1072                 IOVEC_SET_STRING(iovec[n++], message);
1073
1074         dispatch_message(s, iovec, n, ELEMENTSOF(iovec), ucred, tv, label, label_len, priority);
1075
1076         free(message);
1077         free(identifier);
1078         free(pid);
1079         free(syslog_priority);
1080         free(syslog_facility);
1081         free(syslog_identifier);
1082 }
1083
1084 static bool valid_user_field(const char *p, size_t l) {
1085         const char *a;
1086
1087         /* We kinda enforce POSIX syntax recommendations for
1088            environment variables here, but make a couple of additional
1089            requirements.
1090
1091            http://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html */
1092
1093         /* No empty field names */
1094         if (l <= 0)
1095                 return false;
1096
1097         /* Don't allow names longer than 64 chars */
1098         if (l > 64)
1099                 return false;
1100
1101         /* Variables starting with an underscore are protected */
1102         if (p[0] == '_')
1103                 return false;
1104
1105         /* Don't allow digits as first character */
1106         if (p[0] >= '0' && p[0] <= '9')
1107                 return false;
1108
1109         /* Only allow A-Z0-9 and '_' */
1110         for (a = p; a < p + l; a++)
1111                 if (!((*a >= 'A' && *a <= 'Z') ||
1112                       (*a >= '0' && *a <= '9') ||
1113                       *a == '_'))
1114                         return false;
1115
1116         return true;
1117 }
1118
1119 static void process_native_message(
1120                 Server *s,
1121                 const void *buffer, size_t buffer_size,
1122                 struct ucred *ucred,
1123                 struct timeval *tv,
1124                 const char *label, size_t label_len) {
1125
1126         struct iovec *iovec = NULL;
1127         unsigned n = 0, m = 0, j, tn = (unsigned) -1;
1128         const char *p;
1129         size_t remaining;
1130         int priority = LOG_INFO;
1131         char *identifier = NULL, *message = NULL;
1132
1133         assert(s);
1134         assert(buffer || n == 0);
1135
1136         p = buffer;
1137         remaining = buffer_size;
1138
1139         while (remaining > 0) {
1140                 const char *e, *q;
1141
1142                 e = memchr(p, '\n', remaining);
1143
1144                 if (!e) {
1145                         /* Trailing noise, let's ignore it, and flush what we collected */
1146                         log_debug("Received message with trailing noise, ignoring.");
1147                         break;
1148                 }
1149
1150                 if (e == p) {
1151                         /* Entry separator */
1152                         dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, priority);
1153                         n = 0;
1154                         priority = LOG_INFO;
1155
1156                         p++;
1157                         remaining--;
1158                         continue;
1159                 }
1160
1161                 if (*p == '.' || *p == '#') {
1162                         /* Ignore control commands for now, and
1163                          * comments too. */
1164                         remaining -= (e - p) + 1;
1165                         p = e + 1;
1166                         continue;
1167                 }
1168
1169                 /* A property follows */
1170
1171                 if (n+N_IOVEC_META_FIELDS >= m) {
1172                         struct iovec *c;
1173                         unsigned u;
1174
1175                         u = MAX((n+N_IOVEC_META_FIELDS+1) * 2U, 4U);
1176                         c = realloc(iovec, u * sizeof(struct iovec));
1177                         if (!c) {
1178                                 log_error("Out of memory");
1179                                 break;
1180                         }
1181
1182                         iovec = c;
1183                         m = u;
1184                 }
1185
1186                 q = memchr(p, '=', e - p);
1187                 if (q) {
1188                         if (valid_user_field(p, q - p)) {
1189                                 size_t l;
1190
1191                                 l = e - p;
1192
1193                                 /* If the field name starts with an
1194                                  * underscore, skip the variable,
1195                                  * since that indidates a trusted
1196                                  * field */
1197                                 iovec[n].iov_base = (char*) p;
1198                                 iovec[n].iov_len = l;
1199                                 n++;
1200
1201                                 /* We need to determine the priority
1202                                  * of this entry for the rate limiting
1203                                  * logic */
1204                                 if (l == 10 &&
1205                                     memcmp(p, "PRIORITY=", 9) == 0 &&
1206                                     p[9] >= '0' && p[9] <= '9')
1207                                         priority = (priority & LOG_FACMASK) | (p[9] - '0');
1208
1209                                 else if (l == 17 &&
1210                                          memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
1211                                          p[16] >= '0' && p[16] <= '9')
1212                                         priority = (priority & LOG_PRIMASK) | ((p[16] - '0') << 3);
1213
1214                                 else if (l == 18 &&
1215                                          memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
1216                                          p[16] >= '0' && p[16] <= '9' &&
1217                                          p[17] >= '0' && p[17] <= '9')
1218                                         priority = (priority & LOG_PRIMASK) | (((p[16] - '0')*10 + (p[17] - '0')) << 3);
1219
1220                                 else if (l >= 12 &&
1221                                          memcmp(p, "SYSLOG_IDENTIFIER=", 11) == 0) {
1222                                         char *t;
1223
1224                                         t = strndup(p + 11, l - 11);
1225                                         if (t) {
1226                                                 free(identifier);
1227                                                 identifier = t;
1228                                         }
1229                                 } else if (l >= 8 &&
1230                                            memcmp(p, "MESSAGE=", 8) == 0) {
1231                                         char *t;
1232
1233                                         t = strndup(p + 8, l - 8);
1234                                         if (t) {
1235                                                 free(message);
1236                                                 message = t;
1237                                         }
1238                                 }
1239                         }
1240
1241                         remaining -= (e - p) + 1;
1242                         p = e + 1;
1243                         continue;
1244                 } else {
1245                         uint64_t l;
1246                         char *k;
1247
1248                         if (remaining < e - p + 1 + sizeof(uint64_t) + 1) {
1249                                 log_debug("Failed to parse message, ignoring.");
1250                                 break;
1251                         }
1252
1253                         memcpy(&l, e + 1, sizeof(uint64_t));
1254                         l = le64toh(l);
1255
1256                         if (remaining < e - p + 1 + sizeof(uint64_t) + l + 1 ||
1257                             e[1+sizeof(uint64_t)+l] != '\n') {
1258                                 log_debug("Failed to parse message, ignoring.");
1259                                 break;
1260                         }
1261
1262                         k = malloc((e - p) + 1 + l);
1263                         if (!k) {
1264                                 log_error("Out of memory");
1265                                 break;
1266                         }
1267
1268                         memcpy(k, p, e - p);
1269                         k[e - p] = '=';
1270                         memcpy(k + (e - p) + 1, e + 1 + sizeof(uint64_t), l);
1271
1272                         if (valid_user_field(p, e - p)) {
1273                                 iovec[n].iov_base = k;
1274                                 iovec[n].iov_len = (e - p) + 1 + l;
1275                                 n++;
1276                         } else
1277                                 free(k);
1278
1279                         remaining -= (e - p) + 1 + sizeof(uint64_t) + l + 1;
1280                         p = e + 1 + sizeof(uint64_t) + l + 1;
1281                 }
1282         }
1283
1284         if (n <= 0)
1285                 goto finish;
1286
1287         tn = n++;
1288         IOVEC_SET_STRING(iovec[tn], "_TRANSPORT=journal");
1289
1290         if (message) {
1291                 if (s->forward_to_syslog)
1292                         forward_syslog(s, priority, identifier, message, ucred, tv);
1293
1294                 if (s->forward_to_kmsg)
1295                         forward_kmsg(s, priority, identifier, message, ucred);
1296
1297                 if (s->forward_to_console)
1298                         forward_console(s, identifier, message, ucred);
1299         }
1300
1301         dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, priority);
1302
1303 finish:
1304         for (j = 0; j < n; j++)  {
1305                 if (j == tn)
1306                         continue;
1307
1308                 if (iovec[j].iov_base < buffer ||
1309                     (const uint8_t*) iovec[j].iov_base >= (const uint8_t*) buffer + buffer_size)
1310                         free(iovec[j].iov_base);
1311         }
1312
1313         free(identifier);
1314         free(message);
1315 }
1316
1317 static void process_native_file(
1318                 Server *s,
1319                 int fd,
1320                 struct ucred *ucred,
1321                 struct timeval *tv,
1322                 const char *label, size_t label_len) {
1323
1324         struct stat st;
1325         void *p;
1326         ssize_t n;
1327
1328         assert(s);
1329         assert(fd >= 0);
1330
1331         /* Data is in the passed file, since it didn't fit in a
1332          * datagram. We can't map the file here, since clients might
1333          * then truncate it and trigger a SIGBUS for us. So let's
1334          * stupidly read it */
1335
1336         if (fstat(fd, &st) < 0) {
1337                 log_error("Failed to stat passed file, ignoring: %m");
1338                 return;
1339         }
1340
1341         if (!S_ISREG(st.st_mode)) {
1342                 log_error("File passed is not regular. Ignoring.");
1343                 return;
1344         }
1345
1346         if (st.st_size <= 0)
1347                 return;
1348
1349         if (st.st_size > ENTRY_SIZE_MAX) {
1350                 log_error("File passed too large. Ignoring.");
1351                 return;
1352         }
1353
1354         p = malloc(st.st_size);
1355         if (!p) {
1356                 log_error("Out of memory");
1357                 return;
1358         }
1359
1360         n = pread(fd, p, st.st_size, 0);
1361         if (n < 0)
1362                 log_error("Failed to read file, ignoring: %s", strerror(-n));
1363         else if (n > 0)
1364                 process_native_message(s, p, n, ucred, tv, label, label_len);
1365
1366         free(p);
1367 }
1368
1369 static int stdout_stream_log(StdoutStream *s, const char *p) {
1370         struct iovec iovec[N_IOVEC_META_FIELDS + 5];
1371         char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *syslog_identifier = NULL;
1372         unsigned n = 0;
1373         int priority;
1374         char *label = NULL;
1375         size_t label_len = 0;
1376
1377         assert(s);
1378         assert(p);
1379
1380         if (isempty(p))
1381                 return 0;
1382
1383         priority = s->priority;
1384
1385         if (s->level_prefix)
1386                 parse_syslog_priority((char**) &p, &priority);
1387
1388         if (s->forward_to_syslog || s->server->forward_to_syslog)
1389                 forward_syslog(s->server, fixup_priority(priority), s->identifier, p, &s->ucred, NULL);
1390
1391         if (s->forward_to_kmsg || s->server->forward_to_kmsg)
1392                 forward_kmsg(s->server, priority, s->identifier, p, &s->ucred);
1393
1394         if (s->forward_to_console || s->server->forward_to_console)
1395                 forward_console(s->server, s->identifier, p, &s->ucred);
1396
1397         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=stdout");
1398
1399         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1400                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1401
1402         if (priority & LOG_FACMASK)
1403                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1404                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1405
1406         if (s->identifier) {
1407                 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", s->identifier);
1408                 if (syslog_identifier)
1409                         IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1410         }
1411
1412         message = strappend("MESSAGE=", p);
1413         if (message)
1414                 IOVEC_SET_STRING(iovec[n++], message);
1415
1416 #ifdef HAVE_SELINUX
1417         if (s->security_context) {
1418                 label = (char*) s->security_context;
1419                 label_len = strlen((char*) s->security_context);
1420         }
1421 #endif
1422
1423         dispatch_message(s->server, iovec, n, ELEMENTSOF(iovec), &s->ucred, NULL, label, label_len, priority);
1424
1425         free(message);
1426         free(syslog_priority);
1427         free(syslog_facility);
1428         free(syslog_identifier);
1429
1430         return 0;
1431 }
1432
1433 static int stdout_stream_line(StdoutStream *s, char *p) {
1434         int r;
1435
1436         assert(s);
1437         assert(p);
1438
1439         p = strstrip(p);
1440
1441         switch (s->state) {
1442
1443         case STDOUT_STREAM_IDENTIFIER:
1444                 if (isempty(p))
1445                         s->identifier = NULL;
1446                 else  {
1447                         s->identifier = strdup(p);
1448                         if (!s->identifier) {
1449                                 log_error("Out of memory");
1450                                 return -ENOMEM;
1451                         }
1452                 }
1453
1454                 s->state = STDOUT_STREAM_PRIORITY;
1455                 return 0;
1456
1457         case STDOUT_STREAM_PRIORITY:
1458                 r = safe_atoi(p, &s->priority);
1459                 if (r < 0 || s->priority <= 0 || s->priority >= 999) {
1460                         log_warning("Failed to parse log priority line.");
1461                         return -EINVAL;
1462                 }
1463
1464                 s->state = STDOUT_STREAM_LEVEL_PREFIX;
1465                 return 0;
1466
1467         case STDOUT_STREAM_LEVEL_PREFIX:
1468                 r = parse_boolean(p);
1469                 if (r < 0) {
1470                         log_warning("Failed to parse level prefix line.");
1471                         return -EINVAL;
1472                 }
1473
1474                 s->level_prefix = !!r;
1475                 s->state = STDOUT_STREAM_FORWARD_TO_SYSLOG;
1476                 return 0;
1477
1478         case STDOUT_STREAM_FORWARD_TO_SYSLOG:
1479                 r = parse_boolean(p);
1480                 if (r < 0) {
1481                         log_warning("Failed to parse forward to syslog line.");
1482                         return -EINVAL;
1483                 }
1484
1485                 s->forward_to_syslog = !!r;
1486                 s->state = STDOUT_STREAM_FORWARD_TO_KMSG;
1487                 return 0;
1488
1489         case STDOUT_STREAM_FORWARD_TO_KMSG:
1490                 r = parse_boolean(p);
1491                 if (r < 0) {
1492                         log_warning("Failed to parse copy to kmsg line.");
1493                         return -EINVAL;
1494                 }
1495
1496                 s->forward_to_kmsg = !!r;
1497                 s->state = STDOUT_STREAM_FORWARD_TO_CONSOLE;
1498                 return 0;
1499
1500         case STDOUT_STREAM_FORWARD_TO_CONSOLE:
1501                 r = parse_boolean(p);
1502                 if (r < 0) {
1503                         log_warning("Failed to parse copy to console line.");
1504                         return -EINVAL;
1505                 }
1506
1507                 s->forward_to_console = !!r;
1508                 s->state = STDOUT_STREAM_RUNNING;
1509                 return 0;
1510
1511         case STDOUT_STREAM_RUNNING:
1512                 return stdout_stream_log(s, p);
1513         }
1514
1515         assert_not_reached("Unknown stream state");
1516 }
1517
1518 static int stdout_stream_scan(StdoutStream *s, bool force_flush) {
1519         char *p;
1520         size_t remaining;
1521         int r;
1522
1523         assert(s);
1524
1525         p = s->buffer;
1526         remaining = s->length;
1527         for (;;) {
1528                 char *end;
1529                 size_t skip;
1530
1531                 end = memchr(p, '\n', remaining);
1532                 if (end)
1533                         skip = end - p + 1;
1534                 else if (remaining >= sizeof(s->buffer) - 1) {
1535                         end = p + sizeof(s->buffer) - 1;
1536                         skip = remaining;
1537                 } else
1538                         break;
1539
1540                 *end = 0;
1541
1542                 r = stdout_stream_line(s, p);
1543                 if (r < 0)
1544                         return r;
1545
1546                 remaining -= skip;
1547                 p += skip;
1548         }
1549
1550         if (force_flush && remaining > 0) {
1551                 p[remaining] = 0;
1552                 r = stdout_stream_line(s, p);
1553                 if (r < 0)
1554                         return r;
1555
1556                 p += remaining;
1557                 remaining = 0;
1558         }
1559
1560         if (p > s->buffer) {
1561                 memmove(s->buffer, p, remaining);
1562                 s->length = remaining;
1563         }
1564
1565         return 0;
1566 }
1567
1568 static int stdout_stream_process(StdoutStream *s) {
1569         ssize_t l;
1570         int r;
1571
1572         assert(s);
1573
1574         l = read(s->fd, s->buffer+s->length, sizeof(s->buffer)-1-s->length);
1575         if (l < 0) {
1576
1577                 if (errno == EAGAIN)
1578                         return 0;
1579
1580                 log_warning("Failed to read from stream: %m");
1581                 return -errno;
1582         }
1583
1584         if (l == 0) {
1585                 r = stdout_stream_scan(s, true);
1586                 if (r < 0)
1587                         return r;
1588
1589                 return 0;
1590         }
1591
1592         s->length += l;
1593         r = stdout_stream_scan(s, false);
1594         if (r < 0)
1595                 return r;
1596
1597         return 1;
1598
1599 }
1600
1601 static void stdout_stream_free(StdoutStream *s) {
1602         assert(s);
1603
1604         if (s->server) {
1605                 assert(s->server->n_stdout_streams > 0);
1606                 s->server->n_stdout_streams --;
1607                 LIST_REMOVE(StdoutStream, stdout_stream, s->server->stdout_streams, s);
1608         }
1609
1610         if (s->fd >= 0) {
1611                 if (s->server)
1612                         epoll_ctl(s->server->epoll_fd, EPOLL_CTL_DEL, s->fd, NULL);
1613
1614                 close_nointr_nofail(s->fd);
1615         }
1616
1617 #ifdef HAVE_SELINUX
1618         if (s->security_context)
1619                 freecon(s->security_context);
1620 #endif
1621
1622         free(s->identifier);
1623         free(s);
1624 }
1625
1626 static int stdout_stream_new(Server *s) {
1627         StdoutStream *stream;
1628         int fd, r;
1629         socklen_t len;
1630         struct epoll_event ev;
1631
1632         assert(s);
1633
1634         fd = accept4(s->stdout_fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC);
1635         if (fd < 0) {
1636                 if (errno == EAGAIN)
1637                         return 0;
1638
1639                 log_error("Failed to accept stdout connection: %m");
1640                 return -errno;
1641         }
1642
1643         if (s->n_stdout_streams >= STDOUT_STREAMS_MAX) {
1644                 log_warning("Too many stdout streams, refusing connection.");
1645                 close_nointr_nofail(fd);
1646                 return 0;
1647         }
1648
1649         stream = new0(StdoutStream, 1);
1650         if (!stream) {
1651                 log_error("Out of memory.");
1652                 close_nointr_nofail(fd);
1653                 return -ENOMEM;
1654         }
1655
1656         stream->fd = fd;
1657
1658         len = sizeof(stream->ucred);
1659         if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &stream->ucred, &len) < 0) {
1660                 log_error("Failed to determine peer credentials: %m");
1661                 r = -errno;
1662                 goto fail;
1663         }
1664
1665 #ifdef HAVE_SELINUX
1666         if (getpeercon(fd, &stream->security_context) < 0)
1667                 log_error("Failed to determine peer security context.");
1668 #endif
1669
1670         if (shutdown(fd, SHUT_WR) < 0) {
1671                 log_error("Failed to shutdown writing side of socket: %m");
1672                 r = -errno;
1673                 goto fail;
1674         }
1675
1676         zero(ev);
1677         ev.data.ptr = stream;
1678         ev.events = EPOLLIN;
1679         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0) {
1680                 log_error("Failed to add stream to event loop: %m");
1681                 r = -errno;
1682                 goto fail;
1683         }
1684
1685         stream->server = s;
1686         LIST_PREPEND(StdoutStream, stdout_stream, s->stdout_streams, stream);
1687         s->n_stdout_streams ++;
1688
1689         return 0;
1690
1691 fail:
1692         stdout_stream_free(stream);
1693         return r;
1694 }
1695
1696 static int parse_kernel_timestamp(char **_p, usec_t *t) {
1697         usec_t r;
1698         int k, i;
1699         char *p;
1700
1701         assert(_p);
1702         assert(*_p);
1703         assert(t);
1704
1705         p = *_p;
1706
1707         if (strlen(p) < 14 || p[0] != '[' || p[13] != ']' || p[6] != '.')
1708                 return 0;
1709
1710         r = 0;
1711
1712         for (i = 1; i <= 5; i++) {
1713                 r *= 10;
1714
1715                 if (p[i] == ' ')
1716                         continue;
1717
1718                 k = undecchar(p[i]);
1719                 if (k < 0)
1720                         return 0;
1721
1722                 r += k;
1723         }
1724
1725         for (i = 7; i <= 12; i++) {
1726                 r *= 10;
1727
1728                 k = undecchar(p[i]);
1729                 if (k < 0)
1730                         return 0;
1731
1732                 r += k;
1733         }
1734
1735         *t = r;
1736         *_p += 14;
1737         *_p += strspn(*_p, WHITESPACE);
1738
1739         return 1;
1740 }
1741
1742 static void proc_kmsg_line(Server *s, const char *p) {
1743         struct iovec iovec[N_IOVEC_META_FIELDS + 7];
1744         char *message = NULL, *syslog_priority = NULL, *syslog_pid = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *source_time = NULL;
1745         int priority = LOG_KERN | LOG_INFO;
1746         unsigned n = 0;
1747         usec_t usec;
1748         char *identifier = NULL, *pid = NULL;
1749
1750         assert(s);
1751         assert(p);
1752
1753         if (isempty(p))
1754                 return;
1755
1756         parse_syslog_priority((char **) &p, &priority);
1757
1758         if (s->forward_to_kmsg && (priority & LOG_FACMASK) != LOG_KERN)
1759                 return;
1760
1761         if (parse_kernel_timestamp((char **) &p, &usec) > 0) {
1762                 if (asprintf(&source_time, "_SOURCE_MONOTONIC_TIMESTAMP=%llu",
1763                              (unsigned long long) usec) >= 0)
1764                         IOVEC_SET_STRING(iovec[n++], source_time);
1765         }
1766
1767         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=kernel");
1768
1769         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1770                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1771
1772         if ((priority & LOG_FACMASK) == LOG_KERN) {
1773
1774                 if (s->forward_to_syslog)
1775                         forward_syslog(s, priority, "kernel", p, NULL, NULL);
1776
1777                 IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=kernel");
1778         } else {
1779                 read_identifier(&p, &identifier, &pid);
1780
1781                 if (s->forward_to_syslog)
1782                         forward_syslog(s, priority, identifier, p, NULL, NULL);
1783
1784                 if (identifier) {
1785                         syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
1786                         if (syslog_identifier)
1787                                 IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1788                 }
1789
1790                 if (pid) {
1791                         syslog_pid = strappend("SYSLOG_PID=", pid);
1792                         if (syslog_pid)
1793                                 IOVEC_SET_STRING(iovec[n++], syslog_pid);
1794                 }
1795
1796                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1797                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1798         }
1799
1800         message = strappend("MESSAGE=", p);
1801         if (message)
1802                 IOVEC_SET_STRING(iovec[n++], message);
1803
1804         dispatch_message(s, iovec, n, ELEMENTSOF(iovec), NULL, NULL, NULL, 0, priority);
1805
1806         free(message);
1807         free(syslog_priority);
1808         free(syslog_identifier);
1809         free(syslog_pid);
1810         free(syslog_facility);
1811         free(source_time);
1812         free(identifier);
1813         free(pid);
1814 }
1815
1816 static void proc_kmsg_scan(Server *s) {
1817         char *p;
1818         size_t remaining;
1819
1820         assert(s);
1821
1822         p = s->proc_kmsg_buffer;
1823         remaining = s->proc_kmsg_length;
1824         for (;;) {
1825                 char *end;
1826                 size_t skip;
1827
1828                 end = memchr(p, '\n', remaining);
1829                 if (end)
1830                         skip = end - p + 1;
1831                 else if (remaining >= sizeof(s->proc_kmsg_buffer) - 1) {
1832                         end = p + sizeof(s->proc_kmsg_buffer) - 1;
1833                         skip = remaining;
1834                 } else
1835                         break;
1836
1837                 *end = 0;
1838
1839                 proc_kmsg_line(s, p);
1840
1841                 remaining -= skip;
1842                 p += skip;
1843         }
1844
1845         if (p > s->proc_kmsg_buffer) {
1846                 memmove(s->proc_kmsg_buffer, p, remaining);
1847                 s->proc_kmsg_length = remaining;
1848         }
1849 }
1850
1851 static int system_journal_open(Server *s) {
1852         int r;
1853         char *fn;
1854         sd_id128_t machine;
1855         char ids[33];
1856
1857         r = sd_id128_get_machine(&machine);
1858         if (r < 0)
1859                 return r;
1860
1861         sd_id128_to_string(machine, ids);
1862
1863         if (!s->system_journal) {
1864
1865                 /* First try to create the machine path, but not the prefix */
1866                 fn = strappend("/var/log/journal/", ids);
1867                 if (!fn)
1868                         return -ENOMEM;
1869                 (void) mkdir(fn, 0755);
1870                 free(fn);
1871
1872                 /* The create the system journal file */
1873                 fn = join("/var/log/journal/", ids, "/system.journal", NULL);
1874                 if (!fn)
1875                         return -ENOMEM;
1876
1877                 r = journal_file_open(fn, O_RDWR|O_CREAT, 0640, NULL, &s->system_journal);
1878                 free(fn);
1879
1880                 if (r >= 0) {
1881                         journal_default_metrics(&s->system_metrics, s->system_journal->fd);
1882
1883                         s->system_journal->metrics = s->system_metrics;
1884                         s->system_journal->compress = s->compress;
1885
1886                         server_fix_perms(s, s->system_journal, 0);
1887                 } else if (r < 0) {
1888
1889                         if (r != -ENOENT && r != -EROFS)
1890                                 log_warning("Failed to open system journal: %s", strerror(-r));
1891
1892                         r = 0;
1893                 }
1894         }
1895
1896         if (!s->runtime_journal) {
1897
1898                 fn = join("/run/log/journal/", ids, "/system.journal", NULL);
1899                 if (!fn)
1900                         return -ENOMEM;
1901
1902                 if (s->system_journal) {
1903
1904                         /* Try to open the runtime journal, but only
1905                          * if it already exists, so that we can flush
1906                          * it into the system journal */
1907
1908                         r = journal_file_open(fn, O_RDWR, 0640, NULL, &s->runtime_journal);
1909                         free(fn);
1910
1911                         if (r < 0) {
1912                                 if (r != -ENOENT)
1913                                         log_warning("Failed to open runtime journal: %s", strerror(-r));
1914
1915                                 r = 0;
1916                         }
1917
1918                 } else {
1919
1920                         /* OK, we really need the runtime journal, so create
1921                          * it if necessary. */
1922
1923                         (void) mkdir_parents(fn, 0755);
1924                         r = journal_file_open(fn, O_RDWR|O_CREAT, 0640, NULL, &s->runtime_journal);
1925                         free(fn);
1926
1927                         if (r < 0) {
1928                                 log_error("Failed to open runtime journal: %s", strerror(-r));
1929                                 return r;
1930                         }
1931                 }
1932
1933                 if (s->runtime_journal) {
1934                         journal_default_metrics(&s->runtime_metrics, s->runtime_journal->fd);
1935
1936                         s->runtime_journal->metrics = s->runtime_metrics;
1937                         s->runtime_journal->compress = s->compress;
1938
1939                         server_fix_perms(s, s->runtime_journal, 0);
1940                 }
1941         }
1942
1943         return r;
1944 }
1945
1946 static int server_flush_to_var(Server *s) {
1947         char path[] = "/run/log/journal/xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
1948         Object *o = NULL;
1949         int r;
1950         sd_id128_t machine;
1951         sd_journal *j;
1952         usec_t ts;
1953
1954         assert(s);
1955
1956         if (!s->runtime_journal)
1957                 return 0;
1958
1959         ts = now(CLOCK_MONOTONIC);
1960         if (s->var_available_timestamp + RECHECK_VAR_AVAILABLE_USEC > ts)
1961                 return 0;
1962
1963         s->var_available_timestamp = ts;
1964
1965         system_journal_open(s);
1966
1967         if (!s->system_journal)
1968                 return 0;
1969
1970         log_info("Flushing to /var...");
1971
1972         r = sd_id128_get_machine(&machine);
1973         if (r < 0) {
1974                 log_error("Failed to get machine id: %s", strerror(-r));
1975                 return r;
1976         }
1977
1978         r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1979         if (r < 0) {
1980                 log_error("Failed to read runtime journal: %s", strerror(-r));
1981                 return r;
1982         }
1983
1984         SD_JOURNAL_FOREACH(j) {
1985                 JournalFile *f;
1986
1987                 f = j->current_file;
1988                 assert(f && f->current_offset > 0);
1989
1990                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1991                 if (r < 0) {
1992                         log_error("Can't read entry: %s", strerror(-r));
1993                         goto finish;
1994                 }
1995
1996                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1997                 if (r == -E2BIG) {
1998                         log_info("Allocation limit reached.");
1999
2000                         journal_file_post_change(s->system_journal);
2001                         server_rotate(s);
2002                         server_vacuum(s);
2003
2004                         r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
2005                 }
2006
2007                 if (r < 0) {
2008                         log_error("Can't write entry: %s", strerror(-r));
2009                         goto finish;
2010                 }
2011         }
2012
2013 finish:
2014         journal_file_post_change(s->system_journal);
2015
2016         journal_file_close(s->runtime_journal);
2017         s->runtime_journal = NULL;
2018
2019         if (r >= 0) {
2020                 sd_id128_to_string(machine, path + 17);
2021                 rm_rf(path, false, true, false);
2022         }
2023
2024         return r;
2025 }
2026
2027 static int server_read_proc_kmsg(Server *s) {
2028         ssize_t l;
2029         assert(s);
2030         assert(s->proc_kmsg_fd >= 0);
2031
2032         l = read(s->proc_kmsg_fd, s->proc_kmsg_buffer + s->proc_kmsg_length, sizeof(s->proc_kmsg_buffer) - 1 - s->proc_kmsg_length);
2033         if (l < 0) {
2034
2035                 if (errno == EAGAIN || errno == EINTR)
2036                         return 0;
2037
2038                 log_error("Failed to read from kernel: %m");
2039                 return -errno;
2040         }
2041
2042         s->proc_kmsg_length += l;
2043
2044         proc_kmsg_scan(s);
2045         return 1;
2046 }
2047
2048 static int server_flush_proc_kmsg(Server *s) {
2049         int r;
2050
2051         assert(s);
2052
2053         if (s->proc_kmsg_fd < 0)
2054                 return 0;
2055
2056         log_info("Flushing /proc/kmsg...");
2057
2058         for (;;) {
2059                 r = server_read_proc_kmsg(s);
2060                 if (r < 0)
2061                         return r;
2062
2063                 if (r == 0)
2064                         break;
2065         }
2066
2067         return 0;
2068 }
2069
2070 static int process_event(Server *s, struct epoll_event *ev) {
2071         assert(s);
2072
2073         if (ev->data.fd == s->signal_fd) {
2074                 struct signalfd_siginfo sfsi;
2075                 ssize_t n;
2076
2077                 if (ev->events != EPOLLIN) {
2078                         log_info("Got invalid event from epoll.");
2079                         return -EIO;
2080                 }
2081
2082                 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
2083                 if (n != sizeof(sfsi)) {
2084
2085                         if (n >= 0)
2086                                 return -EIO;
2087
2088                         if (errno == EINTR || errno == EAGAIN)
2089                                 return 1;
2090
2091                         return -errno;
2092                 }
2093
2094                 if (sfsi.ssi_signo == SIGUSR1) {
2095                         server_flush_to_var(s);
2096                         return 0;
2097                 }
2098
2099                 log_debug("Received SIG%s", signal_to_string(sfsi.ssi_signo));
2100                 return 0;
2101
2102         } else if (ev->data.fd == s->proc_kmsg_fd) {
2103                 int r;
2104
2105                 if (ev->events != EPOLLIN) {
2106                         log_info("Got invalid event from epoll.");
2107                         return -EIO;
2108                 }
2109
2110                 r = server_read_proc_kmsg(s);
2111                 if (r < 0)
2112                         return r;
2113
2114                 return 1;
2115
2116         } else if (ev->data.fd == s->native_fd ||
2117                    ev->data.fd == s->syslog_fd) {
2118
2119                 if (ev->events != EPOLLIN) {
2120                         log_info("Got invalid event from epoll.");
2121                         return -EIO;
2122                 }
2123
2124                 for (;;) {
2125                         struct msghdr msghdr;
2126                         struct iovec iovec;
2127                         struct ucred *ucred = NULL;
2128                         struct timeval *tv = NULL;
2129                         struct cmsghdr *cmsg;
2130                         char *label = NULL;
2131                         size_t label_len = 0;
2132                         union {
2133                                 struct cmsghdr cmsghdr;
2134                                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
2135                                             CMSG_SPACE(sizeof(struct timeval)) +
2136                                             CMSG_SPACE(sizeof(int)) +
2137                                             CMSG_SPACE(PAGE_SIZE)]; /* selinux label */
2138                         } control;
2139                         ssize_t n;
2140                         int v;
2141                         int *fds = NULL;
2142                         unsigned n_fds = 0;
2143
2144                         if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
2145                                 log_error("SIOCINQ failed: %m");
2146                                 return -errno;
2147                         }
2148
2149                         if (s->buffer_size < (size_t) v) {
2150                                 void *b;
2151                                 size_t l;
2152
2153                                 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
2154                                 b = realloc(s->buffer, l+1);
2155
2156                                 if (!b) {
2157                                         log_error("Couldn't increase buffer.");
2158                                         return -ENOMEM;
2159                                 }
2160
2161                                 s->buffer_size = l;
2162                                 s->buffer = b;
2163                         }
2164
2165                         zero(iovec);
2166                         iovec.iov_base = s->buffer;
2167                         iovec.iov_len = s->buffer_size;
2168
2169                         zero(control);
2170                         zero(msghdr);
2171                         msghdr.msg_iov = &iovec;
2172                         msghdr.msg_iovlen = 1;
2173                         msghdr.msg_control = &control;
2174                         msghdr.msg_controllen = sizeof(control);
2175
2176                         n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
2177                         if (n < 0) {
2178
2179                                 if (errno == EINTR || errno == EAGAIN)
2180                                         return 1;
2181
2182                                 log_error("recvmsg() failed: %m");
2183                                 return -errno;
2184                         }
2185
2186                         for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
2187
2188                                 if (cmsg->cmsg_level == SOL_SOCKET &&
2189                                     cmsg->cmsg_type == SCM_CREDENTIALS &&
2190                                     cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
2191                                         ucred = (struct ucred*) CMSG_DATA(cmsg);
2192                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
2193                                          cmsg->cmsg_type == SCM_SECURITY) {
2194                                         label = (char*) CMSG_DATA(cmsg);
2195                                         label_len = cmsg->cmsg_len - CMSG_LEN(0);
2196                                 } else if (cmsg->cmsg_level == SOL_SOCKET &&
2197                                          cmsg->cmsg_type == SO_TIMESTAMP &&
2198                                          cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
2199                                         tv = (struct timeval*) CMSG_DATA(cmsg);
2200                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
2201                                          cmsg->cmsg_type == SCM_RIGHTS) {
2202                                         fds = (int*) CMSG_DATA(cmsg);
2203                                         n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
2204                                 }
2205                         }
2206
2207                         if (ev->data.fd == s->syslog_fd) {
2208                                 char *e;
2209
2210                                 if (n > 0 && n_fds == 0) {
2211                                         e = memchr(s->buffer, '\n', n);
2212                                         if (e)
2213                                                 *e = 0;
2214                                         else
2215                                                 s->buffer[n] = 0;
2216
2217                                         process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
2218                                 } else if (n_fds > 0)
2219                                         log_warning("Got file descriptors via syslog socket. Ignoring.");
2220
2221                         } else {
2222                                 if (n > 0 && n_fds == 0)
2223                                         process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
2224                                 else if (n == 0 && n_fds == 1)
2225                                         process_native_file(s, fds[0], ucred, tv, label, label_len);
2226                                 else if (n_fds > 0)
2227                                         log_warning("Got too many file descriptors via native socket. Ignoring.");
2228                         }
2229
2230                         close_many(fds, n_fds);
2231                 }
2232
2233                 return 1;
2234
2235         } else if (ev->data.fd == s->stdout_fd) {
2236
2237                 if (ev->events != EPOLLIN) {
2238                         log_info("Got invalid event from epoll.");
2239                         return -EIO;
2240                 }
2241
2242                 stdout_stream_new(s);
2243                 return 1;
2244
2245         } else {
2246                 StdoutStream *stream;
2247
2248                 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
2249                         log_info("Got invalid event from epoll.");
2250                         return -EIO;
2251                 }
2252
2253                 /* If it is none of the well-known fds, it must be an
2254                  * stdout stream fd. Note that this is a bit ugly here
2255                  * (since we rely that none of the well-known fds
2256                  * could be interpreted as pointer), but nonetheless
2257                  * safe, since the well-known fds would never get an
2258                  * fd > 4096, i.e. beyond the first memory page */
2259
2260                 stream = ev->data.ptr;
2261
2262                 if (stdout_stream_process(stream) <= 0)
2263                         stdout_stream_free(stream);
2264
2265                 return 1;
2266         }
2267
2268         log_error("Unknown event.");
2269         return 0;
2270 }
2271
2272 static int open_syslog_socket(Server *s) {
2273         union sockaddr_union sa;
2274         int one, r;
2275         struct epoll_event ev;
2276
2277         assert(s);
2278
2279         if (s->syslog_fd < 0) {
2280
2281                 s->syslog_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2282                 if (s->syslog_fd < 0) {
2283                         log_error("socket() failed: %m");
2284                         return -errno;
2285                 }
2286
2287                 zero(sa);
2288                 sa.un.sun_family = AF_UNIX;
2289                 strncpy(sa.un.sun_path, "/dev/log", sizeof(sa.un.sun_path));
2290
2291                 unlink(sa.un.sun_path);
2292
2293                 r = bind(s->syslog_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2294                 if (r < 0) {
2295                         log_error("bind() failed: %m");
2296                         return -errno;
2297                 }
2298
2299                 chmod(sa.un.sun_path, 0666);
2300         } else
2301                 fd_nonblock(s->syslog_fd, 1);
2302
2303         one = 1;
2304         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
2305         if (r < 0) {
2306                 log_error("SO_PASSCRED failed: %m");
2307                 return -errno;
2308         }
2309
2310         one = 1;
2311         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
2312         if (r < 0)
2313                 log_warning("SO_PASSSEC failed: %m");
2314
2315         one = 1;
2316         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
2317         if (r < 0) {
2318                 log_error("SO_TIMESTAMP failed: %m");
2319                 return -errno;
2320         }
2321
2322         zero(ev);
2323         ev.events = EPOLLIN;
2324         ev.data.fd = s->syslog_fd;
2325         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->syslog_fd, &ev) < 0) {
2326                 log_error("Failed to add syslog server fd to epoll object: %m");
2327                 return -errno;
2328         }
2329
2330         return 0;
2331 }
2332
2333 static int open_native_socket(Server*s) {
2334         union sockaddr_union sa;
2335         int one, r;
2336         struct epoll_event ev;
2337
2338         assert(s);
2339
2340         if (s->native_fd < 0) {
2341
2342                 s->native_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2343                 if (s->native_fd < 0) {
2344                         log_error("socket() failed: %m");
2345                         return -errno;
2346                 }
2347
2348                 zero(sa);
2349                 sa.un.sun_family = AF_UNIX;
2350                 strncpy(sa.un.sun_path, "/run/systemd/journal/socket", sizeof(sa.un.sun_path));
2351
2352                 unlink(sa.un.sun_path);
2353
2354                 r = bind(s->native_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2355                 if (r < 0) {
2356                         log_error("bind() failed: %m");
2357                         return -errno;
2358                 }
2359
2360                 chmod(sa.un.sun_path, 0666);
2361         } else
2362                 fd_nonblock(s->native_fd, 1);
2363
2364         one = 1;
2365         r = setsockopt(s->native_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
2366         if (r < 0) {
2367                 log_error("SO_PASSCRED failed: %m");
2368                 return -errno;
2369         }
2370
2371         one = 1;
2372         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
2373         if (r < 0)
2374                 log_warning("SO_PASSSEC failed: %m");
2375
2376         one = 1;
2377         r = setsockopt(s->native_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
2378         if (r < 0) {
2379                 log_error("SO_TIMESTAMP failed: %m");
2380                 return -errno;
2381         }
2382
2383         zero(ev);
2384         ev.events = EPOLLIN;
2385         ev.data.fd = s->native_fd;
2386         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->native_fd, &ev) < 0) {
2387                 log_error("Failed to add native server fd to epoll object: %m");
2388                 return -errno;
2389         }
2390
2391         return 0;
2392 }
2393
2394 static int open_stdout_socket(Server *s) {
2395         union sockaddr_union sa;
2396         int r;
2397         struct epoll_event ev;
2398
2399         assert(s);
2400
2401         if (s->stdout_fd < 0) {
2402
2403                 s->stdout_fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2404                 if (s->stdout_fd < 0) {
2405                         log_error("socket() failed: %m");
2406                         return -errno;
2407                 }
2408
2409                 zero(sa);
2410                 sa.un.sun_family = AF_UNIX;
2411                 strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
2412
2413                 unlink(sa.un.sun_path);
2414
2415                 r = bind(s->stdout_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2416                 if (r < 0) {
2417                         log_error("bind() failed: %m");
2418                         return -errno;
2419                 }
2420
2421                 chmod(sa.un.sun_path, 0666);
2422
2423                 if (listen(s->stdout_fd, SOMAXCONN) < 0) {
2424                         log_error("liste() failed: %m");
2425                         return -errno;
2426                 }
2427         } else
2428                 fd_nonblock(s->stdout_fd, 1);
2429
2430         zero(ev);
2431         ev.events = EPOLLIN;
2432         ev.data.fd = s->stdout_fd;
2433         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->stdout_fd, &ev) < 0) {
2434                 log_error("Failed to add stdout server fd to epoll object: %m");
2435                 return -errno;
2436         }
2437
2438         return 0;
2439 }
2440
2441 static int open_proc_kmsg(Server *s) {
2442         struct epoll_event ev;
2443
2444         assert(s);
2445
2446         if (!s->import_proc_kmsg)
2447                 return 0;
2448
2449
2450         s->proc_kmsg_fd = open("/proc/kmsg", O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
2451         if (s->proc_kmsg_fd < 0) {
2452                 log_warning("Failed to open /proc/kmsg, ignoring: %m");
2453                 return 0;
2454         }
2455
2456         zero(ev);
2457         ev.events = EPOLLIN;
2458         ev.data.fd = s->proc_kmsg_fd;
2459         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->proc_kmsg_fd, &ev) < 0) {
2460                 log_error("Failed to add /proc/kmsg fd to epoll object: %m");
2461                 return -errno;
2462         }
2463
2464         return 0;
2465 }
2466
2467 static int open_signalfd(Server *s) {
2468         sigset_t mask;
2469         struct epoll_event ev;
2470
2471         assert(s);
2472
2473         assert_se(sigemptyset(&mask) == 0);
2474         sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, -1);
2475         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
2476
2477         s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
2478         if (s->signal_fd < 0) {
2479                 log_error("signalfd(): %m");
2480                 return -errno;
2481         }
2482
2483         zero(ev);
2484         ev.events = EPOLLIN;
2485         ev.data.fd = s->signal_fd;
2486
2487         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
2488                 log_error("epoll_ctl(): %m");
2489                 return -errno;
2490         }
2491
2492         return 0;
2493 }
2494
2495 static int server_parse_proc_cmdline(Server *s) {
2496         char *line, *w, *state;
2497         int r;
2498         size_t l;
2499
2500         if (detect_container(NULL) > 0)
2501                 return 0;
2502
2503         r = read_one_line_file("/proc/cmdline", &line);
2504         if (r < 0) {
2505                 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
2506                 return 0;
2507         }
2508
2509         FOREACH_WORD_QUOTED(w, l, line, state) {
2510                 char *word;
2511
2512                 word = strndup(w, l);
2513                 if (!word) {
2514                         r = -ENOMEM;
2515                         goto finish;
2516                 }
2517
2518                 if (startswith(word, "systemd_journald.forward_to_syslog=")) {
2519                         r = parse_boolean(word + 35);
2520                         if (r < 0)
2521                                 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
2522                         else
2523                                 s->forward_to_syslog = r;
2524                 } else if (startswith(word, "systemd_journald.forward_to_kmsg=")) {
2525                         r = parse_boolean(word + 33);
2526                         if (r < 0)
2527                                 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
2528                         else
2529                                 s->forward_to_kmsg = r;
2530                 } else if (startswith(word, "systemd_journald.forward_to_console=")) {
2531                         r = parse_boolean(word + 36);
2532                         if (r < 0)
2533                                 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
2534                         else
2535                                 s->forward_to_console = r;
2536                 }
2537
2538                 free(word);
2539         }
2540
2541         r = 0;
2542
2543 finish:
2544         free(line);
2545         return r;
2546 }
2547
2548 static int server_parse_config_file(Server *s) {
2549         FILE *f;
2550         const char *fn;
2551         int r;
2552
2553         assert(s);
2554
2555         fn = "/etc/systemd/systemd-journald.conf";
2556         f = fopen(fn, "re");
2557         if (!f) {
2558                 if (errno == ENOENT)
2559                         return 0;
2560
2561                 log_warning("Failed to open configuration file %s: %m", fn);
2562                 return -errno;
2563         }
2564
2565         r = config_parse(fn, f, "Journal\0", config_item_perf_lookup, (void*) journald_gperf_lookup, false, s);
2566         if (r < 0)
2567                 log_warning("Failed to parse configuration file: %s", strerror(-r));
2568
2569         fclose(f);
2570
2571         return r;
2572 }
2573
2574 static int server_init(Server *s) {
2575         int n, r, fd;
2576
2577         assert(s);
2578
2579         zero(*s);
2580         s->syslog_fd = s->native_fd = s->stdout_fd = s->signal_fd = s->epoll_fd = s->proc_kmsg_fd = -1;
2581         s->compress = true;
2582
2583         s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
2584         s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
2585
2586         s->forward_to_syslog = true;
2587         s->import_proc_kmsg = true;
2588
2589         memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
2590         memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
2591
2592         server_parse_config_file(s);
2593         server_parse_proc_cmdline(s);
2594
2595         s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
2596         if (!s->user_journals) {
2597                 log_error("Out of memory.");
2598                 return -ENOMEM;
2599         }
2600
2601         s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
2602         if (s->epoll_fd < 0) {
2603                 log_error("Failed to create epoll object: %m");
2604                 return -errno;
2605         }
2606
2607         n = sd_listen_fds(true);
2608         if (n < 0) {
2609                 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
2610                 return n;
2611         }
2612
2613         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
2614
2615                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
2616
2617                         if (s->native_fd >= 0) {
2618                                 log_error("Too many native sockets passed.");
2619                                 return -EINVAL;
2620                         }
2621
2622                         s->native_fd = fd;
2623
2624                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
2625
2626                         if (s->stdout_fd >= 0) {
2627                                 log_error("Too many stdout sockets passed.");
2628                                 return -EINVAL;
2629                         }
2630
2631                         s->stdout_fd = fd;
2632
2633                 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
2634
2635                         if (s->syslog_fd >= 0) {
2636                                 log_error("Too many /dev/log sockets passed.");
2637                                 return -EINVAL;
2638                         }
2639
2640                         s->syslog_fd = fd;
2641
2642                 } else {
2643                         log_error("Unknown socket passed.");
2644                         return -EINVAL;
2645                 }
2646         }
2647
2648         r = open_syslog_socket(s);
2649         if (r < 0)
2650                 return r;
2651
2652         r = open_native_socket(s);
2653         if (r < 0)
2654                 return r;
2655
2656         r = open_stdout_socket(s);
2657         if (r < 0)
2658                 return r;
2659
2660         r = open_proc_kmsg(s);
2661         if (r < 0)
2662                 return r;
2663
2664         r = system_journal_open(s);
2665         if (r < 0)
2666                 return r;
2667
2668         r = open_signalfd(s);
2669         if (r < 0)
2670                 return r;
2671
2672         s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
2673         if (!s->rate_limit)
2674                 return -ENOMEM;
2675
2676         return 0;
2677 }
2678
2679 static void server_done(Server *s) {
2680         JournalFile *f;
2681         assert(s);
2682
2683         while (s->stdout_streams)
2684                 stdout_stream_free(s->stdout_streams);
2685
2686         if (s->system_journal)
2687                 journal_file_close(s->system_journal);
2688
2689         if (s->runtime_journal)
2690                 journal_file_close(s->runtime_journal);
2691
2692         while ((f = hashmap_steal_first(s->user_journals)))
2693                 journal_file_close(f);
2694
2695         hashmap_free(s->user_journals);
2696
2697         if (s->epoll_fd >= 0)
2698                 close_nointr_nofail(s->epoll_fd);
2699
2700         if (s->signal_fd >= 0)
2701                 close_nointr_nofail(s->signal_fd);
2702
2703         if (s->syslog_fd >= 0)
2704                 close_nointr_nofail(s->syslog_fd);
2705
2706         if (s->native_fd >= 0)
2707                 close_nointr_nofail(s->native_fd);
2708
2709         if (s->stdout_fd >= 0)
2710                 close_nointr_nofail(s->stdout_fd);
2711
2712         if (s->proc_kmsg_fd >= 0)
2713                 close_nointr_nofail(s->proc_kmsg_fd);
2714
2715         if (s->rate_limit)
2716                 journal_rate_limit_free(s->rate_limit);
2717
2718         free(s->buffer);
2719 }
2720
2721 int main(int argc, char *argv[]) {
2722         Server server;
2723         int r;
2724
2725         /* if (getppid() != 1) { */
2726         /*         log_error("This program should be invoked by init only."); */
2727         /*         return EXIT_FAILURE; */
2728         /* } */
2729
2730         if (argc > 1) {
2731                 log_error("This program does not take arguments.");
2732                 return EXIT_FAILURE;
2733         }
2734
2735         log_set_target(LOG_TARGET_CONSOLE);
2736         log_parse_environment();
2737         log_open();
2738
2739         umask(0022);
2740
2741         r = server_init(&server);
2742         if (r < 0)
2743                 goto finish;
2744
2745         server_vacuum(&server);
2746         server_flush_to_var(&server);
2747         server_flush_proc_kmsg(&server);
2748
2749         log_debug("systemd-journald running as pid %lu", (unsigned long) getpid());
2750         driver_message(&server, SD_MESSAGE_JOURNAL_START, "Journal started");
2751
2752         sd_notify(false,
2753                   "READY=1\n"
2754                   "STATUS=Processing requests...");
2755
2756         for (;;) {
2757                 struct epoll_event event;
2758
2759                 r = epoll_wait(server.epoll_fd, &event, 1, -1);
2760                 if (r < 0) {
2761
2762                         if (errno == EINTR)
2763                                 continue;
2764
2765                         log_error("epoll_wait() failed: %m");
2766                         r = -errno;
2767                         goto finish;
2768                 } else if (r == 0)
2769                         break;
2770
2771                 r = process_event(&server, &event);
2772                 if (r < 0)
2773                         goto finish;
2774                 else if (r == 0)
2775                         break;
2776         }
2777
2778         log_debug("systemd-journald stopped as pid %lu", (unsigned long) getpid());
2779         driver_message(&server, SD_MESSAGE_JOURNAL_STOP, "Journal stopped");
2780
2781 finish:
2782         sd_notify(false,
2783                   "STATUS=Shutting down...");
2784
2785         server_done(&server);
2786
2787         return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
2788 }