chiark / gitweb /
journald: split /dev/kmsg related stuff into its own .c file
[elogind.git] / src / journal / journald.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/epoll.h>
23 #include <sys/socket.h>
24 #include <errno.h>
25 #include <sys/signalfd.h>
26 #include <unistd.h>
27 #include <fcntl.h>
28 #include <stddef.h>
29 #include <sys/ioctl.h>
30 #include <linux/sockios.h>
31 #include <sys/statvfs.h>
32 #include <sys/mman.h>
33
34 #include <libudev.h>
35 #include <systemd/sd-journal.h>
36 #include <systemd/sd-messages.h>
37 #include <systemd/sd-daemon.h>
38
39 #ifdef HAVE_LOGIND
40 #include <systemd/sd-login.h>
41 #endif
42
43 #include "mkdir.h"
44 #include "hashmap.h"
45 #include "journal-file.h"
46 #include "socket-util.h"
47 #include "cgroup-util.h"
48 #include "list.h"
49 #include "journal-rate-limit.h"
50 #include "journal-internal.h"
51 #include "journal-vacuum.h"
52 #include "journal-authenticate.h"
53 #include "conf-parser.h"
54 #include "journald.h"
55 #include "journald-kmsg.h"
56 #include "virt.h"
57 #include "missing.h"
58
59 #ifdef HAVE_ACL
60 #include <sys/acl.h>
61 #include <acl/libacl.h>
62 #include "acl-util.h"
63 #endif
64
65 #ifdef HAVE_SELINUX
66 #include <selinux/selinux.h>
67 #endif
68
69 #define USER_JOURNALS_MAX 1024
70 #define STDOUT_STREAMS_MAX 4096
71
72 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
73 #define DEFAULT_RATE_LIMIT_BURST 200
74
75 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
76
77 #define ENTRY_SIZE_MAX (1024*1024*32)
78
79 typedef enum StdoutStreamState {
80         STDOUT_STREAM_IDENTIFIER,
81         STDOUT_STREAM_UNIT_ID,
82         STDOUT_STREAM_PRIORITY,
83         STDOUT_STREAM_LEVEL_PREFIX,
84         STDOUT_STREAM_FORWARD_TO_SYSLOG,
85         STDOUT_STREAM_FORWARD_TO_KMSG,
86         STDOUT_STREAM_FORWARD_TO_CONSOLE,
87         STDOUT_STREAM_RUNNING
88 } StdoutStreamState;
89
90 struct StdoutStream {
91         Server *server;
92         StdoutStreamState state;
93
94         int fd;
95
96         struct ucred ucred;
97 #ifdef HAVE_SELINUX
98         security_context_t security_context;
99 #endif
100
101         char *identifier;
102         char *unit_id;
103         int priority;
104         bool level_prefix:1;
105         bool forward_to_syslog:1;
106         bool forward_to_kmsg:1;
107         bool forward_to_console:1;
108
109         char buffer[LINE_MAX+1];
110         size_t length;
111
112         LIST_FIELDS(StdoutStream, stdout_stream);
113 };
114
115 static const char* const storage_table[] = {
116         [STORAGE_AUTO] = "auto",
117         [STORAGE_VOLATILE] = "volatile",
118         [STORAGE_PERSISTENT] = "persistent",
119         [STORAGE_NONE] = "none"
120 };
121
122 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
123 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
124
125 static uint64_t available_space(Server *s) {
126         char ids[33], *p;
127         const char *f;
128         sd_id128_t machine;
129         struct statvfs ss;
130         uint64_t sum = 0, avail = 0, ss_avail = 0;
131         int r;
132         DIR *d;
133         usec_t ts;
134         JournalMetrics *m;
135
136         ts = now(CLOCK_MONOTONIC);
137
138         if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
139                 return s->cached_available_space;
140
141         r = sd_id128_get_machine(&machine);
142         if (r < 0)
143                 return 0;
144
145         if (s->system_journal) {
146                 f = "/var/log/journal/";
147                 m = &s->system_metrics;
148         } else {
149                 f = "/run/log/journal/";
150                 m = &s->runtime_metrics;
151         }
152
153         assert(m);
154
155         p = strappend(f, sd_id128_to_string(machine, ids));
156         if (!p)
157                 return 0;
158
159         d = opendir(p);
160         free(p);
161
162         if (!d)
163                 return 0;
164
165         if (fstatvfs(dirfd(d), &ss) < 0)
166                 goto finish;
167
168         for (;;) {
169                 struct stat st;
170                 struct dirent buf, *de;
171
172                 r = readdir_r(d, &buf, &de);
173                 if (r != 0)
174                         break;
175
176                 if (!de)
177                         break;
178
179                 if (!endswith(de->d_name, ".journal") &&
180                     !endswith(de->d_name, ".journal~"))
181                         continue;
182
183                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
184                         continue;
185
186                 if (!S_ISREG(st.st_mode))
187                         continue;
188
189                 sum += (uint64_t) st.st_blocks * 512UL;
190         }
191
192         avail = sum >= m->max_use ? 0 : m->max_use - sum;
193
194         ss_avail = ss.f_bsize * ss.f_bavail;
195
196         ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
197
198         if (ss_avail < avail)
199                 avail = ss_avail;
200
201         s->cached_available_space = avail;
202         s->cached_available_space_timestamp = ts;
203
204 finish:
205         closedir(d);
206
207         return avail;
208 }
209
210 static void server_read_file_gid(Server *s) {
211         const char *adm = "adm";
212         int r;
213
214         assert(s);
215
216         if (s->file_gid_valid)
217                 return;
218
219         r = get_group_creds(&adm, &s->file_gid);
220         if (r < 0)
221                 log_warning("Failed to resolve 'adm' group: %s", strerror(-r));
222
223         /* if we couldn't read the gid, then it will be 0, but that's
224          * fine and we shouldn't try to resolve the group again, so
225          * let's just pretend it worked right-away. */
226         s->file_gid_valid = true;
227 }
228
229 static void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
230         int r;
231 #ifdef HAVE_ACL
232         acl_t acl;
233         acl_entry_t entry;
234         acl_permset_t permset;
235 #endif
236
237         assert(f);
238
239         server_read_file_gid(s);
240
241         r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
242         if (r < 0)
243                 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
244
245 #ifdef HAVE_ACL
246         if (uid <= 0)
247                 return;
248
249         acl = acl_get_fd(f->fd);
250         if (!acl) {
251                 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
252                 return;
253         }
254
255         r = acl_find_uid(acl, uid, &entry);
256         if (r <= 0) {
257
258                 if (acl_create_entry(&acl, &entry) < 0 ||
259                     acl_set_tag_type(entry, ACL_USER) < 0 ||
260                     acl_set_qualifier(entry, &uid) < 0) {
261                         log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
262                         goto finish;
263                 }
264         }
265
266         if (acl_get_permset(entry, &permset) < 0 ||
267             acl_add_perm(permset, ACL_READ) < 0 ||
268             acl_calc_mask(&acl) < 0) {
269                 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
270                 goto finish;
271         }
272
273         if (acl_set_fd(f->fd, acl) < 0)
274                 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
275
276 finish:
277         acl_free(acl);
278 #endif
279 }
280
281 static JournalFile* find_journal(Server *s, uid_t uid) {
282         char *p;
283         int r;
284         JournalFile *f;
285         sd_id128_t machine;
286
287         assert(s);
288
289         /* We split up user logs only on /var, not on /run. If the
290          * runtime file is open, we write to it exclusively, in order
291          * to guarantee proper order as soon as we flush /run to
292          * /var and close the runtime file. */
293
294         if (s->runtime_journal)
295                 return s->runtime_journal;
296
297         if (uid <= 0)
298                 return s->system_journal;
299
300         r = sd_id128_get_machine(&machine);
301         if (r < 0)
302                 return s->system_journal;
303
304         f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
305         if (f)
306                 return f;
307
308         if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
309                      SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
310                 return s->system_journal;
311
312         while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
313                 /* Too many open? Then let's close one */
314                 f = hashmap_steal_first(s->user_journals);
315                 assert(f);
316                 journal_file_close(f);
317         }
318
319         r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, s->system_journal, &f);
320         free(p);
321
322         if (r < 0)
323                 return s->system_journal;
324
325         server_fix_perms(s, f, uid);
326
327         r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
328         if (r < 0) {
329                 journal_file_close(f);
330                 return s->system_journal;
331         }
332
333         return f;
334 }
335
336 static void server_rotate(Server *s) {
337         JournalFile *f;
338         void *k;
339         Iterator i;
340         int r;
341
342         log_info("Rotating...");
343
344         if (s->runtime_journal) {
345                 r = journal_file_rotate(&s->runtime_journal, s->compress, false);
346                 if (r < 0)
347                         if (s->runtime_journal)
348                                 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
349                         else
350                                 log_error("Failed to create new runtime journal: %s", strerror(-r));
351                 else
352                         server_fix_perms(s, s->runtime_journal, 0);
353         }
354
355         if (s->system_journal) {
356                 r = journal_file_rotate(&s->system_journal, s->compress, s->seal);
357                 if (r < 0)
358                         if (s->system_journal)
359                                 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
360                         else
361                                 log_error("Failed to create new system journal: %s", strerror(-r));
362
363                 else
364                         server_fix_perms(s, s->system_journal, 0);
365         }
366
367         HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
368                 r = journal_file_rotate(&f, s->compress, s->seal);
369                 if (r < 0)
370                         if (f->path)
371                                 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
372                         else
373                                 log_error("Failed to create user journal: %s", strerror(-r));
374                 else {
375                         hashmap_replace(s->user_journals, k, f);
376                         server_fix_perms(s, s->system_journal, PTR_TO_UINT32(k));
377                 }
378         }
379 }
380
381 static void server_vacuum(Server *s) {
382         char *p;
383         char ids[33];
384         sd_id128_t machine;
385         int r;
386
387         log_info("Vacuuming...");
388
389         r = sd_id128_get_machine(&machine);
390         if (r < 0) {
391                 log_error("Failed to get machine ID: %s", strerror(-r));
392                 return;
393         }
394
395         sd_id128_to_string(machine, ids);
396
397         if (s->system_journal) {
398                 if (asprintf(&p, "/var/log/journal/%s", ids) < 0) {
399                         log_oom();
400                         return;
401                 }
402
403                 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free);
404                 if (r < 0 && r != -ENOENT)
405                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
406                 free(p);
407         }
408
409         if (s->runtime_journal) {
410                 if (asprintf(&p, "/run/log/journal/%s", ids) < 0) {
411                         log_oom();
412                         return;
413                 }
414
415                 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free);
416                 if (r < 0 && r != -ENOENT)
417                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
418                 free(p);
419         }
420
421         s->cached_available_space_timestamp = 0;
422 }
423
424 static char *shortened_cgroup_path(pid_t pid) {
425         int r;
426         char *process_path, *init_path, *path;
427
428         assert(pid > 0);
429
430         r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, pid, &process_path);
431         if (r < 0)
432                 return NULL;
433
434         r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &init_path);
435         if (r < 0) {
436                 free(process_path);
437                 return NULL;
438         }
439
440         if (endswith(init_path, "/system"))
441                 init_path[strlen(init_path) - 7] = 0;
442         else if (streq(init_path, "/"))
443                 init_path[0] = 0;
444
445         if (startswith(process_path, init_path)) {
446                 char *p;
447
448                 p = strdup(process_path + strlen(init_path));
449                 if (!p) {
450                         free(process_path);
451                         free(init_path);
452                         return NULL;
453                 }
454                 path = p;
455         } else {
456                 path = process_path;
457                 process_path = NULL;
458         }
459
460         free(process_path);
461         free(init_path);
462
463         return path;
464 }
465
466 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n) {
467         JournalFile *f;
468         bool vacuumed = false;
469         int r;
470
471         assert(s);
472         assert(iovec);
473         assert(n > 0);
474
475         f = find_journal(s, uid);
476         if (!f)
477                 return;
478
479         if (journal_file_rotate_suggested(f)) {
480                 log_info("Journal header limits reached or header out-of-date, rotating.");
481                 server_rotate(s);
482                 server_vacuum(s);
483                 vacuumed = true;
484
485                 f = find_journal(s, uid);
486                 if (!f)
487                         return;
488         }
489
490         for (;;) {
491                 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
492                 if (r >= 0)
493                         return;
494
495                 if (vacuumed ||
496                     (r != -E2BIG && /* hit limit */
497                      r != -EFBIG && /* hit fs limit */
498                      r != -EDQUOT && /* quota hit */
499                      r != -ENOSPC && /* disk full */
500                      r != -EBADMSG && /* corrupted */
501                      r != -ENODATA && /* truncated */
502                      r != -EHOSTDOWN && /* other machine */
503                      r != -EPROTONOSUPPORT && /* unsupported feature */
504                      r != -EBUSY && /* unclean shutdown */
505                      r != -ESHUTDOWN /* already archived */)) {
506                         log_error("Failed to write entry, ignoring: %s", strerror(-r));
507                         return;
508                 }
509
510                 if (r == -E2BIG || r == -EFBIG || r == EDQUOT || r == ENOSPC)
511                         log_info("Allocation limit reached, rotating.");
512                 else if (r == -EHOSTDOWN)
513                         log_info("Journal file from other machine, rotating.");
514                 else if (r == -EBUSY)
515                         log_info("Unlcean shutdown, rotating.");
516                 else
517                         log_warning("Journal file corrupted, rotating.");
518
519                 server_rotate(s);
520                 server_vacuum(s);
521                 vacuumed = true;
522
523                 f = find_journal(s, uid);
524                 if (!f)
525                         return;
526
527                 log_info("Retrying write.");
528         }
529 }
530
531 static void dispatch_message_real(
532                 Server *s,
533                 struct iovec *iovec, unsigned n, unsigned m,
534                 struct ucred *ucred,
535                 struct timeval *tv,
536                 const char *label, size_t label_len,
537                 const char *unit_id) {
538
539         char *pid = NULL, *uid = NULL, *gid = NULL,
540                 *source_time = NULL, *boot_id = NULL, *machine_id = NULL,
541                 *comm = NULL, *cmdline = NULL, *hostname = NULL,
542                 *audit_session = NULL, *audit_loginuid = NULL,
543                 *exe = NULL, *cgroup = NULL, *session = NULL,
544                 *owner_uid = NULL, *unit = NULL, *selinux_context = NULL;
545
546         char idbuf[33];
547         sd_id128_t id;
548         int r;
549         char *t;
550         uid_t loginuid = 0, realuid = 0;
551
552         assert(s);
553         assert(iovec);
554         assert(n > 0);
555         assert(n + N_IOVEC_META_FIELDS <= m);
556
557         if (ucred) {
558                 uint32_t audit;
559 #ifdef HAVE_LOGIND
560                 uid_t owner;
561 #endif
562
563                 realuid = ucred->uid;
564
565                 if (asprintf(&pid, "_PID=%lu", (unsigned long) ucred->pid) >= 0)
566                         IOVEC_SET_STRING(iovec[n++], pid);
567
568                 if (asprintf(&uid, "_UID=%lu", (unsigned long) ucred->uid) >= 0)
569                         IOVEC_SET_STRING(iovec[n++], uid);
570
571                 if (asprintf(&gid, "_GID=%lu", (unsigned long) ucred->gid) >= 0)
572                         IOVEC_SET_STRING(iovec[n++], gid);
573
574                 r = get_process_comm(ucred->pid, &t);
575                 if (r >= 0) {
576                         comm = strappend("_COMM=", t);
577                         free(t);
578
579                         if (comm)
580                                 IOVEC_SET_STRING(iovec[n++], comm);
581                 }
582
583                 r = get_process_exe(ucred->pid, &t);
584                 if (r >= 0) {
585                         exe = strappend("_EXE=", t);
586                         free(t);
587
588                         if (exe)
589                                 IOVEC_SET_STRING(iovec[n++], exe);
590                 }
591
592                 r = get_process_cmdline(ucred->pid, LINE_MAX, false, &t);
593                 if (r >= 0) {
594                         cmdline = strappend("_CMDLINE=", t);
595                         free(t);
596
597                         if (cmdline)
598                                 IOVEC_SET_STRING(iovec[n++], cmdline);
599                 }
600
601                 r = audit_session_from_pid(ucred->pid, &audit);
602                 if (r >= 0)
603                         if (asprintf(&audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit) >= 0)
604                                 IOVEC_SET_STRING(iovec[n++], audit_session);
605
606                 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
607                 if (r >= 0)
608                         if (asprintf(&audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0)
609                                 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
610
611                 t = shortened_cgroup_path(ucred->pid);
612                 if (t) {
613                         cgroup = strappend("_SYSTEMD_CGROUP=", t);
614                         free(t);
615
616                         if (cgroup)
617                                 IOVEC_SET_STRING(iovec[n++], cgroup);
618                 }
619
620 #ifdef HAVE_LOGIND
621                 if (sd_pid_get_session(ucred->pid, &t) >= 0) {
622                         session = strappend("_SYSTEMD_SESSION=", t);
623                         free(t);
624
625                         if (session)
626                                 IOVEC_SET_STRING(iovec[n++], session);
627                 }
628
629                 if (sd_pid_get_owner_uid(ucred->uid, &owner) >= 0)
630                         if (asprintf(&owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner) >= 0)
631                                 IOVEC_SET_STRING(iovec[n++], owner_uid);
632 #endif
633
634                 if (cg_pid_get_unit(ucred->pid, &t) >= 0) {
635                         unit = strappend("_SYSTEMD_UNIT=", t);
636                         free(t);
637                 } else if (unit_id)
638                         unit = strappend("_SYSTEMD_UNIT=", unit_id);
639
640                 if (unit)
641                         IOVEC_SET_STRING(iovec[n++], unit);
642
643 #ifdef HAVE_SELINUX
644                 if (label) {
645                         selinux_context = malloc(sizeof("_SELINUX_CONTEXT=") + label_len);
646                         if (selinux_context) {
647                                 memcpy(selinux_context, "_SELINUX_CONTEXT=", sizeof("_SELINUX_CONTEXT=")-1);
648                                 memcpy(selinux_context+sizeof("_SELINUX_CONTEXT=")-1, label, label_len);
649                                 selinux_context[sizeof("_SELINUX_CONTEXT=")-1+label_len] = 0;
650                                 IOVEC_SET_STRING(iovec[n++], selinux_context);
651                         }
652                 } else {
653                         security_context_t con;
654
655                         if (getpidcon(ucred->pid, &con) >= 0) {
656                                 selinux_context = strappend("_SELINUX_CONTEXT=", con);
657                                 if (selinux_context)
658                                         IOVEC_SET_STRING(iovec[n++], selinux_context);
659
660                                 freecon(con);
661                         }
662                 }
663 #endif
664         }
665
666         if (tv) {
667                 if (asprintf(&source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu",
668                              (unsigned long long) timeval_load(tv)) >= 0)
669                         IOVEC_SET_STRING(iovec[n++], source_time);
670         }
671
672         /* Note that strictly speaking storing the boot id here is
673          * redundant since the entry includes this in-line
674          * anyway. However, we need this indexed, too. */
675         r = sd_id128_get_boot(&id);
676         if (r >= 0)
677                 if (asprintf(&boot_id, "_BOOT_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
678                         IOVEC_SET_STRING(iovec[n++], boot_id);
679
680         r = sd_id128_get_machine(&id);
681         if (r >= 0)
682                 if (asprintf(&machine_id, "_MACHINE_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
683                         IOVEC_SET_STRING(iovec[n++], machine_id);
684
685         t = gethostname_malloc();
686         if (t) {
687                 hostname = strappend("_HOSTNAME=", t);
688                 free(t);
689                 if (hostname)
690                         IOVEC_SET_STRING(iovec[n++], hostname);
691         }
692
693         assert(n <= m);
694
695         write_to_journal(s, realuid == 0 ? 0 : loginuid, iovec, n);
696
697         free(pid);
698         free(uid);
699         free(gid);
700         free(comm);
701         free(exe);
702         free(cmdline);
703         free(source_time);
704         free(boot_id);
705         free(machine_id);
706         free(hostname);
707         free(audit_session);
708         free(audit_loginuid);
709         free(cgroup);
710         free(session);
711         free(owner_uid);
712         free(unit);
713         free(selinux_context);
714 }
715
716 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
717         char mid[11 + 32 + 1];
718         char buffer[16 + LINE_MAX + 1];
719         struct iovec iovec[N_IOVEC_META_FIELDS + 4];
720         int n = 0;
721         va_list ap;
722         struct ucred ucred;
723
724         assert(s);
725         assert(format);
726
727         IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
728         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
729
730         memcpy(buffer, "MESSAGE=", 8);
731         va_start(ap, format);
732         vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
733         va_end(ap);
734         char_array_0(buffer);
735         IOVEC_SET_STRING(iovec[n++], buffer);
736
737         snprintf(mid, sizeof(mid), "MESSAGE_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(message_id));
738         char_array_0(mid);
739         IOVEC_SET_STRING(iovec[n++], mid);
740
741         zero(ucred);
742         ucred.pid = getpid();
743         ucred.uid = getuid();
744         ucred.gid = getgid();
745
746         dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL);
747 }
748
749 void server_dispatch_message(
750                 Server *s,
751                 struct iovec *iovec, unsigned n, unsigned m,
752                 struct ucred *ucred,
753                 struct timeval *tv,
754                 const char *label, size_t label_len,
755                 const char *unit_id,
756                 int priority) {
757
758         int rl;
759         char *path = NULL, *c;
760
761         assert(s);
762         assert(iovec || n == 0);
763
764         if (n == 0)
765                 return;
766
767         if (LOG_PRI(priority) > s->max_level_store)
768                 return;
769
770         if (!ucred)
771                 goto finish;
772
773         path = shortened_cgroup_path(ucred->pid);
774         if (!path)
775                 goto finish;
776
777         /* example: /user/lennart/3/foobar
778          *          /system/dbus.service/foobar
779          *
780          * So let's cut of everything past the third /, since that is
781          * wher user directories start */
782
783         c = strchr(path, '/');
784         if (c) {
785                 c = strchr(c+1, '/');
786                 if (c) {
787                         c = strchr(c+1, '/');
788                         if (c)
789                                 *c = 0;
790                 }
791         }
792
793         rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available_space(s));
794
795         if (rl == 0) {
796                 free(path);
797                 return;
798         }
799
800         /* Write a suppression message if we suppressed something */
801         if (rl > 1)
802                 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED, "Suppressed %u messages from %s", rl - 1, path);
803
804         free(path);
805
806 finish:
807         dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id);
808 }
809
810 static void forward_syslog_iovec(Server *s, const struct iovec *iovec, unsigned n_iovec, struct ucred *ucred, struct timeval *tv) {
811         struct msghdr msghdr;
812         struct cmsghdr *cmsg;
813         union {
814                 struct cmsghdr cmsghdr;
815                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred))];
816         } control;
817         union sockaddr_union sa;
818
819         assert(s);
820         assert(iovec);
821         assert(n_iovec > 0);
822
823         zero(msghdr);
824         msghdr.msg_iov = (struct iovec*) iovec;
825         msghdr.msg_iovlen = n_iovec;
826
827         zero(sa);
828         sa.un.sun_family = AF_UNIX;
829         strncpy(sa.un.sun_path, "/run/systemd/journal/syslog", sizeof(sa.un.sun_path));
830         msghdr.msg_name = &sa;
831         msghdr.msg_namelen = offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path);
832
833         if (ucred) {
834                 zero(control);
835                 msghdr.msg_control = &control;
836                 msghdr.msg_controllen = sizeof(control);
837
838                 cmsg = CMSG_FIRSTHDR(&msghdr);
839                 cmsg->cmsg_level = SOL_SOCKET;
840                 cmsg->cmsg_type = SCM_CREDENTIALS;
841                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred));
842                 memcpy(CMSG_DATA(cmsg), ucred, sizeof(struct ucred));
843                 msghdr.msg_controllen = cmsg->cmsg_len;
844         }
845
846         /* Forward the syslog message we received via /dev/log to
847          * /run/systemd/syslog. Unfortunately we currently can't set
848          * the SO_TIMESTAMP auxiliary data, and hence we don't. */
849
850         if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
851                 return;
852
853         /* The socket is full? I guess the syslog implementation is
854          * too slow, and we shouldn't wait for that... */
855         if (errno == EAGAIN)
856                 return;
857
858         if (ucred && errno == ESRCH) {
859                 struct ucred u;
860
861                 /* Hmm, presumably the sender process vanished
862                  * by now, so let's fix it as good as we
863                  * can, and retry */
864
865                 u = *ucred;
866                 u.pid = getpid();
867                 memcpy(CMSG_DATA(cmsg), &u, sizeof(struct ucred));
868
869                 if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
870                         return;
871
872                 if (errno == EAGAIN)
873                         return;
874         }
875
876         if (errno != ENOENT)
877                 log_debug("Failed to forward syslog message: %m");
878 }
879
880 static void forward_syslog_raw(Server *s, int priority, const char *buffer, struct ucred *ucred, struct timeval *tv) {
881         struct iovec iovec;
882
883         assert(s);
884         assert(buffer);
885
886         if (LOG_PRI(priority) > s->max_level_syslog)
887                 return;
888
889         IOVEC_SET_STRING(iovec, buffer);
890         forward_syslog_iovec(s, &iovec, 1, ucred, tv);
891 }
892
893 static void forward_syslog(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred, struct timeval *tv) {
894         struct iovec iovec[5];
895         char header_priority[6], header_time[64], header_pid[16];
896         int n = 0;
897         time_t t;
898         struct tm *tm;
899         char *ident_buf = NULL;
900
901         assert(s);
902         assert(priority >= 0);
903         assert(priority <= 999);
904         assert(message);
905
906         if (LOG_PRI(priority) > s->max_level_syslog)
907                 return;
908
909         /* First: priority field */
910         snprintf(header_priority, sizeof(header_priority), "<%i>", priority);
911         char_array_0(header_priority);
912         IOVEC_SET_STRING(iovec[n++], header_priority);
913
914         /* Second: timestamp */
915         t = tv ? tv->tv_sec : ((time_t) (now(CLOCK_REALTIME) / USEC_PER_SEC));
916         tm = localtime(&t);
917         if (!tm)
918                 return;
919         if (strftime(header_time, sizeof(header_time), "%h %e %T ", tm) <= 0)
920                 return;
921         IOVEC_SET_STRING(iovec[n++], header_time);
922
923         /* Third: identifier and PID */
924         if (ucred) {
925                 if (!identifier) {
926                         get_process_comm(ucred->pid, &ident_buf);
927                         identifier = ident_buf;
928                 }
929
930                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
931                 char_array_0(header_pid);
932
933                 if (identifier)
934                         IOVEC_SET_STRING(iovec[n++], identifier);
935
936                 IOVEC_SET_STRING(iovec[n++], header_pid);
937         } else if (identifier) {
938                 IOVEC_SET_STRING(iovec[n++], identifier);
939                 IOVEC_SET_STRING(iovec[n++], ": ");
940         }
941
942         /* Fourth: message */
943         IOVEC_SET_STRING(iovec[n++], message);
944
945         forward_syslog_iovec(s, iovec, n, ucred, tv);
946
947         free(ident_buf);
948 }
949
950 static void forward_console(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred) {
951         struct iovec iovec[4];
952         char header_pid[16];
953         int n = 0, fd;
954         char *ident_buf = NULL;
955         const char *tty;
956
957         assert(s);
958         assert(message);
959
960         if (LOG_PRI(priority) > s->max_level_console)
961                 return;
962
963         /* First: identifier and PID */
964         if (ucred) {
965                 if (!identifier) {
966                         get_process_comm(ucred->pid, &ident_buf);
967                         identifier = ident_buf;
968                 }
969
970                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
971                 char_array_0(header_pid);
972
973                 if (identifier)
974                         IOVEC_SET_STRING(iovec[n++], identifier);
975
976                 IOVEC_SET_STRING(iovec[n++], header_pid);
977         } else if (identifier) {
978                 IOVEC_SET_STRING(iovec[n++], identifier);
979                 IOVEC_SET_STRING(iovec[n++], ": ");
980         }
981
982         /* Third: message */
983         IOVEC_SET_STRING(iovec[n++], message);
984         IOVEC_SET_STRING(iovec[n++], "\n");
985
986         tty = s->tty_path ? s->tty_path : "/dev/console";
987
988         fd = open_terminal(tty, O_WRONLY|O_NOCTTY|O_CLOEXEC);
989         if (fd < 0) {
990                 log_debug("Failed to open %s for logging: %s", tty, strerror(errno));
991                 goto finish;
992         }
993
994         if (writev(fd, iovec, n) < 0)
995                 log_debug("Failed to write to %s for logging: %s", tty, strerror(errno));
996
997         close_nointr_nofail(fd);
998
999 finish:
1000         free(ident_buf);
1001 }
1002
1003 int syslog_fixup_facility(int priority) {
1004
1005         if ((priority & LOG_FACMASK) == 0)
1006                 return (priority & LOG_PRIMASK) | LOG_USER;
1007
1008         return priority;
1009 }
1010
1011 void syslog_read_identifier(const char **buf, char **identifier, char **pid) {
1012         const char *p;
1013         char *t;
1014         size_t l, e;
1015
1016         assert(buf);
1017         assert(identifier);
1018         assert(pid);
1019
1020         p = *buf;
1021
1022         p += strspn(p, WHITESPACE);
1023         l = strcspn(p, WHITESPACE);
1024
1025         if (l <= 0 ||
1026             p[l-1] != ':')
1027                 return;
1028
1029         e = l;
1030         l--;
1031
1032         if (p[l-1] == ']') {
1033                 size_t k = l-1;
1034
1035                 for (;;) {
1036
1037                         if (p[k] == '[') {
1038                                 t = strndup(p+k+1, l-k-2);
1039                                 if (t)
1040                                         *pid = t;
1041
1042                                 l = k;
1043                                 break;
1044                         }
1045
1046                         if (k == 0)
1047                                 break;
1048
1049                         k--;
1050                 }
1051         }
1052
1053         t = strndup(p, l);
1054         if (t)
1055                 *identifier = t;
1056
1057         *buf = p + e;
1058         *buf += strspn(*buf, WHITESPACE);
1059 }
1060
1061 static void process_syslog_message(Server *s, const char *buf, struct ucred *ucred, struct timeval *tv, const char *label, size_t label_len) {
1062         char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *syslog_pid = NULL;
1063         struct iovec iovec[N_IOVEC_META_FIELDS + 6];
1064         unsigned n = 0;
1065         int priority = LOG_USER | LOG_INFO;
1066         char *identifier = NULL, *pid = NULL;
1067         const char *orig;
1068
1069         assert(s);
1070         assert(buf);
1071
1072         orig = buf;
1073         syslog_parse_priority((char**) &buf, &priority);
1074
1075         if (s->forward_to_syslog)
1076                 forward_syslog_raw(s, priority, orig, ucred, tv);
1077
1078         syslog_skip_date((char**) &buf);
1079         syslog_read_identifier(&buf, &identifier, &pid);
1080
1081         if (s->forward_to_kmsg)
1082                 server_forward_kmsg(s, priority, identifier, buf, ucred);
1083
1084         if (s->forward_to_console)
1085                 forward_console(s, priority, identifier, buf, ucred);
1086
1087         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=syslog");
1088
1089         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1090                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1091
1092         if (priority & LOG_FACMASK)
1093                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1094                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1095
1096         if (identifier) {
1097                 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
1098                 if (syslog_identifier)
1099                         IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1100         }
1101
1102         if (pid) {
1103                 syslog_pid = strappend("SYSLOG_PID=", pid);
1104                 if (syslog_pid)
1105                         IOVEC_SET_STRING(iovec[n++], syslog_pid);
1106         }
1107
1108         message = strappend("MESSAGE=", buf);
1109         if (message)
1110                 IOVEC_SET_STRING(iovec[n++], message);
1111
1112         server_dispatch_message(s, iovec, n, ELEMENTSOF(iovec), ucred, tv, label, label_len, NULL, priority);
1113
1114         free(message);
1115         free(identifier);
1116         free(pid);
1117         free(syslog_priority);
1118         free(syslog_facility);
1119         free(syslog_identifier);
1120         free(syslog_pid);
1121 }
1122
1123 static bool valid_user_field(const char *p, size_t l) {
1124         const char *a;
1125
1126         /* We kinda enforce POSIX syntax recommendations for
1127            environment variables here, but make a couple of additional
1128            requirements.
1129
1130            http://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html */
1131
1132         /* No empty field names */
1133         if (l <= 0)
1134                 return false;
1135
1136         /* Don't allow names longer than 64 chars */
1137         if (l > 64)
1138                 return false;
1139
1140         /* Variables starting with an underscore are protected */
1141         if (p[0] == '_')
1142                 return false;
1143
1144         /* Don't allow digits as first character */
1145         if (p[0] >= '0' && p[0] <= '9')
1146                 return false;
1147
1148         /* Only allow A-Z0-9 and '_' */
1149         for (a = p; a < p + l; a++)
1150                 if (!((*a >= 'A' && *a <= 'Z') ||
1151                       (*a >= '0' && *a <= '9') ||
1152                       *a == '_'))
1153                         return false;
1154
1155         return true;
1156 }
1157
1158 static void process_native_message(
1159                 Server *s,
1160                 const void *buffer, size_t buffer_size,
1161                 struct ucred *ucred,
1162                 struct timeval *tv,
1163                 const char *label, size_t label_len) {
1164
1165         struct iovec *iovec = NULL;
1166         unsigned n = 0, m = 0, j, tn = (unsigned) -1;
1167         const char *p;
1168         size_t remaining;
1169         int priority = LOG_INFO;
1170         char *identifier = NULL, *message = NULL;
1171
1172         assert(s);
1173         assert(buffer || buffer_size == 0);
1174
1175         p = buffer;
1176         remaining = buffer_size;
1177
1178         while (remaining > 0) {
1179                 const char *e, *q;
1180
1181                 e = memchr(p, '\n', remaining);
1182
1183                 if (!e) {
1184                         /* Trailing noise, let's ignore it, and flush what we collected */
1185                         log_debug("Received message with trailing noise, ignoring.");
1186                         break;
1187                 }
1188
1189                 if (e == p) {
1190                         /* Entry separator */
1191                         server_dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, NULL, priority);
1192                         n = 0;
1193                         priority = LOG_INFO;
1194
1195                         p++;
1196                         remaining--;
1197                         continue;
1198                 }
1199
1200                 if (*p == '.' || *p == '#') {
1201                         /* Ignore control commands for now, and
1202                          * comments too. */
1203                         remaining -= (e - p) + 1;
1204                         p = e + 1;
1205                         continue;
1206                 }
1207
1208                 /* A property follows */
1209
1210                 if (n+N_IOVEC_META_FIELDS >= m) {
1211                         struct iovec *c;
1212                         unsigned u;
1213
1214                         u = MAX((n+N_IOVEC_META_FIELDS+1) * 2U, 4U);
1215                         c = realloc(iovec, u * sizeof(struct iovec));
1216                         if (!c) {
1217                                 log_oom();
1218                                 break;
1219                         }
1220
1221                         iovec = c;
1222                         m = u;
1223                 }
1224
1225                 q = memchr(p, '=', e - p);
1226                 if (q) {
1227                         if (valid_user_field(p, q - p)) {
1228                                 size_t l;
1229
1230                                 l = e - p;
1231
1232                                 /* If the field name starts with an
1233                                  * underscore, skip the variable,
1234                                  * since that indidates a trusted
1235                                  * field */
1236                                 iovec[n].iov_base = (char*) p;
1237                                 iovec[n].iov_len = l;
1238                                 n++;
1239
1240                                 /* We need to determine the priority
1241                                  * of this entry for the rate limiting
1242                                  * logic */
1243                                 if (l == 10 &&
1244                                     memcmp(p, "PRIORITY=", 9) == 0 &&
1245                                     p[9] >= '0' && p[9] <= '9')
1246                                         priority = (priority & LOG_FACMASK) | (p[9] - '0');
1247
1248                                 else if (l == 17 &&
1249                                          memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
1250                                          p[16] >= '0' && p[16] <= '9')
1251                                         priority = (priority & LOG_PRIMASK) | ((p[16] - '0') << 3);
1252
1253                                 else if (l == 18 &&
1254                                          memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
1255                                          p[16] >= '0' && p[16] <= '9' &&
1256                                          p[17] >= '0' && p[17] <= '9')
1257                                         priority = (priority & LOG_PRIMASK) | (((p[16] - '0')*10 + (p[17] - '0')) << 3);
1258
1259                                 else if (l >= 19 &&
1260                                          memcmp(p, "SYSLOG_IDENTIFIER=", 18) == 0) {
1261                                         char *t;
1262
1263                                         t = strndup(p + 18, l - 18);
1264                                         if (t) {
1265                                                 free(identifier);
1266                                                 identifier = t;
1267                                         }
1268                                 } else if (l >= 8 &&
1269                                            memcmp(p, "MESSAGE=", 8) == 0) {
1270                                         char *t;
1271
1272                                         t = strndup(p + 8, l - 8);
1273                                         if (t) {
1274                                                 free(message);
1275                                                 message = t;
1276                                         }
1277                                 }
1278                         }
1279
1280                         remaining -= (e - p) + 1;
1281                         p = e + 1;
1282                         continue;
1283                 } else {
1284                         le64_t l_le;
1285                         uint64_t l;
1286                         char *k;
1287
1288                         if (remaining < e - p + 1 + sizeof(uint64_t) + 1) {
1289                                 log_debug("Failed to parse message, ignoring.");
1290                                 break;
1291                         }
1292
1293                         memcpy(&l_le, e + 1, sizeof(uint64_t));
1294                         l = le64toh(l_le);
1295
1296                         if (remaining < e - p + 1 + sizeof(uint64_t) + l + 1 ||
1297                             e[1+sizeof(uint64_t)+l] != '\n') {
1298                                 log_debug("Failed to parse message, ignoring.");
1299                                 break;
1300                         }
1301
1302                         k = malloc((e - p) + 1 + l);
1303                         if (!k) {
1304                                 log_oom();
1305                                 break;
1306                         }
1307
1308                         memcpy(k, p, e - p);
1309                         k[e - p] = '=';
1310                         memcpy(k + (e - p) + 1, e + 1 + sizeof(uint64_t), l);
1311
1312                         if (valid_user_field(p, e - p)) {
1313                                 iovec[n].iov_base = k;
1314                                 iovec[n].iov_len = (e - p) + 1 + l;
1315                                 n++;
1316                         } else
1317                                 free(k);
1318
1319                         remaining -= (e - p) + 1 + sizeof(uint64_t) + l + 1;
1320                         p = e + 1 + sizeof(uint64_t) + l + 1;
1321                 }
1322         }
1323
1324         if (n <= 0)
1325                 goto finish;
1326
1327         tn = n++;
1328         IOVEC_SET_STRING(iovec[tn], "_TRANSPORT=journal");
1329
1330         if (message) {
1331                 if (s->forward_to_syslog)
1332                         forward_syslog(s, priority, identifier, message, ucred, tv);
1333
1334                 if (s->forward_to_kmsg)
1335                         server_forward_kmsg(s, priority, identifier, message, ucred);
1336
1337                 if (s->forward_to_console)
1338                         forward_console(s, priority, identifier, message, ucred);
1339         }
1340
1341         server_dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, NULL, priority);
1342
1343 finish:
1344         for (j = 0; j < n; j++)  {
1345                 if (j == tn)
1346                         continue;
1347
1348                 if (iovec[j].iov_base < buffer ||
1349                     (const uint8_t*) iovec[j].iov_base >= (const uint8_t*) buffer + buffer_size)
1350                         free(iovec[j].iov_base);
1351         }
1352
1353         free(iovec);
1354         free(identifier);
1355         free(message);
1356 }
1357
1358 static void process_native_file(
1359                 Server *s,
1360                 int fd,
1361                 struct ucred *ucred,
1362                 struct timeval *tv,
1363                 const char *label, size_t label_len) {
1364
1365         struct stat st;
1366         void *p;
1367         ssize_t n;
1368
1369         assert(s);
1370         assert(fd >= 0);
1371
1372         /* Data is in the passed file, since it didn't fit in a
1373          * datagram. We can't map the file here, since clients might
1374          * then truncate it and trigger a SIGBUS for us. So let's
1375          * stupidly read it */
1376
1377         if (fstat(fd, &st) < 0) {
1378                 log_error("Failed to stat passed file, ignoring: %m");
1379                 return;
1380         }
1381
1382         if (!S_ISREG(st.st_mode)) {
1383                 log_error("File passed is not regular. Ignoring.");
1384                 return;
1385         }
1386
1387         if (st.st_size <= 0)
1388                 return;
1389
1390         if (st.st_size > ENTRY_SIZE_MAX) {
1391                 log_error("File passed too large. Ignoring.");
1392                 return;
1393         }
1394
1395         p = malloc(st.st_size);
1396         if (!p) {
1397                 log_oom();
1398                 return;
1399         }
1400
1401         n = pread(fd, p, st.st_size, 0);
1402         if (n < 0)
1403                 log_error("Failed to read file, ignoring: %s", strerror(-n));
1404         else if (n > 0)
1405                 process_native_message(s, p, n, ucred, tv, label, label_len);
1406
1407         free(p);
1408 }
1409
1410 static int stdout_stream_log(StdoutStream *s, const char *p) {
1411         struct iovec iovec[N_IOVEC_META_FIELDS + 5];
1412         char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *syslog_identifier = NULL;
1413         unsigned n = 0;
1414         int priority;
1415         char *label = NULL;
1416         size_t label_len = 0;
1417
1418         assert(s);
1419         assert(p);
1420
1421         if (isempty(p))
1422                 return 0;
1423
1424         priority = s->priority;
1425
1426         if (s->level_prefix)
1427                 syslog_parse_priority((char**) &p, &priority);
1428
1429         if (s->forward_to_syslog || s->server->forward_to_syslog)
1430                 forward_syslog(s->server, syslog_fixup_facility(priority), s->identifier, p, &s->ucred, NULL);
1431
1432         if (s->forward_to_kmsg || s->server->forward_to_kmsg)
1433                 server_forward_kmsg(s->server, priority, s->identifier, p, &s->ucred);
1434
1435         if (s->forward_to_console || s->server->forward_to_console)
1436                 forward_console(s->server, priority, s->identifier, p, &s->ucred);
1437
1438         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=stdout");
1439
1440         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1441                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1442
1443         if (priority & LOG_FACMASK)
1444                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1445                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1446
1447         if (s->identifier) {
1448                 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", s->identifier);
1449                 if (syslog_identifier)
1450                         IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1451         }
1452
1453         message = strappend("MESSAGE=", p);
1454         if (message)
1455                 IOVEC_SET_STRING(iovec[n++], message);
1456
1457 #ifdef HAVE_SELINUX
1458         if (s->security_context) {
1459                 label = (char*) s->security_context;
1460                 label_len = strlen((char*) s->security_context);
1461         }
1462 #endif
1463
1464         server_dispatch_message(s->server, iovec, n, ELEMENTSOF(iovec), &s->ucred, NULL, label, label_len, s->unit_id, priority);
1465
1466         free(message);
1467         free(syslog_priority);
1468         free(syslog_facility);
1469         free(syslog_identifier);
1470
1471         return 0;
1472 }
1473
1474 static int stdout_stream_line(StdoutStream *s, char *p) {
1475         int r;
1476
1477         assert(s);
1478         assert(p);
1479
1480         p = strstrip(p);
1481
1482         switch (s->state) {
1483
1484         case STDOUT_STREAM_IDENTIFIER:
1485                 if (isempty(p))
1486                         s->identifier = NULL;
1487                 else  {
1488                         s->identifier = strdup(p);
1489                         if (!s->identifier)
1490                                 return log_oom();
1491                 }
1492
1493                 s->state = STDOUT_STREAM_UNIT_ID;
1494                 return 0;
1495
1496         case STDOUT_STREAM_UNIT_ID:
1497                 if (s->ucred.uid == 0) {
1498                         if (isempty(p))
1499                                 s->unit_id = NULL;
1500                         else  {
1501                                 s->unit_id = strdup(p);
1502                                 if (!s->unit_id)
1503                                         return log_oom();
1504                         }
1505                 }
1506
1507                 s->state = STDOUT_STREAM_PRIORITY;
1508                 return 0;
1509
1510         case STDOUT_STREAM_PRIORITY:
1511                 r = safe_atoi(p, &s->priority);
1512                 if (r < 0 || s->priority <= 0 || s->priority >= 999) {
1513                         log_warning("Failed to parse log priority line.");
1514                         return -EINVAL;
1515                 }
1516
1517                 s->state = STDOUT_STREAM_LEVEL_PREFIX;
1518                 return 0;
1519
1520         case STDOUT_STREAM_LEVEL_PREFIX:
1521                 r = parse_boolean(p);
1522                 if (r < 0) {
1523                         log_warning("Failed to parse level prefix line.");
1524                         return -EINVAL;
1525                 }
1526
1527                 s->level_prefix = !!r;
1528                 s->state = STDOUT_STREAM_FORWARD_TO_SYSLOG;
1529                 return 0;
1530
1531         case STDOUT_STREAM_FORWARD_TO_SYSLOG:
1532                 r = parse_boolean(p);
1533                 if (r < 0) {
1534                         log_warning("Failed to parse forward to syslog line.");
1535                         return -EINVAL;
1536                 }
1537
1538                 s->forward_to_syslog = !!r;
1539                 s->state = STDOUT_STREAM_FORWARD_TO_KMSG;
1540                 return 0;
1541
1542         case STDOUT_STREAM_FORWARD_TO_KMSG:
1543                 r = parse_boolean(p);
1544                 if (r < 0) {
1545                         log_warning("Failed to parse copy to kmsg line.");
1546                         return -EINVAL;
1547                 }
1548
1549                 s->forward_to_kmsg = !!r;
1550                 s->state = STDOUT_STREAM_FORWARD_TO_CONSOLE;
1551                 return 0;
1552
1553         case STDOUT_STREAM_FORWARD_TO_CONSOLE:
1554                 r = parse_boolean(p);
1555                 if (r < 0) {
1556                         log_warning("Failed to parse copy to console line.");
1557                         return -EINVAL;
1558                 }
1559
1560                 s->forward_to_console = !!r;
1561                 s->state = STDOUT_STREAM_RUNNING;
1562                 return 0;
1563
1564         case STDOUT_STREAM_RUNNING:
1565                 return stdout_stream_log(s, p);
1566         }
1567
1568         assert_not_reached("Unknown stream state");
1569 }
1570
1571 static int stdout_stream_scan(StdoutStream *s, bool force_flush) {
1572         char *p;
1573         size_t remaining;
1574         int r;
1575
1576         assert(s);
1577
1578         p = s->buffer;
1579         remaining = s->length;
1580         for (;;) {
1581                 char *end;
1582                 size_t skip;
1583
1584                 end = memchr(p, '\n', remaining);
1585                 if (end)
1586                         skip = end - p + 1;
1587                 else if (remaining >= sizeof(s->buffer) - 1) {
1588                         end = p + sizeof(s->buffer) - 1;
1589                         skip = remaining;
1590                 } else
1591                         break;
1592
1593                 *end = 0;
1594
1595                 r = stdout_stream_line(s, p);
1596                 if (r < 0)
1597                         return r;
1598
1599                 remaining -= skip;
1600                 p += skip;
1601         }
1602
1603         if (force_flush && remaining > 0) {
1604                 p[remaining] = 0;
1605                 r = stdout_stream_line(s, p);
1606                 if (r < 0)
1607                         return r;
1608
1609                 p += remaining;
1610                 remaining = 0;
1611         }
1612
1613         if (p > s->buffer) {
1614                 memmove(s->buffer, p, remaining);
1615                 s->length = remaining;
1616         }
1617
1618         return 0;
1619 }
1620
1621 static int stdout_stream_process(StdoutStream *s) {
1622         ssize_t l;
1623         int r;
1624
1625         assert(s);
1626
1627         l = read(s->fd, s->buffer+s->length, sizeof(s->buffer)-1-s->length);
1628         if (l < 0) {
1629
1630                 if (errno == EAGAIN)
1631                         return 0;
1632
1633                 log_warning("Failed to read from stream: %m");
1634                 return -errno;
1635         }
1636
1637         if (l == 0) {
1638                 r = stdout_stream_scan(s, true);
1639                 if (r < 0)
1640                         return r;
1641
1642                 return 0;
1643         }
1644
1645         s->length += l;
1646         r = stdout_stream_scan(s, false);
1647         if (r < 0)
1648                 return r;
1649
1650         return 1;
1651
1652 }
1653
1654 static void stdout_stream_free(StdoutStream *s) {
1655         assert(s);
1656
1657         if (s->server) {
1658                 assert(s->server->n_stdout_streams > 0);
1659                 s->server->n_stdout_streams --;
1660                 LIST_REMOVE(StdoutStream, stdout_stream, s->server->stdout_streams, s);
1661         }
1662
1663         if (s->fd >= 0) {
1664                 if (s->server)
1665                         epoll_ctl(s->server->epoll_fd, EPOLL_CTL_DEL, s->fd, NULL);
1666
1667                 close_nointr_nofail(s->fd);
1668         }
1669
1670 #ifdef HAVE_SELINUX
1671         if (s->security_context)
1672                 freecon(s->security_context);
1673 #endif
1674
1675         free(s->identifier);
1676         free(s);
1677 }
1678
1679 static int stdout_stream_new(Server *s) {
1680         StdoutStream *stream;
1681         int fd, r;
1682         socklen_t len;
1683         struct epoll_event ev;
1684
1685         assert(s);
1686
1687         fd = accept4(s->stdout_fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC);
1688         if (fd < 0) {
1689                 if (errno == EAGAIN)
1690                         return 0;
1691
1692                 log_error("Failed to accept stdout connection: %m");
1693                 return -errno;
1694         }
1695
1696         if (s->n_stdout_streams >= STDOUT_STREAMS_MAX) {
1697                 log_warning("Too many stdout streams, refusing connection.");
1698                 close_nointr_nofail(fd);
1699                 return 0;
1700         }
1701
1702         stream = new0(StdoutStream, 1);
1703         if (!stream) {
1704                 close_nointr_nofail(fd);
1705                 return log_oom();
1706         }
1707
1708         stream->fd = fd;
1709
1710         len = sizeof(stream->ucred);
1711         if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &stream->ucred, &len) < 0) {
1712                 log_error("Failed to determine peer credentials: %m");
1713                 r = -errno;
1714                 goto fail;
1715         }
1716
1717 #ifdef HAVE_SELINUX
1718         if (getpeercon(fd, &stream->security_context) < 0 && errno != ENOPROTOOPT)
1719                 log_error("Failed to determine peer security context: %m");
1720 #endif
1721
1722         if (shutdown(fd, SHUT_WR) < 0) {
1723                 log_error("Failed to shutdown writing side of socket: %m");
1724                 r = -errno;
1725                 goto fail;
1726         }
1727
1728         zero(ev);
1729         ev.data.ptr = stream;
1730         ev.events = EPOLLIN;
1731         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0) {
1732                 log_error("Failed to add stream to event loop: %m");
1733                 r = -errno;
1734                 goto fail;
1735         }
1736
1737         stream->server = s;
1738         LIST_PREPEND(StdoutStream, stdout_stream, s->stdout_streams, stream);
1739         s->n_stdout_streams ++;
1740
1741         return 0;
1742
1743 fail:
1744         stdout_stream_free(stream);
1745         return r;
1746 }
1747
1748 static int system_journal_open(Server *s) {
1749         int r;
1750         char *fn;
1751         sd_id128_t machine;
1752         char ids[33];
1753
1754         r = sd_id128_get_machine(&machine);
1755         if (r < 0)
1756                 return r;
1757
1758         sd_id128_to_string(machine, ids);
1759
1760         if (!s->system_journal &&
1761             (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
1762             access("/run/systemd/journal/flushed", F_OK) >= 0) {
1763
1764                 /* If in auto mode: first try to create the machine
1765                  * path, but not the prefix.
1766                  *
1767                  * If in persistent mode: create /var/log/journal and
1768                  * the machine path */
1769
1770                 if (s->storage == STORAGE_PERSISTENT)
1771                         (void) mkdir("/var/log/journal/", 0755);
1772
1773                 fn = strappend("/var/log/journal/", ids);
1774                 if (!fn)
1775                         return -ENOMEM;
1776
1777                 (void) mkdir(fn, 0755);
1778                 free(fn);
1779
1780                 fn = strjoin("/var/log/journal/", ids, "/system.journal", NULL);
1781                 if (!fn)
1782                         return -ENOMEM;
1783
1784                 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
1785                 free(fn);
1786
1787                 if (r >= 0)
1788                         server_fix_perms(s, s->system_journal, 0);
1789                 else if (r < 0) {
1790
1791                         if (r != -ENOENT && r != -EROFS)
1792                                 log_warning("Failed to open system journal: %s", strerror(-r));
1793
1794                         r = 0;
1795                 }
1796         }
1797
1798         if (!s->runtime_journal &&
1799             (s->storage != STORAGE_NONE)) {
1800
1801                 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
1802                 if (!fn)
1803                         return -ENOMEM;
1804
1805                 if (s->system_journal) {
1806
1807                         /* Try to open the runtime journal, but only
1808                          * if it already exists, so that we can flush
1809                          * it into the system journal */
1810
1811                         r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
1812                         free(fn);
1813
1814                         if (r < 0) {
1815                                 if (r != -ENOENT)
1816                                         log_warning("Failed to open runtime journal: %s", strerror(-r));
1817
1818                                 r = 0;
1819                         }
1820
1821                 } else {
1822
1823                         /* OK, we really need the runtime journal, so create
1824                          * it if necessary. */
1825
1826                         (void) mkdir_parents(fn, 0755);
1827                         r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
1828                         free(fn);
1829
1830                         if (r < 0) {
1831                                 log_error("Failed to open runtime journal: %s", strerror(-r));
1832                                 return r;
1833                         }
1834                 }
1835
1836                 if (s->runtime_journal)
1837                         server_fix_perms(s, s->runtime_journal, 0);
1838         }
1839
1840         return r;
1841 }
1842
1843 static int server_flush_to_var(Server *s) {
1844         Object *o = NULL;
1845         int r;
1846         sd_id128_t machine;
1847         sd_journal *j;
1848
1849         assert(s);
1850
1851         if (s->storage != STORAGE_AUTO &&
1852             s->storage != STORAGE_PERSISTENT)
1853                 return 0;
1854
1855         if (!s->runtime_journal)
1856                 return 0;
1857
1858         system_journal_open(s);
1859
1860         if (!s->system_journal)
1861                 return 0;
1862
1863         log_info("Flushing to /var...");
1864
1865         r = sd_id128_get_machine(&machine);
1866         if (r < 0) {
1867                 log_error("Failed to get machine id: %s", strerror(-r));
1868                 return r;
1869         }
1870
1871         r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1872         if (r < 0) {
1873                 log_error("Failed to read runtime journal: %s", strerror(-r));
1874                 return r;
1875         }
1876
1877         SD_JOURNAL_FOREACH(j) {
1878                 JournalFile *f;
1879
1880                 f = j->current_file;
1881                 assert(f && f->current_offset > 0);
1882
1883                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1884                 if (r < 0) {
1885                         log_error("Can't read entry: %s", strerror(-r));
1886                         goto finish;
1887                 }
1888
1889                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1890                 if (r == -E2BIG) {
1891                         log_info("Allocation limit reached.");
1892
1893                         journal_file_post_change(s->system_journal);
1894                         server_rotate(s);
1895                         server_vacuum(s);
1896
1897                         r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1898                 }
1899
1900                 if (r < 0) {
1901                         log_error("Can't write entry: %s", strerror(-r));
1902                         goto finish;
1903                 }
1904         }
1905
1906 finish:
1907         journal_file_post_change(s->system_journal);
1908
1909         journal_file_close(s->runtime_journal);
1910         s->runtime_journal = NULL;
1911
1912         if (r >= 0)
1913                 rm_rf("/run/log/journal", false, true, false);
1914
1915         return r;
1916 }
1917
1918 static int process_event(Server *s, struct epoll_event *ev) {
1919         assert(s);
1920         assert(ev);
1921
1922         if (ev->data.fd == s->signal_fd) {
1923                 struct signalfd_siginfo sfsi;
1924                 ssize_t n;
1925
1926                 if (ev->events != EPOLLIN) {
1927                         log_info("Got invalid event from epoll.");
1928                         return -EIO;
1929                 }
1930
1931                 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
1932                 if (n != sizeof(sfsi)) {
1933
1934                         if (n >= 0)
1935                                 return -EIO;
1936
1937                         if (errno == EINTR || errno == EAGAIN)
1938                                 return 1;
1939
1940                         return -errno;
1941                 }
1942
1943                 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
1944
1945                 if (sfsi.ssi_signo == SIGUSR1) {
1946                         touch("/run/systemd/journal/flushed");
1947                         server_flush_to_var(s);
1948                         return 1;
1949                 }
1950
1951                 if (sfsi.ssi_signo == SIGUSR2) {
1952                         server_rotate(s);
1953                         server_vacuum(s);
1954                         return 1;
1955                 }
1956
1957                 return 0;
1958
1959         } else if (ev->data.fd == s->dev_kmsg_fd) {
1960                 int r;
1961
1962                 if (ev->events != EPOLLIN) {
1963                         log_info("Got invalid event from epoll.");
1964                         return -EIO;
1965                 }
1966
1967                 r = server_read_dev_kmsg(s);
1968                 if (r < 0)
1969                         return r;
1970
1971                 return 1;
1972
1973         } else if (ev->data.fd == s->native_fd ||
1974                    ev->data.fd == s->syslog_fd) {
1975
1976                 if (ev->events != EPOLLIN) {
1977                         log_info("Got invalid event from epoll.");
1978                         return -EIO;
1979                 }
1980
1981                 for (;;) {
1982                         struct msghdr msghdr;
1983                         struct iovec iovec;
1984                         struct ucred *ucred = NULL;
1985                         struct timeval *tv = NULL;
1986                         struct cmsghdr *cmsg;
1987                         char *label = NULL;
1988                         size_t label_len = 0;
1989                         union {
1990                                 struct cmsghdr cmsghdr;
1991
1992                                 /* We use NAME_MAX space for the
1993                                  * SELinux label here. The kernel
1994                                  * currently enforces no limit, but
1995                                  * according to suggestions from the
1996                                  * SELinux people this will change and
1997                                  * it will probably be identical to
1998                                  * NAME_MAX. For now we use that, but
1999                                  * this should be updated one day when
2000                                  * the final limit is known.*/
2001                                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
2002                                             CMSG_SPACE(sizeof(struct timeval)) +
2003                                             CMSG_SPACE(sizeof(int)) + /* fd */
2004                                             CMSG_SPACE(NAME_MAX)]; /* selinux label */
2005                         } control;
2006                         ssize_t n;
2007                         int v;
2008                         int *fds = NULL;
2009                         unsigned n_fds = 0;
2010
2011                         if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
2012                                 log_error("SIOCINQ failed: %m");
2013                                 return -errno;
2014                         }
2015
2016                         if (s->buffer_size < (size_t) v) {
2017                                 void *b;
2018                                 size_t l;
2019
2020                                 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
2021                                 b = realloc(s->buffer, l+1);
2022
2023                                 if (!b) {
2024                                         log_error("Couldn't increase buffer.");
2025                                         return -ENOMEM;
2026                                 }
2027
2028                                 s->buffer_size = l;
2029                                 s->buffer = b;
2030                         }
2031
2032                         zero(iovec);
2033                         iovec.iov_base = s->buffer;
2034                         iovec.iov_len = s->buffer_size;
2035
2036                         zero(control);
2037                         zero(msghdr);
2038                         msghdr.msg_iov = &iovec;
2039                         msghdr.msg_iovlen = 1;
2040                         msghdr.msg_control = &control;
2041                         msghdr.msg_controllen = sizeof(control);
2042
2043                         n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
2044                         if (n < 0) {
2045
2046                                 if (errno == EINTR || errno == EAGAIN)
2047                                         return 1;
2048
2049                                 log_error("recvmsg() failed: %m");
2050                                 return -errno;
2051                         }
2052
2053                         for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
2054
2055                                 if (cmsg->cmsg_level == SOL_SOCKET &&
2056                                     cmsg->cmsg_type == SCM_CREDENTIALS &&
2057                                     cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
2058                                         ucred = (struct ucred*) CMSG_DATA(cmsg);
2059                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
2060                                          cmsg->cmsg_type == SCM_SECURITY) {
2061                                         label = (char*) CMSG_DATA(cmsg);
2062                                         label_len = cmsg->cmsg_len - CMSG_LEN(0);
2063                                 } else if (cmsg->cmsg_level == SOL_SOCKET &&
2064                                          cmsg->cmsg_type == SO_TIMESTAMP &&
2065                                          cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
2066                                         tv = (struct timeval*) CMSG_DATA(cmsg);
2067                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
2068                                          cmsg->cmsg_type == SCM_RIGHTS) {
2069                                         fds = (int*) CMSG_DATA(cmsg);
2070                                         n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
2071                                 }
2072                         }
2073
2074                         if (ev->data.fd == s->syslog_fd) {
2075                                 char *e;
2076
2077                                 if (n > 0 && n_fds == 0) {
2078                                         e = memchr(s->buffer, '\n', n);
2079                                         if (e)
2080                                                 *e = 0;
2081                                         else
2082                                                 s->buffer[n] = 0;
2083
2084                                         process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
2085                                 } else if (n_fds > 0)
2086                                         log_warning("Got file descriptors via syslog socket. Ignoring.");
2087
2088                         } else {
2089                                 if (n > 0 && n_fds == 0)
2090                                         process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
2091                                 else if (n == 0 && n_fds == 1)
2092                                         process_native_file(s, fds[0], ucred, tv, label, label_len);
2093                                 else if (n_fds > 0)
2094                                         log_warning("Got too many file descriptors via native socket. Ignoring.");
2095                         }
2096
2097                         close_many(fds, n_fds);
2098                 }
2099
2100                 return 1;
2101
2102         } else if (ev->data.fd == s->stdout_fd) {
2103
2104                 if (ev->events != EPOLLIN) {
2105                         log_info("Got invalid event from epoll.");
2106                         return -EIO;
2107                 }
2108
2109                 stdout_stream_new(s);
2110                 return 1;
2111
2112         } else {
2113                 StdoutStream *stream;
2114
2115                 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
2116                         log_info("Got invalid event from epoll.");
2117                         return -EIO;
2118                 }
2119
2120                 /* If it is none of the well-known fds, it must be an
2121                  * stdout stream fd. Note that this is a bit ugly here
2122                  * (since we rely that none of the well-known fds
2123                  * could be interpreted as pointer), but nonetheless
2124                  * safe, since the well-known fds would never get an
2125                  * fd > 4096, i.e. beyond the first memory page */
2126
2127                 stream = ev->data.ptr;
2128
2129                 if (stdout_stream_process(stream) <= 0)
2130                         stdout_stream_free(stream);
2131
2132                 return 1;
2133         }
2134
2135         log_error("Unknown event.");
2136         return 0;
2137 }
2138
2139 static int open_syslog_socket(Server *s) {
2140         union sockaddr_union sa;
2141         int one, r;
2142         struct epoll_event ev;
2143
2144         assert(s);
2145
2146         if (s->syslog_fd < 0) {
2147
2148                 s->syslog_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2149                 if (s->syslog_fd < 0) {
2150                         log_error("socket() failed: %m");
2151                         return -errno;
2152                 }
2153
2154                 zero(sa);
2155                 sa.un.sun_family = AF_UNIX;
2156                 strncpy(sa.un.sun_path, "/dev/log", sizeof(sa.un.sun_path));
2157
2158                 unlink(sa.un.sun_path);
2159
2160                 r = bind(s->syslog_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2161                 if (r < 0) {
2162                         log_error("bind() failed: %m");
2163                         return -errno;
2164                 }
2165
2166                 chmod(sa.un.sun_path, 0666);
2167         } else
2168                 fd_nonblock(s->syslog_fd, 1);
2169
2170         one = 1;
2171         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
2172         if (r < 0) {
2173                 log_error("SO_PASSCRED failed: %m");
2174                 return -errno;
2175         }
2176
2177 #ifdef HAVE_SELINUX
2178         one = 1;
2179         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
2180         if (r < 0)
2181                 log_warning("SO_PASSSEC failed: %m");
2182 #endif
2183
2184         one = 1;
2185         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
2186         if (r < 0) {
2187                 log_error("SO_TIMESTAMP failed: %m");
2188                 return -errno;
2189         }
2190
2191         zero(ev);
2192         ev.events = EPOLLIN;
2193         ev.data.fd = s->syslog_fd;
2194         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->syslog_fd, &ev) < 0) {
2195                 log_error("Failed to add syslog server fd to epoll object: %m");
2196                 return -errno;
2197         }
2198
2199         return 0;
2200 }
2201
2202 static int open_native_socket(Server*s) {
2203         union sockaddr_union sa;
2204         int one, r;
2205         struct epoll_event ev;
2206
2207         assert(s);
2208
2209         if (s->native_fd < 0) {
2210
2211                 s->native_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2212                 if (s->native_fd < 0) {
2213                         log_error("socket() failed: %m");
2214                         return -errno;
2215                 }
2216
2217                 zero(sa);
2218                 sa.un.sun_family = AF_UNIX;
2219                 strncpy(sa.un.sun_path, "/run/systemd/journal/socket", sizeof(sa.un.sun_path));
2220
2221                 unlink(sa.un.sun_path);
2222
2223                 r = bind(s->native_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2224                 if (r < 0) {
2225                         log_error("bind() failed: %m");
2226                         return -errno;
2227                 }
2228
2229                 chmod(sa.un.sun_path, 0666);
2230         } else
2231                 fd_nonblock(s->native_fd, 1);
2232
2233         one = 1;
2234         r = setsockopt(s->native_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
2235         if (r < 0) {
2236                 log_error("SO_PASSCRED failed: %m");
2237                 return -errno;
2238         }
2239
2240 #ifdef HAVE_SELINUX
2241         one = 1;
2242         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
2243         if (r < 0)
2244                 log_warning("SO_PASSSEC failed: %m");
2245 #endif
2246
2247         one = 1;
2248         r = setsockopt(s->native_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
2249         if (r < 0) {
2250                 log_error("SO_TIMESTAMP failed: %m");
2251                 return -errno;
2252         }
2253
2254         zero(ev);
2255         ev.events = EPOLLIN;
2256         ev.data.fd = s->native_fd;
2257         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->native_fd, &ev) < 0) {
2258                 log_error("Failed to add native server fd to epoll object: %m");
2259                 return -errno;
2260         }
2261
2262         return 0;
2263 }
2264
2265 static int open_stdout_socket(Server *s) {
2266         union sockaddr_union sa;
2267         int r;
2268         struct epoll_event ev;
2269
2270         assert(s);
2271
2272         if (s->stdout_fd < 0) {
2273
2274                 s->stdout_fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2275                 if (s->stdout_fd < 0) {
2276                         log_error("socket() failed: %m");
2277                         return -errno;
2278                 }
2279
2280                 zero(sa);
2281                 sa.un.sun_family = AF_UNIX;
2282                 strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
2283
2284                 unlink(sa.un.sun_path);
2285
2286                 r = bind(s->stdout_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2287                 if (r < 0) {
2288                         log_error("bind() failed: %m");
2289                         return -errno;
2290                 }
2291
2292                 chmod(sa.un.sun_path, 0666);
2293
2294                 if (listen(s->stdout_fd, SOMAXCONN) < 0) {
2295                         log_error("liste() failed: %m");
2296                         return -errno;
2297                 }
2298         } else
2299                 fd_nonblock(s->stdout_fd, 1);
2300
2301         zero(ev);
2302         ev.events = EPOLLIN;
2303         ev.data.fd = s->stdout_fd;
2304         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->stdout_fd, &ev) < 0) {
2305                 log_error("Failed to add stdout server fd to epoll object: %m");
2306                 return -errno;
2307         }
2308
2309         return 0;
2310 }
2311
2312 static int open_signalfd(Server *s) {
2313         sigset_t mask;
2314         struct epoll_event ev;
2315
2316         assert(s);
2317
2318         assert_se(sigemptyset(&mask) == 0);
2319         sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
2320         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
2321
2322         s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
2323         if (s->signal_fd < 0) {
2324                 log_error("signalfd(): %m");
2325                 return -errno;
2326         }
2327
2328         zero(ev);
2329         ev.events = EPOLLIN;
2330         ev.data.fd = s->signal_fd;
2331
2332         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
2333                 log_error("epoll_ctl(): %m");
2334                 return -errno;
2335         }
2336
2337         return 0;
2338 }
2339
2340 static int server_parse_proc_cmdline(Server *s) {
2341         char *line, *w, *state;
2342         int r;
2343         size_t l;
2344
2345         if (detect_container(NULL) > 0)
2346                 return 0;
2347
2348         r = read_one_line_file("/proc/cmdline", &line);
2349         if (r < 0) {
2350                 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
2351                 return 0;
2352         }
2353
2354         FOREACH_WORD_QUOTED(w, l, line, state) {
2355                 char *word;
2356
2357                 word = strndup(w, l);
2358                 if (!word) {
2359                         r = -ENOMEM;
2360                         goto finish;
2361                 }
2362
2363                 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
2364                         r = parse_boolean(word + 35);
2365                         if (r < 0)
2366                                 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
2367                         else
2368                                 s->forward_to_syslog = r;
2369                 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
2370                         r = parse_boolean(word + 33);
2371                         if (r < 0)
2372                                 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
2373                         else
2374                                 s->forward_to_kmsg = r;
2375                 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
2376                         r = parse_boolean(word + 36);
2377                         if (r < 0)
2378                                 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
2379                         else
2380                                 s->forward_to_console = r;
2381                 } else if (startswith(word, "systemd.journald"))
2382                         log_warning("Invalid systemd.journald parameter. Ignoring.");
2383
2384                 free(word);
2385         }
2386
2387         r = 0;
2388
2389 finish:
2390         free(line);
2391         return r;
2392 }
2393
2394 static int server_parse_config_file(Server *s) {
2395         FILE *f;
2396         const char *fn;
2397         int r;
2398
2399         assert(s);
2400
2401         fn = "/etc/systemd/journald.conf";
2402         f = fopen(fn, "re");
2403         if (!f) {
2404                 if (errno == ENOENT)
2405                         return 0;
2406
2407                 log_warning("Failed to open configuration file %s: %m", fn);
2408                 return -errno;
2409         }
2410
2411         r = config_parse(fn, f, "Journal\0", config_item_perf_lookup, (void*) journald_gperf_lookup, false, s);
2412         if (r < 0)
2413                 log_warning("Failed to parse configuration file: %s", strerror(-r));
2414
2415         fclose(f);
2416
2417         return r;
2418 }
2419
2420 static int server_init(Server *s) {
2421         int n, r, fd;
2422
2423         assert(s);
2424
2425         zero(*s);
2426         s->syslog_fd = s->native_fd = s->stdout_fd = s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
2427         s->compress = true;
2428         s->seal = true;
2429
2430         s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
2431         s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
2432
2433         s->forward_to_syslog = true;
2434
2435         s->max_level_store = LOG_DEBUG;
2436         s->max_level_syslog = LOG_DEBUG;
2437         s->max_level_kmsg = LOG_NOTICE;
2438         s->max_level_console = LOG_INFO;
2439
2440         memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
2441         memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
2442
2443         server_parse_config_file(s);
2444         server_parse_proc_cmdline(s);
2445
2446         mkdir_p("/run/systemd/journal", 0755);
2447
2448         s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
2449         if (!s->user_journals)
2450                 return log_oom();
2451
2452         s->mmap = mmap_cache_new();
2453         if (!s->mmap)
2454                 return log_oom();
2455
2456         s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
2457         if (s->epoll_fd < 0) {
2458                 log_error("Failed to create epoll object: %m");
2459                 return -errno;
2460         }
2461
2462         n = sd_listen_fds(true);
2463         if (n < 0) {
2464                 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
2465                 return n;
2466         }
2467
2468         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
2469
2470                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
2471
2472                         if (s->native_fd >= 0) {
2473                                 log_error("Too many native sockets passed.");
2474                                 return -EINVAL;
2475                         }
2476
2477                         s->native_fd = fd;
2478
2479                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
2480
2481                         if (s->stdout_fd >= 0) {
2482                                 log_error("Too many stdout sockets passed.");
2483                                 return -EINVAL;
2484                         }
2485
2486                         s->stdout_fd = fd;
2487
2488                 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
2489
2490                         if (s->syslog_fd >= 0) {
2491                                 log_error("Too many /dev/log sockets passed.");
2492                                 return -EINVAL;
2493                         }
2494
2495                         s->syslog_fd = fd;
2496
2497                 } else {
2498                         log_error("Unknown socket passed.");
2499                         return -EINVAL;
2500                 }
2501         }
2502
2503         r = open_syslog_socket(s);
2504         if (r < 0)
2505                 return r;
2506
2507         r = open_native_socket(s);
2508         if (r < 0)
2509                 return r;
2510
2511         r = open_stdout_socket(s);
2512         if (r < 0)
2513                 return r;
2514
2515         r = server_open_dev_kmsg(s);
2516         if (r < 0)
2517                 return r;
2518
2519         r = server_open_kernel_seqnum(s);
2520         if (r < 0)
2521                 return r;
2522
2523         r = open_signalfd(s);
2524         if (r < 0)
2525                 return r;
2526
2527         s->udev = udev_new();
2528         if (!s->udev)
2529                 return -ENOMEM;
2530
2531         s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
2532         if (!s->rate_limit)
2533                 return -ENOMEM;
2534
2535         r = system_journal_open(s);
2536         if (r < 0)
2537                 return r;
2538
2539         return 0;
2540 }
2541
2542 static void maybe_append_tags(Server *s) {
2543 #ifdef HAVE_GCRYPT
2544         JournalFile *f;
2545         Iterator i;
2546         usec_t n;
2547
2548         n = now(CLOCK_REALTIME);
2549
2550         if (s->system_journal)
2551                 journal_file_maybe_append_tag(s->system_journal, n);
2552
2553         HASHMAP_FOREACH(f, s->user_journals, i)
2554                 journal_file_maybe_append_tag(f, n);
2555 #endif
2556 }
2557
2558 static void server_done(Server *s) {
2559         JournalFile *f;
2560         assert(s);
2561
2562         while (s->stdout_streams)
2563                 stdout_stream_free(s->stdout_streams);
2564
2565         if (s->system_journal)
2566                 journal_file_close(s->system_journal);
2567
2568         if (s->runtime_journal)
2569                 journal_file_close(s->runtime_journal);
2570
2571         while ((f = hashmap_steal_first(s->user_journals)))
2572                 journal_file_close(f);
2573
2574         hashmap_free(s->user_journals);
2575
2576         if (s->epoll_fd >= 0)
2577                 close_nointr_nofail(s->epoll_fd);
2578
2579         if (s->signal_fd >= 0)
2580                 close_nointr_nofail(s->signal_fd);
2581
2582         if (s->syslog_fd >= 0)
2583                 close_nointr_nofail(s->syslog_fd);
2584
2585         if (s->native_fd >= 0)
2586                 close_nointr_nofail(s->native_fd);
2587
2588         if (s->stdout_fd >= 0)
2589                 close_nointr_nofail(s->stdout_fd);
2590
2591         if (s->dev_kmsg_fd >= 0)
2592                 close_nointr_nofail(s->dev_kmsg_fd);
2593
2594         if (s->rate_limit)
2595                 journal_rate_limit_free(s->rate_limit);
2596
2597         if (s->kernel_seqnum)
2598                 munmap(s->kernel_seqnum, sizeof(uint64_t));
2599
2600         free(s->buffer);
2601         free(s->tty_path);
2602
2603         if (s->mmap)
2604                 mmap_cache_unref(s->mmap);
2605
2606         if (s->udev)
2607                 udev_unref(s->udev);
2608 }
2609
2610 int main(int argc, char *argv[]) {
2611         Server server;
2612         int r;
2613
2614         /* if (getppid() != 1) { */
2615         /*         log_error("This program should be invoked by init only."); */
2616         /*         return EXIT_FAILURE; */
2617         /* } */
2618
2619         if (argc > 1) {
2620                 log_error("This program does not take arguments.");
2621                 return EXIT_FAILURE;
2622         }
2623
2624         log_set_target(LOG_TARGET_SAFE);
2625         log_set_facility(LOG_SYSLOG);
2626         log_set_max_level(LOG_DEBUG);
2627         log_parse_environment();
2628         log_open();
2629
2630         umask(0022);
2631
2632         r = server_init(&server);
2633         if (r < 0)
2634                 goto finish;
2635
2636         server_vacuum(&server);
2637         server_flush_to_var(&server);
2638         server_flush_dev_kmsg(&server);
2639
2640         log_debug("systemd-journald running as pid %lu", (unsigned long) getpid());
2641         server_driver_message(&server, SD_MESSAGE_JOURNAL_START, "Journal started");
2642
2643         sd_notify(false,
2644                   "READY=1\n"
2645                   "STATUS=Processing requests...");
2646
2647         for (;;) {
2648                 struct epoll_event event;
2649                 int t;
2650
2651 #ifdef HAVE_GCRYPT
2652                 usec_t u;
2653
2654                 if (server.system_journal &&
2655                     journal_file_next_evolve_usec(server.system_journal, &u)) {
2656                         usec_t n;
2657
2658                         n = now(CLOCK_REALTIME);
2659
2660                         if (n >= u)
2661                                 t = 0;
2662                         else
2663                                 t = (int) ((u - n + USEC_PER_MSEC - 1) / USEC_PER_MSEC);
2664                 } else
2665 #endif
2666                         t = -1;
2667
2668                 r = epoll_wait(server.epoll_fd, &event, 1, t);
2669                 if (r < 0) {
2670
2671                         if (errno == EINTR)
2672                                 continue;
2673
2674                         log_error("epoll_wait() failed: %m");
2675                         r = -errno;
2676                         goto finish;
2677                 }
2678
2679                 if (r > 0) {
2680                         r = process_event(&server, &event);
2681                         if (r < 0)
2682                                 goto finish;
2683                         else if (r == 0)
2684                                 break;
2685                 }
2686
2687                 maybe_append_tags(&server);
2688         }
2689
2690         log_debug("systemd-journald stopped as pid %lu", (unsigned long) getpid());
2691         server_driver_message(&server, SD_MESSAGE_JOURNAL_STOP, "Journal stopped");
2692
2693 finish:
2694         sd_notify(false,
2695                   "STATUS=Shutting down...");
2696
2697         server_done(&server);
2698
2699         return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
2700 }