chiark / gitweb /
2429dd3e278380e229b447665eed6a51587b9689
[elogind.git] / src / journal / journald.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/epoll.h>
23 #include <sys/socket.h>
24 #include <errno.h>
25 #include <sys/signalfd.h>
26 #include <unistd.h>
27 #include <fcntl.h>
28 #include <stddef.h>
29 #include <sys/ioctl.h>
30 #include <linux/sockios.h>
31 #include <sys/statvfs.h>
32 #include <sys/mman.h>
33
34 #include <libudev.h>
35 #include <systemd/sd-journal.h>
36 #include <systemd/sd-messages.h>
37 #include <systemd/sd-daemon.h>
38
39 #ifdef HAVE_LOGIND
40 #include <systemd/sd-login.h>
41 #endif
42
43 #include "mkdir.h"
44 #include "hashmap.h"
45 #include "journal-file.h"
46 #include "socket-util.h"
47 #include "cgroup-util.h"
48 #include "list.h"
49 #include "journal-rate-limit.h"
50 #include "journal-internal.h"
51 #include "journal-vacuum.h"
52 #include "journal-authenticate.h"
53 #include "conf-parser.h"
54 #include "journald.h"
55 #include "virt.h"
56 #include "missing.h"
57
58 #ifdef HAVE_ACL
59 #include <sys/acl.h>
60 #include <acl/libacl.h>
61 #include "acl-util.h"
62 #endif
63
64 #ifdef HAVE_SELINUX
65 #include <selinux/selinux.h>
66 #endif
67
68 #define USER_JOURNALS_MAX 1024
69 #define STDOUT_STREAMS_MAX 4096
70
71 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
72 #define DEFAULT_RATE_LIMIT_BURST 200
73
74 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
75
76 #define N_IOVEC_META_FIELDS 17
77 #define N_IOVEC_KERNEL_FIELDS 64
78 #define N_IOVEC_UDEV_FIELDS 32
79
80 #define ENTRY_SIZE_MAX (1024*1024*32)
81
82 typedef enum StdoutStreamState {
83         STDOUT_STREAM_IDENTIFIER,
84         STDOUT_STREAM_UNIT_ID,
85         STDOUT_STREAM_PRIORITY,
86         STDOUT_STREAM_LEVEL_PREFIX,
87         STDOUT_STREAM_FORWARD_TO_SYSLOG,
88         STDOUT_STREAM_FORWARD_TO_KMSG,
89         STDOUT_STREAM_FORWARD_TO_CONSOLE,
90         STDOUT_STREAM_RUNNING
91 } StdoutStreamState;
92
93 struct StdoutStream {
94         Server *server;
95         StdoutStreamState state;
96
97         int fd;
98
99         struct ucred ucred;
100 #ifdef HAVE_SELINUX
101         security_context_t security_context;
102 #endif
103
104         char *identifier;
105         char *unit_id;
106         int priority;
107         bool level_prefix:1;
108         bool forward_to_syslog:1;
109         bool forward_to_kmsg:1;
110         bool forward_to_console:1;
111
112         char buffer[LINE_MAX+1];
113         size_t length;
114
115         LIST_FIELDS(StdoutStream, stdout_stream);
116 };
117
118 static const char* const storage_table[] = {
119         [STORAGE_AUTO] = "auto",
120         [STORAGE_VOLATILE] = "volatile",
121         [STORAGE_PERSISTENT] = "persistent",
122         [STORAGE_NONE] = "none"
123 };
124
125 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
126 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
127
128 static uint64_t available_space(Server *s) {
129         char ids[33], *p;
130         const char *f;
131         sd_id128_t machine;
132         struct statvfs ss;
133         uint64_t sum = 0, avail = 0, ss_avail = 0;
134         int r;
135         DIR *d;
136         usec_t ts;
137         JournalMetrics *m;
138
139         ts = now(CLOCK_MONOTONIC);
140
141         if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
142                 return s->cached_available_space;
143
144         r = sd_id128_get_machine(&machine);
145         if (r < 0)
146                 return 0;
147
148         if (s->system_journal) {
149                 f = "/var/log/journal/";
150                 m = &s->system_metrics;
151         } else {
152                 f = "/run/log/journal/";
153                 m = &s->runtime_metrics;
154         }
155
156         assert(m);
157
158         p = strappend(f, sd_id128_to_string(machine, ids));
159         if (!p)
160                 return 0;
161
162         d = opendir(p);
163         free(p);
164
165         if (!d)
166                 return 0;
167
168         if (fstatvfs(dirfd(d), &ss) < 0)
169                 goto finish;
170
171         for (;;) {
172                 struct stat st;
173                 struct dirent buf, *de;
174
175                 r = readdir_r(d, &buf, &de);
176                 if (r != 0)
177                         break;
178
179                 if (!de)
180                         break;
181
182                 if (!endswith(de->d_name, ".journal") &&
183                     !endswith(de->d_name, ".journal~"))
184                         continue;
185
186                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
187                         continue;
188
189                 if (!S_ISREG(st.st_mode))
190                         continue;
191
192                 sum += (uint64_t) st.st_blocks * 512UL;
193         }
194
195         avail = sum >= m->max_use ? 0 : m->max_use - sum;
196
197         ss_avail = ss.f_bsize * ss.f_bavail;
198
199         ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
200
201         if (ss_avail < avail)
202                 avail = ss_avail;
203
204         s->cached_available_space = avail;
205         s->cached_available_space_timestamp = ts;
206
207 finish:
208         closedir(d);
209
210         return avail;
211 }
212
213 static void server_read_file_gid(Server *s) {
214         const char *adm = "adm";
215         int r;
216
217         assert(s);
218
219         if (s->file_gid_valid)
220                 return;
221
222         r = get_group_creds(&adm, &s->file_gid);
223         if (r < 0)
224                 log_warning("Failed to resolve 'adm' group: %s", strerror(-r));
225
226         /* if we couldn't read the gid, then it will be 0, but that's
227          * fine and we shouldn't try to resolve the group again, so
228          * let's just pretend it worked right-away. */
229         s->file_gid_valid = true;
230 }
231
232 static void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
233         int r;
234 #ifdef HAVE_ACL
235         acl_t acl;
236         acl_entry_t entry;
237         acl_permset_t permset;
238 #endif
239
240         assert(f);
241
242         server_read_file_gid(s);
243
244         r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
245         if (r < 0)
246                 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
247
248 #ifdef HAVE_ACL
249         if (uid <= 0)
250                 return;
251
252         acl = acl_get_fd(f->fd);
253         if (!acl) {
254                 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
255                 return;
256         }
257
258         r = acl_find_uid(acl, uid, &entry);
259         if (r <= 0) {
260
261                 if (acl_create_entry(&acl, &entry) < 0 ||
262                     acl_set_tag_type(entry, ACL_USER) < 0 ||
263                     acl_set_qualifier(entry, &uid) < 0) {
264                         log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
265                         goto finish;
266                 }
267         }
268
269         if (acl_get_permset(entry, &permset) < 0 ||
270             acl_add_perm(permset, ACL_READ) < 0 ||
271             acl_calc_mask(&acl) < 0) {
272                 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
273                 goto finish;
274         }
275
276         if (acl_set_fd(f->fd, acl) < 0)
277                 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
278
279 finish:
280         acl_free(acl);
281 #endif
282 }
283
284 static JournalFile* find_journal(Server *s, uid_t uid) {
285         char *p;
286         int r;
287         JournalFile *f;
288         sd_id128_t machine;
289
290         assert(s);
291
292         /* We split up user logs only on /var, not on /run. If the
293          * runtime file is open, we write to it exclusively, in order
294          * to guarantee proper order as soon as we flush /run to
295          * /var and close the runtime file. */
296
297         if (s->runtime_journal)
298                 return s->runtime_journal;
299
300         if (uid <= 0)
301                 return s->system_journal;
302
303         r = sd_id128_get_machine(&machine);
304         if (r < 0)
305                 return s->system_journal;
306
307         f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
308         if (f)
309                 return f;
310
311         if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
312                      SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
313                 return s->system_journal;
314
315         while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
316                 /* Too many open? Then let's close one */
317                 f = hashmap_steal_first(s->user_journals);
318                 assert(f);
319                 journal_file_close(f);
320         }
321
322         r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, s->system_journal, &f);
323         free(p);
324
325         if (r < 0)
326                 return s->system_journal;
327
328         server_fix_perms(s, f, uid);
329
330         r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
331         if (r < 0) {
332                 journal_file_close(f);
333                 return s->system_journal;
334         }
335
336         return f;
337 }
338
339 static void server_rotate(Server *s) {
340         JournalFile *f;
341         void *k;
342         Iterator i;
343         int r;
344
345         log_info("Rotating...");
346
347         if (s->runtime_journal) {
348                 r = journal_file_rotate(&s->runtime_journal, s->compress, false);
349                 if (r < 0)
350                         if (s->runtime_journal)
351                                 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
352                         else
353                                 log_error("Failed to create new runtime journal: %s", strerror(-r));
354                 else
355                         server_fix_perms(s, s->runtime_journal, 0);
356         }
357
358         if (s->system_journal) {
359                 r = journal_file_rotate(&s->system_journal, s->compress, s->seal);
360                 if (r < 0)
361                         if (s->system_journal)
362                                 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
363                         else
364                                 log_error("Failed to create new system journal: %s", strerror(-r));
365
366                 else
367                         server_fix_perms(s, s->system_journal, 0);
368         }
369
370         HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
371                 r = journal_file_rotate(&f, s->compress, s->seal);
372                 if (r < 0)
373                         if (f->path)
374                                 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
375                         else
376                                 log_error("Failed to create user journal: %s", strerror(-r));
377                 else {
378                         hashmap_replace(s->user_journals, k, f);
379                         server_fix_perms(s, s->system_journal, PTR_TO_UINT32(k));
380                 }
381         }
382 }
383
384 static void server_vacuum(Server *s) {
385         char *p;
386         char ids[33];
387         sd_id128_t machine;
388         int r;
389
390         log_info("Vacuuming...");
391
392         r = sd_id128_get_machine(&machine);
393         if (r < 0) {
394                 log_error("Failed to get machine ID: %s", strerror(-r));
395                 return;
396         }
397
398         sd_id128_to_string(machine, ids);
399
400         if (s->system_journal) {
401                 if (asprintf(&p, "/var/log/journal/%s", ids) < 0) {
402                         log_oom();
403                         return;
404                 }
405
406                 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free);
407                 if (r < 0 && r != -ENOENT)
408                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
409                 free(p);
410         }
411
412         if (s->runtime_journal) {
413                 if (asprintf(&p, "/run/log/journal/%s", ids) < 0) {
414                         log_oom();
415                         return;
416                 }
417
418                 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free);
419                 if (r < 0 && r != -ENOENT)
420                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
421                 free(p);
422         }
423
424         s->cached_available_space_timestamp = 0;
425 }
426
427 static char *shortened_cgroup_path(pid_t pid) {
428         int r;
429         char *process_path, *init_path, *path;
430
431         assert(pid > 0);
432
433         r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, pid, &process_path);
434         if (r < 0)
435                 return NULL;
436
437         r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &init_path);
438         if (r < 0) {
439                 free(process_path);
440                 return NULL;
441         }
442
443         if (endswith(init_path, "/system"))
444                 init_path[strlen(init_path) - 7] = 0;
445         else if (streq(init_path, "/"))
446                 init_path[0] = 0;
447
448         if (startswith(process_path, init_path)) {
449                 char *p;
450
451                 p = strdup(process_path + strlen(init_path));
452                 if (!p) {
453                         free(process_path);
454                         free(init_path);
455                         return NULL;
456                 }
457                 path = p;
458         } else {
459                 path = process_path;
460                 process_path = NULL;
461         }
462
463         free(process_path);
464         free(init_path);
465
466         return path;
467 }
468
469 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n) {
470         JournalFile *f;
471         bool vacuumed = false;
472         int r;
473
474         assert(s);
475         assert(iovec);
476         assert(n > 0);
477
478         f = find_journal(s, uid);
479         if (!f)
480                 return;
481
482         if (journal_file_rotate_suggested(f)) {
483                 log_info("Journal header limits reached or header out-of-date, rotating.");
484                 server_rotate(s);
485                 server_vacuum(s);
486                 vacuumed = true;
487
488                 f = find_journal(s, uid);
489                 if (!f)
490                         return;
491         }
492
493         for (;;) {
494                 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
495                 if (r >= 0)
496                         return;
497
498                 if (vacuumed ||
499                     (r != -E2BIG && /* hit limit */
500                      r != -EFBIG && /* hit fs limit */
501                      r != -EDQUOT && /* quota hit */
502                      r != -ENOSPC && /* disk full */
503                      r != -EBADMSG && /* corrupted */
504                      r != -ENODATA && /* truncated */
505                      r != -EHOSTDOWN && /* other machine */
506                      r != -EPROTONOSUPPORT && /* unsupported feature */
507                      r != -EBUSY && /* unclean shutdown */
508                      r != -ESHUTDOWN /* already archived */)) {
509                         log_error("Failed to write entry, ignoring: %s", strerror(-r));
510                         return;
511                 }
512
513                 if (r == -E2BIG || r == -EFBIG || r == EDQUOT || r == ENOSPC)
514                         log_info("Allocation limit reached, rotating.");
515                 else if (r == -EHOSTDOWN)
516                         log_info("Journal file from other machine, rotating.");
517                 else if (r == -EBUSY)
518                         log_info("Unlcean shutdown, rotating.");
519                 else
520                         log_warning("Journal file corrupted, rotating.");
521
522                 server_rotate(s);
523                 server_vacuum(s);
524                 vacuumed = true;
525
526                 f = find_journal(s, uid);
527                 if (!f)
528                         return;
529
530                 log_info("Retrying write.");
531         }
532 }
533
534 static void dispatch_message_real(
535                 Server *s,
536                 struct iovec *iovec, unsigned n, unsigned m,
537                 struct ucred *ucred,
538                 struct timeval *tv,
539                 const char *label, size_t label_len,
540                 const char *unit_id) {
541
542         char *pid = NULL, *uid = NULL, *gid = NULL,
543                 *source_time = NULL, *boot_id = NULL, *machine_id = NULL,
544                 *comm = NULL, *cmdline = NULL, *hostname = NULL,
545                 *audit_session = NULL, *audit_loginuid = NULL,
546                 *exe = NULL, *cgroup = NULL, *session = NULL,
547                 *owner_uid = NULL, *unit = NULL, *selinux_context = NULL;
548
549         char idbuf[33];
550         sd_id128_t id;
551         int r;
552         char *t;
553         uid_t loginuid = 0, realuid = 0;
554
555         assert(s);
556         assert(iovec);
557         assert(n > 0);
558         assert(n + N_IOVEC_META_FIELDS <= m);
559
560         if (ucred) {
561                 uint32_t audit;
562 #ifdef HAVE_LOGIND
563                 uid_t owner;
564 #endif
565
566                 realuid = ucred->uid;
567
568                 if (asprintf(&pid, "_PID=%lu", (unsigned long) ucred->pid) >= 0)
569                         IOVEC_SET_STRING(iovec[n++], pid);
570
571                 if (asprintf(&uid, "_UID=%lu", (unsigned long) ucred->uid) >= 0)
572                         IOVEC_SET_STRING(iovec[n++], uid);
573
574                 if (asprintf(&gid, "_GID=%lu", (unsigned long) ucred->gid) >= 0)
575                         IOVEC_SET_STRING(iovec[n++], gid);
576
577                 r = get_process_comm(ucred->pid, &t);
578                 if (r >= 0) {
579                         comm = strappend("_COMM=", t);
580                         free(t);
581
582                         if (comm)
583                                 IOVEC_SET_STRING(iovec[n++], comm);
584                 }
585
586                 r = get_process_exe(ucred->pid, &t);
587                 if (r >= 0) {
588                         exe = strappend("_EXE=", t);
589                         free(t);
590
591                         if (exe)
592                                 IOVEC_SET_STRING(iovec[n++], exe);
593                 }
594
595                 r = get_process_cmdline(ucred->pid, LINE_MAX, false, &t);
596                 if (r >= 0) {
597                         cmdline = strappend("_CMDLINE=", t);
598                         free(t);
599
600                         if (cmdline)
601                                 IOVEC_SET_STRING(iovec[n++], cmdline);
602                 }
603
604                 r = audit_session_from_pid(ucred->pid, &audit);
605                 if (r >= 0)
606                         if (asprintf(&audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit) >= 0)
607                                 IOVEC_SET_STRING(iovec[n++], audit_session);
608
609                 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
610                 if (r >= 0)
611                         if (asprintf(&audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0)
612                                 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
613
614                 t = shortened_cgroup_path(ucred->pid);
615                 if (t) {
616                         cgroup = strappend("_SYSTEMD_CGROUP=", t);
617                         free(t);
618
619                         if (cgroup)
620                                 IOVEC_SET_STRING(iovec[n++], cgroup);
621                 }
622
623 #ifdef HAVE_LOGIND
624                 if (sd_pid_get_session(ucred->pid, &t) >= 0) {
625                         session = strappend("_SYSTEMD_SESSION=", t);
626                         free(t);
627
628                         if (session)
629                                 IOVEC_SET_STRING(iovec[n++], session);
630                 }
631
632                 if (sd_pid_get_owner_uid(ucred->uid, &owner) >= 0)
633                         if (asprintf(&owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner) >= 0)
634                                 IOVEC_SET_STRING(iovec[n++], owner_uid);
635 #endif
636
637                 if (cg_pid_get_unit(ucred->pid, &t) >= 0) {
638                         unit = strappend("_SYSTEMD_UNIT=", t);
639                         free(t);
640                 } else if (unit_id)
641                         unit = strappend("_SYSTEMD_UNIT=", unit_id);
642
643                 if (unit)
644                         IOVEC_SET_STRING(iovec[n++], unit);
645
646 #ifdef HAVE_SELINUX
647                 if (label) {
648                         selinux_context = malloc(sizeof("_SELINUX_CONTEXT=") + label_len);
649                         if (selinux_context) {
650                                 memcpy(selinux_context, "_SELINUX_CONTEXT=", sizeof("_SELINUX_CONTEXT=")-1);
651                                 memcpy(selinux_context+sizeof("_SELINUX_CONTEXT=")-1, label, label_len);
652                                 selinux_context[sizeof("_SELINUX_CONTEXT=")-1+label_len] = 0;
653                                 IOVEC_SET_STRING(iovec[n++], selinux_context);
654                         }
655                 } else {
656                         security_context_t con;
657
658                         if (getpidcon(ucred->pid, &con) >= 0) {
659                                 selinux_context = strappend("_SELINUX_CONTEXT=", con);
660                                 if (selinux_context)
661                                         IOVEC_SET_STRING(iovec[n++], selinux_context);
662
663                                 freecon(con);
664                         }
665                 }
666 #endif
667         }
668
669         if (tv) {
670                 if (asprintf(&source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu",
671                              (unsigned long long) timeval_load(tv)) >= 0)
672                         IOVEC_SET_STRING(iovec[n++], source_time);
673         }
674
675         /* Note that strictly speaking storing the boot id here is
676          * redundant since the entry includes this in-line
677          * anyway. However, we need this indexed, too. */
678         r = sd_id128_get_boot(&id);
679         if (r >= 0)
680                 if (asprintf(&boot_id, "_BOOT_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
681                         IOVEC_SET_STRING(iovec[n++], boot_id);
682
683         r = sd_id128_get_machine(&id);
684         if (r >= 0)
685                 if (asprintf(&machine_id, "_MACHINE_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
686                         IOVEC_SET_STRING(iovec[n++], machine_id);
687
688         t = gethostname_malloc();
689         if (t) {
690                 hostname = strappend("_HOSTNAME=", t);
691                 free(t);
692                 if (hostname)
693                         IOVEC_SET_STRING(iovec[n++], hostname);
694         }
695
696         assert(n <= m);
697
698         write_to_journal(s, realuid == 0 ? 0 : loginuid, iovec, n);
699
700         free(pid);
701         free(uid);
702         free(gid);
703         free(comm);
704         free(exe);
705         free(cmdline);
706         free(source_time);
707         free(boot_id);
708         free(machine_id);
709         free(hostname);
710         free(audit_session);
711         free(audit_loginuid);
712         free(cgroup);
713         free(session);
714         free(owner_uid);
715         free(unit);
716         free(selinux_context);
717 }
718
719 static void driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
720         char mid[11 + 32 + 1];
721         char buffer[16 + LINE_MAX + 1];
722         struct iovec iovec[N_IOVEC_META_FIELDS + 4];
723         int n = 0;
724         va_list ap;
725         struct ucred ucred;
726
727         assert(s);
728         assert(format);
729
730         IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
731         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
732
733         memcpy(buffer, "MESSAGE=", 8);
734         va_start(ap, format);
735         vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
736         va_end(ap);
737         char_array_0(buffer);
738         IOVEC_SET_STRING(iovec[n++], buffer);
739
740         snprintf(mid, sizeof(mid), "MESSAGE_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(message_id));
741         char_array_0(mid);
742         IOVEC_SET_STRING(iovec[n++], mid);
743
744         zero(ucred);
745         ucred.pid = getpid();
746         ucred.uid = getuid();
747         ucred.gid = getgid();
748
749         dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL);
750 }
751
752 static void dispatch_message(Server *s,
753                              struct iovec *iovec, unsigned n, unsigned m,
754                              struct ucred *ucred,
755                              struct timeval *tv,
756                              const char *label, size_t label_len,
757                              const char *unit_id,
758                              int priority) {
759         int rl;
760         char *path = NULL, *c;
761
762         assert(s);
763         assert(iovec || n == 0);
764
765         if (n == 0)
766                 return;
767
768         if (LOG_PRI(priority) > s->max_level_store)
769                 return;
770
771         if (!ucred)
772                 goto finish;
773
774         path = shortened_cgroup_path(ucred->pid);
775         if (!path)
776                 goto finish;
777
778         /* example: /user/lennart/3/foobar
779          *          /system/dbus.service/foobar
780          *
781          * So let's cut of everything past the third /, since that is
782          * wher user directories start */
783
784         c = strchr(path, '/');
785         if (c) {
786                 c = strchr(c+1, '/');
787                 if (c) {
788                         c = strchr(c+1, '/');
789                         if (c)
790                                 *c = 0;
791                 }
792         }
793
794         rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available_space(s));
795
796         if (rl == 0) {
797                 free(path);
798                 return;
799         }
800
801         /* Write a suppression message if we suppressed something */
802         if (rl > 1)
803                 driver_message(s, SD_MESSAGE_JOURNAL_DROPPED, "Suppressed %u messages from %s", rl - 1, path);
804
805         free(path);
806
807 finish:
808         dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id);
809 }
810
811 static void forward_syslog_iovec(Server *s, const struct iovec *iovec, unsigned n_iovec, struct ucred *ucred, struct timeval *tv) {
812         struct msghdr msghdr;
813         struct cmsghdr *cmsg;
814         union {
815                 struct cmsghdr cmsghdr;
816                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred))];
817         } control;
818         union sockaddr_union sa;
819
820         assert(s);
821         assert(iovec);
822         assert(n_iovec > 0);
823
824         zero(msghdr);
825         msghdr.msg_iov = (struct iovec*) iovec;
826         msghdr.msg_iovlen = n_iovec;
827
828         zero(sa);
829         sa.un.sun_family = AF_UNIX;
830         strncpy(sa.un.sun_path, "/run/systemd/journal/syslog", sizeof(sa.un.sun_path));
831         msghdr.msg_name = &sa;
832         msghdr.msg_namelen = offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path);
833
834         if (ucred) {
835                 zero(control);
836                 msghdr.msg_control = &control;
837                 msghdr.msg_controllen = sizeof(control);
838
839                 cmsg = CMSG_FIRSTHDR(&msghdr);
840                 cmsg->cmsg_level = SOL_SOCKET;
841                 cmsg->cmsg_type = SCM_CREDENTIALS;
842                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred));
843                 memcpy(CMSG_DATA(cmsg), ucred, sizeof(struct ucred));
844                 msghdr.msg_controllen = cmsg->cmsg_len;
845         }
846
847         /* Forward the syslog message we received via /dev/log to
848          * /run/systemd/syslog. Unfortunately we currently can't set
849          * the SO_TIMESTAMP auxiliary data, and hence we don't. */
850
851         if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
852                 return;
853
854         /* The socket is full? I guess the syslog implementation is
855          * too slow, and we shouldn't wait for that... */
856         if (errno == EAGAIN)
857                 return;
858
859         if (ucred && errno == ESRCH) {
860                 struct ucred u;
861
862                 /* Hmm, presumably the sender process vanished
863                  * by now, so let's fix it as good as we
864                  * can, and retry */
865
866                 u = *ucred;
867                 u.pid = getpid();
868                 memcpy(CMSG_DATA(cmsg), &u, sizeof(struct ucred));
869
870                 if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
871                         return;
872
873                 if (errno == EAGAIN)
874                         return;
875         }
876
877         if (errno != ENOENT)
878                 log_debug("Failed to forward syslog message: %m");
879 }
880
881 static void forward_syslog_raw(Server *s, int priority, const char *buffer, struct ucred *ucred, struct timeval *tv) {
882         struct iovec iovec;
883
884         assert(s);
885         assert(buffer);
886
887         if (LOG_PRI(priority) > s->max_level_syslog)
888                 return;
889
890         IOVEC_SET_STRING(iovec, buffer);
891         forward_syslog_iovec(s, &iovec, 1, ucred, tv);
892 }
893
894 static void forward_syslog(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred, struct timeval *tv) {
895         struct iovec iovec[5];
896         char header_priority[6], header_time[64], header_pid[16];
897         int n = 0;
898         time_t t;
899         struct tm *tm;
900         char *ident_buf = NULL;
901
902         assert(s);
903         assert(priority >= 0);
904         assert(priority <= 999);
905         assert(message);
906
907         if (LOG_PRI(priority) > s->max_level_syslog)
908                 return;
909
910         /* First: priority field */
911         snprintf(header_priority, sizeof(header_priority), "<%i>", priority);
912         char_array_0(header_priority);
913         IOVEC_SET_STRING(iovec[n++], header_priority);
914
915         /* Second: timestamp */
916         t = tv ? tv->tv_sec : ((time_t) (now(CLOCK_REALTIME) / USEC_PER_SEC));
917         tm = localtime(&t);
918         if (!tm)
919                 return;
920         if (strftime(header_time, sizeof(header_time), "%h %e %T ", tm) <= 0)
921                 return;
922         IOVEC_SET_STRING(iovec[n++], header_time);
923
924         /* Third: identifier and PID */
925         if (ucred) {
926                 if (!identifier) {
927                         get_process_comm(ucred->pid, &ident_buf);
928                         identifier = ident_buf;
929                 }
930
931                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
932                 char_array_0(header_pid);
933
934                 if (identifier)
935                         IOVEC_SET_STRING(iovec[n++], identifier);
936
937                 IOVEC_SET_STRING(iovec[n++], header_pid);
938         } else if (identifier) {
939                 IOVEC_SET_STRING(iovec[n++], identifier);
940                 IOVEC_SET_STRING(iovec[n++], ": ");
941         }
942
943         /* Fourth: message */
944         IOVEC_SET_STRING(iovec[n++], message);
945
946         forward_syslog_iovec(s, iovec, n, ucred, tv);
947
948         free(ident_buf);
949 }
950
951 static int fixup_priority(int priority) {
952
953         if ((priority & LOG_FACMASK) == 0)
954                 return (priority & LOG_PRIMASK) | LOG_USER;
955
956         return priority;
957 }
958
959 static void forward_kmsg(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred) {
960         struct iovec iovec[5];
961         char header_priority[6], header_pid[16];
962         int n = 0;
963         char *ident_buf = NULL;
964
965         assert(s);
966         assert(priority >= 0);
967         assert(priority <= 999);
968         assert(message);
969
970         if (_unlikely_(LOG_PRI(priority) > s->max_level_kmsg))
971                 return;
972
973         if (_unlikely_(s->dev_kmsg_fd < 0))
974                 return;
975
976         /* Never allow messages with kernel facility to be written to
977          * kmsg, regardless where the data comes from. */
978         priority = fixup_priority(priority);
979
980         /* First: priority field */
981         snprintf(header_priority, sizeof(header_priority), "<%i>", priority);
982         char_array_0(header_priority);
983         IOVEC_SET_STRING(iovec[n++], header_priority);
984
985         /* Second: identifier and PID */
986         if (ucred) {
987                 if (!identifier) {
988                         get_process_comm(ucred->pid, &ident_buf);
989                         identifier = ident_buf;
990                 }
991
992                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
993                 char_array_0(header_pid);
994
995                 if (identifier)
996                         IOVEC_SET_STRING(iovec[n++], identifier);
997
998                 IOVEC_SET_STRING(iovec[n++], header_pid);
999         } else if (identifier) {
1000                 IOVEC_SET_STRING(iovec[n++], identifier);
1001                 IOVEC_SET_STRING(iovec[n++], ": ");
1002         }
1003
1004         /* Fourth: message */
1005         IOVEC_SET_STRING(iovec[n++], message);
1006         IOVEC_SET_STRING(iovec[n++], "\n");
1007
1008         if (writev(s->dev_kmsg_fd, iovec, n) < 0)
1009                 log_debug("Failed to write to /dev/kmsg for logging: %s", strerror(errno));
1010
1011         free(ident_buf);
1012 }
1013
1014 static void forward_console(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred) {
1015         struct iovec iovec[4];
1016         char header_pid[16];
1017         int n = 0, fd;
1018         char *ident_buf = NULL;
1019         const char *tty;
1020
1021         assert(s);
1022         assert(message);
1023
1024         if (LOG_PRI(priority) > s->max_level_console)
1025                 return;
1026
1027         /* First: identifier and PID */
1028         if (ucred) {
1029                 if (!identifier) {
1030                         get_process_comm(ucred->pid, &ident_buf);
1031                         identifier = ident_buf;
1032                 }
1033
1034                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
1035                 char_array_0(header_pid);
1036
1037                 if (identifier)
1038                         IOVEC_SET_STRING(iovec[n++], identifier);
1039
1040                 IOVEC_SET_STRING(iovec[n++], header_pid);
1041         } else if (identifier) {
1042                 IOVEC_SET_STRING(iovec[n++], identifier);
1043                 IOVEC_SET_STRING(iovec[n++], ": ");
1044         }
1045
1046         /* Third: message */
1047         IOVEC_SET_STRING(iovec[n++], message);
1048         IOVEC_SET_STRING(iovec[n++], "\n");
1049
1050         tty = s->tty_path ? s->tty_path : "/dev/console";
1051
1052         fd = open_terminal(tty, O_WRONLY|O_NOCTTY|O_CLOEXEC);
1053         if (fd < 0) {
1054                 log_debug("Failed to open %s for logging: %s", tty, strerror(errno));
1055                 goto finish;
1056         }
1057
1058         if (writev(fd, iovec, n) < 0)
1059                 log_debug("Failed to write to %s for logging: %s", tty, strerror(errno));
1060
1061         close_nointr_nofail(fd);
1062
1063 finish:
1064         free(ident_buf);
1065 }
1066
1067 static void read_identifier(const char **buf, char **identifier, char **pid) {
1068         const char *p;
1069         char *t;
1070         size_t l, e;
1071
1072         assert(buf);
1073         assert(identifier);
1074         assert(pid);
1075
1076         p = *buf;
1077
1078         p += strspn(p, WHITESPACE);
1079         l = strcspn(p, WHITESPACE);
1080
1081         if (l <= 0 ||
1082             p[l-1] != ':')
1083                 return;
1084
1085         e = l;
1086         l--;
1087
1088         if (p[l-1] == ']') {
1089                 size_t k = l-1;
1090
1091                 for (;;) {
1092
1093                         if (p[k] == '[') {
1094                                 t = strndup(p+k+1, l-k-2);
1095                                 if (t)
1096                                         *pid = t;
1097
1098                                 l = k;
1099                                 break;
1100                         }
1101
1102                         if (k == 0)
1103                                 break;
1104
1105                         k--;
1106                 }
1107         }
1108
1109         t = strndup(p, l);
1110         if (t)
1111                 *identifier = t;
1112
1113         *buf = p + e;
1114         *buf += strspn(*buf, WHITESPACE);
1115 }
1116
1117 static void process_syslog_message(Server *s, const char *buf, struct ucred *ucred, struct timeval *tv, const char *label, size_t label_len) {
1118         char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *syslog_pid = NULL;
1119         struct iovec iovec[N_IOVEC_META_FIELDS + 6];
1120         unsigned n = 0;
1121         int priority = LOG_USER | LOG_INFO;
1122         char *identifier = NULL, *pid = NULL;
1123         const char *orig;
1124
1125         assert(s);
1126         assert(buf);
1127
1128         orig = buf;
1129         parse_syslog_priority((char**) &buf, &priority);
1130
1131         if (s->forward_to_syslog)
1132                 forward_syslog_raw(s, priority, orig, ucred, tv);
1133
1134         skip_syslog_date((char**) &buf);
1135         read_identifier(&buf, &identifier, &pid);
1136
1137         if (s->forward_to_kmsg)
1138                 forward_kmsg(s, priority, identifier, buf, ucred);
1139
1140         if (s->forward_to_console)
1141                 forward_console(s, priority, identifier, buf, ucred);
1142
1143         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=syslog");
1144
1145         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1146                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1147
1148         if (priority & LOG_FACMASK)
1149                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1150                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1151
1152         if (identifier) {
1153                 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
1154                 if (syslog_identifier)
1155                         IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1156         }
1157
1158         if (pid) {
1159                 syslog_pid = strappend("SYSLOG_PID=", pid);
1160                 if (syslog_pid)
1161                         IOVEC_SET_STRING(iovec[n++], syslog_pid);
1162         }
1163
1164         message = strappend("MESSAGE=", buf);
1165         if (message)
1166                 IOVEC_SET_STRING(iovec[n++], message);
1167
1168         dispatch_message(s, iovec, n, ELEMENTSOF(iovec), ucred, tv, label, label_len, NULL, priority);
1169
1170         free(message);
1171         free(identifier);
1172         free(pid);
1173         free(syslog_priority);
1174         free(syslog_facility);
1175         free(syslog_identifier);
1176         free(syslog_pid);
1177 }
1178
1179 static bool valid_user_field(const char *p, size_t l) {
1180         const char *a;
1181
1182         /* We kinda enforce POSIX syntax recommendations for
1183            environment variables here, but make a couple of additional
1184            requirements.
1185
1186            http://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html */
1187
1188         /* No empty field names */
1189         if (l <= 0)
1190                 return false;
1191
1192         /* Don't allow names longer than 64 chars */
1193         if (l > 64)
1194                 return false;
1195
1196         /* Variables starting with an underscore are protected */
1197         if (p[0] == '_')
1198                 return false;
1199
1200         /* Don't allow digits as first character */
1201         if (p[0] >= '0' && p[0] <= '9')
1202                 return false;
1203
1204         /* Only allow A-Z0-9 and '_' */
1205         for (a = p; a < p + l; a++)
1206                 if (!((*a >= 'A' && *a <= 'Z') ||
1207                       (*a >= '0' && *a <= '9') ||
1208                       *a == '_'))
1209                         return false;
1210
1211         return true;
1212 }
1213
1214 static void process_native_message(
1215                 Server *s,
1216                 const void *buffer, size_t buffer_size,
1217                 struct ucred *ucred,
1218                 struct timeval *tv,
1219                 const char *label, size_t label_len) {
1220
1221         struct iovec *iovec = NULL;
1222         unsigned n = 0, m = 0, j, tn = (unsigned) -1;
1223         const char *p;
1224         size_t remaining;
1225         int priority = LOG_INFO;
1226         char *identifier = NULL, *message = NULL;
1227
1228         assert(s);
1229         assert(buffer || buffer_size == 0);
1230
1231         p = buffer;
1232         remaining = buffer_size;
1233
1234         while (remaining > 0) {
1235                 const char *e, *q;
1236
1237                 e = memchr(p, '\n', remaining);
1238
1239                 if (!e) {
1240                         /* Trailing noise, let's ignore it, and flush what we collected */
1241                         log_debug("Received message with trailing noise, ignoring.");
1242                         break;
1243                 }
1244
1245                 if (e == p) {
1246                         /* Entry separator */
1247                         dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, NULL, priority);
1248                         n = 0;
1249                         priority = LOG_INFO;
1250
1251                         p++;
1252                         remaining--;
1253                         continue;
1254                 }
1255
1256                 if (*p == '.' || *p == '#') {
1257                         /* Ignore control commands for now, and
1258                          * comments too. */
1259                         remaining -= (e - p) + 1;
1260                         p = e + 1;
1261                         continue;
1262                 }
1263
1264                 /* A property follows */
1265
1266                 if (n+N_IOVEC_META_FIELDS >= m) {
1267                         struct iovec *c;
1268                         unsigned u;
1269
1270                         u = MAX((n+N_IOVEC_META_FIELDS+1) * 2U, 4U);
1271                         c = realloc(iovec, u * sizeof(struct iovec));
1272                         if (!c) {
1273                                 log_oom();
1274                                 break;
1275                         }
1276
1277                         iovec = c;
1278                         m = u;
1279                 }
1280
1281                 q = memchr(p, '=', e - p);
1282                 if (q) {
1283                         if (valid_user_field(p, q - p)) {
1284                                 size_t l;
1285
1286                                 l = e - p;
1287
1288                                 /* If the field name starts with an
1289                                  * underscore, skip the variable,
1290                                  * since that indidates a trusted
1291                                  * field */
1292                                 iovec[n].iov_base = (char*) p;
1293                                 iovec[n].iov_len = l;
1294                                 n++;
1295
1296                                 /* We need to determine the priority
1297                                  * of this entry for the rate limiting
1298                                  * logic */
1299                                 if (l == 10 &&
1300                                     memcmp(p, "PRIORITY=", 9) == 0 &&
1301                                     p[9] >= '0' && p[9] <= '9')
1302                                         priority = (priority & LOG_FACMASK) | (p[9] - '0');
1303
1304                                 else if (l == 17 &&
1305                                          memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
1306                                          p[16] >= '0' && p[16] <= '9')
1307                                         priority = (priority & LOG_PRIMASK) | ((p[16] - '0') << 3);
1308
1309                                 else if (l == 18 &&
1310                                          memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
1311                                          p[16] >= '0' && p[16] <= '9' &&
1312                                          p[17] >= '0' && p[17] <= '9')
1313                                         priority = (priority & LOG_PRIMASK) | (((p[16] - '0')*10 + (p[17] - '0')) << 3);
1314
1315                                 else if (l >= 19 &&
1316                                          memcmp(p, "SYSLOG_IDENTIFIER=", 18) == 0) {
1317                                         char *t;
1318
1319                                         t = strndup(p + 18, l - 18);
1320                                         if (t) {
1321                                                 free(identifier);
1322                                                 identifier = t;
1323                                         }
1324                                 } else if (l >= 8 &&
1325                                            memcmp(p, "MESSAGE=", 8) == 0) {
1326                                         char *t;
1327
1328                                         t = strndup(p + 8, l - 8);
1329                                         if (t) {
1330                                                 free(message);
1331                                                 message = t;
1332                                         }
1333                                 }
1334                         }
1335
1336                         remaining -= (e - p) + 1;
1337                         p = e + 1;
1338                         continue;
1339                 } else {
1340                         le64_t l_le;
1341                         uint64_t l;
1342                         char *k;
1343
1344                         if (remaining < e - p + 1 + sizeof(uint64_t) + 1) {
1345                                 log_debug("Failed to parse message, ignoring.");
1346                                 break;
1347                         }
1348
1349                         memcpy(&l_le, e + 1, sizeof(uint64_t));
1350                         l = le64toh(l_le);
1351
1352                         if (remaining < e - p + 1 + sizeof(uint64_t) + l + 1 ||
1353                             e[1+sizeof(uint64_t)+l] != '\n') {
1354                                 log_debug("Failed to parse message, ignoring.");
1355                                 break;
1356                         }
1357
1358                         k = malloc((e - p) + 1 + l);
1359                         if (!k) {
1360                                 log_oom();
1361                                 break;
1362                         }
1363
1364                         memcpy(k, p, e - p);
1365                         k[e - p] = '=';
1366                         memcpy(k + (e - p) + 1, e + 1 + sizeof(uint64_t), l);
1367
1368                         if (valid_user_field(p, e - p)) {
1369                                 iovec[n].iov_base = k;
1370                                 iovec[n].iov_len = (e - p) + 1 + l;
1371                                 n++;
1372                         } else
1373                                 free(k);
1374
1375                         remaining -= (e - p) + 1 + sizeof(uint64_t) + l + 1;
1376                         p = e + 1 + sizeof(uint64_t) + l + 1;
1377                 }
1378         }
1379
1380         if (n <= 0)
1381                 goto finish;
1382
1383         tn = n++;
1384         IOVEC_SET_STRING(iovec[tn], "_TRANSPORT=journal");
1385
1386         if (message) {
1387                 if (s->forward_to_syslog)
1388                         forward_syslog(s, priority, identifier, message, ucred, tv);
1389
1390                 if (s->forward_to_kmsg)
1391                         forward_kmsg(s, priority, identifier, message, ucred);
1392
1393                 if (s->forward_to_console)
1394                         forward_console(s, priority, identifier, message, ucred);
1395         }
1396
1397         dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, NULL, priority);
1398
1399 finish:
1400         for (j = 0; j < n; j++)  {
1401                 if (j == tn)
1402                         continue;
1403
1404                 if (iovec[j].iov_base < buffer ||
1405                     (const uint8_t*) iovec[j].iov_base >= (const uint8_t*) buffer + buffer_size)
1406                         free(iovec[j].iov_base);
1407         }
1408
1409         free(iovec);
1410         free(identifier);
1411         free(message);
1412 }
1413
1414 static void process_native_file(
1415                 Server *s,
1416                 int fd,
1417                 struct ucred *ucred,
1418                 struct timeval *tv,
1419                 const char *label, size_t label_len) {
1420
1421         struct stat st;
1422         void *p;
1423         ssize_t n;
1424
1425         assert(s);
1426         assert(fd >= 0);
1427
1428         /* Data is in the passed file, since it didn't fit in a
1429          * datagram. We can't map the file here, since clients might
1430          * then truncate it and trigger a SIGBUS for us. So let's
1431          * stupidly read it */
1432
1433         if (fstat(fd, &st) < 0) {
1434                 log_error("Failed to stat passed file, ignoring: %m");
1435                 return;
1436         }
1437
1438         if (!S_ISREG(st.st_mode)) {
1439                 log_error("File passed is not regular. Ignoring.");
1440                 return;
1441         }
1442
1443         if (st.st_size <= 0)
1444                 return;
1445
1446         if (st.st_size > ENTRY_SIZE_MAX) {
1447                 log_error("File passed too large. Ignoring.");
1448                 return;
1449         }
1450
1451         p = malloc(st.st_size);
1452         if (!p) {
1453                 log_oom();
1454                 return;
1455         }
1456
1457         n = pread(fd, p, st.st_size, 0);
1458         if (n < 0)
1459                 log_error("Failed to read file, ignoring: %s", strerror(-n));
1460         else if (n > 0)
1461                 process_native_message(s, p, n, ucred, tv, label, label_len);
1462
1463         free(p);
1464 }
1465
1466 static int stdout_stream_log(StdoutStream *s, const char *p) {
1467         struct iovec iovec[N_IOVEC_META_FIELDS + 5];
1468         char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *syslog_identifier = NULL;
1469         unsigned n = 0;
1470         int priority;
1471         char *label = NULL;
1472         size_t label_len = 0;
1473
1474         assert(s);
1475         assert(p);
1476
1477         if (isempty(p))
1478                 return 0;
1479
1480         priority = s->priority;
1481
1482         if (s->level_prefix)
1483                 parse_syslog_priority((char**) &p, &priority);
1484
1485         if (s->forward_to_syslog || s->server->forward_to_syslog)
1486                 forward_syslog(s->server, fixup_priority(priority), s->identifier, p, &s->ucred, NULL);
1487
1488         if (s->forward_to_kmsg || s->server->forward_to_kmsg)
1489                 forward_kmsg(s->server, priority, s->identifier, p, &s->ucred);
1490
1491         if (s->forward_to_console || s->server->forward_to_console)
1492                 forward_console(s->server, priority, s->identifier, p, &s->ucred);
1493
1494         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=stdout");
1495
1496         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1497                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1498
1499         if (priority & LOG_FACMASK)
1500                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1501                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1502
1503         if (s->identifier) {
1504                 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", s->identifier);
1505                 if (syslog_identifier)
1506                         IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1507         }
1508
1509         message = strappend("MESSAGE=", p);
1510         if (message)
1511                 IOVEC_SET_STRING(iovec[n++], message);
1512
1513 #ifdef HAVE_SELINUX
1514         if (s->security_context) {
1515                 label = (char*) s->security_context;
1516                 label_len = strlen((char*) s->security_context);
1517         }
1518 #endif
1519
1520         dispatch_message(s->server, iovec, n, ELEMENTSOF(iovec), &s->ucred, NULL, label, label_len, s->unit_id, priority);
1521
1522         free(message);
1523         free(syslog_priority);
1524         free(syslog_facility);
1525         free(syslog_identifier);
1526
1527         return 0;
1528 }
1529
1530 static int stdout_stream_line(StdoutStream *s, char *p) {
1531         int r;
1532
1533         assert(s);
1534         assert(p);
1535
1536         p = strstrip(p);
1537
1538         switch (s->state) {
1539
1540         case STDOUT_STREAM_IDENTIFIER:
1541                 if (isempty(p))
1542                         s->identifier = NULL;
1543                 else  {
1544                         s->identifier = strdup(p);
1545                         if (!s->identifier)
1546                                 return log_oom();
1547                 }
1548
1549                 s->state = STDOUT_STREAM_UNIT_ID;
1550                 return 0;
1551
1552         case STDOUT_STREAM_UNIT_ID:
1553                 if (s->ucred.uid == 0) {
1554                         if (isempty(p))
1555                                 s->unit_id = NULL;
1556                         else  {
1557                                 s->unit_id = strdup(p);
1558                                 if (!s->unit_id)
1559                                         return log_oom();
1560                         }
1561                 }
1562
1563                 s->state = STDOUT_STREAM_PRIORITY;
1564                 return 0;
1565
1566         case STDOUT_STREAM_PRIORITY:
1567                 r = safe_atoi(p, &s->priority);
1568                 if (r < 0 || s->priority <= 0 || s->priority >= 999) {
1569                         log_warning("Failed to parse log priority line.");
1570                         return -EINVAL;
1571                 }
1572
1573                 s->state = STDOUT_STREAM_LEVEL_PREFIX;
1574                 return 0;
1575
1576         case STDOUT_STREAM_LEVEL_PREFIX:
1577                 r = parse_boolean(p);
1578                 if (r < 0) {
1579                         log_warning("Failed to parse level prefix line.");
1580                         return -EINVAL;
1581                 }
1582
1583                 s->level_prefix = !!r;
1584                 s->state = STDOUT_STREAM_FORWARD_TO_SYSLOG;
1585                 return 0;
1586
1587         case STDOUT_STREAM_FORWARD_TO_SYSLOG:
1588                 r = parse_boolean(p);
1589                 if (r < 0) {
1590                         log_warning("Failed to parse forward to syslog line.");
1591                         return -EINVAL;
1592                 }
1593
1594                 s->forward_to_syslog = !!r;
1595                 s->state = STDOUT_STREAM_FORWARD_TO_KMSG;
1596                 return 0;
1597
1598         case STDOUT_STREAM_FORWARD_TO_KMSG:
1599                 r = parse_boolean(p);
1600                 if (r < 0) {
1601                         log_warning("Failed to parse copy to kmsg line.");
1602                         return -EINVAL;
1603                 }
1604
1605                 s->forward_to_kmsg = !!r;
1606                 s->state = STDOUT_STREAM_FORWARD_TO_CONSOLE;
1607                 return 0;
1608
1609         case STDOUT_STREAM_FORWARD_TO_CONSOLE:
1610                 r = parse_boolean(p);
1611                 if (r < 0) {
1612                         log_warning("Failed to parse copy to console line.");
1613                         return -EINVAL;
1614                 }
1615
1616                 s->forward_to_console = !!r;
1617                 s->state = STDOUT_STREAM_RUNNING;
1618                 return 0;
1619
1620         case STDOUT_STREAM_RUNNING:
1621                 return stdout_stream_log(s, p);
1622         }
1623
1624         assert_not_reached("Unknown stream state");
1625 }
1626
1627 static int stdout_stream_scan(StdoutStream *s, bool force_flush) {
1628         char *p;
1629         size_t remaining;
1630         int r;
1631
1632         assert(s);
1633
1634         p = s->buffer;
1635         remaining = s->length;
1636         for (;;) {
1637                 char *end;
1638                 size_t skip;
1639
1640                 end = memchr(p, '\n', remaining);
1641                 if (end)
1642                         skip = end - p + 1;
1643                 else if (remaining >= sizeof(s->buffer) - 1) {
1644                         end = p + sizeof(s->buffer) - 1;
1645                         skip = remaining;
1646                 } else
1647                         break;
1648
1649                 *end = 0;
1650
1651                 r = stdout_stream_line(s, p);
1652                 if (r < 0)
1653                         return r;
1654
1655                 remaining -= skip;
1656                 p += skip;
1657         }
1658
1659         if (force_flush && remaining > 0) {
1660                 p[remaining] = 0;
1661                 r = stdout_stream_line(s, p);
1662                 if (r < 0)
1663                         return r;
1664
1665                 p += remaining;
1666                 remaining = 0;
1667         }
1668
1669         if (p > s->buffer) {
1670                 memmove(s->buffer, p, remaining);
1671                 s->length = remaining;
1672         }
1673
1674         return 0;
1675 }
1676
1677 static int stdout_stream_process(StdoutStream *s) {
1678         ssize_t l;
1679         int r;
1680
1681         assert(s);
1682
1683         l = read(s->fd, s->buffer+s->length, sizeof(s->buffer)-1-s->length);
1684         if (l < 0) {
1685
1686                 if (errno == EAGAIN)
1687                         return 0;
1688
1689                 log_warning("Failed to read from stream: %m");
1690                 return -errno;
1691         }
1692
1693         if (l == 0) {
1694                 r = stdout_stream_scan(s, true);
1695                 if (r < 0)
1696                         return r;
1697
1698                 return 0;
1699         }
1700
1701         s->length += l;
1702         r = stdout_stream_scan(s, false);
1703         if (r < 0)
1704                 return r;
1705
1706         return 1;
1707
1708 }
1709
1710 static void stdout_stream_free(StdoutStream *s) {
1711         assert(s);
1712
1713         if (s->server) {
1714                 assert(s->server->n_stdout_streams > 0);
1715                 s->server->n_stdout_streams --;
1716                 LIST_REMOVE(StdoutStream, stdout_stream, s->server->stdout_streams, s);
1717         }
1718
1719         if (s->fd >= 0) {
1720                 if (s->server)
1721                         epoll_ctl(s->server->epoll_fd, EPOLL_CTL_DEL, s->fd, NULL);
1722
1723                 close_nointr_nofail(s->fd);
1724         }
1725
1726 #ifdef HAVE_SELINUX
1727         if (s->security_context)
1728                 freecon(s->security_context);
1729 #endif
1730
1731         free(s->identifier);
1732         free(s);
1733 }
1734
1735 static int stdout_stream_new(Server *s) {
1736         StdoutStream *stream;
1737         int fd, r;
1738         socklen_t len;
1739         struct epoll_event ev;
1740
1741         assert(s);
1742
1743         fd = accept4(s->stdout_fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC);
1744         if (fd < 0) {
1745                 if (errno == EAGAIN)
1746                         return 0;
1747
1748                 log_error("Failed to accept stdout connection: %m");
1749                 return -errno;
1750         }
1751
1752         if (s->n_stdout_streams >= STDOUT_STREAMS_MAX) {
1753                 log_warning("Too many stdout streams, refusing connection.");
1754                 close_nointr_nofail(fd);
1755                 return 0;
1756         }
1757
1758         stream = new0(StdoutStream, 1);
1759         if (!stream) {
1760                 close_nointr_nofail(fd);
1761                 return log_oom();
1762         }
1763
1764         stream->fd = fd;
1765
1766         len = sizeof(stream->ucred);
1767         if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &stream->ucred, &len) < 0) {
1768                 log_error("Failed to determine peer credentials: %m");
1769                 r = -errno;
1770                 goto fail;
1771         }
1772
1773 #ifdef HAVE_SELINUX
1774         if (getpeercon(fd, &stream->security_context) < 0 && errno != ENOPROTOOPT)
1775                 log_error("Failed to determine peer security context: %m");
1776 #endif
1777
1778         if (shutdown(fd, SHUT_WR) < 0) {
1779                 log_error("Failed to shutdown writing side of socket: %m");
1780                 r = -errno;
1781                 goto fail;
1782         }
1783
1784         zero(ev);
1785         ev.data.ptr = stream;
1786         ev.events = EPOLLIN;
1787         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0) {
1788                 log_error("Failed to add stream to event loop: %m");
1789                 r = -errno;
1790                 goto fail;
1791         }
1792
1793         stream->server = s;
1794         LIST_PREPEND(StdoutStream, stdout_stream, s->stdout_streams, stream);
1795         s->n_stdout_streams ++;
1796
1797         return 0;
1798
1799 fail:
1800         stdout_stream_free(stream);
1801         return r;
1802 }
1803
1804 static bool is_us(const char *pid) {
1805         pid_t t;
1806
1807         assert(pid);
1808
1809         if (parse_pid(pid, &t) < 0)
1810                 return false;
1811
1812         return t == getpid();
1813 }
1814
1815 static void dev_kmsg_record(Server *s, char *p, size_t l) {
1816         struct iovec iovec[N_IOVEC_META_FIELDS + 7 + N_IOVEC_KERNEL_FIELDS + 2 + N_IOVEC_UDEV_FIELDS];
1817         char *message = NULL, *syslog_priority = NULL, *syslog_pid = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *source_time = NULL;
1818         int priority, r;
1819         unsigned n = 0, z = 0, j;
1820         usec_t usec;
1821         char *identifier = NULL, *pid = NULL, *e, *f, *k;
1822         uint64_t serial;
1823         size_t pl;
1824         char *kernel_device = NULL;
1825
1826         assert(s);
1827         assert(p);
1828
1829         if (l <= 0)
1830                 return;
1831
1832         e = memchr(p, ',', l);
1833         if (!e)
1834                 return;
1835         *e = 0;
1836
1837         r = safe_atoi(p, &priority);
1838         if (r < 0 || priority < 0 || priority > 999)
1839                 return;
1840
1841         if (s->forward_to_kmsg && (priority & LOG_FACMASK) != LOG_KERN)
1842                 return;
1843
1844         l -= (e - p) + 1;
1845         p = e + 1;
1846         e = memchr(p, ',', l);
1847         if (!e)
1848                 return;
1849         *e = 0;
1850
1851         r = safe_atou64(p, &serial);
1852         if (r < 0)
1853                 return;
1854
1855         if (s->kernel_seqnum) {
1856                 /* We already read this one? */
1857                 if (serial < *s->kernel_seqnum)
1858                         return;
1859
1860                 /* Did we lose any? */
1861                 if (serial > *s->kernel_seqnum)
1862                         driver_message(s, SD_MESSAGE_JOURNAL_MISSED, "Missed %llu kernel messages", (unsigned long long) serial - *s->kernel_seqnum - 1);
1863
1864                 /* Make sure we never read this one again. Note that
1865                  * we always store the next message serial we expect
1866                  * here, simply because this makes handling the first
1867                  * message with serial 0 easy. */
1868                 *s->kernel_seqnum = serial + 1;
1869         }
1870
1871         l -= (e - p) + 1;
1872         p = e + 1;
1873         f = memchr(p, ';', l);
1874         if (!f)
1875                 return;
1876         /* Kernel 3.6 has the flags field, kernel 3.5 lacks that */
1877         e = memchr(p, ',', l);
1878         if (!e || f < e)
1879                 e = f;
1880         *e = 0;
1881
1882         r = parse_usec(p, &usec);
1883         if (r < 0)
1884                 return;
1885
1886         l -= (f - p) + 1;
1887         p = f + 1;
1888         e = memchr(p, '\n', l);
1889         if (!e)
1890                 return;
1891         *e = 0;
1892
1893         pl = e - p;
1894         l -= (e - p) + 1;
1895         k = e + 1;
1896
1897         for (j = 0; l > 0 && j < N_IOVEC_KERNEL_FIELDS; j++) {
1898                 char *m;
1899                 /* Meta data fields attached */
1900
1901                 if (*k != ' ')
1902                         break;
1903
1904                 k ++, l --;
1905
1906                 e = memchr(k, '\n', l);
1907                 if (!e)
1908                         return;
1909
1910                 *e = 0;
1911
1912                 m = cunescape_length_with_prefix(k, e - k, "_KERNEL_");
1913                 if (!m)
1914                         break;
1915
1916                 if (startswith(m, "_KERNEL_DEVICE="))
1917                         kernel_device = m + 15;
1918
1919                 IOVEC_SET_STRING(iovec[n++], m);
1920                 z++;
1921
1922                 l -= (e - k) + 1;
1923                 k = e + 1;
1924         }
1925
1926         if (kernel_device) {
1927                 struct udev_device *ud;
1928
1929                 ud = udev_device_new_from_device_id(s->udev, kernel_device);
1930                 if (ud) {
1931                         const char *g;
1932                         struct udev_list_entry *ll;
1933                         char *b;
1934
1935                         g = udev_device_get_devnode(ud);
1936                         if (g) {
1937                                 b = strappend("_UDEV_DEVNODE=", g);
1938                                 if (b) {
1939                                         IOVEC_SET_STRING(iovec[n++], b);
1940                                         z++;
1941                                 }
1942                         }
1943
1944                         g = udev_device_get_sysname(ud);
1945                         if (g) {
1946                                 b = strappend("_UDEV_SYSNAME=", g);
1947                                 if (b) {
1948                                         IOVEC_SET_STRING(iovec[n++], b);
1949                                         z++;
1950                                 }
1951                         }
1952
1953                         j = 0;
1954                         ll = udev_device_get_devlinks_list_entry(ud);
1955                         udev_list_entry_foreach(ll, ll) {
1956
1957                                 if (j > N_IOVEC_UDEV_FIELDS)
1958                                         break;
1959
1960                                 g = udev_list_entry_get_name(ll);
1961                                 b = strappend("_UDEV_DEVLINK=", g);
1962                                 if (g) {
1963                                         IOVEC_SET_STRING(iovec[n++], b);
1964                                         z++;
1965                                 }
1966
1967                                 j++;
1968                         }
1969
1970                         udev_device_unref(ud);
1971                 }
1972         }
1973
1974         if (asprintf(&source_time, "_SOURCE_MONOTONIC_TIMESTAMP=%llu",
1975                      (unsigned long long) usec) >= 0)
1976                 IOVEC_SET_STRING(iovec[n++], source_time);
1977
1978         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=kernel");
1979
1980         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1981                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1982
1983         if ((priority & LOG_FACMASK) == LOG_KERN)
1984                 IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=kernel");
1985         else {
1986                 read_identifier((const char**) &p, &identifier, &pid);
1987
1988                 /* Avoid any messages we generated ourselves via
1989                  * log_info() and friends. */
1990                 if (pid && is_us(pid))
1991                         goto finish;
1992
1993                 if (identifier) {
1994                         syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
1995                         if (syslog_identifier)
1996                                 IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1997                 }
1998
1999                 if (pid) {
2000                         syslog_pid = strappend("SYSLOG_PID=", pid);
2001                         if (syslog_pid)
2002                                 IOVEC_SET_STRING(iovec[n++], syslog_pid);
2003                 }
2004
2005                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
2006                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
2007         }
2008
2009         message = cunescape_length_with_prefix(p, pl, "MESSAGE=");
2010         if (message)
2011                 IOVEC_SET_STRING(iovec[n++], message);
2012
2013         dispatch_message(s, iovec, n, ELEMENTSOF(iovec), NULL, NULL, NULL, 0, NULL, priority);
2014
2015 finish:
2016         for (j = 0; j < z; j++)
2017                 free(iovec[j].iov_base);
2018
2019         free(message);
2020         free(syslog_priority);
2021         free(syslog_identifier);
2022         free(syslog_pid);
2023         free(syslog_facility);
2024         free(source_time);
2025         free(identifier);
2026         free(pid);
2027 }
2028
2029 static int system_journal_open(Server *s) {
2030         int r;
2031         char *fn;
2032         sd_id128_t machine;
2033         char ids[33];
2034
2035         r = sd_id128_get_machine(&machine);
2036         if (r < 0)
2037                 return r;
2038
2039         sd_id128_to_string(machine, ids);
2040
2041         if (!s->system_journal &&
2042             (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
2043             access("/run/systemd/journal/flushed", F_OK) >= 0) {
2044
2045                 /* If in auto mode: first try to create the machine
2046                  * path, but not the prefix.
2047                  *
2048                  * If in persistent mode: create /var/log/journal and
2049                  * the machine path */
2050
2051                 if (s->storage == STORAGE_PERSISTENT)
2052                         (void) mkdir("/var/log/journal/", 0755);
2053
2054                 fn = strappend("/var/log/journal/", ids);
2055                 if (!fn)
2056                         return -ENOMEM;
2057
2058                 (void) mkdir(fn, 0755);
2059                 free(fn);
2060
2061                 fn = strjoin("/var/log/journal/", ids, "/system.journal", NULL);
2062                 if (!fn)
2063                         return -ENOMEM;
2064
2065                 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
2066                 free(fn);
2067
2068                 if (r >= 0)
2069                         server_fix_perms(s, s->system_journal, 0);
2070                 else if (r < 0) {
2071
2072                         if (r != -ENOENT && r != -EROFS)
2073                                 log_warning("Failed to open system journal: %s", strerror(-r));
2074
2075                         r = 0;
2076                 }
2077         }
2078
2079         if (!s->runtime_journal &&
2080             (s->storage != STORAGE_NONE)) {
2081
2082                 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
2083                 if (!fn)
2084                         return -ENOMEM;
2085
2086                 if (s->system_journal) {
2087
2088                         /* Try to open the runtime journal, but only
2089                          * if it already exists, so that we can flush
2090                          * it into the system journal */
2091
2092                         r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
2093                         free(fn);
2094
2095                         if (r < 0) {
2096                                 if (r != -ENOENT)
2097                                         log_warning("Failed to open runtime journal: %s", strerror(-r));
2098
2099                                 r = 0;
2100                         }
2101
2102                 } else {
2103
2104                         /* OK, we really need the runtime journal, so create
2105                          * it if necessary. */
2106
2107                         (void) mkdir_parents(fn, 0755);
2108                         r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
2109                         free(fn);
2110
2111                         if (r < 0) {
2112                                 log_error("Failed to open runtime journal: %s", strerror(-r));
2113                                 return r;
2114                         }
2115                 }
2116
2117                 if (s->runtime_journal)
2118                         server_fix_perms(s, s->runtime_journal, 0);
2119         }
2120
2121         return r;
2122 }
2123
2124 static int server_flush_to_var(Server *s) {
2125         Object *o = NULL;
2126         int r;
2127         sd_id128_t machine;
2128         sd_journal *j;
2129
2130         assert(s);
2131
2132         if (s->storage != STORAGE_AUTO &&
2133             s->storage != STORAGE_PERSISTENT)
2134                 return 0;
2135
2136         if (!s->runtime_journal)
2137                 return 0;
2138
2139         system_journal_open(s);
2140
2141         if (!s->system_journal)
2142                 return 0;
2143
2144         log_info("Flushing to /var...");
2145
2146         r = sd_id128_get_machine(&machine);
2147         if (r < 0) {
2148                 log_error("Failed to get machine id: %s", strerror(-r));
2149                 return r;
2150         }
2151
2152         r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
2153         if (r < 0) {
2154                 log_error("Failed to read runtime journal: %s", strerror(-r));
2155                 return r;
2156         }
2157
2158         SD_JOURNAL_FOREACH(j) {
2159                 JournalFile *f;
2160
2161                 f = j->current_file;
2162                 assert(f && f->current_offset > 0);
2163
2164                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
2165                 if (r < 0) {
2166                         log_error("Can't read entry: %s", strerror(-r));
2167                         goto finish;
2168                 }
2169
2170                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
2171                 if (r == -E2BIG) {
2172                         log_info("Allocation limit reached.");
2173
2174                         journal_file_post_change(s->system_journal);
2175                         server_rotate(s);
2176                         server_vacuum(s);
2177
2178                         r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
2179                 }
2180
2181                 if (r < 0) {
2182                         log_error("Can't write entry: %s", strerror(-r));
2183                         goto finish;
2184                 }
2185         }
2186
2187 finish:
2188         journal_file_post_change(s->system_journal);
2189
2190         journal_file_close(s->runtime_journal);
2191         s->runtime_journal = NULL;
2192
2193         if (r >= 0)
2194                 rm_rf("/run/log/journal", false, true, false);
2195
2196         return r;
2197 }
2198
2199 static int server_read_dev_kmsg(Server *s) {
2200         char buffer[8192+1]; /* the kernel-side limit per record is 8K currently */
2201         ssize_t l;
2202
2203         assert(s);
2204         assert(s->dev_kmsg_fd >= 0);
2205
2206         l = read(s->dev_kmsg_fd, buffer, sizeof(buffer) - 1);
2207         if (l == 0)
2208                 return 0;
2209         if (l < 0) {
2210                 /* Old kernels who don't allow reading from /dev/kmsg
2211                  * return EINVAL when we try. So handle this cleanly,
2212                  * but don' try to ever read from it again. */
2213                 if (errno == EINVAL) {
2214                         epoll_ctl(s->epoll_fd, EPOLL_CTL_DEL, s->dev_kmsg_fd, NULL);
2215                         return 0;
2216                 }
2217
2218                 if (errno == EAGAIN || errno == EINTR || errno == EPIPE)
2219                         return 0;
2220
2221                 log_error("Failed to read from kernel: %m");
2222                 return -errno;
2223         }
2224
2225         dev_kmsg_record(s, buffer, l);
2226         return 1;
2227 }
2228
2229 static int server_flush_dev_kmsg(Server *s) {
2230         int r;
2231
2232         assert(s);
2233
2234         if (s->dev_kmsg_fd < 0)
2235                 return 0;
2236
2237         if (!s->dev_kmsg_readable)
2238                 return 0;
2239
2240         log_info("Flushing /dev/kmsg...");
2241
2242         for (;;) {
2243                 r = server_read_dev_kmsg(s);
2244                 if (r < 0)
2245                         return r;
2246
2247                 if (r == 0)
2248                         break;
2249         }
2250
2251         return 0;
2252 }
2253
2254 static int process_event(Server *s, struct epoll_event *ev) {
2255         assert(s);
2256         assert(ev);
2257
2258         if (ev->data.fd == s->signal_fd) {
2259                 struct signalfd_siginfo sfsi;
2260                 ssize_t n;
2261
2262                 if (ev->events != EPOLLIN) {
2263                         log_info("Got invalid event from epoll.");
2264                         return -EIO;
2265                 }
2266
2267                 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
2268                 if (n != sizeof(sfsi)) {
2269
2270                         if (n >= 0)
2271                                 return -EIO;
2272
2273                         if (errno == EINTR || errno == EAGAIN)
2274                                 return 1;
2275
2276                         return -errno;
2277                 }
2278
2279                 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
2280
2281                 if (sfsi.ssi_signo == SIGUSR1) {
2282                         touch("/run/systemd/journal/flushed");
2283                         server_flush_to_var(s);
2284                         return 1;
2285                 }
2286
2287                 if (sfsi.ssi_signo == SIGUSR2) {
2288                         server_rotate(s);
2289                         server_vacuum(s);
2290                         return 1;
2291                 }
2292
2293                 return 0;
2294
2295         } else if (ev->data.fd == s->dev_kmsg_fd) {
2296                 int r;
2297
2298                 if (ev->events != EPOLLIN) {
2299                         log_info("Got invalid event from epoll.");
2300                         return -EIO;
2301                 }
2302
2303                 r = server_read_dev_kmsg(s);
2304                 if (r < 0)
2305                         return r;
2306
2307                 return 1;
2308
2309         } else if (ev->data.fd == s->native_fd ||
2310                    ev->data.fd == s->syslog_fd) {
2311
2312                 if (ev->events != EPOLLIN) {
2313                         log_info("Got invalid event from epoll.");
2314                         return -EIO;
2315                 }
2316
2317                 for (;;) {
2318                         struct msghdr msghdr;
2319                         struct iovec iovec;
2320                         struct ucred *ucred = NULL;
2321                         struct timeval *tv = NULL;
2322                         struct cmsghdr *cmsg;
2323                         char *label = NULL;
2324                         size_t label_len = 0;
2325                         union {
2326                                 struct cmsghdr cmsghdr;
2327
2328                                 /* We use NAME_MAX space for the
2329                                  * SELinux label here. The kernel
2330                                  * currently enforces no limit, but
2331                                  * according to suggestions from the
2332                                  * SELinux people this will change and
2333                                  * it will probably be identical to
2334                                  * NAME_MAX. For now we use that, but
2335                                  * this should be updated one day when
2336                                  * the final limit is known.*/
2337                                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
2338                                             CMSG_SPACE(sizeof(struct timeval)) +
2339                                             CMSG_SPACE(sizeof(int)) + /* fd */
2340                                             CMSG_SPACE(NAME_MAX)]; /* selinux label */
2341                         } control;
2342                         ssize_t n;
2343                         int v;
2344                         int *fds = NULL;
2345                         unsigned n_fds = 0;
2346
2347                         if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
2348                                 log_error("SIOCINQ failed: %m");
2349                                 return -errno;
2350                         }
2351
2352                         if (s->buffer_size < (size_t) v) {
2353                                 void *b;
2354                                 size_t l;
2355
2356                                 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
2357                                 b = realloc(s->buffer, l+1);
2358
2359                                 if (!b) {
2360                                         log_error("Couldn't increase buffer.");
2361                                         return -ENOMEM;
2362                                 }
2363
2364                                 s->buffer_size = l;
2365                                 s->buffer = b;
2366                         }
2367
2368                         zero(iovec);
2369                         iovec.iov_base = s->buffer;
2370                         iovec.iov_len = s->buffer_size;
2371
2372                         zero(control);
2373                         zero(msghdr);
2374                         msghdr.msg_iov = &iovec;
2375                         msghdr.msg_iovlen = 1;
2376                         msghdr.msg_control = &control;
2377                         msghdr.msg_controllen = sizeof(control);
2378
2379                         n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
2380                         if (n < 0) {
2381
2382                                 if (errno == EINTR || errno == EAGAIN)
2383                                         return 1;
2384
2385                                 log_error("recvmsg() failed: %m");
2386                                 return -errno;
2387                         }
2388
2389                         for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
2390
2391                                 if (cmsg->cmsg_level == SOL_SOCKET &&
2392                                     cmsg->cmsg_type == SCM_CREDENTIALS &&
2393                                     cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
2394                                         ucred = (struct ucred*) CMSG_DATA(cmsg);
2395                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
2396                                          cmsg->cmsg_type == SCM_SECURITY) {
2397                                         label = (char*) CMSG_DATA(cmsg);
2398                                         label_len = cmsg->cmsg_len - CMSG_LEN(0);
2399                                 } else if (cmsg->cmsg_level == SOL_SOCKET &&
2400                                          cmsg->cmsg_type == SO_TIMESTAMP &&
2401                                          cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
2402                                         tv = (struct timeval*) CMSG_DATA(cmsg);
2403                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
2404                                          cmsg->cmsg_type == SCM_RIGHTS) {
2405                                         fds = (int*) CMSG_DATA(cmsg);
2406                                         n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
2407                                 }
2408                         }
2409
2410                         if (ev->data.fd == s->syslog_fd) {
2411                                 char *e;
2412
2413                                 if (n > 0 && n_fds == 0) {
2414                                         e = memchr(s->buffer, '\n', n);
2415                                         if (e)
2416                                                 *e = 0;
2417                                         else
2418                                                 s->buffer[n] = 0;
2419
2420                                         process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
2421                                 } else if (n_fds > 0)
2422                                         log_warning("Got file descriptors via syslog socket. Ignoring.");
2423
2424                         } else {
2425                                 if (n > 0 && n_fds == 0)
2426                                         process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
2427                                 else if (n == 0 && n_fds == 1)
2428                                         process_native_file(s, fds[0], ucred, tv, label, label_len);
2429                                 else if (n_fds > 0)
2430                                         log_warning("Got too many file descriptors via native socket. Ignoring.");
2431                         }
2432
2433                         close_many(fds, n_fds);
2434                 }
2435
2436                 return 1;
2437
2438         } else if (ev->data.fd == s->stdout_fd) {
2439
2440                 if (ev->events != EPOLLIN) {
2441                         log_info("Got invalid event from epoll.");
2442                         return -EIO;
2443                 }
2444
2445                 stdout_stream_new(s);
2446                 return 1;
2447
2448         } else {
2449                 StdoutStream *stream;
2450
2451                 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
2452                         log_info("Got invalid event from epoll.");
2453                         return -EIO;
2454                 }
2455
2456                 /* If it is none of the well-known fds, it must be an
2457                  * stdout stream fd. Note that this is a bit ugly here
2458                  * (since we rely that none of the well-known fds
2459                  * could be interpreted as pointer), but nonetheless
2460                  * safe, since the well-known fds would never get an
2461                  * fd > 4096, i.e. beyond the first memory page */
2462
2463                 stream = ev->data.ptr;
2464
2465                 if (stdout_stream_process(stream) <= 0)
2466                         stdout_stream_free(stream);
2467
2468                 return 1;
2469         }
2470
2471         log_error("Unknown event.");
2472         return 0;
2473 }
2474
2475 static int open_syslog_socket(Server *s) {
2476         union sockaddr_union sa;
2477         int one, r;
2478         struct epoll_event ev;
2479
2480         assert(s);
2481
2482         if (s->syslog_fd < 0) {
2483
2484                 s->syslog_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2485                 if (s->syslog_fd < 0) {
2486                         log_error("socket() failed: %m");
2487                         return -errno;
2488                 }
2489
2490                 zero(sa);
2491                 sa.un.sun_family = AF_UNIX;
2492                 strncpy(sa.un.sun_path, "/dev/log", sizeof(sa.un.sun_path));
2493
2494                 unlink(sa.un.sun_path);
2495
2496                 r = bind(s->syslog_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2497                 if (r < 0) {
2498                         log_error("bind() failed: %m");
2499                         return -errno;
2500                 }
2501
2502                 chmod(sa.un.sun_path, 0666);
2503         } else
2504                 fd_nonblock(s->syslog_fd, 1);
2505
2506         one = 1;
2507         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
2508         if (r < 0) {
2509                 log_error("SO_PASSCRED failed: %m");
2510                 return -errno;
2511         }
2512
2513 #ifdef HAVE_SELINUX
2514         one = 1;
2515         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
2516         if (r < 0)
2517                 log_warning("SO_PASSSEC failed: %m");
2518 #endif
2519
2520         one = 1;
2521         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
2522         if (r < 0) {
2523                 log_error("SO_TIMESTAMP failed: %m");
2524                 return -errno;
2525         }
2526
2527         zero(ev);
2528         ev.events = EPOLLIN;
2529         ev.data.fd = s->syslog_fd;
2530         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->syslog_fd, &ev) < 0) {
2531                 log_error("Failed to add syslog server fd to epoll object: %m");
2532                 return -errno;
2533         }
2534
2535         return 0;
2536 }
2537
2538 static int open_native_socket(Server*s) {
2539         union sockaddr_union sa;
2540         int one, r;
2541         struct epoll_event ev;
2542
2543         assert(s);
2544
2545         if (s->native_fd < 0) {
2546
2547                 s->native_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2548                 if (s->native_fd < 0) {
2549                         log_error("socket() failed: %m");
2550                         return -errno;
2551                 }
2552
2553                 zero(sa);
2554                 sa.un.sun_family = AF_UNIX;
2555                 strncpy(sa.un.sun_path, "/run/systemd/journal/socket", sizeof(sa.un.sun_path));
2556
2557                 unlink(sa.un.sun_path);
2558
2559                 r = bind(s->native_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2560                 if (r < 0) {
2561                         log_error("bind() failed: %m");
2562                         return -errno;
2563                 }
2564
2565                 chmod(sa.un.sun_path, 0666);
2566         } else
2567                 fd_nonblock(s->native_fd, 1);
2568
2569         one = 1;
2570         r = setsockopt(s->native_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
2571         if (r < 0) {
2572                 log_error("SO_PASSCRED failed: %m");
2573                 return -errno;
2574         }
2575
2576 #ifdef HAVE_SELINUX
2577         one = 1;
2578         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
2579         if (r < 0)
2580                 log_warning("SO_PASSSEC failed: %m");
2581 #endif
2582
2583         one = 1;
2584         r = setsockopt(s->native_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
2585         if (r < 0) {
2586                 log_error("SO_TIMESTAMP failed: %m");
2587                 return -errno;
2588         }
2589
2590         zero(ev);
2591         ev.events = EPOLLIN;
2592         ev.data.fd = s->native_fd;
2593         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->native_fd, &ev) < 0) {
2594                 log_error("Failed to add native server fd to epoll object: %m");
2595                 return -errno;
2596         }
2597
2598         return 0;
2599 }
2600
2601 static int open_stdout_socket(Server *s) {
2602         union sockaddr_union sa;
2603         int r;
2604         struct epoll_event ev;
2605
2606         assert(s);
2607
2608         if (s->stdout_fd < 0) {
2609
2610                 s->stdout_fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2611                 if (s->stdout_fd < 0) {
2612                         log_error("socket() failed: %m");
2613                         return -errno;
2614                 }
2615
2616                 zero(sa);
2617                 sa.un.sun_family = AF_UNIX;
2618                 strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
2619
2620                 unlink(sa.un.sun_path);
2621
2622                 r = bind(s->stdout_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2623                 if (r < 0) {
2624                         log_error("bind() failed: %m");
2625                         return -errno;
2626                 }
2627
2628                 chmod(sa.un.sun_path, 0666);
2629
2630                 if (listen(s->stdout_fd, SOMAXCONN) < 0) {
2631                         log_error("liste() failed: %m");
2632                         return -errno;
2633                 }
2634         } else
2635                 fd_nonblock(s->stdout_fd, 1);
2636
2637         zero(ev);
2638         ev.events = EPOLLIN;
2639         ev.data.fd = s->stdout_fd;
2640         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->stdout_fd, &ev) < 0) {
2641                 log_error("Failed to add stdout server fd to epoll object: %m");
2642                 return -errno;
2643         }
2644
2645         return 0;
2646 }
2647
2648 static int open_dev_kmsg(Server *s) {
2649         struct epoll_event ev;
2650
2651         assert(s);
2652
2653         s->dev_kmsg_fd = open("/dev/kmsg", O_RDWR|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
2654         if (s->dev_kmsg_fd < 0) {
2655                 log_warning("Failed to open /dev/kmsg, ignoring: %m");
2656                 return 0;
2657         }
2658
2659         zero(ev);
2660         ev.events = EPOLLIN;
2661         ev.data.fd = s->dev_kmsg_fd;
2662         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->dev_kmsg_fd, &ev) < 0) {
2663
2664                 /* This will fail with EPERM on older kernels where
2665                  * /dev/kmsg is not readable. */
2666                 if (errno == EPERM)
2667                         return 0;
2668
2669                 log_error("Failed to add /dev/kmsg fd to epoll object: %m");
2670                 return -errno;
2671         }
2672
2673         s->dev_kmsg_readable = true;
2674
2675         return 0;
2676 }
2677
2678 static int open_kernel_seqnum(Server *s) {
2679         int fd;
2680         uint64_t *p;
2681
2682         assert(s);
2683
2684         /* We store the seqnum we last read in an mmaped file. That
2685          * way we can just use it like a variable, but it is
2686          * persistant and automatically flushed at reboot. */
2687
2688         fd = open("/run/systemd/journal/kernel-seqnum", O_RDWR|O_CREAT|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW, 0644);
2689         if (fd < 0) {
2690                 log_error("Failed to open /run/systemd/journal/kernel-seqnum, ignoring: %m");
2691                 return 0;
2692         }
2693
2694         if (posix_fallocate(fd, 0, sizeof(uint64_t)) < 0) {
2695                 log_error("Failed to allocate sequential number file, ignoring: %m");
2696                 close_nointr_nofail(fd);
2697                 return 0;
2698         }
2699
2700         p = mmap(NULL, sizeof(uint64_t), PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2701         if (p == MAP_FAILED) {
2702                 log_error("Failed to map sequential number file, ignoring: %m");
2703                 close_nointr_nofail(fd);
2704                 return 0;
2705         }
2706
2707         close_nointr_nofail(fd);
2708         s->kernel_seqnum = p;
2709
2710         return 0;
2711 }
2712
2713 static int open_signalfd(Server *s) {
2714         sigset_t mask;
2715         struct epoll_event ev;
2716
2717         assert(s);
2718
2719         assert_se(sigemptyset(&mask) == 0);
2720         sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
2721         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
2722
2723         s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
2724         if (s->signal_fd < 0) {
2725                 log_error("signalfd(): %m");
2726                 return -errno;
2727         }
2728
2729         zero(ev);
2730         ev.events = EPOLLIN;
2731         ev.data.fd = s->signal_fd;
2732
2733         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
2734                 log_error("epoll_ctl(): %m");
2735                 return -errno;
2736         }
2737
2738         return 0;
2739 }
2740
2741 static int server_parse_proc_cmdline(Server *s) {
2742         char *line, *w, *state;
2743         int r;
2744         size_t l;
2745
2746         if (detect_container(NULL) > 0)
2747                 return 0;
2748
2749         r = read_one_line_file("/proc/cmdline", &line);
2750         if (r < 0) {
2751                 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
2752                 return 0;
2753         }
2754
2755         FOREACH_WORD_QUOTED(w, l, line, state) {
2756                 char *word;
2757
2758                 word = strndup(w, l);
2759                 if (!word) {
2760                         r = -ENOMEM;
2761                         goto finish;
2762                 }
2763
2764                 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
2765                         r = parse_boolean(word + 35);
2766                         if (r < 0)
2767                                 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
2768                         else
2769                                 s->forward_to_syslog = r;
2770                 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
2771                         r = parse_boolean(word + 33);
2772                         if (r < 0)
2773                                 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
2774                         else
2775                                 s->forward_to_kmsg = r;
2776                 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
2777                         r = parse_boolean(word + 36);
2778                         if (r < 0)
2779                                 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
2780                         else
2781                                 s->forward_to_console = r;
2782                 } else if (startswith(word, "systemd.journald"))
2783                         log_warning("Invalid systemd.journald parameter. Ignoring.");
2784
2785                 free(word);
2786         }
2787
2788         r = 0;
2789
2790 finish:
2791         free(line);
2792         return r;
2793 }
2794
2795 static int server_parse_config_file(Server *s) {
2796         FILE *f;
2797         const char *fn;
2798         int r;
2799
2800         assert(s);
2801
2802         fn = "/etc/systemd/journald.conf";
2803         f = fopen(fn, "re");
2804         if (!f) {
2805                 if (errno == ENOENT)
2806                         return 0;
2807
2808                 log_warning("Failed to open configuration file %s: %m", fn);
2809                 return -errno;
2810         }
2811
2812         r = config_parse(fn, f, "Journal\0", config_item_perf_lookup, (void*) journald_gperf_lookup, false, s);
2813         if (r < 0)
2814                 log_warning("Failed to parse configuration file: %s", strerror(-r));
2815
2816         fclose(f);
2817
2818         return r;
2819 }
2820
2821 static int server_init(Server *s) {
2822         int n, r, fd;
2823
2824         assert(s);
2825
2826         zero(*s);
2827         s->syslog_fd = s->native_fd = s->stdout_fd = s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
2828         s->compress = true;
2829         s->seal = true;
2830
2831         s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
2832         s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
2833
2834         s->forward_to_syslog = true;
2835
2836         s->max_level_store = LOG_DEBUG;
2837         s->max_level_syslog = LOG_DEBUG;
2838         s->max_level_kmsg = LOG_NOTICE;
2839         s->max_level_console = LOG_INFO;
2840
2841         memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
2842         memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
2843
2844         server_parse_config_file(s);
2845         server_parse_proc_cmdline(s);
2846
2847         mkdir_p("/run/systemd/journal", 0755);
2848
2849         s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
2850         if (!s->user_journals)
2851                 return log_oom();
2852
2853         s->mmap = mmap_cache_new();
2854         if (!s->mmap)
2855                 return log_oom();
2856
2857         s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
2858         if (s->epoll_fd < 0) {
2859                 log_error("Failed to create epoll object: %m");
2860                 return -errno;
2861         }
2862
2863         n = sd_listen_fds(true);
2864         if (n < 0) {
2865                 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
2866                 return n;
2867         }
2868
2869         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
2870
2871                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
2872
2873                         if (s->native_fd >= 0) {
2874                                 log_error("Too many native sockets passed.");
2875                                 return -EINVAL;
2876                         }
2877
2878                         s->native_fd = fd;
2879
2880                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
2881
2882                         if (s->stdout_fd >= 0) {
2883                                 log_error("Too many stdout sockets passed.");
2884                                 return -EINVAL;
2885                         }
2886
2887                         s->stdout_fd = fd;
2888
2889                 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
2890
2891                         if (s->syslog_fd >= 0) {
2892                                 log_error("Too many /dev/log sockets passed.");
2893                                 return -EINVAL;
2894                         }
2895
2896                         s->syslog_fd = fd;
2897
2898                 } else {
2899                         log_error("Unknown socket passed.");
2900                         return -EINVAL;
2901                 }
2902         }
2903
2904         r = open_syslog_socket(s);
2905         if (r < 0)
2906                 return r;
2907
2908         r = open_native_socket(s);
2909         if (r < 0)
2910                 return r;
2911
2912         r = open_stdout_socket(s);
2913         if (r < 0)
2914                 return r;
2915
2916         r = open_dev_kmsg(s);
2917         if (r < 0)
2918                 return r;
2919
2920         r = open_kernel_seqnum(s);
2921         if (r < 0)
2922                 return r;
2923
2924         r = open_signalfd(s);
2925         if (r < 0)
2926                 return r;
2927
2928         s->udev = udev_new();
2929         if (!s->udev)
2930                 return -ENOMEM;
2931
2932         s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
2933         if (!s->rate_limit)
2934                 return -ENOMEM;
2935
2936         r = system_journal_open(s);
2937         if (r < 0)
2938                 return r;
2939
2940         return 0;
2941 }
2942
2943 static void server_done(Server *s) {
2944         JournalFile *f;
2945         assert(s);
2946
2947         while (s->stdout_streams)
2948                 stdout_stream_free(s->stdout_streams);
2949
2950         if (s->system_journal)
2951                 journal_file_close(s->system_journal);
2952
2953         if (s->runtime_journal)
2954                 journal_file_close(s->runtime_journal);
2955
2956         while ((f = hashmap_steal_first(s->user_journals)))
2957                 journal_file_close(f);
2958
2959         hashmap_free(s->user_journals);
2960
2961         if (s->epoll_fd >= 0)
2962                 close_nointr_nofail(s->epoll_fd);
2963
2964         if (s->signal_fd >= 0)
2965                 close_nointr_nofail(s->signal_fd);
2966
2967         if (s->syslog_fd >= 0)
2968                 close_nointr_nofail(s->syslog_fd);
2969
2970         if (s->native_fd >= 0)
2971                 close_nointr_nofail(s->native_fd);
2972
2973         if (s->stdout_fd >= 0)
2974                 close_nointr_nofail(s->stdout_fd);
2975
2976         if (s->dev_kmsg_fd >= 0)
2977                 close_nointr_nofail(s->dev_kmsg_fd);
2978
2979         if (s->rate_limit)
2980                 journal_rate_limit_free(s->rate_limit);
2981
2982         if (s->kernel_seqnum)
2983                 munmap(s->kernel_seqnum, sizeof(uint64_t));
2984
2985         free(s->buffer);
2986         free(s->tty_path);
2987
2988         if (s->mmap)
2989                 mmap_cache_unref(s->mmap);
2990
2991         if (s->udev)
2992                 udev_unref(s->udev);
2993 }
2994
2995 int main(int argc, char *argv[]) {
2996         Server server;
2997         int r;
2998
2999         /* if (getppid() != 1) { */
3000         /*         log_error("This program should be invoked by init only."); */
3001         /*         return EXIT_FAILURE; */
3002         /* } */
3003
3004         if (argc > 1) {
3005                 log_error("This program does not take arguments.");
3006                 return EXIT_FAILURE;
3007         }
3008
3009         log_set_target(LOG_TARGET_SAFE);
3010         log_set_facility(LOG_SYSLOG);
3011         log_set_max_level(LOG_DEBUG);
3012         log_parse_environment();
3013         log_open();
3014
3015         umask(0022);
3016
3017         r = server_init(&server);
3018         if (r < 0)
3019                 goto finish;
3020
3021         server_vacuum(&server);
3022         server_flush_to_var(&server);
3023         server_flush_dev_kmsg(&server);
3024
3025         log_debug("systemd-journald running as pid %lu", (unsigned long) getpid());
3026         driver_message(&server, SD_MESSAGE_JOURNAL_START, "Journal started");
3027
3028         sd_notify(false,
3029                   "READY=1\n"
3030                   "STATUS=Processing requests...");
3031
3032         for (;;) {
3033                 struct epoll_event event;
3034                 int t;
3035
3036 #ifdef HAVE_GCRYPT
3037                 usec_t u;
3038
3039                 if (server.system_journal &&
3040                     journal_file_next_evolve_usec(server.system_journal, &u)) {
3041                         usec_t n;
3042
3043                         n = now(CLOCK_MONOTONIC);
3044
3045                         if (n >= u)
3046                                 t = 0;
3047                         else
3048                                 t = (int) ((u - n + USEC_PER_MSEC - 1) / USEC_PER_MSEC);
3049                 } else
3050 #endif
3051                         t = -1;
3052
3053                 r = epoll_wait(server.epoll_fd, &event, 1, t);
3054                 if (r < 0) {
3055
3056                         if (errno == EINTR)
3057                                 continue;
3058
3059                         log_error("epoll_wait() failed: %m");
3060                         r = -errno;
3061                         goto finish;
3062                 }
3063
3064                 if (r > 0) {
3065                         r = process_event(&server, &event);
3066                         if (r < 0)
3067                                 goto finish;
3068                         else if (r == 0)
3069                                 break;
3070                 }
3071
3072 #ifdef HAVE_GCRYPT
3073                 if (server.system_journal)
3074                         journal_file_maybe_append_tag(server.system_journal, 0);
3075 #endif
3076         }
3077
3078         log_debug("systemd-journald stopped as pid %lu", (unsigned long) getpid());
3079         driver_message(&server, SD_MESSAGE_JOURNAL_STOP, "Journal stopped");
3080
3081 finish:
3082         sd_notify(false,
3083                   "STATUS=Shutting down...");
3084
3085         server_done(&server);
3086
3087         return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
3088 }