chiark / gitweb /
8c41d9bab1e6b057b88c4cfa451c76275733139c
[elogind.git] / src / journal / journald.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/epoll.h>
23 #include <sys/socket.h>
24 #include <errno.h>
25 #include <sys/signalfd.h>
26 #include <unistd.h>
27 #include <fcntl.h>
28 #include <stddef.h>
29 #include <sys/ioctl.h>
30 #include <linux/sockios.h>
31 #include <sys/statvfs.h>
32 #include <sys/mman.h>
33
34 #include <systemd/sd-journal.h>
35 #include <systemd/sd-messages.h>
36 #include <systemd/sd-daemon.h>
37
38 #ifdef HAVE_LOGIND
39 #include <systemd/sd-login.h>
40 #endif
41
42 #include "mkdir.h"
43 #include "hashmap.h"
44 #include "journal-file.h"
45 #include "socket-util.h"
46 #include "cgroup-util.h"
47 #include "list.h"
48 #include "journal-rate-limit.h"
49 #include "journal-internal.h"
50 #include "conf-parser.h"
51 #include "journald.h"
52 #include "virt.h"
53 #include "missing.h"
54
55 #ifdef HAVE_ACL
56 #include <sys/acl.h>
57 #include <acl/libacl.h>
58 #include "acl-util.h"
59 #endif
60
61 #ifdef HAVE_SELINUX
62 #include <selinux/selinux.h>
63 #endif
64
65 #define USER_JOURNALS_MAX 1024
66 #define STDOUT_STREAMS_MAX 4096
67
68 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
69 #define DEFAULT_RATE_LIMIT_BURST 200
70
71 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
72
73 #define N_IOVEC_META_FIELDS 17
74 #define N_IOVEC_KERNEL_FIELDS 64
75
76 #define ENTRY_SIZE_MAX (1024*1024*32)
77
78 typedef enum StdoutStreamState {
79         STDOUT_STREAM_IDENTIFIER,
80         STDOUT_STREAM_UNIT_ID,
81         STDOUT_STREAM_PRIORITY,
82         STDOUT_STREAM_LEVEL_PREFIX,
83         STDOUT_STREAM_FORWARD_TO_SYSLOG,
84         STDOUT_STREAM_FORWARD_TO_KMSG,
85         STDOUT_STREAM_FORWARD_TO_CONSOLE,
86         STDOUT_STREAM_RUNNING
87 } StdoutStreamState;
88
89 struct StdoutStream {
90         Server *server;
91         StdoutStreamState state;
92
93         int fd;
94
95         struct ucred ucred;
96 #ifdef HAVE_SELINUX
97         security_context_t security_context;
98 #endif
99
100         char *identifier;
101         char *unit_id;
102         int priority;
103         bool level_prefix:1;
104         bool forward_to_syslog:1;
105         bool forward_to_kmsg:1;
106         bool forward_to_console:1;
107
108         char buffer[LINE_MAX+1];
109         size_t length;
110
111         LIST_FIELDS(StdoutStream, stdout_stream);
112 };
113
114 static const char* const storage_table[] = {
115         [STORAGE_AUTO] = "auto",
116         [STORAGE_VOLATILE] = "volatile",
117         [STORAGE_PERSISTENT] = "persistent",
118         [STORAGE_NONE] = "none"
119 };
120
121 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
122 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
123
124 static uint64_t available_space(Server *s) {
125         char ids[33], *p;
126         const char *f;
127         sd_id128_t machine;
128         struct statvfs ss;
129         uint64_t sum = 0, avail = 0, ss_avail = 0;
130         int r;
131         DIR *d;
132         usec_t ts;
133         JournalMetrics *m;
134
135         ts = now(CLOCK_MONOTONIC);
136
137         if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
138                 return s->cached_available_space;
139
140         r = sd_id128_get_machine(&machine);
141         if (r < 0)
142                 return 0;
143
144         if (s->system_journal) {
145                 f = "/var/log/journal/";
146                 m = &s->system_metrics;
147         } else {
148                 f = "/run/log/journal/";
149                 m = &s->runtime_metrics;
150         }
151
152         assert(m);
153
154         p = strappend(f, sd_id128_to_string(machine, ids));
155         if (!p)
156                 return 0;
157
158         d = opendir(p);
159         free(p);
160
161         if (!d)
162                 return 0;
163
164         if (fstatvfs(dirfd(d), &ss) < 0)
165                 goto finish;
166
167         for (;;) {
168                 struct stat st;
169                 struct dirent buf, *de;
170
171                 r = readdir_r(d, &buf, &de);
172                 if (r != 0)
173                         break;
174
175                 if (!de)
176                         break;
177
178                 if (!endswith(de->d_name, ".journal") &&
179                     !endswith(de->d_name, ".journal~"))
180                         continue;
181
182                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
183                         continue;
184
185                 if (!S_ISREG(st.st_mode))
186                         continue;
187
188                 sum += (uint64_t) st.st_blocks * 512UL;
189         }
190
191         avail = sum >= m->max_use ? 0 : m->max_use - sum;
192
193         ss_avail = ss.f_bsize * ss.f_bavail;
194
195         ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
196
197         if (ss_avail < avail)
198                 avail = ss_avail;
199
200         s->cached_available_space = avail;
201         s->cached_available_space_timestamp = ts;
202
203 finish:
204         closedir(d);
205
206         return avail;
207 }
208
209 static void server_read_file_gid(Server *s) {
210         const char *adm = "adm";
211         int r;
212
213         assert(s);
214
215         if (s->file_gid_valid)
216                 return;
217
218         r = get_group_creds(&adm, &s->file_gid);
219         if (r < 0)
220                 log_warning("Failed to resolve 'adm' group: %s", strerror(-r));
221
222         /* if we couldn't read the gid, then it will be 0, but that's
223          * fine and we shouldn't try to resolve the group again, so
224          * let's just pretend it worked right-away. */
225         s->file_gid_valid = true;
226 }
227
228 static void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
229         int r;
230 #ifdef HAVE_ACL
231         acl_t acl;
232         acl_entry_t entry;
233         acl_permset_t permset;
234 #endif
235
236         assert(f);
237
238         server_read_file_gid(s);
239
240         r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
241         if (r < 0)
242                 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
243
244 #ifdef HAVE_ACL
245         if (uid <= 0)
246                 return;
247
248         acl = acl_get_fd(f->fd);
249         if (!acl) {
250                 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
251                 return;
252         }
253
254         r = acl_find_uid(acl, uid, &entry);
255         if (r <= 0) {
256
257                 if (acl_create_entry(&acl, &entry) < 0 ||
258                     acl_set_tag_type(entry, ACL_USER) < 0 ||
259                     acl_set_qualifier(entry, &uid) < 0) {
260                         log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
261                         goto finish;
262                 }
263         }
264
265         if (acl_get_permset(entry, &permset) < 0 ||
266             acl_add_perm(permset, ACL_READ) < 0 ||
267             acl_calc_mask(&acl) < 0) {
268                 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
269                 goto finish;
270         }
271
272         if (acl_set_fd(f->fd, acl) < 0)
273                 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
274
275 finish:
276         acl_free(acl);
277 #endif
278 }
279
280 static JournalFile* find_journal(Server *s, uid_t uid) {
281         char *p;
282         int r;
283         JournalFile *f;
284         sd_id128_t machine;
285
286         assert(s);
287
288         /* We split up user logs only on /var, not on /run. If the
289          * runtime file is open, we write to it exclusively, in order
290          * to guarantee proper order as soon as we flush /run to
291          * /var and close the runtime file. */
292
293         if (s->runtime_journal)
294                 return s->runtime_journal;
295
296         if (uid <= 0)
297                 return s->system_journal;
298
299         r = sd_id128_get_machine(&machine);
300         if (r < 0)
301                 return s->system_journal;
302
303         f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
304         if (f)
305                 return f;
306
307         if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
308                      SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
309                 return s->system_journal;
310
311         while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
312                 /* Too many open? Then let's close one */
313                 f = hashmap_steal_first(s->user_journals);
314                 assert(f);
315                 journal_file_close(f);
316         }
317
318         r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, false, &s->system_metrics, s->system_journal, &f);
319         free(p);
320
321         if (r < 0)
322                 return s->system_journal;
323
324         server_fix_perms(s, f, uid);
325
326         r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
327         if (r < 0) {
328                 journal_file_close(f);
329                 return s->system_journal;
330         }
331
332         return f;
333 }
334
335 static void server_rotate(Server *s) {
336         JournalFile *f;
337         void *k;
338         Iterator i;
339         int r;
340
341         log_info("Rotating...");
342
343         if (s->runtime_journal) {
344                 r = journal_file_rotate(&s->runtime_journal, s->compress, false);
345                 if (r < 0)
346                         if (s->runtime_journal)
347                                 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
348                         else
349                                 log_error("Failed to create new runtime journal: %s", strerror(-r));
350                 else
351                         server_fix_perms(s, s->runtime_journal, 0);
352         }
353
354         if (s->system_journal) {
355                 r = journal_file_rotate(&s->system_journal, s->compress, true);
356                 if (r < 0)
357                         if (s->system_journal)
358                                 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
359                         else
360                                 log_error("Failed to create new system journal: %s", strerror(-r));
361
362                 else
363                         server_fix_perms(s, s->system_journal, 0);
364         }
365
366         HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
367                 r = journal_file_rotate(&f, s->compress, false);
368                 if (r < 0)
369                         if (f->path)
370                                 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
371                         else
372                                 log_error("Failed to create user journal: %s", strerror(-r));
373                 else {
374                         hashmap_replace(s->user_journals, k, f);
375                         server_fix_perms(s, s->system_journal, PTR_TO_UINT32(k));
376                 }
377         }
378 }
379
380 static void server_vacuum(Server *s) {
381         char *p;
382         char ids[33];
383         sd_id128_t machine;
384         int r;
385
386         log_info("Vacuuming...");
387
388         r = sd_id128_get_machine(&machine);
389         if (r < 0) {
390                 log_error("Failed to get machine ID: %s", strerror(-r));
391                 return;
392         }
393
394         sd_id128_to_string(machine, ids);
395
396         if (s->system_journal) {
397                 if (asprintf(&p, "/var/log/journal/%s", ids) < 0) {
398                         log_oom();
399                         return;
400                 }
401
402                 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free);
403                 if (r < 0 && r != -ENOENT)
404                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
405                 free(p);
406         }
407
408         if (s->runtime_journal) {
409                 if (asprintf(&p, "/run/log/journal/%s", ids) < 0) {
410                         log_oom();
411                         return;
412                 }
413
414                 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free);
415                 if (r < 0 && r != -ENOENT)
416                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
417                 free(p);
418         }
419
420         s->cached_available_space_timestamp = 0;
421 }
422
423 static char *shortened_cgroup_path(pid_t pid) {
424         int r;
425         char *process_path, *init_path, *path;
426
427         assert(pid > 0);
428
429         r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, pid, &process_path);
430         if (r < 0)
431                 return NULL;
432
433         r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &init_path);
434         if (r < 0) {
435                 free(process_path);
436                 return NULL;
437         }
438
439         if (endswith(init_path, "/system"))
440                 init_path[strlen(init_path) - 7] = 0;
441         else if (streq(init_path, "/"))
442                 init_path[0] = 0;
443
444         if (startswith(process_path, init_path)) {
445                 char *p;
446
447                 p = strdup(process_path + strlen(init_path));
448                 if (!p) {
449                         free(process_path);
450                         free(init_path);
451                         return NULL;
452                 }
453                 path = p;
454         } else {
455                 path = process_path;
456                 process_path = NULL;
457         }
458
459         free(process_path);
460         free(init_path);
461
462         return path;
463 }
464
465 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n) {
466         JournalFile *f;
467         bool vacuumed = false;
468         int r;
469
470         assert(s);
471         assert(iovec);
472         assert(n > 0);
473
474         f = find_journal(s, uid);
475         if (!f)
476                 return;
477
478         if (journal_file_rotate_suggested(f)) {
479                 log_info("Journal header limits reached or header out-of-date, rotating.");
480                 server_rotate(s);
481                 server_vacuum(s);
482                 vacuumed = true;
483
484                 f = find_journal(s, uid);
485                 if (!f)
486                         return;
487         }
488
489         for (;;) {
490                 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
491                 if (r >= 0)
492                         return;
493
494                 if (vacuumed ||
495                     (r != -E2BIG && /* hit limit */
496                      r != -EFBIG && /* hit fs limit */
497                      r != -EDQUOT && /* quota hit */
498                      r != -ENOSPC && /* disk full */
499                      r != -EBADMSG && /* corrupted */
500                      r != -ENODATA && /* truncated */
501                      r != -EHOSTDOWN && /* other machine */
502                      r != -EPROTONOSUPPORT && /* unsupported feature */
503                      r != -EBUSY && /* unclean shutdown */
504                      r != -ESHUTDOWN /* already archived */)) {
505                         log_error("Failed to write entry, ignoring: %s", strerror(-r));
506                         return;
507                 }
508
509                 if (r == -E2BIG || r == -EFBIG || r == EDQUOT || r == ENOSPC)
510                         log_info("Allocation limit reached, rotating.");
511                 else if (r == -EHOSTDOWN)
512                         log_info("Journal file from other machine, rotating.");
513                 else if (r == -EBUSY)
514                         log_info("Unlcean shutdown, rotating.");
515                 else
516                         log_warning("Journal file corrupted, rotating.");
517
518                 server_rotate(s);
519                 server_vacuum(s);
520                 vacuumed = true;
521
522                 f = find_journal(s, uid);
523                 if (!f)
524                         return;
525
526                 log_info("Retrying write.");
527         }
528 }
529
530 static void dispatch_message_real(
531                 Server *s,
532                 struct iovec *iovec, unsigned n, unsigned m,
533                 struct ucred *ucred,
534                 struct timeval *tv,
535                 const char *label, size_t label_len,
536                 const char *unit_id) {
537
538         char *pid = NULL, *uid = NULL, *gid = NULL,
539                 *source_time = NULL, *boot_id = NULL, *machine_id = NULL,
540                 *comm = NULL, *cmdline = NULL, *hostname = NULL,
541                 *audit_session = NULL, *audit_loginuid = NULL,
542                 *exe = NULL, *cgroup = NULL, *session = NULL,
543                 *owner_uid = NULL, *unit = NULL, *selinux_context = NULL;
544
545         char idbuf[33];
546         sd_id128_t id;
547         int r;
548         char *t;
549         uid_t loginuid = 0, realuid = 0;
550
551         assert(s);
552         assert(iovec);
553         assert(n > 0);
554         assert(n + N_IOVEC_META_FIELDS <= m);
555
556         if (ucred) {
557                 uint32_t audit;
558 #ifdef HAVE_LOGIND
559                 uid_t owner;
560 #endif
561
562                 realuid = ucred->uid;
563
564                 if (asprintf(&pid, "_PID=%lu", (unsigned long) ucred->pid) >= 0)
565                         IOVEC_SET_STRING(iovec[n++], pid);
566
567                 if (asprintf(&uid, "_UID=%lu", (unsigned long) ucred->uid) >= 0)
568                         IOVEC_SET_STRING(iovec[n++], uid);
569
570                 if (asprintf(&gid, "_GID=%lu", (unsigned long) ucred->gid) >= 0)
571                         IOVEC_SET_STRING(iovec[n++], gid);
572
573                 r = get_process_comm(ucred->pid, &t);
574                 if (r >= 0) {
575                         comm = strappend("_COMM=", t);
576                         free(t);
577
578                         if (comm)
579                                 IOVEC_SET_STRING(iovec[n++], comm);
580                 }
581
582                 r = get_process_exe(ucred->pid, &t);
583                 if (r >= 0) {
584                         exe = strappend("_EXE=", t);
585                         free(t);
586
587                         if (exe)
588                                 IOVEC_SET_STRING(iovec[n++], exe);
589                 }
590
591                 r = get_process_cmdline(ucred->pid, LINE_MAX, false, &t);
592                 if (r >= 0) {
593                         cmdline = strappend("_CMDLINE=", t);
594                         free(t);
595
596                         if (cmdline)
597                                 IOVEC_SET_STRING(iovec[n++], cmdline);
598                 }
599
600                 r = audit_session_from_pid(ucred->pid, &audit);
601                 if (r >= 0)
602                         if (asprintf(&audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit) >= 0)
603                                 IOVEC_SET_STRING(iovec[n++], audit_session);
604
605                 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
606                 if (r >= 0)
607                         if (asprintf(&audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0)
608                                 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
609
610                 t = shortened_cgroup_path(ucred->pid);
611                 if (t) {
612                         cgroup = strappend("_SYSTEMD_CGROUP=", t);
613                         free(t);
614
615                         if (cgroup)
616                                 IOVEC_SET_STRING(iovec[n++], cgroup);
617                 }
618
619 #ifdef HAVE_LOGIND
620                 if (sd_pid_get_session(ucred->pid, &t) >= 0) {
621                         session = strappend("_SYSTEMD_SESSION=", t);
622                         free(t);
623
624                         if (session)
625                                 IOVEC_SET_STRING(iovec[n++], session);
626                 }
627
628                 if (sd_pid_get_owner_uid(ucred->uid, &owner) >= 0)
629                         if (asprintf(&owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner) >= 0)
630                                 IOVEC_SET_STRING(iovec[n++], owner_uid);
631 #endif
632
633                 if (cg_pid_get_unit(ucred->pid, &t) >= 0) {
634                         unit = strappend("_SYSTEMD_UNIT=", t);
635                         free(t);
636                 } else if (unit_id)
637                         unit = strappend("_SYSTEMD_UNIT=", unit_id);
638
639                 if (unit)
640                         IOVEC_SET_STRING(iovec[n++], unit);
641
642 #ifdef HAVE_SELINUX
643                 if (label) {
644                         selinux_context = malloc(sizeof("_SELINUX_CONTEXT=") + label_len);
645                         if (selinux_context) {
646                                 memcpy(selinux_context, "_SELINUX_CONTEXT=", sizeof("_SELINUX_CONTEXT=")-1);
647                                 memcpy(selinux_context+sizeof("_SELINUX_CONTEXT=")-1, label, label_len);
648                                 selinux_context[sizeof("_SELINUX_CONTEXT=")-1+label_len] = 0;
649                                 IOVEC_SET_STRING(iovec[n++], selinux_context);
650                         }
651                 } else {
652                         security_context_t con;
653
654                         if (getpidcon(ucred->pid, &con) >= 0) {
655                                 selinux_context = strappend("_SELINUX_CONTEXT=", con);
656                                 if (selinux_context)
657                                         IOVEC_SET_STRING(iovec[n++], selinux_context);
658
659                                 freecon(con);
660                         }
661                 }
662 #endif
663         }
664
665         if (tv) {
666                 if (asprintf(&source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu",
667                              (unsigned long long) timeval_load(tv)) >= 0)
668                         IOVEC_SET_STRING(iovec[n++], source_time);
669         }
670
671         /* Note that strictly speaking storing the boot id here is
672          * redundant since the entry includes this in-line
673          * anyway. However, we need this indexed, too. */
674         r = sd_id128_get_boot(&id);
675         if (r >= 0)
676                 if (asprintf(&boot_id, "_BOOT_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
677                         IOVEC_SET_STRING(iovec[n++], boot_id);
678
679         r = sd_id128_get_machine(&id);
680         if (r >= 0)
681                 if (asprintf(&machine_id, "_MACHINE_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
682                         IOVEC_SET_STRING(iovec[n++], machine_id);
683
684         t = gethostname_malloc();
685         if (t) {
686                 hostname = strappend("_HOSTNAME=", t);
687                 free(t);
688                 if (hostname)
689                         IOVEC_SET_STRING(iovec[n++], hostname);
690         }
691
692         assert(n <= m);
693
694         write_to_journal(s, realuid == 0 ? 0 : loginuid, iovec, n);
695
696         free(pid);
697         free(uid);
698         free(gid);
699         free(comm);
700         free(exe);
701         free(cmdline);
702         free(source_time);
703         free(boot_id);
704         free(machine_id);
705         free(hostname);
706         free(audit_session);
707         free(audit_loginuid);
708         free(cgroup);
709         free(session);
710         free(owner_uid);
711         free(unit);
712         free(selinux_context);
713 }
714
715 static void driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
716         char mid[11 + 32 + 1];
717         char buffer[16 + LINE_MAX + 1];
718         struct iovec iovec[N_IOVEC_META_FIELDS + 4];
719         int n = 0;
720         va_list ap;
721         struct ucred ucred;
722
723         assert(s);
724         assert(format);
725
726         IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
727         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
728
729         memcpy(buffer, "MESSAGE=", 8);
730         va_start(ap, format);
731         vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
732         va_end(ap);
733         char_array_0(buffer);
734         IOVEC_SET_STRING(iovec[n++], buffer);
735
736         snprintf(mid, sizeof(mid), "MESSAGE_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(message_id));
737         char_array_0(mid);
738         IOVEC_SET_STRING(iovec[n++], mid);
739
740         zero(ucred);
741         ucred.pid = getpid();
742         ucred.uid = getuid();
743         ucred.gid = getgid();
744
745         dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL);
746 }
747
748 static void dispatch_message(Server *s,
749                              struct iovec *iovec, unsigned n, unsigned m,
750                              struct ucred *ucred,
751                              struct timeval *tv,
752                              const char *label, size_t label_len,
753                              const char *unit_id,
754                              int priority) {
755         int rl;
756         char *path = NULL, *c;
757
758         assert(s);
759         assert(iovec || n == 0);
760
761         if (n == 0)
762                 return;
763
764         if (LOG_PRI(priority) > s->max_level_store)
765                 return;
766
767         if (!ucred)
768                 goto finish;
769
770         path = shortened_cgroup_path(ucred->pid);
771         if (!path)
772                 goto finish;
773
774         /* example: /user/lennart/3/foobar
775          *          /system/dbus.service/foobar
776          *
777          * So let's cut of everything past the third /, since that is
778          * wher user directories start */
779
780         c = strchr(path, '/');
781         if (c) {
782                 c = strchr(c+1, '/');
783                 if (c) {
784                         c = strchr(c+1, '/');
785                         if (c)
786                                 *c = 0;
787                 }
788         }
789
790         rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available_space(s));
791
792         if (rl == 0) {
793                 free(path);
794                 return;
795         }
796
797         /* Write a suppression message if we suppressed something */
798         if (rl > 1)
799                 driver_message(s, SD_MESSAGE_JOURNAL_DROPPED, "Suppressed %u messages from %s", rl - 1, path);
800
801         free(path);
802
803 finish:
804         dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id);
805 }
806
807 static void forward_syslog_iovec(Server *s, const struct iovec *iovec, unsigned n_iovec, struct ucred *ucred, struct timeval *tv) {
808         struct msghdr msghdr;
809         struct cmsghdr *cmsg;
810         union {
811                 struct cmsghdr cmsghdr;
812                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred))];
813         } control;
814         union sockaddr_union sa;
815
816         assert(s);
817         assert(iovec);
818         assert(n_iovec > 0);
819
820         zero(msghdr);
821         msghdr.msg_iov = (struct iovec*) iovec;
822         msghdr.msg_iovlen = n_iovec;
823
824         zero(sa);
825         sa.un.sun_family = AF_UNIX;
826         strncpy(sa.un.sun_path, "/run/systemd/journal/syslog", sizeof(sa.un.sun_path));
827         msghdr.msg_name = &sa;
828         msghdr.msg_namelen = offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path);
829
830         if (ucred) {
831                 zero(control);
832                 msghdr.msg_control = &control;
833                 msghdr.msg_controllen = sizeof(control);
834
835                 cmsg = CMSG_FIRSTHDR(&msghdr);
836                 cmsg->cmsg_level = SOL_SOCKET;
837                 cmsg->cmsg_type = SCM_CREDENTIALS;
838                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred));
839                 memcpy(CMSG_DATA(cmsg), ucred, sizeof(struct ucred));
840                 msghdr.msg_controllen = cmsg->cmsg_len;
841         }
842
843         /* Forward the syslog message we received via /dev/log to
844          * /run/systemd/syslog. Unfortunately we currently can't set
845          * the SO_TIMESTAMP auxiliary data, and hence we don't. */
846
847         if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
848                 return;
849
850         /* The socket is full? I guess the syslog implementation is
851          * too slow, and we shouldn't wait for that... */
852         if (errno == EAGAIN)
853                 return;
854
855         if (ucred && errno == ESRCH) {
856                 struct ucred u;
857
858                 /* Hmm, presumably the sender process vanished
859                  * by now, so let's fix it as good as we
860                  * can, and retry */
861
862                 u = *ucred;
863                 u.pid = getpid();
864                 memcpy(CMSG_DATA(cmsg), &u, sizeof(struct ucred));
865
866                 if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
867                         return;
868
869                 if (errno == EAGAIN)
870                         return;
871         }
872
873         if (errno != ENOENT)
874                 log_debug("Failed to forward syslog message: %m");
875 }
876
877 static void forward_syslog_raw(Server *s, int priority, const char *buffer, struct ucred *ucred, struct timeval *tv) {
878         struct iovec iovec;
879
880         assert(s);
881         assert(buffer);
882
883         if (LOG_PRI(priority) > s->max_level_syslog)
884                 return;
885
886         IOVEC_SET_STRING(iovec, buffer);
887         forward_syslog_iovec(s, &iovec, 1, ucred, tv);
888 }
889
890 static void forward_syslog(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred, struct timeval *tv) {
891         struct iovec iovec[5];
892         char header_priority[6], header_time[64], header_pid[16];
893         int n = 0;
894         time_t t;
895         struct tm *tm;
896         char *ident_buf = NULL;
897
898         assert(s);
899         assert(priority >= 0);
900         assert(priority <= 999);
901         assert(message);
902
903         if (LOG_PRI(priority) > s->max_level_syslog)
904                 return;
905
906         /* First: priority field */
907         snprintf(header_priority, sizeof(header_priority), "<%i>", priority);
908         char_array_0(header_priority);
909         IOVEC_SET_STRING(iovec[n++], header_priority);
910
911         /* Second: timestamp */
912         t = tv ? tv->tv_sec : ((time_t) (now(CLOCK_REALTIME) / USEC_PER_SEC));
913         tm = localtime(&t);
914         if (!tm)
915                 return;
916         if (strftime(header_time, sizeof(header_time), "%h %e %T ", tm) <= 0)
917                 return;
918         IOVEC_SET_STRING(iovec[n++], header_time);
919
920         /* Third: identifier and PID */
921         if (ucred) {
922                 if (!identifier) {
923                         get_process_comm(ucred->pid, &ident_buf);
924                         identifier = ident_buf;
925                 }
926
927                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
928                 char_array_0(header_pid);
929
930                 if (identifier)
931                         IOVEC_SET_STRING(iovec[n++], identifier);
932
933                 IOVEC_SET_STRING(iovec[n++], header_pid);
934         } else if (identifier) {
935                 IOVEC_SET_STRING(iovec[n++], identifier);
936                 IOVEC_SET_STRING(iovec[n++], ": ");
937         }
938
939         /* Fourth: message */
940         IOVEC_SET_STRING(iovec[n++], message);
941
942         forward_syslog_iovec(s, iovec, n, ucred, tv);
943
944         free(ident_buf);
945 }
946
947 static int fixup_priority(int priority) {
948
949         if ((priority & LOG_FACMASK) == 0)
950                 return (priority & LOG_PRIMASK) | LOG_USER;
951
952         return priority;
953 }
954
955 static void forward_kmsg(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred) {
956         struct iovec iovec[5];
957         char header_priority[6], header_pid[16];
958         int n = 0;
959         char *ident_buf = NULL;
960
961         assert(s);
962         assert(priority >= 0);
963         assert(priority <= 999);
964         assert(message);
965
966         if (_unlikely_(LOG_PRI(priority) > s->max_level_kmsg))
967                 return;
968
969         if (_unlikely_(s->dev_kmsg_fd < 0))
970                 return;
971
972         /* Never allow messages with kernel facility to be written to
973          * kmsg, regardless where the data comes from. */
974         priority = fixup_priority(priority);
975
976         /* First: priority field */
977         snprintf(header_priority, sizeof(header_priority), "<%i>", priority);
978         char_array_0(header_priority);
979         IOVEC_SET_STRING(iovec[n++], header_priority);
980
981         /* Second: identifier and PID */
982         if (ucred) {
983                 if (!identifier) {
984                         get_process_comm(ucred->pid, &ident_buf);
985                         identifier = ident_buf;
986                 }
987
988                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
989                 char_array_0(header_pid);
990
991                 if (identifier)
992                         IOVEC_SET_STRING(iovec[n++], identifier);
993
994                 IOVEC_SET_STRING(iovec[n++], header_pid);
995         } else if (identifier) {
996                 IOVEC_SET_STRING(iovec[n++], identifier);
997                 IOVEC_SET_STRING(iovec[n++], ": ");
998         }
999
1000         /* Fourth: message */
1001         IOVEC_SET_STRING(iovec[n++], message);
1002         IOVEC_SET_STRING(iovec[n++], "\n");
1003
1004         if (writev(s->dev_kmsg_fd, iovec, n) < 0)
1005                 log_debug("Failed to write to /dev/kmsg for logging: %s", strerror(errno));
1006
1007         free(ident_buf);
1008 }
1009
1010 static void forward_console(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred) {
1011         struct iovec iovec[4];
1012         char header_pid[16];
1013         int n = 0, fd;
1014         char *ident_buf = NULL;
1015         const char *tty;
1016
1017         assert(s);
1018         assert(message);
1019
1020         if (LOG_PRI(priority) > s->max_level_console)
1021                 return;
1022
1023         /* First: identifier and PID */
1024         if (ucred) {
1025                 if (!identifier) {
1026                         get_process_comm(ucred->pid, &ident_buf);
1027                         identifier = ident_buf;
1028                 }
1029
1030                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
1031                 char_array_0(header_pid);
1032
1033                 if (identifier)
1034                         IOVEC_SET_STRING(iovec[n++], identifier);
1035
1036                 IOVEC_SET_STRING(iovec[n++], header_pid);
1037         } else if (identifier) {
1038                 IOVEC_SET_STRING(iovec[n++], identifier);
1039                 IOVEC_SET_STRING(iovec[n++], ": ");
1040         }
1041
1042         /* Third: message */
1043         IOVEC_SET_STRING(iovec[n++], message);
1044         IOVEC_SET_STRING(iovec[n++], "\n");
1045
1046         tty = s->tty_path ? s->tty_path : "/dev/console";
1047
1048         fd = open_terminal(tty, O_WRONLY|O_NOCTTY|O_CLOEXEC);
1049         if (fd < 0) {
1050                 log_debug("Failed to open %s for logging: %s", tty, strerror(errno));
1051                 goto finish;
1052         }
1053
1054         if (writev(fd, iovec, n) < 0)
1055                 log_debug("Failed to write to %s for logging: %s", tty, strerror(errno));
1056
1057         close_nointr_nofail(fd);
1058
1059 finish:
1060         free(ident_buf);
1061 }
1062
1063 static void read_identifier(const char **buf, char **identifier, char **pid) {
1064         const char *p;
1065         char *t;
1066         size_t l, e;
1067
1068         assert(buf);
1069         assert(identifier);
1070         assert(pid);
1071
1072         p = *buf;
1073
1074         p += strspn(p, WHITESPACE);
1075         l = strcspn(p, WHITESPACE);
1076
1077         if (l <= 0 ||
1078             p[l-1] != ':')
1079                 return;
1080
1081         e = l;
1082         l--;
1083
1084         if (p[l-1] == ']') {
1085                 size_t k = l-1;
1086
1087                 for (;;) {
1088
1089                         if (p[k] == '[') {
1090                                 t = strndup(p+k+1, l-k-2);
1091                                 if (t)
1092                                         *pid = t;
1093
1094                                 l = k;
1095                                 break;
1096                         }
1097
1098                         if (k == 0)
1099                                 break;
1100
1101                         k--;
1102                 }
1103         }
1104
1105         t = strndup(p, l);
1106         if (t)
1107                 *identifier = t;
1108
1109         *buf = p + e;
1110         *buf += strspn(*buf, WHITESPACE);
1111 }
1112
1113 static void process_syslog_message(Server *s, const char *buf, struct ucred *ucred, struct timeval *tv, const char *label, size_t label_len) {
1114         char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *syslog_pid = NULL;
1115         struct iovec iovec[N_IOVEC_META_FIELDS + 6];
1116         unsigned n = 0;
1117         int priority = LOG_USER | LOG_INFO;
1118         char *identifier = NULL, *pid = NULL;
1119         const char *orig;
1120
1121         assert(s);
1122         assert(buf);
1123
1124         orig = buf;
1125         parse_syslog_priority((char**) &buf, &priority);
1126
1127         if (s->forward_to_syslog)
1128                 forward_syslog_raw(s, priority, orig, ucred, tv);
1129
1130         skip_syslog_date((char**) &buf);
1131         read_identifier(&buf, &identifier, &pid);
1132
1133         if (s->forward_to_kmsg)
1134                 forward_kmsg(s, priority, identifier, buf, ucred);
1135
1136         if (s->forward_to_console)
1137                 forward_console(s, priority, identifier, buf, ucred);
1138
1139         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=syslog");
1140
1141         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1142                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1143
1144         if (priority & LOG_FACMASK)
1145                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1146                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1147
1148         if (identifier) {
1149                 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
1150                 if (syslog_identifier)
1151                         IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1152         }
1153
1154         if (pid) {
1155                 syslog_pid = strappend("SYSLOG_PID=", pid);
1156                 if (syslog_pid)
1157                         IOVEC_SET_STRING(iovec[n++], syslog_pid);
1158         }
1159
1160         message = strappend("MESSAGE=", buf);
1161         if (message)
1162                 IOVEC_SET_STRING(iovec[n++], message);
1163
1164         dispatch_message(s, iovec, n, ELEMENTSOF(iovec), ucred, tv, label, label_len, NULL, priority);
1165
1166         free(message);
1167         free(identifier);
1168         free(pid);
1169         free(syslog_priority);
1170         free(syslog_facility);
1171         free(syslog_identifier);
1172         free(syslog_pid);
1173 }
1174
1175 static bool valid_user_field(const char *p, size_t l) {
1176         const char *a;
1177
1178         /* We kinda enforce POSIX syntax recommendations for
1179            environment variables here, but make a couple of additional
1180            requirements.
1181
1182            http://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html */
1183
1184         /* No empty field names */
1185         if (l <= 0)
1186                 return false;
1187
1188         /* Don't allow names longer than 64 chars */
1189         if (l > 64)
1190                 return false;
1191
1192         /* Variables starting with an underscore are protected */
1193         if (p[0] == '_')
1194                 return false;
1195
1196         /* Don't allow digits as first character */
1197         if (p[0] >= '0' && p[0] <= '9')
1198                 return false;
1199
1200         /* Only allow A-Z0-9 and '_' */
1201         for (a = p; a < p + l; a++)
1202                 if (!((*a >= 'A' && *a <= 'Z') ||
1203                       (*a >= '0' && *a <= '9') ||
1204                       *a == '_'))
1205                         return false;
1206
1207         return true;
1208 }
1209
1210 static void process_native_message(
1211                 Server *s,
1212                 const void *buffer, size_t buffer_size,
1213                 struct ucred *ucred,
1214                 struct timeval *tv,
1215                 const char *label, size_t label_len) {
1216
1217         struct iovec *iovec = NULL;
1218         unsigned n = 0, m = 0, j, tn = (unsigned) -1;
1219         const char *p;
1220         size_t remaining;
1221         int priority = LOG_INFO;
1222         char *identifier = NULL, *message = NULL;
1223
1224         assert(s);
1225         assert(buffer || buffer_size == 0);
1226
1227         p = buffer;
1228         remaining = buffer_size;
1229
1230         while (remaining > 0) {
1231                 const char *e, *q;
1232
1233                 e = memchr(p, '\n', remaining);
1234
1235                 if (!e) {
1236                         /* Trailing noise, let's ignore it, and flush what we collected */
1237                         log_debug("Received message with trailing noise, ignoring.");
1238                         break;
1239                 }
1240
1241                 if (e == p) {
1242                         /* Entry separator */
1243                         dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, NULL, priority);
1244                         n = 0;
1245                         priority = LOG_INFO;
1246
1247                         p++;
1248                         remaining--;
1249                         continue;
1250                 }
1251
1252                 if (*p == '.' || *p == '#') {
1253                         /* Ignore control commands for now, and
1254                          * comments too. */
1255                         remaining -= (e - p) + 1;
1256                         p = e + 1;
1257                         continue;
1258                 }
1259
1260                 /* A property follows */
1261
1262                 if (n+N_IOVEC_META_FIELDS >= m) {
1263                         struct iovec *c;
1264                         unsigned u;
1265
1266                         u = MAX((n+N_IOVEC_META_FIELDS+1) * 2U, 4U);
1267                         c = realloc(iovec, u * sizeof(struct iovec));
1268                         if (!c) {
1269                                 log_oom();
1270                                 break;
1271                         }
1272
1273                         iovec = c;
1274                         m = u;
1275                 }
1276
1277                 q = memchr(p, '=', e - p);
1278                 if (q) {
1279                         if (valid_user_field(p, q - p)) {
1280                                 size_t l;
1281
1282                                 l = e - p;
1283
1284                                 /* If the field name starts with an
1285                                  * underscore, skip the variable,
1286                                  * since that indidates a trusted
1287                                  * field */
1288                                 iovec[n].iov_base = (char*) p;
1289                                 iovec[n].iov_len = l;
1290                                 n++;
1291
1292                                 /* We need to determine the priority
1293                                  * of this entry for the rate limiting
1294                                  * logic */
1295                                 if (l == 10 &&
1296                                     memcmp(p, "PRIORITY=", 9) == 0 &&
1297                                     p[9] >= '0' && p[9] <= '9')
1298                                         priority = (priority & LOG_FACMASK) | (p[9] - '0');
1299
1300                                 else if (l == 17 &&
1301                                          memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
1302                                          p[16] >= '0' && p[16] <= '9')
1303                                         priority = (priority & LOG_PRIMASK) | ((p[16] - '0') << 3);
1304
1305                                 else if (l == 18 &&
1306                                          memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
1307                                          p[16] >= '0' && p[16] <= '9' &&
1308                                          p[17] >= '0' && p[17] <= '9')
1309                                         priority = (priority & LOG_PRIMASK) | (((p[16] - '0')*10 + (p[17] - '0')) << 3);
1310
1311                                 else if (l >= 19 &&
1312                                          memcmp(p, "SYSLOG_IDENTIFIER=", 18) == 0) {
1313                                         char *t;
1314
1315                                         t = strndup(p + 18, l - 18);
1316                                         if (t) {
1317                                                 free(identifier);
1318                                                 identifier = t;
1319                                         }
1320                                 } else if (l >= 8 &&
1321                                            memcmp(p, "MESSAGE=", 8) == 0) {
1322                                         char *t;
1323
1324                                         t = strndup(p + 8, l - 8);
1325                                         if (t) {
1326                                                 free(message);
1327                                                 message = t;
1328                                         }
1329                                 }
1330                         }
1331
1332                         remaining -= (e - p) + 1;
1333                         p = e + 1;
1334                         continue;
1335                 } else {
1336                         le64_t l_le;
1337                         uint64_t l;
1338                         char *k;
1339
1340                         if (remaining < e - p + 1 + sizeof(uint64_t) + 1) {
1341                                 log_debug("Failed to parse message, ignoring.");
1342                                 break;
1343                         }
1344
1345                         memcpy(&l_le, e + 1, sizeof(uint64_t));
1346                         l = le64toh(l_le);
1347
1348                         if (remaining < e - p + 1 + sizeof(uint64_t) + l + 1 ||
1349                             e[1+sizeof(uint64_t)+l] != '\n') {
1350                                 log_debug("Failed to parse message, ignoring.");
1351                                 break;
1352                         }
1353
1354                         k = malloc((e - p) + 1 + l);
1355                         if (!k) {
1356                                 log_oom();
1357                                 break;
1358                         }
1359
1360                         memcpy(k, p, e - p);
1361                         k[e - p] = '=';
1362                         memcpy(k + (e - p) + 1, e + 1 + sizeof(uint64_t), l);
1363
1364                         if (valid_user_field(p, e - p)) {
1365                                 iovec[n].iov_base = k;
1366                                 iovec[n].iov_len = (e - p) + 1 + l;
1367                                 n++;
1368                         } else
1369                                 free(k);
1370
1371                         remaining -= (e - p) + 1 + sizeof(uint64_t) + l + 1;
1372                         p = e + 1 + sizeof(uint64_t) + l + 1;
1373                 }
1374         }
1375
1376         if (n <= 0)
1377                 goto finish;
1378
1379         tn = n++;
1380         IOVEC_SET_STRING(iovec[tn], "_TRANSPORT=journal");
1381
1382         if (message) {
1383                 if (s->forward_to_syslog)
1384                         forward_syslog(s, priority, identifier, message, ucred, tv);
1385
1386                 if (s->forward_to_kmsg)
1387                         forward_kmsg(s, priority, identifier, message, ucred);
1388
1389                 if (s->forward_to_console)
1390                         forward_console(s, priority, identifier, message, ucred);
1391         }
1392
1393         dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, NULL, priority);
1394
1395 finish:
1396         for (j = 0; j < n; j++)  {
1397                 if (j == tn)
1398                         continue;
1399
1400                 if (iovec[j].iov_base < buffer ||
1401                     (const uint8_t*) iovec[j].iov_base >= (const uint8_t*) buffer + buffer_size)
1402                         free(iovec[j].iov_base);
1403         }
1404
1405         free(iovec);
1406         free(identifier);
1407         free(message);
1408 }
1409
1410 static void process_native_file(
1411                 Server *s,
1412                 int fd,
1413                 struct ucred *ucred,
1414                 struct timeval *tv,
1415                 const char *label, size_t label_len) {
1416
1417         struct stat st;
1418         void *p;
1419         ssize_t n;
1420
1421         assert(s);
1422         assert(fd >= 0);
1423
1424         /* Data is in the passed file, since it didn't fit in a
1425          * datagram. We can't map the file here, since clients might
1426          * then truncate it and trigger a SIGBUS for us. So let's
1427          * stupidly read it */
1428
1429         if (fstat(fd, &st) < 0) {
1430                 log_error("Failed to stat passed file, ignoring: %m");
1431                 return;
1432         }
1433
1434         if (!S_ISREG(st.st_mode)) {
1435                 log_error("File passed is not regular. Ignoring.");
1436                 return;
1437         }
1438
1439         if (st.st_size <= 0)
1440                 return;
1441
1442         if (st.st_size > ENTRY_SIZE_MAX) {
1443                 log_error("File passed too large. Ignoring.");
1444                 return;
1445         }
1446
1447         p = malloc(st.st_size);
1448         if (!p) {
1449                 log_oom();
1450                 return;
1451         }
1452
1453         n = pread(fd, p, st.st_size, 0);
1454         if (n < 0)
1455                 log_error("Failed to read file, ignoring: %s", strerror(-n));
1456         else if (n > 0)
1457                 process_native_message(s, p, n, ucred, tv, label, label_len);
1458
1459         free(p);
1460 }
1461
1462 static int stdout_stream_log(StdoutStream *s, const char *p) {
1463         struct iovec iovec[N_IOVEC_META_FIELDS + 5];
1464         char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *syslog_identifier = NULL;
1465         unsigned n = 0;
1466         int priority;
1467         char *label = NULL;
1468         size_t label_len = 0;
1469
1470         assert(s);
1471         assert(p);
1472
1473         if (isempty(p))
1474                 return 0;
1475
1476         priority = s->priority;
1477
1478         if (s->level_prefix)
1479                 parse_syslog_priority((char**) &p, &priority);
1480
1481         if (s->forward_to_syslog || s->server->forward_to_syslog)
1482                 forward_syslog(s->server, fixup_priority(priority), s->identifier, p, &s->ucred, NULL);
1483
1484         if (s->forward_to_kmsg || s->server->forward_to_kmsg)
1485                 forward_kmsg(s->server, priority, s->identifier, p, &s->ucred);
1486
1487         if (s->forward_to_console || s->server->forward_to_console)
1488                 forward_console(s->server, priority, s->identifier, p, &s->ucred);
1489
1490         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=stdout");
1491
1492         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1493                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1494
1495         if (priority & LOG_FACMASK)
1496                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1497                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1498
1499         if (s->identifier) {
1500                 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", s->identifier);
1501                 if (syslog_identifier)
1502                         IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1503         }
1504
1505         message = strappend("MESSAGE=", p);
1506         if (message)
1507                 IOVEC_SET_STRING(iovec[n++], message);
1508
1509 #ifdef HAVE_SELINUX
1510         if (s->security_context) {
1511                 label = (char*) s->security_context;
1512                 label_len = strlen((char*) s->security_context);
1513         }
1514 #endif
1515
1516         dispatch_message(s->server, iovec, n, ELEMENTSOF(iovec), &s->ucred, NULL, label, label_len, s->unit_id, priority);
1517
1518         free(message);
1519         free(syslog_priority);
1520         free(syslog_facility);
1521         free(syslog_identifier);
1522
1523         return 0;
1524 }
1525
1526 static int stdout_stream_line(StdoutStream *s, char *p) {
1527         int r;
1528
1529         assert(s);
1530         assert(p);
1531
1532         p = strstrip(p);
1533
1534         switch (s->state) {
1535
1536         case STDOUT_STREAM_IDENTIFIER:
1537                 if (isempty(p))
1538                         s->identifier = NULL;
1539                 else  {
1540                         s->identifier = strdup(p);
1541                         if (!s->identifier)
1542                                 return log_oom();
1543                 }
1544
1545                 s->state = STDOUT_STREAM_UNIT_ID;
1546                 return 0;
1547
1548         case STDOUT_STREAM_UNIT_ID:
1549                 if (s->ucred.uid == 0) {
1550                         if (isempty(p))
1551                                 s->unit_id = NULL;
1552                         else  {
1553                                 s->unit_id = strdup(p);
1554                                 if (!s->unit_id)
1555                                         return log_oom();
1556                         }
1557                 }
1558
1559                 s->state = STDOUT_STREAM_PRIORITY;
1560                 return 0;
1561
1562         case STDOUT_STREAM_PRIORITY:
1563                 r = safe_atoi(p, &s->priority);
1564                 if (r < 0 || s->priority <= 0 || s->priority >= 999) {
1565                         log_warning("Failed to parse log priority line.");
1566                         return -EINVAL;
1567                 }
1568
1569                 s->state = STDOUT_STREAM_LEVEL_PREFIX;
1570                 return 0;
1571
1572         case STDOUT_STREAM_LEVEL_PREFIX:
1573                 r = parse_boolean(p);
1574                 if (r < 0) {
1575                         log_warning("Failed to parse level prefix line.");
1576                         return -EINVAL;
1577                 }
1578
1579                 s->level_prefix = !!r;
1580                 s->state = STDOUT_STREAM_FORWARD_TO_SYSLOG;
1581                 return 0;
1582
1583         case STDOUT_STREAM_FORWARD_TO_SYSLOG:
1584                 r = parse_boolean(p);
1585                 if (r < 0) {
1586                         log_warning("Failed to parse forward to syslog line.");
1587                         return -EINVAL;
1588                 }
1589
1590                 s->forward_to_syslog = !!r;
1591                 s->state = STDOUT_STREAM_FORWARD_TO_KMSG;
1592                 return 0;
1593
1594         case STDOUT_STREAM_FORWARD_TO_KMSG:
1595                 r = parse_boolean(p);
1596                 if (r < 0) {
1597                         log_warning("Failed to parse copy to kmsg line.");
1598                         return -EINVAL;
1599                 }
1600
1601                 s->forward_to_kmsg = !!r;
1602                 s->state = STDOUT_STREAM_FORWARD_TO_CONSOLE;
1603                 return 0;
1604
1605         case STDOUT_STREAM_FORWARD_TO_CONSOLE:
1606                 r = parse_boolean(p);
1607                 if (r < 0) {
1608                         log_warning("Failed to parse copy to console line.");
1609                         return -EINVAL;
1610                 }
1611
1612                 s->forward_to_console = !!r;
1613                 s->state = STDOUT_STREAM_RUNNING;
1614                 return 0;
1615
1616         case STDOUT_STREAM_RUNNING:
1617                 return stdout_stream_log(s, p);
1618         }
1619
1620         assert_not_reached("Unknown stream state");
1621 }
1622
1623 static int stdout_stream_scan(StdoutStream *s, bool force_flush) {
1624         char *p;
1625         size_t remaining;
1626         int r;
1627
1628         assert(s);
1629
1630         p = s->buffer;
1631         remaining = s->length;
1632         for (;;) {
1633                 char *end;
1634                 size_t skip;
1635
1636                 end = memchr(p, '\n', remaining);
1637                 if (end)
1638                         skip = end - p + 1;
1639                 else if (remaining >= sizeof(s->buffer) - 1) {
1640                         end = p + sizeof(s->buffer) - 1;
1641                         skip = remaining;
1642                 } else
1643                         break;
1644
1645                 *end = 0;
1646
1647                 r = stdout_stream_line(s, p);
1648                 if (r < 0)
1649                         return r;
1650
1651                 remaining -= skip;
1652                 p += skip;
1653         }
1654
1655         if (force_flush && remaining > 0) {
1656                 p[remaining] = 0;
1657                 r = stdout_stream_line(s, p);
1658                 if (r < 0)
1659                         return r;
1660
1661                 p += remaining;
1662                 remaining = 0;
1663         }
1664
1665         if (p > s->buffer) {
1666                 memmove(s->buffer, p, remaining);
1667                 s->length = remaining;
1668         }
1669
1670         return 0;
1671 }
1672
1673 static int stdout_stream_process(StdoutStream *s) {
1674         ssize_t l;
1675         int r;
1676
1677         assert(s);
1678
1679         l = read(s->fd, s->buffer+s->length, sizeof(s->buffer)-1-s->length);
1680         if (l < 0) {
1681
1682                 if (errno == EAGAIN)
1683                         return 0;
1684
1685                 log_warning("Failed to read from stream: %m");
1686                 return -errno;
1687         }
1688
1689         if (l == 0) {
1690                 r = stdout_stream_scan(s, true);
1691                 if (r < 0)
1692                         return r;
1693
1694                 return 0;
1695         }
1696
1697         s->length += l;
1698         r = stdout_stream_scan(s, false);
1699         if (r < 0)
1700                 return r;
1701
1702         return 1;
1703
1704 }
1705
1706 static void stdout_stream_free(StdoutStream *s) {
1707         assert(s);
1708
1709         if (s->server) {
1710                 assert(s->server->n_stdout_streams > 0);
1711                 s->server->n_stdout_streams --;
1712                 LIST_REMOVE(StdoutStream, stdout_stream, s->server->stdout_streams, s);
1713         }
1714
1715         if (s->fd >= 0) {
1716                 if (s->server)
1717                         epoll_ctl(s->server->epoll_fd, EPOLL_CTL_DEL, s->fd, NULL);
1718
1719                 close_nointr_nofail(s->fd);
1720         }
1721
1722 #ifdef HAVE_SELINUX
1723         if (s->security_context)
1724                 freecon(s->security_context);
1725 #endif
1726
1727         free(s->identifier);
1728         free(s);
1729 }
1730
1731 static int stdout_stream_new(Server *s) {
1732         StdoutStream *stream;
1733         int fd, r;
1734         socklen_t len;
1735         struct epoll_event ev;
1736
1737         assert(s);
1738
1739         fd = accept4(s->stdout_fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC);
1740         if (fd < 0) {
1741                 if (errno == EAGAIN)
1742                         return 0;
1743
1744                 log_error("Failed to accept stdout connection: %m");
1745                 return -errno;
1746         }
1747
1748         if (s->n_stdout_streams >= STDOUT_STREAMS_MAX) {
1749                 log_warning("Too many stdout streams, refusing connection.");
1750                 close_nointr_nofail(fd);
1751                 return 0;
1752         }
1753
1754         stream = new0(StdoutStream, 1);
1755         if (!stream) {
1756                 close_nointr_nofail(fd);
1757                 return log_oom();
1758         }
1759
1760         stream->fd = fd;
1761
1762         len = sizeof(stream->ucred);
1763         if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &stream->ucred, &len) < 0) {
1764                 log_error("Failed to determine peer credentials: %m");
1765                 r = -errno;
1766                 goto fail;
1767         }
1768
1769 #ifdef HAVE_SELINUX
1770         if (getpeercon(fd, &stream->security_context) < 0 && errno != ENOPROTOOPT)
1771                 log_error("Failed to determine peer security context: %m");
1772 #endif
1773
1774         if (shutdown(fd, SHUT_WR) < 0) {
1775                 log_error("Failed to shutdown writing side of socket: %m");
1776                 r = -errno;
1777                 goto fail;
1778         }
1779
1780         zero(ev);
1781         ev.data.ptr = stream;
1782         ev.events = EPOLLIN;
1783         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0) {
1784                 log_error("Failed to add stream to event loop: %m");
1785                 r = -errno;
1786                 goto fail;
1787         }
1788
1789         stream->server = s;
1790         LIST_PREPEND(StdoutStream, stdout_stream, s->stdout_streams, stream);
1791         s->n_stdout_streams ++;
1792
1793         return 0;
1794
1795 fail:
1796         stdout_stream_free(stream);
1797         return r;
1798 }
1799
1800 static bool is_us(const char *pid) {
1801         pid_t t;
1802
1803         assert(pid);
1804
1805         if (parse_pid(pid, &t) < 0)
1806                 return false;
1807
1808         return t == getpid();
1809 }
1810
1811 static void dev_kmsg_record(Server *s, char *p, size_t l) {
1812         struct iovec iovec[N_IOVEC_META_FIELDS + 7 + N_IOVEC_KERNEL_FIELDS];
1813         char *message = NULL, *syslog_priority = NULL, *syslog_pid = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *source_time = NULL;
1814         int priority, r;
1815         unsigned n = 0, z = 0, j;
1816         usec_t usec;
1817         char *identifier = NULL, *pid = NULL, *e, *f, *k;
1818         uint64_t serial;
1819         size_t pl;
1820
1821         assert(s);
1822         assert(p);
1823
1824         if (l <= 0)
1825                 return;
1826
1827         e = memchr(p, ',', l);
1828         if (!e)
1829                 return;
1830         *e = 0;
1831
1832         r = safe_atoi(p, &priority);
1833         if (r < 0 || priority < 0 || priority > 999)
1834                 return;
1835
1836         if (s->forward_to_kmsg && (priority & LOG_FACMASK) != LOG_KERN)
1837                 return;
1838
1839         l -= (e - p) + 1;
1840         p = e + 1;
1841         e = memchr(p, ',', l);
1842         if (!e)
1843                 return;
1844         *e = 0;
1845
1846         r = safe_atou64(p, &serial);
1847         if (r < 0)
1848                 return;
1849
1850         if (s->kernel_seqnum) {
1851                 /* We already read this one? */
1852                 if (serial < *s->kernel_seqnum)
1853                         return;
1854
1855                 /* Did we lose any? */
1856                 if (serial > *s->kernel_seqnum)
1857                         driver_message(s, SD_MESSAGE_JOURNAL_MISSED, "Missed %llu kernel messages", (unsigned long long) serial - *s->kernel_seqnum - 1);
1858
1859                 /* Make sure we never read this one again. Note that
1860                  * we always store the next message serial we expect
1861                  * here, simply because this makes handling the first
1862                  * message with serial 0 easy. */
1863                 *s->kernel_seqnum = serial + 1;
1864         }
1865
1866         l -= (e - p) + 1;
1867         p = e + 1;
1868         f = memchr(p, ';', l);
1869         if (!f)
1870                 return;
1871         /* Kernel 3.6 has the flags field, kernel 3.5 lacks that */
1872         e = memchr(p, ',', l);
1873         if (!e || f < e)
1874                 e = f;
1875         *e = 0;
1876
1877         r = parse_usec(p, &usec);
1878         if (r < 0)
1879                 return;
1880
1881         l -= (f - p) + 1;
1882         p = f + 1;
1883         e = memchr(p, '\n', l);
1884         if (!e)
1885                 return;
1886         *e = 0;
1887
1888         pl = e - p;
1889         l -= (e - p) + 1;
1890         k = e + 1;
1891
1892         for (j = 0; l > 0 && j < N_IOVEC_KERNEL_FIELDS; j++) {
1893                 char *m;
1894                 /* Meta data fields attached */
1895
1896                 if (*k != ' ')
1897                         break;
1898
1899                 k ++, l --;
1900
1901                 e = memchr(k, '\n', l);
1902                 if (!e)
1903                         return;
1904
1905                 *e = 0;
1906
1907                 m = cunescape_length_with_prefix(k, e - k, "_KERNEL_");
1908                 if (!m)
1909                         break;
1910
1911                 IOVEC_SET_STRING(iovec[n++], m);
1912                 z++;
1913
1914                 l -= (e - k) + 1;
1915                 k = e + 1;
1916         }
1917
1918         if (asprintf(&source_time, "_SOURCE_MONOTONIC_TIMESTAMP=%llu",
1919                      (unsigned long long) usec) >= 0)
1920                 IOVEC_SET_STRING(iovec[n++], source_time);
1921
1922         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=kernel");
1923
1924         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1925                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1926
1927         if ((priority & LOG_FACMASK) == LOG_KERN)
1928                 IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=kernel");
1929         else {
1930                 read_identifier((const char**) &p, &identifier, &pid);
1931
1932                 /* Avoid any messages we generated ourselves via
1933                  * log_info() and friends. */
1934                 if (pid && is_us(pid))
1935                         goto finish;
1936
1937                 if (identifier) {
1938                         syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
1939                         if (syslog_identifier)
1940                                 IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1941                 }
1942
1943                 if (pid) {
1944                         syslog_pid = strappend("SYSLOG_PID=", pid);
1945                         if (syslog_pid)
1946                                 IOVEC_SET_STRING(iovec[n++], syslog_pid);
1947                 }
1948
1949                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1950                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1951         }
1952
1953         message = cunescape_length_with_prefix(p, pl, "MESSAGE=");
1954         if (message)
1955                 IOVEC_SET_STRING(iovec[n++], message);
1956
1957         dispatch_message(s, iovec, n, ELEMENTSOF(iovec), NULL, NULL, NULL, 0, NULL, priority);
1958
1959 finish:
1960         for (j = 0; j < z; j++)
1961                 free(iovec[j].iov_base);
1962
1963         free(message);
1964         free(syslog_priority);
1965         free(syslog_identifier);
1966         free(syslog_pid);
1967         free(syslog_facility);
1968         free(source_time);
1969         free(identifier);
1970         free(pid);
1971 }
1972
1973 static int system_journal_open(Server *s) {
1974         int r;
1975         char *fn;
1976         sd_id128_t machine;
1977         char ids[33];
1978
1979         r = sd_id128_get_machine(&machine);
1980         if (r < 0)
1981                 return r;
1982
1983         sd_id128_to_string(machine, ids);
1984
1985         if (!s->system_journal &&
1986             (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
1987             access("/run/systemd/journal/flushed", F_OK) >= 0) {
1988
1989                 /* If in auto mode: first try to create the machine
1990                  * path, but not the prefix.
1991                  *
1992                  * If in persistent mode: create /var/log/journal and
1993                  * the machine path */
1994
1995                 if (s->storage == STORAGE_PERSISTENT)
1996                         (void) mkdir("/var/log/journal/", 0755);
1997
1998                 fn = strappend("/var/log/journal/", ids);
1999                 if (!fn)
2000                         return -ENOMEM;
2001
2002                 (void) mkdir(fn, 0755);
2003                 free(fn);
2004
2005                 fn = strjoin("/var/log/journal/", ids, "/system.journal", NULL);
2006                 if (!fn)
2007                         return -ENOMEM;
2008
2009                 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, true, &s->system_metrics, NULL, &s->system_journal);
2010                 free(fn);
2011
2012                 if (r >= 0)
2013                         server_fix_perms(s, s->system_journal, 0);
2014                 else if (r < 0) {
2015
2016                         if (r != -ENOENT && r != -EROFS)
2017                                 log_warning("Failed to open system journal: %s", strerror(-r));
2018
2019                         r = 0;
2020                 }
2021         }
2022
2023         if (!s->runtime_journal &&
2024             (s->storage != STORAGE_NONE)) {
2025
2026                 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
2027                 if (!fn)
2028                         return -ENOMEM;
2029
2030                 if (s->system_journal) {
2031
2032                         /* Try to open the runtime journal, but only
2033                          * if it already exists, so that we can flush
2034                          * it into the system journal */
2035
2036                         r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, NULL, &s->runtime_journal);
2037                         free(fn);
2038
2039                         if (r < 0) {
2040                                 if (r != -ENOENT)
2041                                         log_warning("Failed to open runtime journal: %s", strerror(-r));
2042
2043                                 r = 0;
2044                         }
2045
2046                 } else {
2047
2048                         /* OK, we really need the runtime journal, so create
2049                          * it if necessary. */
2050
2051                         (void) mkdir_parents(fn, 0755);
2052                         r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, NULL, &s->runtime_journal);
2053                         free(fn);
2054
2055                         if (r < 0) {
2056                                 log_error("Failed to open runtime journal: %s", strerror(-r));
2057                                 return r;
2058                         }
2059                 }
2060
2061                 if (s->runtime_journal)
2062                         server_fix_perms(s, s->runtime_journal, 0);
2063         }
2064
2065         return r;
2066 }
2067
2068 static int server_flush_to_var(Server *s) {
2069         Object *o = NULL;
2070         int r;
2071         sd_id128_t machine;
2072         sd_journal *j;
2073
2074         assert(s);
2075
2076         if (s->storage != STORAGE_AUTO &&
2077             s->storage != STORAGE_PERSISTENT)
2078                 return 0;
2079
2080         if (!s->runtime_journal)
2081                 return 0;
2082
2083         system_journal_open(s);
2084
2085         if (!s->system_journal)
2086                 return 0;
2087
2088         log_info("Flushing to /var...");
2089
2090         r = sd_id128_get_machine(&machine);
2091         if (r < 0) {
2092                 log_error("Failed to get machine id: %s", strerror(-r));
2093                 return r;
2094         }
2095
2096         r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
2097         if (r < 0) {
2098                 log_error("Failed to read runtime journal: %s", strerror(-r));
2099                 return r;
2100         }
2101
2102         SD_JOURNAL_FOREACH(j) {
2103                 JournalFile *f;
2104
2105                 f = j->current_file;
2106                 assert(f && f->current_offset > 0);
2107
2108                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
2109                 if (r < 0) {
2110                         log_error("Can't read entry: %s", strerror(-r));
2111                         goto finish;
2112                 }
2113
2114                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
2115                 if (r == -E2BIG) {
2116                         log_info("Allocation limit reached.");
2117
2118                         journal_file_post_change(s->system_journal);
2119                         server_rotate(s);
2120                         server_vacuum(s);
2121
2122                         r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
2123                 }
2124
2125                 if (r < 0) {
2126                         log_error("Can't write entry: %s", strerror(-r));
2127                         goto finish;
2128                 }
2129         }
2130
2131 finish:
2132         journal_file_post_change(s->system_journal);
2133
2134         journal_file_close(s->runtime_journal);
2135         s->runtime_journal = NULL;
2136
2137         if (r >= 0)
2138                 rm_rf("/run/log/journal", false, true, false);
2139
2140         return r;
2141 }
2142
2143 static int server_read_dev_kmsg(Server *s) {
2144         char buffer[8192+1]; /* the kernel-side limit per record is 8K currently */
2145         ssize_t l;
2146
2147         assert(s);
2148         assert(s->dev_kmsg_fd >= 0);
2149
2150         l = read(s->dev_kmsg_fd, buffer, sizeof(buffer) - 1);
2151         if (l == 0)
2152                 return 0;
2153         if (l < 0) {
2154                 /* Old kernels who don't allow reading from /dev/kmsg
2155                  * return EINVAL when we try. So handle this cleanly,
2156                  * but don' try to ever read from it again. */
2157                 if (errno == EINVAL) {
2158                         epoll_ctl(s->epoll_fd, EPOLL_CTL_DEL, s->dev_kmsg_fd, NULL);
2159                         return 0;
2160                 }
2161
2162                 if (errno == EAGAIN || errno == EINTR)
2163                         return 0;
2164
2165                 log_error("Failed to read from kernel: %m");
2166                 return -errno;
2167         }
2168
2169         dev_kmsg_record(s, buffer, l);
2170         return 1;
2171 }
2172
2173 static int server_flush_dev_kmsg(Server *s) {
2174         int r;
2175
2176         assert(s);
2177
2178         if (s->dev_kmsg_fd < 0)
2179                 return 0;
2180
2181         if (!s->dev_kmsg_readable)
2182                 return 0;
2183
2184         log_info("Flushing /dev/kmsg...");
2185
2186         for (;;) {
2187                 r = server_read_dev_kmsg(s);
2188                 if (r < 0)
2189                         return r;
2190
2191                 if (r == 0)
2192                         break;
2193         }
2194
2195         return 0;
2196 }
2197
2198 static int process_event(Server *s, struct epoll_event *ev) {
2199         assert(s);
2200         assert(ev);
2201
2202         if (ev->data.fd == s->signal_fd) {
2203                 struct signalfd_siginfo sfsi;
2204                 ssize_t n;
2205
2206                 if (ev->events != EPOLLIN) {
2207                         log_info("Got invalid event from epoll.");
2208                         return -EIO;
2209                 }
2210
2211                 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
2212                 if (n != sizeof(sfsi)) {
2213
2214                         if (n >= 0)
2215                                 return -EIO;
2216
2217                         if (errno == EINTR || errno == EAGAIN)
2218                                 return 1;
2219
2220                         return -errno;
2221                 }
2222
2223                 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
2224
2225                 if (sfsi.ssi_signo == SIGUSR1) {
2226                         touch("/run/systemd/journal/flushed");
2227                         server_flush_to_var(s);
2228                         return 1;
2229                 }
2230
2231                 if (sfsi.ssi_signo == SIGUSR2) {
2232                         server_rotate(s);
2233                         server_vacuum(s);
2234                         return 1;
2235                 }
2236
2237                 return 0;
2238
2239         } else if (ev->data.fd == s->dev_kmsg_fd) {
2240                 int r;
2241
2242                 if (ev->events != EPOLLIN) {
2243                         log_info("Got invalid event from epoll.");
2244                         return -EIO;
2245                 }
2246
2247                 r = server_read_dev_kmsg(s);
2248                 if (r < 0)
2249                         return r;
2250
2251                 return 1;
2252
2253         } else if (ev->data.fd == s->native_fd ||
2254                    ev->data.fd == s->syslog_fd) {
2255
2256                 if (ev->events != EPOLLIN) {
2257                         log_info("Got invalid event from epoll.");
2258                         return -EIO;
2259                 }
2260
2261                 for (;;) {
2262                         struct msghdr msghdr;
2263                         struct iovec iovec;
2264                         struct ucred *ucred = NULL;
2265                         struct timeval *tv = NULL;
2266                         struct cmsghdr *cmsg;
2267                         char *label = NULL;
2268                         size_t label_len = 0;
2269                         union {
2270                                 struct cmsghdr cmsghdr;
2271
2272                                 /* We use NAME_MAX space for the
2273                                  * SELinux label here. The kernel
2274                                  * currently enforces no limit, but
2275                                  * according to suggestions from the
2276                                  * SELinux people this will change and
2277                                  * it will probably be identical to
2278                                  * NAME_MAX. For now we use that, but
2279                                  * this should be updated one day when
2280                                  * the final limit is known.*/
2281                                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
2282                                             CMSG_SPACE(sizeof(struct timeval)) +
2283                                             CMSG_SPACE(sizeof(int)) + /* fd */
2284                                             CMSG_SPACE(NAME_MAX)]; /* selinux label */
2285                         } control;
2286                         ssize_t n;
2287                         int v;
2288                         int *fds = NULL;
2289                         unsigned n_fds = 0;
2290
2291                         if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
2292                                 log_error("SIOCINQ failed: %m");
2293                                 return -errno;
2294                         }
2295
2296                         if (s->buffer_size < (size_t) v) {
2297                                 void *b;
2298                                 size_t l;
2299
2300                                 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
2301                                 b = realloc(s->buffer, l+1);
2302
2303                                 if (!b) {
2304                                         log_error("Couldn't increase buffer.");
2305                                         return -ENOMEM;
2306                                 }
2307
2308                                 s->buffer_size = l;
2309                                 s->buffer = b;
2310                         }
2311
2312                         zero(iovec);
2313                         iovec.iov_base = s->buffer;
2314                         iovec.iov_len = s->buffer_size;
2315
2316                         zero(control);
2317                         zero(msghdr);
2318                         msghdr.msg_iov = &iovec;
2319                         msghdr.msg_iovlen = 1;
2320                         msghdr.msg_control = &control;
2321                         msghdr.msg_controllen = sizeof(control);
2322
2323                         n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
2324                         if (n < 0) {
2325
2326                                 if (errno == EINTR || errno == EAGAIN)
2327                                         return 1;
2328
2329                                 log_error("recvmsg() failed: %m");
2330                                 return -errno;
2331                         }
2332
2333                         for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
2334
2335                                 if (cmsg->cmsg_level == SOL_SOCKET &&
2336                                     cmsg->cmsg_type == SCM_CREDENTIALS &&
2337                                     cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
2338                                         ucred = (struct ucred*) CMSG_DATA(cmsg);
2339                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
2340                                          cmsg->cmsg_type == SCM_SECURITY) {
2341                                         label = (char*) CMSG_DATA(cmsg);
2342                                         label_len = cmsg->cmsg_len - CMSG_LEN(0);
2343                                 } else if (cmsg->cmsg_level == SOL_SOCKET &&
2344                                          cmsg->cmsg_type == SO_TIMESTAMP &&
2345                                          cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
2346                                         tv = (struct timeval*) CMSG_DATA(cmsg);
2347                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
2348                                          cmsg->cmsg_type == SCM_RIGHTS) {
2349                                         fds = (int*) CMSG_DATA(cmsg);
2350                                         n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
2351                                 }
2352                         }
2353
2354                         if (ev->data.fd == s->syslog_fd) {
2355                                 char *e;
2356
2357                                 if (n > 0 && n_fds == 0) {
2358                                         e = memchr(s->buffer, '\n', n);
2359                                         if (e)
2360                                                 *e = 0;
2361                                         else
2362                                                 s->buffer[n] = 0;
2363
2364                                         process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
2365                                 } else if (n_fds > 0)
2366                                         log_warning("Got file descriptors via syslog socket. Ignoring.");
2367
2368                         } else {
2369                                 if (n > 0 && n_fds == 0)
2370                                         process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
2371                                 else if (n == 0 && n_fds == 1)
2372                                         process_native_file(s, fds[0], ucred, tv, label, label_len);
2373                                 else if (n_fds > 0)
2374                                         log_warning("Got too many file descriptors via native socket. Ignoring.");
2375                         }
2376
2377                         close_many(fds, n_fds);
2378                 }
2379
2380                 return 1;
2381
2382         } else if (ev->data.fd == s->stdout_fd) {
2383
2384                 if (ev->events != EPOLLIN) {
2385                         log_info("Got invalid event from epoll.");
2386                         return -EIO;
2387                 }
2388
2389                 stdout_stream_new(s);
2390                 return 1;
2391
2392         } else {
2393                 StdoutStream *stream;
2394
2395                 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
2396                         log_info("Got invalid event from epoll.");
2397                         return -EIO;
2398                 }
2399
2400                 /* If it is none of the well-known fds, it must be an
2401                  * stdout stream fd. Note that this is a bit ugly here
2402                  * (since we rely that none of the well-known fds
2403                  * could be interpreted as pointer), but nonetheless
2404                  * safe, since the well-known fds would never get an
2405                  * fd > 4096, i.e. beyond the first memory page */
2406
2407                 stream = ev->data.ptr;
2408
2409                 if (stdout_stream_process(stream) <= 0)
2410                         stdout_stream_free(stream);
2411
2412                 return 1;
2413         }
2414
2415         log_error("Unknown event.");
2416         return 0;
2417 }
2418
2419 static int open_syslog_socket(Server *s) {
2420         union sockaddr_union sa;
2421         int one, r;
2422         struct epoll_event ev;
2423
2424         assert(s);
2425
2426         if (s->syslog_fd < 0) {
2427
2428                 s->syslog_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2429                 if (s->syslog_fd < 0) {
2430                         log_error("socket() failed: %m");
2431                         return -errno;
2432                 }
2433
2434                 zero(sa);
2435                 sa.un.sun_family = AF_UNIX;
2436                 strncpy(sa.un.sun_path, "/dev/log", sizeof(sa.un.sun_path));
2437
2438                 unlink(sa.un.sun_path);
2439
2440                 r = bind(s->syslog_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2441                 if (r < 0) {
2442                         log_error("bind() failed: %m");
2443                         return -errno;
2444                 }
2445
2446                 chmod(sa.un.sun_path, 0666);
2447         } else
2448                 fd_nonblock(s->syslog_fd, 1);
2449
2450         one = 1;
2451         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
2452         if (r < 0) {
2453                 log_error("SO_PASSCRED failed: %m");
2454                 return -errno;
2455         }
2456
2457 #ifdef HAVE_SELINUX
2458         one = 1;
2459         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
2460         if (r < 0)
2461                 log_warning("SO_PASSSEC failed: %m");
2462 #endif
2463
2464         one = 1;
2465         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
2466         if (r < 0) {
2467                 log_error("SO_TIMESTAMP failed: %m");
2468                 return -errno;
2469         }
2470
2471         zero(ev);
2472         ev.events = EPOLLIN;
2473         ev.data.fd = s->syslog_fd;
2474         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->syslog_fd, &ev) < 0) {
2475                 log_error("Failed to add syslog server fd to epoll object: %m");
2476                 return -errno;
2477         }
2478
2479         return 0;
2480 }
2481
2482 static int open_native_socket(Server*s) {
2483         union sockaddr_union sa;
2484         int one, r;
2485         struct epoll_event ev;
2486
2487         assert(s);
2488
2489         if (s->native_fd < 0) {
2490
2491                 s->native_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2492                 if (s->native_fd < 0) {
2493                         log_error("socket() failed: %m");
2494                         return -errno;
2495                 }
2496
2497                 zero(sa);
2498                 sa.un.sun_family = AF_UNIX;
2499                 strncpy(sa.un.sun_path, "/run/systemd/journal/socket", sizeof(sa.un.sun_path));
2500
2501                 unlink(sa.un.sun_path);
2502
2503                 r = bind(s->native_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2504                 if (r < 0) {
2505                         log_error("bind() failed: %m");
2506                         return -errno;
2507                 }
2508
2509                 chmod(sa.un.sun_path, 0666);
2510         } else
2511                 fd_nonblock(s->native_fd, 1);
2512
2513         one = 1;
2514         r = setsockopt(s->native_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
2515         if (r < 0) {
2516                 log_error("SO_PASSCRED failed: %m");
2517                 return -errno;
2518         }
2519
2520 #ifdef HAVE_SELINUX
2521         one = 1;
2522         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
2523         if (r < 0)
2524                 log_warning("SO_PASSSEC failed: %m");
2525 #endif
2526
2527         one = 1;
2528         r = setsockopt(s->native_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
2529         if (r < 0) {
2530                 log_error("SO_TIMESTAMP failed: %m");
2531                 return -errno;
2532         }
2533
2534         zero(ev);
2535         ev.events = EPOLLIN;
2536         ev.data.fd = s->native_fd;
2537         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->native_fd, &ev) < 0) {
2538                 log_error("Failed to add native server fd to epoll object: %m");
2539                 return -errno;
2540         }
2541
2542         return 0;
2543 }
2544
2545 static int open_stdout_socket(Server *s) {
2546         union sockaddr_union sa;
2547         int r;
2548         struct epoll_event ev;
2549
2550         assert(s);
2551
2552         if (s->stdout_fd < 0) {
2553
2554                 s->stdout_fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2555                 if (s->stdout_fd < 0) {
2556                         log_error("socket() failed: %m");
2557                         return -errno;
2558                 }
2559
2560                 zero(sa);
2561                 sa.un.sun_family = AF_UNIX;
2562                 strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
2563
2564                 unlink(sa.un.sun_path);
2565
2566                 r = bind(s->stdout_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2567                 if (r < 0) {
2568                         log_error("bind() failed: %m");
2569                         return -errno;
2570                 }
2571
2572                 chmod(sa.un.sun_path, 0666);
2573
2574                 if (listen(s->stdout_fd, SOMAXCONN) < 0) {
2575                         log_error("liste() failed: %m");
2576                         return -errno;
2577                 }
2578         } else
2579                 fd_nonblock(s->stdout_fd, 1);
2580
2581         zero(ev);
2582         ev.events = EPOLLIN;
2583         ev.data.fd = s->stdout_fd;
2584         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->stdout_fd, &ev) < 0) {
2585                 log_error("Failed to add stdout server fd to epoll object: %m");
2586                 return -errno;
2587         }
2588
2589         return 0;
2590 }
2591
2592 static int open_dev_kmsg(Server *s) {
2593         struct epoll_event ev;
2594
2595         assert(s);
2596
2597         s->dev_kmsg_fd = open("/dev/kmsg", O_RDWR|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
2598         if (s->dev_kmsg_fd < 0) {
2599                 log_warning("Failed to open /dev/kmsg, ignoring: %m");
2600                 return 0;
2601         }
2602
2603         zero(ev);
2604         ev.events = EPOLLIN;
2605         ev.data.fd = s->dev_kmsg_fd;
2606         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->dev_kmsg_fd, &ev) < 0) {
2607
2608                 /* This will fail with EPERM on older kernels where
2609                  * /dev/kmsg is not readable. */
2610                 if (errno == EPERM)
2611                         return 0;
2612
2613                 log_error("Failed to add /dev/kmsg fd to epoll object: %m");
2614                 return -errno;
2615         }
2616
2617         s->dev_kmsg_readable = true;
2618
2619         return 0;
2620 }
2621
2622 static int open_kernel_seqnum(Server *s) {
2623         int fd;
2624         uint64_t *p;
2625
2626         assert(s);
2627
2628         /* We store the seqnum we last read in an mmaped file. That
2629          * way we can just use it like a variable, but it is
2630          * persistant and automatically flushed at reboot. */
2631
2632         fd = open("/run/systemd/journal/kernel-seqnum", O_RDWR|O_CREAT|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW, 0644);
2633         if (fd < 0) {
2634                 log_error("Failed to open /run/systemd/journal/kernel-seqnum, ignoring: %m");
2635                 return 0;
2636         }
2637
2638         if (posix_fallocate(fd, 0, sizeof(uint64_t)) < 0) {
2639                 log_error("Failed to allocate sequential number file, ignoring: %m");
2640                 close_nointr_nofail(fd);
2641                 return 0;
2642         }
2643
2644         p = mmap(NULL, sizeof(uint64_t), PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2645         if (p == MAP_FAILED) {
2646                 log_error("Failed to map sequential number file, ignoring: %m");
2647                 close_nointr_nofail(fd);
2648                 return 0;
2649         }
2650
2651         close_nointr_nofail(fd);
2652         s->kernel_seqnum = p;
2653
2654         return 0;
2655 }
2656
2657 static int open_signalfd(Server *s) {
2658         sigset_t mask;
2659         struct epoll_event ev;
2660
2661         assert(s);
2662
2663         assert_se(sigemptyset(&mask) == 0);
2664         sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
2665         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
2666
2667         s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
2668         if (s->signal_fd < 0) {
2669                 log_error("signalfd(): %m");
2670                 return -errno;
2671         }
2672
2673         zero(ev);
2674         ev.events = EPOLLIN;
2675         ev.data.fd = s->signal_fd;
2676
2677         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
2678                 log_error("epoll_ctl(): %m");
2679                 return -errno;
2680         }
2681
2682         return 0;
2683 }
2684
2685 static int server_parse_proc_cmdline(Server *s) {
2686         char *line, *w, *state;
2687         int r;
2688         size_t l;
2689
2690         if (detect_container(NULL) > 0)
2691                 return 0;
2692
2693         r = read_one_line_file("/proc/cmdline", &line);
2694         if (r < 0) {
2695                 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
2696                 return 0;
2697         }
2698
2699         FOREACH_WORD_QUOTED(w, l, line, state) {
2700                 char *word;
2701
2702                 word = strndup(w, l);
2703                 if (!word) {
2704                         r = -ENOMEM;
2705                         goto finish;
2706                 }
2707
2708                 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
2709                         r = parse_boolean(word + 35);
2710                         if (r < 0)
2711                                 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
2712                         else
2713                                 s->forward_to_syslog = r;
2714                 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
2715                         r = parse_boolean(word + 33);
2716                         if (r < 0)
2717                                 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
2718                         else
2719                                 s->forward_to_kmsg = r;
2720                 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
2721                         r = parse_boolean(word + 36);
2722                         if (r < 0)
2723                                 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
2724                         else
2725                                 s->forward_to_console = r;
2726                 } else if (startswith(word, "systemd.journald"))
2727                         log_warning("Invalid systemd.journald parameter. Ignoring.");
2728
2729                 free(word);
2730         }
2731
2732         r = 0;
2733
2734 finish:
2735         free(line);
2736         return r;
2737 }
2738
2739 static int server_parse_config_file(Server *s) {
2740         FILE *f;
2741         const char *fn;
2742         int r;
2743
2744         assert(s);
2745
2746         fn = "/etc/systemd/journald.conf";
2747         f = fopen(fn, "re");
2748         if (!f) {
2749                 if (errno == ENOENT)
2750                         return 0;
2751
2752                 log_warning("Failed to open configuration file %s: %m", fn);
2753                 return -errno;
2754         }
2755
2756         r = config_parse(fn, f, "Journal\0", config_item_perf_lookup, (void*) journald_gperf_lookup, false, s);
2757         if (r < 0)
2758                 log_warning("Failed to parse configuration file: %s", strerror(-r));
2759
2760         fclose(f);
2761
2762         return r;
2763 }
2764
2765 static int server_init(Server *s) {
2766         int n, r, fd;
2767
2768         assert(s);
2769
2770         zero(*s);
2771         s->syslog_fd = s->native_fd = s->stdout_fd = s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
2772         s->compress = true;
2773
2774         s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
2775         s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
2776
2777         s->forward_to_syslog = true;
2778
2779         s->max_level_store = LOG_DEBUG;
2780         s->max_level_syslog = LOG_DEBUG;
2781         s->max_level_kmsg = LOG_NOTICE;
2782         s->max_level_console = LOG_INFO;
2783
2784         memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
2785         memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
2786
2787         server_parse_config_file(s);
2788         server_parse_proc_cmdline(s);
2789
2790         mkdir_p("/run/systemd/journal", 0755);
2791
2792         s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
2793         if (!s->user_journals)
2794                 return log_oom();
2795
2796         s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
2797         if (s->epoll_fd < 0) {
2798                 log_error("Failed to create epoll object: %m");
2799                 return -errno;
2800         }
2801
2802         n = sd_listen_fds(true);
2803         if (n < 0) {
2804                 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
2805                 return n;
2806         }
2807
2808         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
2809
2810                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
2811
2812                         if (s->native_fd >= 0) {
2813                                 log_error("Too many native sockets passed.");
2814                                 return -EINVAL;
2815                         }
2816
2817                         s->native_fd = fd;
2818
2819                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
2820
2821                         if (s->stdout_fd >= 0) {
2822                                 log_error("Too many stdout sockets passed.");
2823                                 return -EINVAL;
2824                         }
2825
2826                         s->stdout_fd = fd;
2827
2828                 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
2829
2830                         if (s->syslog_fd >= 0) {
2831                                 log_error("Too many /dev/log sockets passed.");
2832                                 return -EINVAL;
2833                         }
2834
2835                         s->syslog_fd = fd;
2836
2837                 } else {
2838                         log_error("Unknown socket passed.");
2839                         return -EINVAL;
2840                 }
2841         }
2842
2843         r = open_syslog_socket(s);
2844         if (r < 0)
2845                 return r;
2846
2847         r = open_native_socket(s);
2848         if (r < 0)
2849                 return r;
2850
2851         r = open_stdout_socket(s);
2852         if (r < 0)
2853                 return r;
2854
2855         r = open_dev_kmsg(s);
2856         if (r < 0)
2857                 return r;
2858
2859         r = open_kernel_seqnum(s);
2860         if (r < 0)
2861                 return r;
2862
2863         r = open_signalfd(s);
2864         if (r < 0)
2865                 return r;
2866
2867         s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
2868         if (!s->rate_limit)
2869                 return -ENOMEM;
2870
2871         r = system_journal_open(s);
2872         if (r < 0)
2873                 return r;
2874
2875         return 0;
2876 }
2877
2878 static void server_done(Server *s) {
2879         JournalFile *f;
2880         assert(s);
2881
2882         while (s->stdout_streams)
2883                 stdout_stream_free(s->stdout_streams);
2884
2885         if (s->system_journal)
2886                 journal_file_close(s->system_journal);
2887
2888         if (s->runtime_journal)
2889                 journal_file_close(s->runtime_journal);
2890
2891         while ((f = hashmap_steal_first(s->user_journals)))
2892                 journal_file_close(f);
2893
2894         hashmap_free(s->user_journals);
2895
2896         if (s->epoll_fd >= 0)
2897                 close_nointr_nofail(s->epoll_fd);
2898
2899         if (s->signal_fd >= 0)
2900                 close_nointr_nofail(s->signal_fd);
2901
2902         if (s->syslog_fd >= 0)
2903                 close_nointr_nofail(s->syslog_fd);
2904
2905         if (s->native_fd >= 0)
2906                 close_nointr_nofail(s->native_fd);
2907
2908         if (s->stdout_fd >= 0)
2909                 close_nointr_nofail(s->stdout_fd);
2910
2911         if (s->dev_kmsg_fd >= 0)
2912                 close_nointr_nofail(s->dev_kmsg_fd);
2913
2914         if (s->rate_limit)
2915                 journal_rate_limit_free(s->rate_limit);
2916
2917         if (s->kernel_seqnum)
2918                 munmap(s->kernel_seqnum, sizeof(uint64_t));
2919
2920         free(s->buffer);
2921         free(s->tty_path);
2922 }
2923
2924 int main(int argc, char *argv[]) {
2925         Server server;
2926         int r;
2927
2928         /* if (getppid() != 1) { */
2929         /*         log_error("This program should be invoked by init only."); */
2930         /*         return EXIT_FAILURE; */
2931         /* } */
2932
2933         if (argc > 1) {
2934                 log_error("This program does not take arguments.");
2935                 return EXIT_FAILURE;
2936         }
2937
2938         log_set_target(LOG_TARGET_SAFE);
2939         log_set_facility(LOG_SYSLOG);
2940         log_parse_environment();
2941         log_open();
2942
2943         umask(0022);
2944
2945         r = server_init(&server);
2946         if (r < 0)
2947                 goto finish;
2948
2949         server_vacuum(&server);
2950         server_flush_to_var(&server);
2951         server_flush_dev_kmsg(&server);
2952
2953         log_debug("systemd-journald running as pid %lu", (unsigned long) getpid());
2954         driver_message(&server, SD_MESSAGE_JOURNAL_START, "Journal started");
2955
2956         sd_notify(false,
2957                   "READY=1\n"
2958                   "STATUS=Processing requests...");
2959
2960         for (;;) {
2961                 struct epoll_event event;
2962
2963                 r = epoll_wait(server.epoll_fd, &event, 1, -1);
2964                 if (r < 0) {
2965
2966                         if (errno == EINTR)
2967                                 continue;
2968
2969                         log_error("epoll_wait() failed: %m");
2970                         r = -errno;
2971                         goto finish;
2972                 } else if (r == 0)
2973                         break;
2974
2975                 r = process_event(&server, &event);
2976                 if (r < 0)
2977                         goto finish;
2978                 else if (r == 0)
2979                         break;
2980         }
2981
2982         log_debug("systemd-journald stopped as pid %lu", (unsigned long) getpid());
2983         driver_message(&server, SD_MESSAGE_JOURNAL_STOP, "Journal stopped");
2984
2985 finish:
2986         sd_notify(false,
2987                   "STATUS=Shutting down...");
2988
2989         server_done(&server);
2990
2991         return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
2992 }