chiark / gitweb /
journald: never read the same kernel msg twice, and generate message when we lose one
[elogind.git] / src / journal / journald.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/epoll.h>
23 #include <sys/socket.h>
24 #include <errno.h>
25 #include <sys/signalfd.h>
26 #include <unistd.h>
27 #include <fcntl.h>
28 #include <stddef.h>
29 #include <sys/ioctl.h>
30 #include <linux/sockios.h>
31 #include <sys/statvfs.h>
32 #include <sys/mman.h>
33
34 #include <systemd/sd-journal.h>
35 #include <systemd/sd-messages.h>
36 #include <systemd/sd-daemon.h>
37
38 #ifdef HAVE_LOGIND
39 #include <systemd/sd-login.h>
40 #endif
41
42 #include "mkdir.h"
43 #include "hashmap.h"
44 #include "journal-file.h"
45 #include "socket-util.h"
46 #include "cgroup-util.h"
47 #include "list.h"
48 #include "journal-rate-limit.h"
49 #include "journal-internal.h"
50 #include "conf-parser.h"
51 #include "journald.h"
52 #include "virt.h"
53 #include "missing.h"
54
55 #ifdef HAVE_ACL
56 #include <sys/acl.h>
57 #include <acl/libacl.h>
58 #include "acl-util.h"
59 #endif
60
61 #ifdef HAVE_SELINUX
62 #include <selinux/selinux.h>
63 #endif
64
65 #define USER_JOURNALS_MAX 1024
66 #define STDOUT_STREAMS_MAX 4096
67
68 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
69 #define DEFAULT_RATE_LIMIT_BURST 200
70
71 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
72
73 #define N_IOVEC_META_FIELDS 17
74 #define N_IOVEC_KERNEL_FIELDS 64
75
76 #define ENTRY_SIZE_MAX (1024*1024*32)
77
78 typedef enum StdoutStreamState {
79         STDOUT_STREAM_IDENTIFIER,
80         STDOUT_STREAM_UNIT_ID,
81         STDOUT_STREAM_PRIORITY,
82         STDOUT_STREAM_LEVEL_PREFIX,
83         STDOUT_STREAM_FORWARD_TO_SYSLOG,
84         STDOUT_STREAM_FORWARD_TO_KMSG,
85         STDOUT_STREAM_FORWARD_TO_CONSOLE,
86         STDOUT_STREAM_RUNNING
87 } StdoutStreamState;
88
89 struct StdoutStream {
90         Server *server;
91         StdoutStreamState state;
92
93         int fd;
94
95         struct ucred ucred;
96 #ifdef HAVE_SELINUX
97         security_context_t security_context;
98 #endif
99
100         char *identifier;
101         char *unit_id;
102         int priority;
103         bool level_prefix:1;
104         bool forward_to_syslog:1;
105         bool forward_to_kmsg:1;
106         bool forward_to_console:1;
107
108         char buffer[LINE_MAX+1];
109         size_t length;
110
111         LIST_FIELDS(StdoutStream, stdout_stream);
112 };
113
114 static const char* const storage_table[] = {
115         [STORAGE_AUTO] = "auto",
116         [STORAGE_VOLATILE] = "volatile",
117         [STORAGE_PERSISTENT] = "persistent",
118         [STORAGE_NONE] = "none"
119 };
120
121 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
122 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
123
124 static uint64_t available_space(Server *s) {
125         char ids[33], *p;
126         const char *f;
127         sd_id128_t machine;
128         struct statvfs ss;
129         uint64_t sum = 0, avail = 0, ss_avail = 0;
130         int r;
131         DIR *d;
132         usec_t ts;
133         JournalMetrics *m;
134
135         ts = now(CLOCK_MONOTONIC);
136
137         if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
138                 return s->cached_available_space;
139
140         r = sd_id128_get_machine(&machine);
141         if (r < 0)
142                 return 0;
143
144         if (s->system_journal) {
145                 f = "/var/log/journal/";
146                 m = &s->system_metrics;
147         } else {
148                 f = "/run/log/journal/";
149                 m = &s->runtime_metrics;
150         }
151
152         assert(m);
153
154         p = strappend(f, sd_id128_to_string(machine, ids));
155         if (!p)
156                 return 0;
157
158         d = opendir(p);
159         free(p);
160
161         if (!d)
162                 return 0;
163
164         if (fstatvfs(dirfd(d), &ss) < 0)
165                 goto finish;
166
167         for (;;) {
168                 struct stat st;
169                 struct dirent buf, *de;
170
171                 r = readdir_r(d, &buf, &de);
172                 if (r != 0)
173                         break;
174
175                 if (!de)
176                         break;
177
178                 if (!endswith(de->d_name, ".journal") &&
179                     !endswith(de->d_name, ".journal~"))
180                         continue;
181
182                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
183                         continue;
184
185                 if (!S_ISREG(st.st_mode))
186                         continue;
187
188                 sum += (uint64_t) st.st_blocks * 512UL;
189         }
190
191         avail = sum >= m->max_use ? 0 : m->max_use - sum;
192
193         ss_avail = ss.f_bsize * ss.f_bavail;
194
195         ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
196
197         if (ss_avail < avail)
198                 avail = ss_avail;
199
200         s->cached_available_space = avail;
201         s->cached_available_space_timestamp = ts;
202
203 finish:
204         closedir(d);
205
206         return avail;
207 }
208
209 static void server_read_file_gid(Server *s) {
210         const char *adm = "adm";
211         int r;
212
213         assert(s);
214
215         if (s->file_gid_valid)
216                 return;
217
218         r = get_group_creds(&adm, &s->file_gid);
219         if (r < 0)
220                 log_warning("Failed to resolve 'adm' group: %s", strerror(-r));
221
222         /* if we couldn't read the gid, then it will be 0, but that's
223          * fine and we shouldn't try to resolve the group again, so
224          * let's just pretend it worked right-away. */
225         s->file_gid_valid = true;
226 }
227
228 static void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
229         int r;
230 #ifdef HAVE_ACL
231         acl_t acl;
232         acl_entry_t entry;
233         acl_permset_t permset;
234 #endif
235
236         assert(f);
237
238         server_read_file_gid(s);
239
240         r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
241         if (r < 0)
242                 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
243
244 #ifdef HAVE_ACL
245         if (uid <= 0)
246                 return;
247
248         acl = acl_get_fd(f->fd);
249         if (!acl) {
250                 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
251                 return;
252         }
253
254         r = acl_find_uid(acl, uid, &entry);
255         if (r <= 0) {
256
257                 if (acl_create_entry(&acl, &entry) < 0 ||
258                     acl_set_tag_type(entry, ACL_USER) < 0 ||
259                     acl_set_qualifier(entry, &uid) < 0) {
260                         log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
261                         goto finish;
262                 }
263         }
264
265         if (acl_get_permset(entry, &permset) < 0 ||
266             acl_add_perm(permset, ACL_READ) < 0 ||
267             acl_calc_mask(&acl) < 0) {
268                 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
269                 goto finish;
270         }
271
272         if (acl_set_fd(f->fd, acl) < 0)
273                 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
274
275 finish:
276         acl_free(acl);
277 #endif
278 }
279
280 static JournalFile* find_journal(Server *s, uid_t uid) {
281         char *p;
282         int r;
283         JournalFile *f;
284         char ids[33];
285         sd_id128_t machine;
286
287         assert(s);
288
289         /* We split up user logs only on /var, not on /run. If the
290          * runtime file is open, we write to it exclusively, in order
291          * to guarantee proper order as soon as we flush /run to
292          * /var and close the runtime file. */
293
294         if (s->runtime_journal)
295                 return s->runtime_journal;
296
297         if (uid <= 0)
298                 return s->system_journal;
299
300         r = sd_id128_get_machine(&machine);
301         if (r < 0)
302                 return s->system_journal;
303
304         f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
305         if (f)
306                 return f;
307
308         if (asprintf(&p, "/var/log/journal/%s/user-%lu.journal", sd_id128_to_string(machine, ids), (unsigned long) uid) < 0)
309                 return s->system_journal;
310
311         while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
312                 /* Too many open? Then let's close one */
313                 f = hashmap_steal_first(s->user_journals);
314                 assert(f);
315                 journal_file_close(f);
316         }
317
318         r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, &s->system_metrics, s->system_journal, &f);
319         free(p);
320
321         if (r < 0)
322                 return s->system_journal;
323
324         server_fix_perms(s, f, uid);
325
326         r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
327         if (r < 0) {
328                 journal_file_close(f);
329                 return s->system_journal;
330         }
331
332         return f;
333 }
334
335 static void server_rotate(Server *s) {
336         JournalFile *f;
337         void *k;
338         Iterator i;
339         int r;
340
341         log_info("Rotating...");
342
343         if (s->runtime_journal) {
344                 r = journal_file_rotate(&s->runtime_journal);
345                 if (r < 0)
346                         if (s->runtime_journal)
347                                 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
348                         else
349                                 log_error("Failed to create new runtime journal: %s", strerror(-r));
350                 else
351                         server_fix_perms(s, s->runtime_journal, 0);
352         }
353
354         if (s->system_journal) {
355                 r = journal_file_rotate(&s->system_journal);
356                 if (r < 0)
357                         if (s->system_journal)
358                                 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
359                         else
360                                 log_error("Failed to create new system journal: %s", strerror(-r));
361
362                 else
363                         server_fix_perms(s, s->system_journal, 0);
364         }
365
366         HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
367                 r = journal_file_rotate(&f);
368                 if (r < 0)
369                         if (f->path)
370                                 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
371                         else
372                                 log_error("Failed to create user journal: %s", strerror(-r));
373                 else {
374                         hashmap_replace(s->user_journals, k, f);
375                         server_fix_perms(s, s->system_journal, PTR_TO_UINT32(k));
376                 }
377         }
378 }
379
380 static void server_vacuum(Server *s) {
381         char *p;
382         char ids[33];
383         sd_id128_t machine;
384         int r;
385
386         log_info("Vacuuming...");
387
388         r = sd_id128_get_machine(&machine);
389         if (r < 0) {
390                 log_error("Failed to get machine ID: %s", strerror(-r));
391                 return;
392         }
393
394         sd_id128_to_string(machine, ids);
395
396         if (s->system_journal) {
397                 if (asprintf(&p, "/var/log/journal/%s", ids) < 0) {
398                         log_oom();
399                         return;
400                 }
401
402                 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free);
403                 if (r < 0 && r != -ENOENT)
404                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
405                 free(p);
406         }
407
408         if (s->runtime_journal) {
409                 if (asprintf(&p, "/run/log/journal/%s", ids) < 0) {
410                         log_oom();
411                         return;
412                 }
413
414                 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free);
415                 if (r < 0 && r != -ENOENT)
416                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
417                 free(p);
418         }
419
420         s->cached_available_space_timestamp = 0;
421 }
422
423 static char *shortened_cgroup_path(pid_t pid) {
424         int r;
425         char *process_path, *init_path, *path;
426
427         assert(pid > 0);
428
429         r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, pid, &process_path);
430         if (r < 0)
431                 return NULL;
432
433         r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &init_path);
434         if (r < 0) {
435                 free(process_path);
436                 return NULL;
437         }
438
439         if (endswith(init_path, "/system"))
440                 init_path[strlen(init_path) - 7] = 0;
441         else if (streq(init_path, "/"))
442                 init_path[0] = 0;
443
444         if (startswith(process_path, init_path)) {
445                 char *p;
446
447                 p = strdup(process_path + strlen(init_path));
448                 if (!p) {
449                         free(process_path);
450                         free(init_path);
451                         return NULL;
452                 }
453                 path = p;
454         } else {
455                 path = process_path;
456                 process_path = NULL;
457         }
458
459         free(process_path);
460         free(init_path);
461
462         return path;
463 }
464
465 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n) {
466         JournalFile *f;
467         bool vacuumed = false;
468         int r;
469
470         assert(s);
471         assert(iovec);
472         assert(n > 0);
473
474         f = find_journal(s, uid);
475         if (!f)
476                 return;
477
478         if (journal_file_rotate_suggested(f)) {
479                 log_info("Journal header limits reached or header out-of-date, rotating.");
480                 server_rotate(s);
481                 server_vacuum(s);
482                 vacuumed = true;
483
484                 f = find_journal(s, uid);
485                 if (!f)
486                         return;
487         }
488
489         for (;;) {
490                 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
491                 if (r >= 0)
492                         return;
493
494                 if (vacuumed ||
495                     (r != -E2BIG && /* hit limit */
496                      r != -EFBIG && /* hit fs limit */
497                      r != -EDQUOT && /* quota hit */
498                      r != -ENOSPC && /* disk full */
499                      r != -EBADMSG && /* corrupted */
500                      r != -ENODATA && /* truncated */
501                      r != -EHOSTDOWN && /* other machine */
502                      r != -EPROTONOSUPPORT && /* unsupported feature */
503                      r != -EBUSY && /* unclean shutdown */
504                      r != -ESHUTDOWN /* already archived */)) {
505                         log_error("Failed to write entry, ignoring: %s", strerror(-r));
506                         return;
507                 }
508
509                 if (r == -E2BIG || r == -EFBIG || r == EDQUOT || r == ENOSPC)
510                         log_info("Allocation limit reached, rotating.");
511                 else if (r == -EHOSTDOWN)
512                         log_info("Journal file from other machine, rotating.");
513                 else if (r == -EBUSY)
514                         log_info("Unlcean shutdown, rotating.");
515                 else
516                         log_warning("Journal file corrupted, rotating.");
517
518                 server_rotate(s);
519                 server_vacuum(s);
520                 vacuumed = true;
521
522                 f = find_journal(s, uid);
523                 if (!f)
524                         return;
525
526                 log_info("Retrying write.");
527         }
528 }
529
530 static void dispatch_message_real(
531                 Server *s,
532                 struct iovec *iovec, unsigned n, unsigned m,
533                 struct ucred *ucred,
534                 struct timeval *tv,
535                 const char *label, size_t label_len,
536                 const char *unit_id) {
537
538         char *pid = NULL, *uid = NULL, *gid = NULL,
539                 *source_time = NULL, *boot_id = NULL, *machine_id = NULL,
540                 *comm = NULL, *cmdline = NULL, *hostname = NULL,
541                 *audit_session = NULL, *audit_loginuid = NULL,
542                 *exe = NULL, *cgroup = NULL, *session = NULL,
543                 *owner_uid = NULL, *unit = NULL, *selinux_context = NULL;
544
545         char idbuf[33];
546         sd_id128_t id;
547         int r;
548         char *t;
549         uid_t loginuid = 0, realuid = 0;
550
551         assert(s);
552         assert(iovec);
553         assert(n > 0);
554         assert(n + N_IOVEC_META_FIELDS <= m);
555
556         if (ucred) {
557                 uint32_t audit;
558 #ifdef HAVE_LOGIND
559                 uid_t owner;
560 #endif
561
562                 realuid = ucred->uid;
563
564                 if (asprintf(&pid, "_PID=%lu", (unsigned long) ucred->pid) >= 0)
565                         IOVEC_SET_STRING(iovec[n++], pid);
566
567                 if (asprintf(&uid, "_UID=%lu", (unsigned long) ucred->uid) >= 0)
568                         IOVEC_SET_STRING(iovec[n++], uid);
569
570                 if (asprintf(&gid, "_GID=%lu", (unsigned long) ucred->gid) >= 0)
571                         IOVEC_SET_STRING(iovec[n++], gid);
572
573                 r = get_process_comm(ucred->pid, &t);
574                 if (r >= 0) {
575                         comm = strappend("_COMM=", t);
576                         free(t);
577
578                         if (comm)
579                                 IOVEC_SET_STRING(iovec[n++], comm);
580                 }
581
582                 r = get_process_exe(ucred->pid, &t);
583                 if (r >= 0) {
584                         exe = strappend("_EXE=", t);
585                         free(t);
586
587                         if (exe)
588                                 IOVEC_SET_STRING(iovec[n++], exe);
589                 }
590
591                 r = get_process_cmdline(ucred->pid, LINE_MAX, false, &t);
592                 if (r >= 0) {
593                         cmdline = strappend("_CMDLINE=", t);
594                         free(t);
595
596                         if (cmdline)
597                                 IOVEC_SET_STRING(iovec[n++], cmdline);
598                 }
599
600                 r = audit_session_from_pid(ucred->pid, &audit);
601                 if (r >= 0)
602                         if (asprintf(&audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit) >= 0)
603                                 IOVEC_SET_STRING(iovec[n++], audit_session);
604
605                 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
606                 if (r >= 0)
607                         if (asprintf(&audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0)
608                                 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
609
610                 t = shortened_cgroup_path(ucred->pid);
611                 if (t) {
612                         cgroup = strappend("_SYSTEMD_CGROUP=", t);
613                         free(t);
614
615                         if (cgroup)
616                                 IOVEC_SET_STRING(iovec[n++], cgroup);
617                 }
618
619 #ifdef HAVE_LOGIND
620                 if (sd_pid_get_session(ucred->pid, &t) >= 0) {
621                         session = strappend("_SYSTEMD_SESSION=", t);
622                         free(t);
623
624                         if (session)
625                                 IOVEC_SET_STRING(iovec[n++], session);
626                 }
627
628                 if (sd_pid_get_owner_uid(ucred->uid, &owner) >= 0)
629                         if (asprintf(&owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner) >= 0)
630                                 IOVEC_SET_STRING(iovec[n++], owner_uid);
631 #endif
632
633                 if (cg_pid_get_unit(ucred->pid, &t) >= 0) {
634                         unit = strappend("_SYSTEMD_UNIT=", t);
635                         free(t);
636                 } else if (unit_id)
637                         unit = strappend("_SYSTEMD_UNIT=", unit_id);
638
639                 if (unit)
640                         IOVEC_SET_STRING(iovec[n++], unit);
641
642 #ifdef HAVE_SELINUX
643                 if (label) {
644                         selinux_context = malloc(sizeof("_SELINUX_CONTEXT=") + label_len);
645                         if (selinux_context) {
646                                 memcpy(selinux_context, "_SELINUX_CONTEXT=", sizeof("_SELINUX_CONTEXT=")-1);
647                                 memcpy(selinux_context+sizeof("_SELINUX_CONTEXT=")-1, label, label_len);
648                                 selinux_context[sizeof("_SELINUX_CONTEXT=")-1+label_len] = 0;
649                                 IOVEC_SET_STRING(iovec[n++], selinux_context);
650                         }
651                 } else {
652                         security_context_t con;
653
654                         if (getpidcon(ucred->pid, &con) >= 0) {
655                                 selinux_context = strappend("_SELINUX_CONTEXT=", con);
656                                 if (selinux_context)
657                                         IOVEC_SET_STRING(iovec[n++], selinux_context);
658
659                                 freecon(con);
660                         }
661                 }
662 #endif
663         }
664
665         if (tv) {
666                 if (asprintf(&source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu",
667                              (unsigned long long) timeval_load(tv)) >= 0)
668                         IOVEC_SET_STRING(iovec[n++], source_time);
669         }
670
671         /* Note that strictly speaking storing the boot id here is
672          * redundant since the entry includes this in-line
673          * anyway. However, we need this indexed, too. */
674         r = sd_id128_get_boot(&id);
675         if (r >= 0)
676                 if (asprintf(&boot_id, "_BOOT_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
677                         IOVEC_SET_STRING(iovec[n++], boot_id);
678
679         r = sd_id128_get_machine(&id);
680         if (r >= 0)
681                 if (asprintf(&machine_id, "_MACHINE_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
682                         IOVEC_SET_STRING(iovec[n++], machine_id);
683
684         t = gethostname_malloc();
685         if (t) {
686                 hostname = strappend("_HOSTNAME=", t);
687                 free(t);
688                 if (hostname)
689                         IOVEC_SET_STRING(iovec[n++], hostname);
690         }
691
692         assert(n <= m);
693
694         write_to_journal(s, realuid == 0 ? 0 : loginuid, iovec, n);
695
696         free(pid);
697         free(uid);
698         free(gid);
699         free(comm);
700         free(exe);
701         free(cmdline);
702         free(source_time);
703         free(boot_id);
704         free(machine_id);
705         free(hostname);
706         free(audit_session);
707         free(audit_loginuid);
708         free(cgroup);
709         free(session);
710         free(owner_uid);
711         free(unit);
712         free(selinux_context);
713 }
714
715 static void driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
716         char mid[11 + 32 + 1];
717         char buffer[16 + LINE_MAX + 1];
718         struct iovec iovec[N_IOVEC_META_FIELDS + 4];
719         int n = 0;
720         va_list ap;
721         struct ucred ucred;
722
723         assert(s);
724         assert(format);
725
726         IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
727         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
728
729         memcpy(buffer, "MESSAGE=", 8);
730         va_start(ap, format);
731         vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
732         va_end(ap);
733         char_array_0(buffer);
734         IOVEC_SET_STRING(iovec[n++], buffer);
735
736         snprintf(mid, sizeof(mid), "MESSAGE_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(message_id));
737         char_array_0(mid);
738         IOVEC_SET_STRING(iovec[n++], mid);
739
740         zero(ucred);
741         ucred.pid = getpid();
742         ucred.uid = getuid();
743         ucred.gid = getgid();
744
745         dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL);
746 }
747
748 static void dispatch_message(Server *s,
749                              struct iovec *iovec, unsigned n, unsigned m,
750                              struct ucred *ucred,
751                              struct timeval *tv,
752                              const char *label, size_t label_len,
753                              const char *unit_id,
754                              int priority) {
755         int rl;
756         char *path = NULL, *c;
757
758         assert(s);
759         assert(iovec || n == 0);
760
761         if (n == 0)
762                 return;
763
764         if (LOG_PRI(priority) > s->max_level_store)
765                 return;
766
767         if (!ucred)
768                 goto finish;
769
770         path = shortened_cgroup_path(ucred->pid);
771         if (!path)
772                 goto finish;
773
774         /* example: /user/lennart/3/foobar
775          *          /system/dbus.service/foobar
776          *
777          * So let's cut of everything past the third /, since that is
778          * wher user directories start */
779
780         c = strchr(path, '/');
781         if (c) {
782                 c = strchr(c+1, '/');
783                 if (c) {
784                         c = strchr(c+1, '/');
785                         if (c)
786                                 *c = 0;
787                 }
788         }
789
790         rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available_space(s));
791
792         if (rl == 0) {
793                 free(path);
794                 return;
795         }
796
797         /* Write a suppression message if we suppressed something */
798         if (rl > 1)
799                 driver_message(s, SD_MESSAGE_JOURNAL_DROPPED, "Suppressed %u messages from %s", rl - 1, path);
800
801         free(path);
802
803 finish:
804         dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id);
805 }
806
807 static void forward_syslog_iovec(Server *s, const struct iovec *iovec, unsigned n_iovec, struct ucred *ucred, struct timeval *tv) {
808         struct msghdr msghdr;
809         struct cmsghdr *cmsg;
810         union {
811                 struct cmsghdr cmsghdr;
812                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred))];
813         } control;
814         union sockaddr_union sa;
815
816         assert(s);
817         assert(iovec);
818         assert(n_iovec > 0);
819
820         zero(msghdr);
821         msghdr.msg_iov = (struct iovec*) iovec;
822         msghdr.msg_iovlen = n_iovec;
823
824         zero(sa);
825         sa.un.sun_family = AF_UNIX;
826         strncpy(sa.un.sun_path, "/run/systemd/journal/syslog", sizeof(sa.un.sun_path));
827         msghdr.msg_name = &sa;
828         msghdr.msg_namelen = offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path);
829
830         if (ucred) {
831                 zero(control);
832                 msghdr.msg_control = &control;
833                 msghdr.msg_controllen = sizeof(control);
834
835                 cmsg = CMSG_FIRSTHDR(&msghdr);
836                 cmsg->cmsg_level = SOL_SOCKET;
837                 cmsg->cmsg_type = SCM_CREDENTIALS;
838                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred));
839                 memcpy(CMSG_DATA(cmsg), ucred, sizeof(struct ucred));
840                 msghdr.msg_controllen = cmsg->cmsg_len;
841         }
842
843         /* Forward the syslog message we received via /dev/log to
844          * /run/systemd/syslog. Unfortunately we currently can't set
845          * the SO_TIMESTAMP auxiliary data, and hence we don't. */
846
847         if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
848                 return;
849
850         /* The socket is full? I guess the syslog implementation is
851          * too slow, and we shouldn't wait for that... */
852         if (errno == EAGAIN)
853                 return;
854
855         if (ucred && errno == ESRCH) {
856                 struct ucred u;
857
858                 /* Hmm, presumably the sender process vanished
859                  * by now, so let's fix it as good as we
860                  * can, and retry */
861
862                 u = *ucred;
863                 u.pid = getpid();
864                 memcpy(CMSG_DATA(cmsg), &u, sizeof(struct ucred));
865
866                 if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
867                         return;
868
869                 if (errno == EAGAIN)
870                         return;
871         }
872
873         if (errno != ENOENT)
874                 log_debug("Failed to forward syslog message: %m");
875 }
876
877 static void forward_syslog_raw(Server *s, int priority, const char *buffer, struct ucred *ucred, struct timeval *tv) {
878         struct iovec iovec;
879
880         assert(s);
881         assert(buffer);
882
883         if (LOG_PRI(priority) > s->max_level_syslog)
884                 return;
885
886         IOVEC_SET_STRING(iovec, buffer);
887         forward_syslog_iovec(s, &iovec, 1, ucred, tv);
888 }
889
890 static void forward_syslog(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred, struct timeval *tv) {
891         struct iovec iovec[5];
892         char header_priority[6], header_time[64], header_pid[16];
893         int n = 0;
894         time_t t;
895         struct tm *tm;
896         char *ident_buf = NULL;
897
898         assert(s);
899         assert(priority >= 0);
900         assert(priority <= 999);
901         assert(message);
902
903         if (LOG_PRI(priority) > s->max_level_syslog)
904                 return;
905
906         /* First: priority field */
907         snprintf(header_priority, sizeof(header_priority), "<%i>", priority);
908         char_array_0(header_priority);
909         IOVEC_SET_STRING(iovec[n++], header_priority);
910
911         /* Second: timestamp */
912         t = tv ? tv->tv_sec : ((time_t) (now(CLOCK_REALTIME) / USEC_PER_SEC));
913         tm = localtime(&t);
914         if (!tm)
915                 return;
916         if (strftime(header_time, sizeof(header_time), "%h %e %T ", tm) <= 0)
917                 return;
918         IOVEC_SET_STRING(iovec[n++], header_time);
919
920         /* Third: identifier and PID */
921         if (ucred) {
922                 if (!identifier) {
923                         get_process_comm(ucred->pid, &ident_buf);
924                         identifier = ident_buf;
925                 }
926
927                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
928                 char_array_0(header_pid);
929
930                 if (identifier)
931                         IOVEC_SET_STRING(iovec[n++], identifier);
932
933                 IOVEC_SET_STRING(iovec[n++], header_pid);
934         } else if (identifier) {
935                 IOVEC_SET_STRING(iovec[n++], identifier);
936                 IOVEC_SET_STRING(iovec[n++], ": ");
937         }
938
939         /* Fourth: message */
940         IOVEC_SET_STRING(iovec[n++], message);
941
942         forward_syslog_iovec(s, iovec, n, ucred, tv);
943
944         free(ident_buf);
945 }
946
947 static int fixup_priority(int priority) {
948
949         if ((priority & LOG_FACMASK) == 0)
950                 return (priority & LOG_PRIMASK) | LOG_USER;
951
952         return priority;
953 }
954
955 static void forward_kmsg(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred) {
956         struct iovec iovec[5];
957         char header_priority[6], header_pid[16];
958         int n = 0;
959         char *ident_buf = NULL;
960
961         assert(s);
962         assert(priority >= 0);
963         assert(priority <= 999);
964         assert(message);
965
966         if (_unlikely_(LOG_PRI(priority) > s->max_level_kmsg))
967                 return;
968
969         if (_unlikely_(s->dev_kmsg_fd < 0))
970                 return;
971
972         /* Never allow messages with kernel facility to be written to
973          * kmsg, regardless where the data comes from. */
974         priority = fixup_priority(priority);
975
976         /* First: priority field */
977         snprintf(header_priority, sizeof(header_priority), "<%i>", priority);
978         char_array_0(header_priority);
979         IOVEC_SET_STRING(iovec[n++], header_priority);
980
981         /* Second: identifier and PID */
982         if (ucred) {
983                 if (!identifier) {
984                         get_process_comm(ucred->pid, &ident_buf);
985                         identifier = ident_buf;
986                 }
987
988                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
989                 char_array_0(header_pid);
990
991                 if (identifier)
992                         IOVEC_SET_STRING(iovec[n++], identifier);
993
994                 IOVEC_SET_STRING(iovec[n++], header_pid);
995         } else if (identifier) {
996                 IOVEC_SET_STRING(iovec[n++], identifier);
997                 IOVEC_SET_STRING(iovec[n++], ": ");
998         }
999
1000         /* Fourth: message */
1001         IOVEC_SET_STRING(iovec[n++], message);
1002         IOVEC_SET_STRING(iovec[n++], "\n");
1003
1004         if (writev(s->dev_kmsg_fd, iovec, n) < 0)
1005                 log_debug("Failed to write to /dev/kmsg for logging: %s", strerror(errno));
1006
1007         free(ident_buf);
1008 }
1009
1010 static void forward_console(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred) {
1011         struct iovec iovec[4];
1012         char header_pid[16];
1013         int n = 0, fd;
1014         char *ident_buf = NULL;
1015         const char *tty;
1016
1017         assert(s);
1018         assert(message);
1019
1020         if (LOG_PRI(priority) > s->max_level_console)
1021                 return;
1022
1023         /* First: identifier and PID */
1024         if (ucred) {
1025                 if (!identifier) {
1026                         get_process_comm(ucred->pid, &ident_buf);
1027                         identifier = ident_buf;
1028                 }
1029
1030                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
1031                 char_array_0(header_pid);
1032
1033                 if (identifier)
1034                         IOVEC_SET_STRING(iovec[n++], identifier);
1035
1036                 IOVEC_SET_STRING(iovec[n++], header_pid);
1037         } else if (identifier) {
1038                 IOVEC_SET_STRING(iovec[n++], identifier);
1039                 IOVEC_SET_STRING(iovec[n++], ": ");
1040         }
1041
1042         /* Third: message */
1043         IOVEC_SET_STRING(iovec[n++], message);
1044         IOVEC_SET_STRING(iovec[n++], "\n");
1045
1046         tty = s->tty_path ? s->tty_path : "/dev/console";
1047
1048         fd = open_terminal(tty, O_WRONLY|O_NOCTTY|O_CLOEXEC);
1049         if (fd < 0) {
1050                 log_debug("Failed to open %s for logging: %s", tty, strerror(errno));
1051                 goto finish;
1052         }
1053
1054         if (writev(fd, iovec, n) < 0)
1055                 log_debug("Failed to write to %s for logging: %s", tty, strerror(errno));
1056
1057         close_nointr_nofail(fd);
1058
1059 finish:
1060         free(ident_buf);
1061 }
1062
1063 static void read_identifier(const char **buf, char **identifier, char **pid) {
1064         const char *p;
1065         char *t;
1066         size_t l, e;
1067
1068         assert(buf);
1069         assert(identifier);
1070         assert(pid);
1071
1072         p = *buf;
1073
1074         p += strspn(p, WHITESPACE);
1075         l = strcspn(p, WHITESPACE);
1076
1077         if (l <= 0 ||
1078             p[l-1] != ':')
1079                 return;
1080
1081         e = l;
1082         l--;
1083
1084         if (p[l-1] == ']') {
1085                 size_t k = l-1;
1086
1087                 for (;;) {
1088
1089                         if (p[k] == '[') {
1090                                 t = strndup(p+k+1, l-k-2);
1091                                 if (t)
1092                                         *pid = t;
1093
1094                                 l = k;
1095                                 break;
1096                         }
1097
1098                         if (k == 0)
1099                                 break;
1100
1101                         k--;
1102                 }
1103         }
1104
1105         t = strndup(p, l);
1106         if (t)
1107                 *identifier = t;
1108
1109         *buf = p + e;
1110         *buf += strspn(*buf, WHITESPACE);
1111 }
1112
1113 static void process_syslog_message(Server *s, const char *buf, struct ucred *ucred, struct timeval *tv, const char *label, size_t label_len) {
1114         char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *syslog_pid = NULL;
1115         struct iovec iovec[N_IOVEC_META_FIELDS + 6];
1116         unsigned n = 0;
1117         int priority = LOG_USER | LOG_INFO;
1118         char *identifier = NULL, *pid = NULL;
1119         const char *orig;
1120
1121         assert(s);
1122         assert(buf);
1123
1124         orig = buf;
1125         parse_syslog_priority((char**) &buf, &priority);
1126
1127         if (s->forward_to_syslog)
1128                 forward_syslog_raw(s, priority, orig, ucred, tv);
1129
1130         skip_syslog_date((char**) &buf);
1131         read_identifier(&buf, &identifier, &pid);
1132
1133         if (s->forward_to_kmsg)
1134                 forward_kmsg(s, priority, identifier, buf, ucred);
1135
1136         if (s->forward_to_console)
1137                 forward_console(s, priority, identifier, buf, ucred);
1138
1139         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=syslog");
1140
1141         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1142                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1143
1144         if (priority & LOG_FACMASK)
1145                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1146                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1147
1148         if (identifier) {
1149                 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
1150                 if (syslog_identifier)
1151                         IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1152         }
1153
1154         if (pid) {
1155                 syslog_pid = strappend("SYSLOG_PID=", pid);
1156                 if (syslog_pid)
1157                         IOVEC_SET_STRING(iovec[n++], syslog_pid);
1158         }
1159
1160         message = strappend("MESSAGE=", buf);
1161         if (message)
1162                 IOVEC_SET_STRING(iovec[n++], message);
1163
1164         dispatch_message(s, iovec, n, ELEMENTSOF(iovec), ucred, tv, label, label_len, NULL, priority);
1165
1166         free(message);
1167         free(identifier);
1168         free(pid);
1169         free(syslog_priority);
1170         free(syslog_facility);
1171         free(syslog_identifier);
1172         free(syslog_pid);
1173 }
1174
1175 static bool valid_user_field(const char *p, size_t l) {
1176         const char *a;
1177
1178         /* We kinda enforce POSIX syntax recommendations for
1179            environment variables here, but make a couple of additional
1180            requirements.
1181
1182            http://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html */
1183
1184         /* No empty field names */
1185         if (l <= 0)
1186                 return false;
1187
1188         /* Don't allow names longer than 64 chars */
1189         if (l > 64)
1190                 return false;
1191
1192         /* Variables starting with an underscore are protected */
1193         if (p[0] == '_')
1194                 return false;
1195
1196         /* Don't allow digits as first character */
1197         if (p[0] >= '0' && p[0] <= '9')
1198                 return false;
1199
1200         /* Only allow A-Z0-9 and '_' */
1201         for (a = p; a < p + l; a++)
1202                 if (!((*a >= 'A' && *a <= 'Z') ||
1203                       (*a >= '0' && *a <= '9') ||
1204                       *a == '_'))
1205                         return false;
1206
1207         return true;
1208 }
1209
1210 static void process_native_message(
1211                 Server *s,
1212                 const void *buffer, size_t buffer_size,
1213                 struct ucred *ucred,
1214                 struct timeval *tv,
1215                 const char *label, size_t label_len) {
1216
1217         struct iovec *iovec = NULL;
1218         unsigned n = 0, m = 0, j, tn = (unsigned) -1;
1219         const char *p;
1220         size_t remaining;
1221         int priority = LOG_INFO;
1222         char *identifier = NULL, *message = NULL;
1223
1224         assert(s);
1225         assert(buffer || buffer_size == 0);
1226
1227         p = buffer;
1228         remaining = buffer_size;
1229
1230         while (remaining > 0) {
1231                 const char *e, *q;
1232
1233                 e = memchr(p, '\n', remaining);
1234
1235                 if (!e) {
1236                         /* Trailing noise, let's ignore it, and flush what we collected */
1237                         log_debug("Received message with trailing noise, ignoring.");
1238                         break;
1239                 }
1240
1241                 if (e == p) {
1242                         /* Entry separator */
1243                         dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, NULL, priority);
1244                         n = 0;
1245                         priority = LOG_INFO;
1246
1247                         p++;
1248                         remaining--;
1249                         continue;
1250                 }
1251
1252                 if (*p == '.' || *p == '#') {
1253                         /* Ignore control commands for now, and
1254                          * comments too. */
1255                         remaining -= (e - p) + 1;
1256                         p = e + 1;
1257                         continue;
1258                 }
1259
1260                 /* A property follows */
1261
1262                 if (n+N_IOVEC_META_FIELDS >= m) {
1263                         struct iovec *c;
1264                         unsigned u;
1265
1266                         u = MAX((n+N_IOVEC_META_FIELDS+1) * 2U, 4U);
1267                         c = realloc(iovec, u * sizeof(struct iovec));
1268                         if (!c) {
1269                                 log_oom();
1270                                 break;
1271                         }
1272
1273                         iovec = c;
1274                         m = u;
1275                 }
1276
1277                 q = memchr(p, '=', e - p);
1278                 if (q) {
1279                         if (valid_user_field(p, q - p)) {
1280                                 size_t l;
1281
1282                                 l = e - p;
1283
1284                                 /* If the field name starts with an
1285                                  * underscore, skip the variable,
1286                                  * since that indidates a trusted
1287                                  * field */
1288                                 iovec[n].iov_base = (char*) p;
1289                                 iovec[n].iov_len = l;
1290                                 n++;
1291
1292                                 /* We need to determine the priority
1293                                  * of this entry for the rate limiting
1294                                  * logic */
1295                                 if (l == 10 &&
1296                                     memcmp(p, "PRIORITY=", 9) == 0 &&
1297                                     p[9] >= '0' && p[9] <= '9')
1298                                         priority = (priority & LOG_FACMASK) | (p[9] - '0');
1299
1300                                 else if (l == 17 &&
1301                                          memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
1302                                          p[16] >= '0' && p[16] <= '9')
1303                                         priority = (priority & LOG_PRIMASK) | ((p[16] - '0') << 3);
1304
1305                                 else if (l == 18 &&
1306                                          memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
1307                                          p[16] >= '0' && p[16] <= '9' &&
1308                                          p[17] >= '0' && p[17] <= '9')
1309                                         priority = (priority & LOG_PRIMASK) | (((p[16] - '0')*10 + (p[17] - '0')) << 3);
1310
1311                                 else if (l >= 19 &&
1312                                          memcmp(p, "SYSLOG_IDENTIFIER=", 18) == 0) {
1313                                         char *t;
1314
1315                                         t = strndup(p + 18, l - 18);
1316                                         if (t) {
1317                                                 free(identifier);
1318                                                 identifier = t;
1319                                         }
1320                                 } else if (l >= 8 &&
1321                                            memcmp(p, "MESSAGE=", 8) == 0) {
1322                                         char *t;
1323
1324                                         t = strndup(p + 8, l - 8);
1325                                         if (t) {
1326                                                 free(message);
1327                                                 message = t;
1328                                         }
1329                                 }
1330                         }
1331
1332                         remaining -= (e - p) + 1;
1333                         p = e + 1;
1334                         continue;
1335                 } else {
1336                         le64_t l_le;
1337                         uint64_t l;
1338                         char *k;
1339
1340                         if (remaining < e - p + 1 + sizeof(uint64_t) + 1) {
1341                                 log_debug("Failed to parse message, ignoring.");
1342                                 break;
1343                         }
1344
1345                         memcpy(&l_le, e + 1, sizeof(uint64_t));
1346                         l = le64toh(l_le);
1347
1348                         if (remaining < e - p + 1 + sizeof(uint64_t) + l + 1 ||
1349                             e[1+sizeof(uint64_t)+l] != '\n') {
1350                                 log_debug("Failed to parse message, ignoring.");
1351                                 break;
1352                         }
1353
1354                         k = malloc((e - p) + 1 + l);
1355                         if (!k) {
1356                                 log_oom();
1357                                 break;
1358                         }
1359
1360                         memcpy(k, p, e - p);
1361                         k[e - p] = '=';
1362                         memcpy(k + (e - p) + 1, e + 1 + sizeof(uint64_t), l);
1363
1364                         if (valid_user_field(p, e - p)) {
1365                                 iovec[n].iov_base = k;
1366                                 iovec[n].iov_len = (e - p) + 1 + l;
1367                                 n++;
1368                         } else
1369                                 free(k);
1370
1371                         remaining -= (e - p) + 1 + sizeof(uint64_t) + l + 1;
1372                         p = e + 1 + sizeof(uint64_t) + l + 1;
1373                 }
1374         }
1375
1376         if (n <= 0)
1377                 goto finish;
1378
1379         tn = n++;
1380         IOVEC_SET_STRING(iovec[tn], "_TRANSPORT=journal");
1381
1382         if (message) {
1383                 if (s->forward_to_syslog)
1384                         forward_syslog(s, priority, identifier, message, ucred, tv);
1385
1386                 if (s->forward_to_kmsg)
1387                         forward_kmsg(s, priority, identifier, message, ucred);
1388
1389                 if (s->forward_to_console)
1390                         forward_console(s, priority, identifier, message, ucred);
1391         }
1392
1393         dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, NULL, priority);
1394
1395 finish:
1396         for (j = 0; j < n; j++)  {
1397                 if (j == tn)
1398                         continue;
1399
1400                 if (iovec[j].iov_base < buffer ||
1401                     (const uint8_t*) iovec[j].iov_base >= (const uint8_t*) buffer + buffer_size)
1402                         free(iovec[j].iov_base);
1403         }
1404
1405         free(iovec);
1406         free(identifier);
1407         free(message);
1408 }
1409
1410 static void process_native_file(
1411                 Server *s,
1412                 int fd,
1413                 struct ucred *ucred,
1414                 struct timeval *tv,
1415                 const char *label, size_t label_len) {
1416
1417         struct stat st;
1418         void *p;
1419         ssize_t n;
1420
1421         assert(s);
1422         assert(fd >= 0);
1423
1424         /* Data is in the passed file, since it didn't fit in a
1425          * datagram. We can't map the file here, since clients might
1426          * then truncate it and trigger a SIGBUS for us. So let's
1427          * stupidly read it */
1428
1429         if (fstat(fd, &st) < 0) {
1430                 log_error("Failed to stat passed file, ignoring: %m");
1431                 return;
1432         }
1433
1434         if (!S_ISREG(st.st_mode)) {
1435                 log_error("File passed is not regular. Ignoring.");
1436                 return;
1437         }
1438
1439         if (st.st_size <= 0)
1440                 return;
1441
1442         if (st.st_size > ENTRY_SIZE_MAX) {
1443                 log_error("File passed too large. Ignoring.");
1444                 return;
1445         }
1446
1447         p = malloc(st.st_size);
1448         if (!p) {
1449                 log_oom();
1450                 return;
1451         }
1452
1453         n = pread(fd, p, st.st_size, 0);
1454         if (n < 0)
1455                 log_error("Failed to read file, ignoring: %s", strerror(-n));
1456         else if (n > 0)
1457                 process_native_message(s, p, n, ucred, tv, label, label_len);
1458
1459         free(p);
1460 }
1461
1462 static int stdout_stream_log(StdoutStream *s, const char *p) {
1463         struct iovec iovec[N_IOVEC_META_FIELDS + 5];
1464         char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *syslog_identifier = NULL;
1465         unsigned n = 0;
1466         int priority;
1467         char *label = NULL;
1468         size_t label_len = 0;
1469
1470         assert(s);
1471         assert(p);
1472
1473         if (isempty(p))
1474                 return 0;
1475
1476         priority = s->priority;
1477
1478         if (s->level_prefix)
1479                 parse_syslog_priority((char**) &p, &priority);
1480
1481         if (s->forward_to_syslog || s->server->forward_to_syslog)
1482                 forward_syslog(s->server, fixup_priority(priority), s->identifier, p, &s->ucred, NULL);
1483
1484         if (s->forward_to_kmsg || s->server->forward_to_kmsg)
1485                 forward_kmsg(s->server, priority, s->identifier, p, &s->ucred);
1486
1487         if (s->forward_to_console || s->server->forward_to_console)
1488                 forward_console(s->server, priority, s->identifier, p, &s->ucred);
1489
1490         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=stdout");
1491
1492         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1493                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1494
1495         if (priority & LOG_FACMASK)
1496                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1497                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1498
1499         if (s->identifier) {
1500                 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", s->identifier);
1501                 if (syslog_identifier)
1502                         IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1503         }
1504
1505         message = strappend("MESSAGE=", p);
1506         if (message)
1507                 IOVEC_SET_STRING(iovec[n++], message);
1508
1509 #ifdef HAVE_SELINUX
1510         if (s->security_context) {
1511                 label = (char*) s->security_context;
1512                 label_len = strlen((char*) s->security_context);
1513         }
1514 #endif
1515
1516         dispatch_message(s->server, iovec, n, ELEMENTSOF(iovec), &s->ucred, NULL, label, label_len, s->unit_id, priority);
1517
1518         free(message);
1519         free(syslog_priority);
1520         free(syslog_facility);
1521         free(syslog_identifier);
1522
1523         return 0;
1524 }
1525
1526 static int stdout_stream_line(StdoutStream *s, char *p) {
1527         int r;
1528
1529         assert(s);
1530         assert(p);
1531
1532         p = strstrip(p);
1533
1534         switch (s->state) {
1535
1536         case STDOUT_STREAM_IDENTIFIER:
1537                 if (isempty(p))
1538                         s->identifier = NULL;
1539                 else  {
1540                         s->identifier = strdup(p);
1541                         if (!s->identifier)
1542                                 return log_oom();
1543                 }
1544
1545                 s->state = STDOUT_STREAM_UNIT_ID;
1546                 return 0;
1547
1548         case STDOUT_STREAM_UNIT_ID:
1549                 if (s->ucred.uid == 0) {
1550                         if (isempty(p))
1551                                 s->unit_id = NULL;
1552                         else  {
1553                                 s->unit_id = strdup(p);
1554                                 if (!s->unit_id)
1555                                         return log_oom();
1556                         }
1557                 }
1558
1559                 s->state = STDOUT_STREAM_PRIORITY;
1560                 return 0;
1561
1562         case STDOUT_STREAM_PRIORITY:
1563                 r = safe_atoi(p, &s->priority);
1564                 if (r < 0 || s->priority <= 0 || s->priority >= 999) {
1565                         log_warning("Failed to parse log priority line.");
1566                         return -EINVAL;
1567                 }
1568
1569                 s->state = STDOUT_STREAM_LEVEL_PREFIX;
1570                 return 0;
1571
1572         case STDOUT_STREAM_LEVEL_PREFIX:
1573                 r = parse_boolean(p);
1574                 if (r < 0) {
1575                         log_warning("Failed to parse level prefix line.");
1576                         return -EINVAL;
1577                 }
1578
1579                 s->level_prefix = !!r;
1580                 s->state = STDOUT_STREAM_FORWARD_TO_SYSLOG;
1581                 return 0;
1582
1583         case STDOUT_STREAM_FORWARD_TO_SYSLOG:
1584                 r = parse_boolean(p);
1585                 if (r < 0) {
1586                         log_warning("Failed to parse forward to syslog line.");
1587                         return -EINVAL;
1588                 }
1589
1590                 s->forward_to_syslog = !!r;
1591                 s->state = STDOUT_STREAM_FORWARD_TO_KMSG;
1592                 return 0;
1593
1594         case STDOUT_STREAM_FORWARD_TO_KMSG:
1595                 r = parse_boolean(p);
1596                 if (r < 0) {
1597                         log_warning("Failed to parse copy to kmsg line.");
1598                         return -EINVAL;
1599                 }
1600
1601                 s->forward_to_kmsg = !!r;
1602                 s->state = STDOUT_STREAM_FORWARD_TO_CONSOLE;
1603                 return 0;
1604
1605         case STDOUT_STREAM_FORWARD_TO_CONSOLE:
1606                 r = parse_boolean(p);
1607                 if (r < 0) {
1608                         log_warning("Failed to parse copy to console line.");
1609                         return -EINVAL;
1610                 }
1611
1612                 s->forward_to_console = !!r;
1613                 s->state = STDOUT_STREAM_RUNNING;
1614                 return 0;
1615
1616         case STDOUT_STREAM_RUNNING:
1617                 return stdout_stream_log(s, p);
1618         }
1619
1620         assert_not_reached("Unknown stream state");
1621 }
1622
1623 static int stdout_stream_scan(StdoutStream *s, bool force_flush) {
1624         char *p;
1625         size_t remaining;
1626         int r;
1627
1628         assert(s);
1629
1630         p = s->buffer;
1631         remaining = s->length;
1632         for (;;) {
1633                 char *end;
1634                 size_t skip;
1635
1636                 end = memchr(p, '\n', remaining);
1637                 if (end)
1638                         skip = end - p + 1;
1639                 else if (remaining >= sizeof(s->buffer) - 1) {
1640                         end = p + sizeof(s->buffer) - 1;
1641                         skip = remaining;
1642                 } else
1643                         break;
1644
1645                 *end = 0;
1646
1647                 r = stdout_stream_line(s, p);
1648                 if (r < 0)
1649                         return r;
1650
1651                 remaining -= skip;
1652                 p += skip;
1653         }
1654
1655         if (force_flush && remaining > 0) {
1656                 p[remaining] = 0;
1657                 r = stdout_stream_line(s, p);
1658                 if (r < 0)
1659                         return r;
1660
1661                 p += remaining;
1662                 remaining = 0;
1663         }
1664
1665         if (p > s->buffer) {
1666                 memmove(s->buffer, p, remaining);
1667                 s->length = remaining;
1668         }
1669
1670         return 0;
1671 }
1672
1673 static int stdout_stream_process(StdoutStream *s) {
1674         ssize_t l;
1675         int r;
1676
1677         assert(s);
1678
1679         l = read(s->fd, s->buffer+s->length, sizeof(s->buffer)-1-s->length);
1680         if (l < 0) {
1681
1682                 if (errno == EAGAIN)
1683                         return 0;
1684
1685                 log_warning("Failed to read from stream: %m");
1686                 return -errno;
1687         }
1688
1689         if (l == 0) {
1690                 r = stdout_stream_scan(s, true);
1691                 if (r < 0)
1692                         return r;
1693
1694                 return 0;
1695         }
1696
1697         s->length += l;
1698         r = stdout_stream_scan(s, false);
1699         if (r < 0)
1700                 return r;
1701
1702         return 1;
1703
1704 }
1705
1706 static void stdout_stream_free(StdoutStream *s) {
1707         assert(s);
1708
1709         if (s->server) {
1710                 assert(s->server->n_stdout_streams > 0);
1711                 s->server->n_stdout_streams --;
1712                 LIST_REMOVE(StdoutStream, stdout_stream, s->server->stdout_streams, s);
1713         }
1714
1715         if (s->fd >= 0) {
1716                 if (s->server)
1717                         epoll_ctl(s->server->epoll_fd, EPOLL_CTL_DEL, s->fd, NULL);
1718
1719                 close_nointr_nofail(s->fd);
1720         }
1721
1722 #ifdef HAVE_SELINUX
1723         if (s->security_context)
1724                 freecon(s->security_context);
1725 #endif
1726
1727         free(s->identifier);
1728         free(s);
1729 }
1730
1731 static int stdout_stream_new(Server *s) {
1732         StdoutStream *stream;
1733         int fd, r;
1734         socklen_t len;
1735         struct epoll_event ev;
1736
1737         assert(s);
1738
1739         fd = accept4(s->stdout_fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC);
1740         if (fd < 0) {
1741                 if (errno == EAGAIN)
1742                         return 0;
1743
1744                 log_error("Failed to accept stdout connection: %m");
1745                 return -errno;
1746         }
1747
1748         if (s->n_stdout_streams >= STDOUT_STREAMS_MAX) {
1749                 log_warning("Too many stdout streams, refusing connection.");
1750                 close_nointr_nofail(fd);
1751                 return 0;
1752         }
1753
1754         stream = new0(StdoutStream, 1);
1755         if (!stream) {
1756                 close_nointr_nofail(fd);
1757                 return log_oom();
1758         }
1759
1760         stream->fd = fd;
1761
1762         len = sizeof(stream->ucred);
1763         if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &stream->ucred, &len) < 0) {
1764                 log_error("Failed to determine peer credentials: %m");
1765                 r = -errno;
1766                 goto fail;
1767         }
1768
1769 #ifdef HAVE_SELINUX
1770         if (getpeercon(fd, &stream->security_context) < 0 && errno != ENOPROTOOPT)
1771                 log_error("Failed to determine peer security context: %m");
1772 #endif
1773
1774         if (shutdown(fd, SHUT_WR) < 0) {
1775                 log_error("Failed to shutdown writing side of socket: %m");
1776                 r = -errno;
1777                 goto fail;
1778         }
1779
1780         zero(ev);
1781         ev.data.ptr = stream;
1782         ev.events = EPOLLIN;
1783         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0) {
1784                 log_error("Failed to add stream to event loop: %m");
1785                 r = -errno;
1786                 goto fail;
1787         }
1788
1789         stream->server = s;
1790         LIST_PREPEND(StdoutStream, stdout_stream, s->stdout_streams, stream);
1791         s->n_stdout_streams ++;
1792
1793         return 0;
1794
1795 fail:
1796         stdout_stream_free(stream);
1797         return r;
1798 }
1799
1800 static bool is_us(const char *pid) {
1801         pid_t t;
1802
1803         assert(pid);
1804
1805         if (parse_pid(pid, &t) < 0)
1806                 return false;
1807
1808         return t == getpid();
1809 }
1810
1811 static void dev_kmsg_record(Server *s, char *p, size_t l) {
1812         struct iovec iovec[N_IOVEC_META_FIELDS + 7 + N_IOVEC_KERNEL_FIELDS];
1813         char *message = NULL, *syslog_priority = NULL, *syslog_pid = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *source_time = NULL;
1814         int priority, r;
1815         unsigned n = 0, z = 0, j;
1816         usec_t usec;
1817         char *identifier = NULL, *pid = NULL, *e, *f, *k;
1818         uint64_t serial;
1819         size_t pl;
1820
1821         assert(s);
1822         assert(p);
1823
1824         if (l <= 0)
1825                 return;
1826
1827         e = memchr(p, ',', l);
1828         if (!e)
1829                 return;
1830         *e = 0;
1831
1832         r = safe_atoi(p, &priority);
1833         if (r < 0 || priority < 0 || priority > 999)
1834                 return;
1835
1836         if (s->forward_to_kmsg && (priority & LOG_FACMASK) != LOG_KERN)
1837                 return;
1838
1839         l -= (e - p) + 1;
1840         p = e + 1;
1841         e = memchr(p, ',', l);
1842         if (!e)
1843                 return;
1844         *e = 0;
1845
1846         r = safe_atou64(p, &serial);
1847         if (r < 0)
1848                 return;
1849
1850         if (s->kernel_seqnum) {
1851                 /* We already read this one? */
1852                 if (serial < *s->kernel_seqnum)
1853                         return;
1854
1855                 /* Did we lose any? */
1856                 if (serial > *s->kernel_seqnum)
1857                         driver_message(s, SD_MESSAGE_JOURNAL_MISSED, "Missed %llu kernel messages", (unsigned long long) serial - *s->kernel_seqnum - 1);
1858
1859                 /* Make sure we never read this one again. Note that
1860                  * we always store the next message serial we expect
1861                  * here, simply because this makes handling the first
1862                  * message with serial 0 easy. */
1863                 *s->kernel_seqnum = serial + 1;
1864         }
1865
1866         l -= (e - p) + 1;
1867         p = e + 1;
1868         f = memchr(p, ';', l);
1869         if (!f)
1870                 return;
1871         /* Kernel 3.6 has the flags field, kernel 3.5 lacks that */
1872         e = memchr(p, ',', l);
1873         if (!e || f < e)
1874                 e = f;
1875         *e = 0;
1876
1877         r = parse_usec(p, &usec);
1878         if (r < 0)
1879                 return;
1880
1881         l -= (f - p) + 1;
1882         p = f + 1;
1883         e = memchr(p, '\n', l);
1884         if (!e)
1885                 return;
1886         *e = 0;
1887
1888         pl = e - p;
1889         l -= (e - p) + 1;
1890         k = e + 1;
1891
1892         for (j = 0; l > 0 && j < N_IOVEC_KERNEL_FIELDS; j++) {
1893                 char *m;
1894                 /* Meta data fields attached */
1895
1896                 if (*k != ' ')
1897                         break;
1898
1899                 k ++, l --;
1900
1901                 e = memchr(k, '\n', l);
1902                 if (!e)
1903                         return;
1904
1905                 *e = 0;
1906
1907                 m = cunescape_length_with_prefix(k, e - k, "_KERNEL_");
1908                 if (!m)
1909                         break;
1910
1911                 IOVEC_SET_STRING(iovec[n++], m);
1912                 z++;
1913
1914                 l -= (e - k) + 1;
1915                 k = e + 1;
1916         }
1917
1918         if (asprintf(&source_time, "_SOURCE_MONOTONIC_TIMESTAMP=%llu",
1919                      (unsigned long long) usec) >= 0)
1920                 IOVEC_SET_STRING(iovec[n++], source_time);
1921
1922         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=kernel");
1923
1924         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1925                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1926
1927         if ((priority & LOG_FACMASK) == LOG_KERN)
1928                 IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=kernel");
1929         else {
1930                 read_identifier((const char**) &p, &identifier, &pid);
1931
1932                 /* Avoid any messages we generated ourselves via
1933                  * log_info() and friends. */
1934                 if (pid && is_us(pid))
1935                         goto finish;
1936
1937                 if (identifier) {
1938                         syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
1939                         if (syslog_identifier)
1940                                 IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1941                 }
1942
1943                 if (pid) {
1944                         syslog_pid = strappend("SYSLOG_PID=", pid);
1945                         if (syslog_pid)
1946                                 IOVEC_SET_STRING(iovec[n++], syslog_pid);
1947                 }
1948
1949                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1950                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1951         }
1952
1953         message = cunescape_length_with_prefix(p, pl, "MESSAGE=");
1954         if (message)
1955                 IOVEC_SET_STRING(iovec[n++], message);
1956
1957         dispatch_message(s, iovec, n, ELEMENTSOF(iovec), NULL, NULL, NULL, 0, NULL, priority);
1958
1959 finish:
1960         for (j = 0; j < z; j++)
1961                 free(iovec[j].iov_base);
1962
1963         free(message);
1964         free(syslog_priority);
1965         free(syslog_identifier);
1966         free(syslog_pid);
1967         free(syslog_facility);
1968         free(source_time);
1969         free(identifier);
1970         free(pid);
1971 }
1972
1973 static int system_journal_open(Server *s) {
1974         int r;
1975         char *fn;
1976         sd_id128_t machine;
1977         char ids[33];
1978
1979         r = sd_id128_get_machine(&machine);
1980         if (r < 0)
1981                 return r;
1982
1983         sd_id128_to_string(machine, ids);
1984
1985         if (!s->system_journal &&
1986             (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
1987             access("/run/systemd/journal/flushed", F_OK) >= 0) {
1988
1989                 /* If in auto mode: first try to create the machine
1990                  * path, but not the prefix.
1991                  *
1992                  * If in persistent mode: create /var/log/journal and
1993                  * the machine path */
1994
1995                 if (s->storage == STORAGE_PERSISTENT)
1996                         (void) mkdir("/var/log/journal/", 0755);
1997
1998                 fn = strappend("/var/log/journal/", ids);
1999                 if (!fn)
2000                         return -ENOMEM;
2001
2002                 (void) mkdir(fn, 0755);
2003                 free(fn);
2004
2005                 fn = strjoin("/var/log/journal/", ids, "/system.journal", NULL);
2006                 if (!fn)
2007                         return -ENOMEM;
2008
2009                 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, &s->system_metrics, NULL, &s->system_journal);
2010                 free(fn);
2011
2012                 if (r >= 0) {
2013                         s->system_journal->compress = s->compress;
2014
2015                         server_fix_perms(s, s->system_journal, 0);
2016                 } else if (r < 0) {
2017
2018                         if (r != -ENOENT && r != -EROFS)
2019                                 log_warning("Failed to open system journal: %s", strerror(-r));
2020
2021                         r = 0;
2022                 }
2023         }
2024
2025         if (!s->runtime_journal &&
2026             (s->storage != STORAGE_NONE)) {
2027
2028                 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
2029                 if (!fn)
2030                         return -ENOMEM;
2031
2032                 if (s->system_journal) {
2033
2034                         /* Try to open the runtime journal, but only
2035                          * if it already exists, so that we can flush
2036                          * it into the system journal */
2037
2038                         r = journal_file_open(fn, O_RDWR, 0640, &s->runtime_metrics, NULL, &s->runtime_journal);
2039                         free(fn);
2040
2041                         if (r < 0) {
2042                                 if (r != -ENOENT)
2043                                         log_warning("Failed to open runtime journal: %s", strerror(-r));
2044
2045                                 r = 0;
2046                         }
2047
2048                 } else {
2049
2050                         /* OK, we really need the runtime journal, so create
2051                          * it if necessary. */
2052
2053                         (void) mkdir_parents(fn, 0755);
2054                         r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, &s->runtime_metrics, NULL, &s->runtime_journal);
2055                         free(fn);
2056
2057                         if (r < 0) {
2058                                 log_error("Failed to open runtime journal: %s", strerror(-r));
2059                                 return r;
2060                         }
2061                 }
2062
2063                 if (s->runtime_journal) {
2064                         s->runtime_journal->compress = s->compress;
2065
2066                         server_fix_perms(s, s->runtime_journal, 0);
2067                 }
2068         }
2069
2070         return r;
2071 }
2072
2073 static int server_flush_to_var(Server *s) {
2074         Object *o = NULL;
2075         int r;
2076         sd_id128_t machine;
2077         sd_journal *j;
2078
2079         assert(s);
2080
2081         if (s->storage != STORAGE_AUTO &&
2082             s->storage != STORAGE_PERSISTENT)
2083                 return 0;
2084
2085         if (!s->runtime_journal)
2086                 return 0;
2087
2088         system_journal_open(s);
2089
2090         if (!s->system_journal)
2091                 return 0;
2092
2093         log_info("Flushing to /var...");
2094
2095         r = sd_id128_get_machine(&machine);
2096         if (r < 0) {
2097                 log_error("Failed to get machine id: %s", strerror(-r));
2098                 return r;
2099         }
2100
2101         r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
2102         if (r < 0) {
2103                 log_error("Failed to read runtime journal: %s", strerror(-r));
2104                 return r;
2105         }
2106
2107         SD_JOURNAL_FOREACH(j) {
2108                 JournalFile *f;
2109
2110                 f = j->current_file;
2111                 assert(f && f->current_offset > 0);
2112
2113                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
2114                 if (r < 0) {
2115                         log_error("Can't read entry: %s", strerror(-r));
2116                         goto finish;
2117                 }
2118
2119                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
2120                 if (r == -E2BIG) {
2121                         log_info("Allocation limit reached.");
2122
2123                         journal_file_post_change(s->system_journal);
2124                         server_rotate(s);
2125                         server_vacuum(s);
2126
2127                         r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
2128                 }
2129
2130                 if (r < 0) {
2131                         log_error("Can't write entry: %s", strerror(-r));
2132                         goto finish;
2133                 }
2134         }
2135
2136 finish:
2137         journal_file_post_change(s->system_journal);
2138
2139         journal_file_close(s->runtime_journal);
2140         s->runtime_journal = NULL;
2141
2142         if (r >= 0)
2143                 rm_rf("/run/log/journal", false, true, false);
2144
2145         return r;
2146 }
2147
2148 static int server_read_dev_kmsg(Server *s) {
2149         char buffer[8192+1]; /* the kernel-side limit per record is 8K currently */
2150         ssize_t l;
2151
2152         assert(s);
2153         assert(s->dev_kmsg_fd >= 0);
2154
2155         l = read(s->dev_kmsg_fd, buffer, sizeof(buffer) - 1);
2156         if (l == 0)
2157                 return 0;
2158         if (l < 0) {
2159                 /* Old kernels who don't allow reading from /dev/kmsg
2160                  * return EINVAL when we try. So handle this cleanly,
2161                  * but don' try to ever read from it again. */
2162                 if (errno == EINVAL) {
2163                         epoll_ctl(s->epoll_fd, EPOLL_CTL_DEL, s->dev_kmsg_fd, NULL);
2164                         return 0;
2165                 }
2166
2167                 if (errno == EAGAIN || errno == EINTR)
2168                         return 0;
2169
2170                 log_error("Failed to read from kernel: %m");
2171                 return -errno;
2172         }
2173
2174         dev_kmsg_record(s, buffer, l);
2175         return 1;
2176 }
2177
2178 static int server_flush_dev_kmsg(Server *s) {
2179         int r;
2180
2181         assert(s);
2182
2183         if (s->dev_kmsg_fd < 0)
2184                 return 0;
2185
2186         if (!s->dev_kmsg_readable)
2187                 return 0;
2188
2189         log_info("Flushing /dev/kmsg...");
2190
2191         for (;;) {
2192                 r = server_read_dev_kmsg(s);
2193                 if (r < 0)
2194                         return r;
2195
2196                 if (r == 0)
2197                         break;
2198         }
2199
2200         return 0;
2201 }
2202
2203 static int process_event(Server *s, struct epoll_event *ev) {
2204         assert(s);
2205         assert(ev);
2206
2207         if (ev->data.fd == s->signal_fd) {
2208                 struct signalfd_siginfo sfsi;
2209                 ssize_t n;
2210
2211                 if (ev->events != EPOLLIN) {
2212                         log_info("Got invalid event from epoll.");
2213                         return -EIO;
2214                 }
2215
2216                 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
2217                 if (n != sizeof(sfsi)) {
2218
2219                         if (n >= 0)
2220                                 return -EIO;
2221
2222                         if (errno == EINTR || errno == EAGAIN)
2223                                 return 1;
2224
2225                         return -errno;
2226                 }
2227
2228                 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
2229
2230                 if (sfsi.ssi_signo == SIGUSR1) {
2231                         touch("/run/systemd/journal/flushed");
2232                         server_flush_to_var(s);
2233                         return 1;
2234                 }
2235
2236                 if (sfsi.ssi_signo == SIGUSR2) {
2237                         server_rotate(s);
2238                         server_vacuum(s);
2239                         return 1;
2240                 }
2241
2242                 return 0;
2243
2244         } else if (ev->data.fd == s->dev_kmsg_fd) {
2245                 int r;
2246
2247                 if (ev->events != EPOLLIN) {
2248                         log_info("Got invalid event from epoll.");
2249                         return -EIO;
2250                 }
2251
2252                 r = server_read_dev_kmsg(s);
2253                 if (r < 0)
2254                         return r;
2255
2256                 return 1;
2257
2258         } else if (ev->data.fd == s->native_fd ||
2259                    ev->data.fd == s->syslog_fd) {
2260
2261                 if (ev->events != EPOLLIN) {
2262                         log_info("Got invalid event from epoll.");
2263                         return -EIO;
2264                 }
2265
2266                 for (;;) {
2267                         struct msghdr msghdr;
2268                         struct iovec iovec;
2269                         struct ucred *ucred = NULL;
2270                         struct timeval *tv = NULL;
2271                         struct cmsghdr *cmsg;
2272                         char *label = NULL;
2273                         size_t label_len = 0;
2274                         union {
2275                                 struct cmsghdr cmsghdr;
2276
2277                                 /* We use NAME_MAX space for the
2278                                  * SELinux label here. The kernel
2279                                  * currently enforces no limit, but
2280                                  * according to suggestions from the
2281                                  * SELinux people this will change and
2282                                  * it will probably be identical to
2283                                  * NAME_MAX. For now we use that, but
2284                                  * this should be updated one day when
2285                                  * the final limit is known.*/
2286                                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
2287                                             CMSG_SPACE(sizeof(struct timeval)) +
2288                                             CMSG_SPACE(sizeof(int)) + /* fd */
2289                                             CMSG_SPACE(NAME_MAX)]; /* selinux label */
2290                         } control;
2291                         ssize_t n;
2292                         int v;
2293                         int *fds = NULL;
2294                         unsigned n_fds = 0;
2295
2296                         if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
2297                                 log_error("SIOCINQ failed: %m");
2298                                 return -errno;
2299                         }
2300
2301                         if (s->buffer_size < (size_t) v) {
2302                                 void *b;
2303                                 size_t l;
2304
2305                                 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
2306                                 b = realloc(s->buffer, l+1);
2307
2308                                 if (!b) {
2309                                         log_error("Couldn't increase buffer.");
2310                                         return -ENOMEM;
2311                                 }
2312
2313                                 s->buffer_size = l;
2314                                 s->buffer = b;
2315                         }
2316
2317                         zero(iovec);
2318                         iovec.iov_base = s->buffer;
2319                         iovec.iov_len = s->buffer_size;
2320
2321                         zero(control);
2322                         zero(msghdr);
2323                         msghdr.msg_iov = &iovec;
2324                         msghdr.msg_iovlen = 1;
2325                         msghdr.msg_control = &control;
2326                         msghdr.msg_controllen = sizeof(control);
2327
2328                         n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
2329                         if (n < 0) {
2330
2331                                 if (errno == EINTR || errno == EAGAIN)
2332                                         return 1;
2333
2334                                 log_error("recvmsg() failed: %m");
2335                                 return -errno;
2336                         }
2337
2338                         for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
2339
2340                                 if (cmsg->cmsg_level == SOL_SOCKET &&
2341                                     cmsg->cmsg_type == SCM_CREDENTIALS &&
2342                                     cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
2343                                         ucred = (struct ucred*) CMSG_DATA(cmsg);
2344                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
2345                                          cmsg->cmsg_type == SCM_SECURITY) {
2346                                         label = (char*) CMSG_DATA(cmsg);
2347                                         label_len = cmsg->cmsg_len - CMSG_LEN(0);
2348                                 } else if (cmsg->cmsg_level == SOL_SOCKET &&
2349                                          cmsg->cmsg_type == SO_TIMESTAMP &&
2350                                          cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
2351                                         tv = (struct timeval*) CMSG_DATA(cmsg);
2352                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
2353                                          cmsg->cmsg_type == SCM_RIGHTS) {
2354                                         fds = (int*) CMSG_DATA(cmsg);
2355                                         n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
2356                                 }
2357                         }
2358
2359                         if (ev->data.fd == s->syslog_fd) {
2360                                 char *e;
2361
2362                                 if (n > 0 && n_fds == 0) {
2363                                         e = memchr(s->buffer, '\n', n);
2364                                         if (e)
2365                                                 *e = 0;
2366                                         else
2367                                                 s->buffer[n] = 0;
2368
2369                                         process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
2370                                 } else if (n_fds > 0)
2371                                         log_warning("Got file descriptors via syslog socket. Ignoring.");
2372
2373                         } else {
2374                                 if (n > 0 && n_fds == 0)
2375                                         process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
2376                                 else if (n == 0 && n_fds == 1)
2377                                         process_native_file(s, fds[0], ucred, tv, label, label_len);
2378                                 else if (n_fds > 0)
2379                                         log_warning("Got too many file descriptors via native socket. Ignoring.");
2380                         }
2381
2382                         close_many(fds, n_fds);
2383                 }
2384
2385                 return 1;
2386
2387         } else if (ev->data.fd == s->stdout_fd) {
2388
2389                 if (ev->events != EPOLLIN) {
2390                         log_info("Got invalid event from epoll.");
2391                         return -EIO;
2392                 }
2393
2394                 stdout_stream_new(s);
2395                 return 1;
2396
2397         } else {
2398                 StdoutStream *stream;
2399
2400                 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
2401                         log_info("Got invalid event from epoll.");
2402                         return -EIO;
2403                 }
2404
2405                 /* If it is none of the well-known fds, it must be an
2406                  * stdout stream fd. Note that this is a bit ugly here
2407                  * (since we rely that none of the well-known fds
2408                  * could be interpreted as pointer), but nonetheless
2409                  * safe, since the well-known fds would never get an
2410                  * fd > 4096, i.e. beyond the first memory page */
2411
2412                 stream = ev->data.ptr;
2413
2414                 if (stdout_stream_process(stream) <= 0)
2415                         stdout_stream_free(stream);
2416
2417                 return 1;
2418         }
2419
2420         log_error("Unknown event.");
2421         return 0;
2422 }
2423
2424 static int open_syslog_socket(Server *s) {
2425         union sockaddr_union sa;
2426         int one, r;
2427         struct epoll_event ev;
2428
2429         assert(s);
2430
2431         if (s->syslog_fd < 0) {
2432
2433                 s->syslog_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2434                 if (s->syslog_fd < 0) {
2435                         log_error("socket() failed: %m");
2436                         return -errno;
2437                 }
2438
2439                 zero(sa);
2440                 sa.un.sun_family = AF_UNIX;
2441                 strncpy(sa.un.sun_path, "/dev/log", sizeof(sa.un.sun_path));
2442
2443                 unlink(sa.un.sun_path);
2444
2445                 r = bind(s->syslog_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2446                 if (r < 0) {
2447                         log_error("bind() failed: %m");
2448                         return -errno;
2449                 }
2450
2451                 chmod(sa.un.sun_path, 0666);
2452         } else
2453                 fd_nonblock(s->syslog_fd, 1);
2454
2455         one = 1;
2456         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
2457         if (r < 0) {
2458                 log_error("SO_PASSCRED failed: %m");
2459                 return -errno;
2460         }
2461
2462 #ifdef HAVE_SELINUX
2463         one = 1;
2464         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
2465         if (r < 0)
2466                 log_warning("SO_PASSSEC failed: %m");
2467 #endif
2468
2469         one = 1;
2470         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
2471         if (r < 0) {
2472                 log_error("SO_TIMESTAMP failed: %m");
2473                 return -errno;
2474         }
2475
2476         zero(ev);
2477         ev.events = EPOLLIN;
2478         ev.data.fd = s->syslog_fd;
2479         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->syslog_fd, &ev) < 0) {
2480                 log_error("Failed to add syslog server fd to epoll object: %m");
2481                 return -errno;
2482         }
2483
2484         return 0;
2485 }
2486
2487 static int open_native_socket(Server*s) {
2488         union sockaddr_union sa;
2489         int one, r;
2490         struct epoll_event ev;
2491
2492         assert(s);
2493
2494         if (s->native_fd < 0) {
2495
2496                 s->native_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2497                 if (s->native_fd < 0) {
2498                         log_error("socket() failed: %m");
2499                         return -errno;
2500                 }
2501
2502                 zero(sa);
2503                 sa.un.sun_family = AF_UNIX;
2504                 strncpy(sa.un.sun_path, "/run/systemd/journal/socket", sizeof(sa.un.sun_path));
2505
2506                 unlink(sa.un.sun_path);
2507
2508                 r = bind(s->native_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2509                 if (r < 0) {
2510                         log_error("bind() failed: %m");
2511                         return -errno;
2512                 }
2513
2514                 chmod(sa.un.sun_path, 0666);
2515         } else
2516                 fd_nonblock(s->native_fd, 1);
2517
2518         one = 1;
2519         r = setsockopt(s->native_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
2520         if (r < 0) {
2521                 log_error("SO_PASSCRED failed: %m");
2522                 return -errno;
2523         }
2524
2525 #ifdef HAVE_SELINUX
2526         one = 1;
2527         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
2528         if (r < 0)
2529                 log_warning("SO_PASSSEC failed: %m");
2530 #endif
2531
2532         one = 1;
2533         r = setsockopt(s->native_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
2534         if (r < 0) {
2535                 log_error("SO_TIMESTAMP failed: %m");
2536                 return -errno;
2537         }
2538
2539         zero(ev);
2540         ev.events = EPOLLIN;
2541         ev.data.fd = s->native_fd;
2542         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->native_fd, &ev) < 0) {
2543                 log_error("Failed to add native server fd to epoll object: %m");
2544                 return -errno;
2545         }
2546
2547         return 0;
2548 }
2549
2550 static int open_stdout_socket(Server *s) {
2551         union sockaddr_union sa;
2552         int r;
2553         struct epoll_event ev;
2554
2555         assert(s);
2556
2557         if (s->stdout_fd < 0) {
2558
2559                 s->stdout_fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2560                 if (s->stdout_fd < 0) {
2561                         log_error("socket() failed: %m");
2562                         return -errno;
2563                 }
2564
2565                 zero(sa);
2566                 sa.un.sun_family = AF_UNIX;
2567                 strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
2568
2569                 unlink(sa.un.sun_path);
2570
2571                 r = bind(s->stdout_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2572                 if (r < 0) {
2573                         log_error("bind() failed: %m");
2574                         return -errno;
2575                 }
2576
2577                 chmod(sa.un.sun_path, 0666);
2578
2579                 if (listen(s->stdout_fd, SOMAXCONN) < 0) {
2580                         log_error("liste() failed: %m");
2581                         return -errno;
2582                 }
2583         } else
2584                 fd_nonblock(s->stdout_fd, 1);
2585
2586         zero(ev);
2587         ev.events = EPOLLIN;
2588         ev.data.fd = s->stdout_fd;
2589         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->stdout_fd, &ev) < 0) {
2590                 log_error("Failed to add stdout server fd to epoll object: %m");
2591                 return -errno;
2592         }
2593
2594         return 0;
2595 }
2596
2597 static int open_dev_kmsg(Server *s) {
2598         struct epoll_event ev;
2599
2600         assert(s);
2601
2602         s->dev_kmsg_fd = open("/dev/kmsg", O_RDWR|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
2603         if (s->dev_kmsg_fd < 0) {
2604                 log_warning("Failed to open /dev/kmsg, ignoring: %m");
2605                 return 0;
2606         }
2607
2608         zero(ev);
2609         ev.events = EPOLLIN;
2610         ev.data.fd = s->dev_kmsg_fd;
2611         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->dev_kmsg_fd, &ev) < 0) {
2612
2613                 /* This will fail with EPERM on older kernels where
2614                  * /dev/kmsg is not readable. */
2615                 if (errno == EPERM)
2616                         return 0;
2617
2618                 log_error("Failed to add /dev/kmsg fd to epoll object: %m");
2619                 return -errno;
2620         }
2621
2622         s->dev_kmsg_readable = true;
2623
2624         return 0;
2625 }
2626
2627 static int open_kernel_seqnum(Server *s) {
2628         int fd;
2629         uint64_t *p;
2630
2631         assert(s);
2632
2633         /* We store the seqnum we last read in an mmaped file. That
2634          * way we can just use it like a variable, but it is
2635          * persistant and automatically flushed at reboot. */
2636
2637         fd = open("/run/systemd/journal/kernel-seqnum", O_RDWR|O_CREAT|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW, 0644);
2638         if (fd < 0) {
2639                 log_error("Failed to open /run/systemd/journal/kernel-seqnum, ignoring: %m");
2640                 return 0;
2641         }
2642
2643         if (posix_fallocate(fd, 0, sizeof(uint64_t)) < 0) {
2644                 log_error("Failed to allocate sequential number file, ignoring: %m");
2645                 close_nointr_nofail(fd);
2646                 return 0;
2647         }
2648
2649         p = mmap(NULL, sizeof(uint64_t), PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2650         if (p == MAP_FAILED) {
2651                 log_error("Failed to map sequential number file, ignoring: %m");
2652                 close_nointr_nofail(fd);
2653                 return 0;
2654         }
2655
2656         close_nointr_nofail(fd);
2657         s->kernel_seqnum = p;
2658
2659         return 0;
2660 }
2661
2662 static int open_signalfd(Server *s) {
2663         sigset_t mask;
2664         struct epoll_event ev;
2665
2666         assert(s);
2667
2668         assert_se(sigemptyset(&mask) == 0);
2669         sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
2670         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
2671
2672         s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
2673         if (s->signal_fd < 0) {
2674                 log_error("signalfd(): %m");
2675                 return -errno;
2676         }
2677
2678         zero(ev);
2679         ev.events = EPOLLIN;
2680         ev.data.fd = s->signal_fd;
2681
2682         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
2683                 log_error("epoll_ctl(): %m");
2684                 return -errno;
2685         }
2686
2687         return 0;
2688 }
2689
2690 static int server_parse_proc_cmdline(Server *s) {
2691         char *line, *w, *state;
2692         int r;
2693         size_t l;
2694
2695         if (detect_container(NULL) > 0)
2696                 return 0;
2697
2698         r = read_one_line_file("/proc/cmdline", &line);
2699         if (r < 0) {
2700                 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
2701                 return 0;
2702         }
2703
2704         FOREACH_WORD_QUOTED(w, l, line, state) {
2705                 char *word;
2706
2707                 word = strndup(w, l);
2708                 if (!word) {
2709                         r = -ENOMEM;
2710                         goto finish;
2711                 }
2712
2713                 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
2714                         r = parse_boolean(word + 35);
2715                         if (r < 0)
2716                                 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
2717                         else
2718                                 s->forward_to_syslog = r;
2719                 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
2720                         r = parse_boolean(word + 33);
2721                         if (r < 0)
2722                                 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
2723                         else
2724                                 s->forward_to_kmsg = r;
2725                 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
2726                         r = parse_boolean(word + 36);
2727                         if (r < 0)
2728                                 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
2729                         else
2730                                 s->forward_to_console = r;
2731                 } else if (startswith(word, "systemd.journald"))
2732                         log_warning("Invalid systemd.journald parameter. Ignoring.");
2733
2734                 free(word);
2735         }
2736
2737         r = 0;
2738
2739 finish:
2740         free(line);
2741         return r;
2742 }
2743
2744 static int server_parse_config_file(Server *s) {
2745         FILE *f;
2746         const char *fn;
2747         int r;
2748
2749         assert(s);
2750
2751         fn = "/etc/systemd/journald.conf";
2752         f = fopen(fn, "re");
2753         if (!f) {
2754                 if (errno == ENOENT)
2755                         return 0;
2756
2757                 log_warning("Failed to open configuration file %s: %m", fn);
2758                 return -errno;
2759         }
2760
2761         r = config_parse(fn, f, "Journal\0", config_item_perf_lookup, (void*) journald_gperf_lookup, false, s);
2762         if (r < 0)
2763                 log_warning("Failed to parse configuration file: %s", strerror(-r));
2764
2765         fclose(f);
2766
2767         return r;
2768 }
2769
2770 static int server_init(Server *s) {
2771         int n, r, fd;
2772
2773         assert(s);
2774
2775         zero(*s);
2776         s->syslog_fd = s->native_fd = s->stdout_fd = s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
2777         s->compress = true;
2778
2779         s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
2780         s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
2781
2782         s->forward_to_syslog = true;
2783
2784         s->max_level_store = LOG_DEBUG;
2785         s->max_level_syslog = LOG_DEBUG;
2786         s->max_level_kmsg = LOG_NOTICE;
2787         s->max_level_console = LOG_INFO;
2788
2789         memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
2790         memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
2791
2792         server_parse_config_file(s);
2793         server_parse_proc_cmdline(s);
2794
2795         mkdir_p("/run/systemd/journal", 0755);
2796
2797         s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
2798         if (!s->user_journals)
2799                 return log_oom();
2800
2801         s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
2802         if (s->epoll_fd < 0) {
2803                 log_error("Failed to create epoll object: %m");
2804                 return -errno;
2805         }
2806
2807         n = sd_listen_fds(true);
2808         if (n < 0) {
2809                 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
2810                 return n;
2811         }
2812
2813         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
2814
2815                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
2816
2817                         if (s->native_fd >= 0) {
2818                                 log_error("Too many native sockets passed.");
2819                                 return -EINVAL;
2820                         }
2821
2822                         s->native_fd = fd;
2823
2824                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
2825
2826                         if (s->stdout_fd >= 0) {
2827                                 log_error("Too many stdout sockets passed.");
2828                                 return -EINVAL;
2829                         }
2830
2831                         s->stdout_fd = fd;
2832
2833                 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
2834
2835                         if (s->syslog_fd >= 0) {
2836                                 log_error("Too many /dev/log sockets passed.");
2837                                 return -EINVAL;
2838                         }
2839
2840                         s->syslog_fd = fd;
2841
2842                 } else {
2843                         log_error("Unknown socket passed.");
2844                         return -EINVAL;
2845                 }
2846         }
2847
2848         r = open_syslog_socket(s);
2849         if (r < 0)
2850                 return r;
2851
2852         r = open_native_socket(s);
2853         if (r < 0)
2854                 return r;
2855
2856         r = open_stdout_socket(s);
2857         if (r < 0)
2858                 return r;
2859
2860         r = open_dev_kmsg(s);
2861         if (r < 0)
2862                 return r;
2863
2864         r = open_kernel_seqnum(s);
2865         if (r < 0)
2866                 return r;
2867
2868         r = open_signalfd(s);
2869         if (r < 0)
2870                 return r;
2871
2872         s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
2873         if (!s->rate_limit)
2874                 return -ENOMEM;
2875
2876         r = system_journal_open(s);
2877         if (r < 0)
2878                 return r;
2879
2880         return 0;
2881 }
2882
2883 static void server_done(Server *s) {
2884         JournalFile *f;
2885         assert(s);
2886
2887         while (s->stdout_streams)
2888                 stdout_stream_free(s->stdout_streams);
2889
2890         if (s->system_journal)
2891                 journal_file_close(s->system_journal);
2892
2893         if (s->runtime_journal)
2894                 journal_file_close(s->runtime_journal);
2895
2896         while ((f = hashmap_steal_first(s->user_journals)))
2897                 journal_file_close(f);
2898
2899         hashmap_free(s->user_journals);
2900
2901         if (s->epoll_fd >= 0)
2902                 close_nointr_nofail(s->epoll_fd);
2903
2904         if (s->signal_fd >= 0)
2905                 close_nointr_nofail(s->signal_fd);
2906
2907         if (s->syslog_fd >= 0)
2908                 close_nointr_nofail(s->syslog_fd);
2909
2910         if (s->native_fd >= 0)
2911                 close_nointr_nofail(s->native_fd);
2912
2913         if (s->stdout_fd >= 0)
2914                 close_nointr_nofail(s->stdout_fd);
2915
2916         if (s->dev_kmsg_fd >= 0)
2917                 close_nointr_nofail(s->dev_kmsg_fd);
2918
2919         if (s->rate_limit)
2920                 journal_rate_limit_free(s->rate_limit);
2921
2922         if (s->kernel_seqnum)
2923                 munmap(s->kernel_seqnum, sizeof(uint64_t));
2924
2925         free(s->buffer);
2926         free(s->tty_path);
2927 }
2928
2929 int main(int argc, char *argv[]) {
2930         Server server;
2931         int r;
2932
2933         /* if (getppid() != 1) { */
2934         /*         log_error("This program should be invoked by init only."); */
2935         /*         return EXIT_FAILURE; */
2936         /* } */
2937
2938         if (argc > 1) {
2939                 log_error("This program does not take arguments.");
2940                 return EXIT_FAILURE;
2941         }
2942
2943         log_set_target(LOG_TARGET_SAFE);
2944         log_set_facility(LOG_SYSLOG);
2945         log_parse_environment();
2946         log_open();
2947
2948         umask(0022);
2949
2950         r = server_init(&server);
2951         if (r < 0)
2952                 goto finish;
2953
2954         server_vacuum(&server);
2955         server_flush_to_var(&server);
2956         server_flush_dev_kmsg(&server);
2957
2958         log_debug("systemd-journald running as pid %lu", (unsigned long) getpid());
2959         driver_message(&server, SD_MESSAGE_JOURNAL_START, "Journal started");
2960
2961         sd_notify(false,
2962                   "READY=1\n"
2963                   "STATUS=Processing requests...");
2964
2965         for (;;) {
2966                 struct epoll_event event;
2967
2968                 r = epoll_wait(server.epoll_fd, &event, 1, -1);
2969                 if (r < 0) {
2970
2971                         if (errno == EINTR)
2972                                 continue;
2973
2974                         log_error("epoll_wait() failed: %m");
2975                         r = -errno;
2976                         goto finish;
2977                 } else if (r == 0)
2978                         break;
2979
2980                 r = process_event(&server, &event);
2981                 if (r < 0)
2982                         goto finish;
2983                 else if (r == 0)
2984                         break;
2985         }
2986
2987         log_debug("systemd-journald stopped as pid %lu", (unsigned long) getpid());
2988         driver_message(&server, SD_MESSAGE_JOURNAL_STOP, "Journal stopped");
2989
2990 finish:
2991         sd_notify(false,
2992                   "STATUS=Shutting down...");
2993
2994         server_done(&server);
2995
2996         return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
2997 }