chiark / gitweb /
f2dd4050b61cf884d5e20f66e6c509bb7d15dd0e
[elogind.git] / src / journal / journald.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/epoll.h>
23 #include <sys/socket.h>
24 #include <errno.h>
25 #include <sys/signalfd.h>
26 #include <unistd.h>
27 #include <fcntl.h>
28 #include <stddef.h>
29 #include <sys/ioctl.h>
30 #include <linux/sockios.h>
31 #include <sys/statvfs.h>
32 #include <sys/mman.h>
33
34 #include <systemd/sd-journal.h>
35 #include <systemd/sd-messages.h>
36 #include <systemd/sd-daemon.h>
37
38 #ifdef HAVE_LOGIND
39 #include <systemd/sd-login.h>
40 #endif
41
42 #include "mkdir.h"
43 #include "hashmap.h"
44 #include "journal-file.h"
45 #include "socket-util.h"
46 #include "cgroup-util.h"
47 #include "list.h"
48 #include "journal-rate-limit.h"
49 #include "journal-internal.h"
50 #include "journal-vacuum.h"
51 #include "journal-authenticate.h"
52 #include "conf-parser.h"
53 #include "journald.h"
54 #include "virt.h"
55 #include "missing.h"
56
57 #ifdef HAVE_ACL
58 #include <sys/acl.h>
59 #include <acl/libacl.h>
60 #include "acl-util.h"
61 #endif
62
63 #ifdef HAVE_SELINUX
64 #include <selinux/selinux.h>
65 #endif
66
67 #define USER_JOURNALS_MAX 1024
68 #define STDOUT_STREAMS_MAX 4096
69
70 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
71 #define DEFAULT_RATE_LIMIT_BURST 200
72
73 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
74
75 #define N_IOVEC_META_FIELDS 17
76 #define N_IOVEC_KERNEL_FIELDS 64
77
78 #define ENTRY_SIZE_MAX (1024*1024*32)
79
80 typedef enum StdoutStreamState {
81         STDOUT_STREAM_IDENTIFIER,
82         STDOUT_STREAM_UNIT_ID,
83         STDOUT_STREAM_PRIORITY,
84         STDOUT_STREAM_LEVEL_PREFIX,
85         STDOUT_STREAM_FORWARD_TO_SYSLOG,
86         STDOUT_STREAM_FORWARD_TO_KMSG,
87         STDOUT_STREAM_FORWARD_TO_CONSOLE,
88         STDOUT_STREAM_RUNNING
89 } StdoutStreamState;
90
91 struct StdoutStream {
92         Server *server;
93         StdoutStreamState state;
94
95         int fd;
96
97         struct ucred ucred;
98 #ifdef HAVE_SELINUX
99         security_context_t security_context;
100 #endif
101
102         char *identifier;
103         char *unit_id;
104         int priority;
105         bool level_prefix:1;
106         bool forward_to_syslog:1;
107         bool forward_to_kmsg:1;
108         bool forward_to_console:1;
109
110         char buffer[LINE_MAX+1];
111         size_t length;
112
113         LIST_FIELDS(StdoutStream, stdout_stream);
114 };
115
116 static const char* const storage_table[] = {
117         [STORAGE_AUTO] = "auto",
118         [STORAGE_VOLATILE] = "volatile",
119         [STORAGE_PERSISTENT] = "persistent",
120         [STORAGE_NONE] = "none"
121 };
122
123 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
124 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
125
126 static uint64_t available_space(Server *s) {
127         char ids[33], *p;
128         const char *f;
129         sd_id128_t machine;
130         struct statvfs ss;
131         uint64_t sum = 0, avail = 0, ss_avail = 0;
132         int r;
133         DIR *d;
134         usec_t ts;
135         JournalMetrics *m;
136
137         ts = now(CLOCK_MONOTONIC);
138
139         if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
140                 return s->cached_available_space;
141
142         r = sd_id128_get_machine(&machine);
143         if (r < 0)
144                 return 0;
145
146         if (s->system_journal) {
147                 f = "/var/log/journal/";
148                 m = &s->system_metrics;
149         } else {
150                 f = "/run/log/journal/";
151                 m = &s->runtime_metrics;
152         }
153
154         assert(m);
155
156         p = strappend(f, sd_id128_to_string(machine, ids));
157         if (!p)
158                 return 0;
159
160         d = opendir(p);
161         free(p);
162
163         if (!d)
164                 return 0;
165
166         if (fstatvfs(dirfd(d), &ss) < 0)
167                 goto finish;
168
169         for (;;) {
170                 struct stat st;
171                 struct dirent buf, *de;
172
173                 r = readdir_r(d, &buf, &de);
174                 if (r != 0)
175                         break;
176
177                 if (!de)
178                         break;
179
180                 if (!endswith(de->d_name, ".journal") &&
181                     !endswith(de->d_name, ".journal~"))
182                         continue;
183
184                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
185                         continue;
186
187                 if (!S_ISREG(st.st_mode))
188                         continue;
189
190                 sum += (uint64_t) st.st_blocks * 512UL;
191         }
192
193         avail = sum >= m->max_use ? 0 : m->max_use - sum;
194
195         ss_avail = ss.f_bsize * ss.f_bavail;
196
197         ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
198
199         if (ss_avail < avail)
200                 avail = ss_avail;
201
202         s->cached_available_space = avail;
203         s->cached_available_space_timestamp = ts;
204
205 finish:
206         closedir(d);
207
208         return avail;
209 }
210
211 static void server_read_file_gid(Server *s) {
212         const char *adm = "adm";
213         int r;
214
215         assert(s);
216
217         if (s->file_gid_valid)
218                 return;
219
220         r = get_group_creds(&adm, &s->file_gid);
221         if (r < 0)
222                 log_warning("Failed to resolve 'adm' group: %s", strerror(-r));
223
224         /* if we couldn't read the gid, then it will be 0, but that's
225          * fine and we shouldn't try to resolve the group again, so
226          * let's just pretend it worked right-away. */
227         s->file_gid_valid = true;
228 }
229
230 static void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
231         int r;
232 #ifdef HAVE_ACL
233         acl_t acl;
234         acl_entry_t entry;
235         acl_permset_t permset;
236 #endif
237
238         assert(f);
239
240         server_read_file_gid(s);
241
242         r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
243         if (r < 0)
244                 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
245
246 #ifdef HAVE_ACL
247         if (uid <= 0)
248                 return;
249
250         acl = acl_get_fd(f->fd);
251         if (!acl) {
252                 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
253                 return;
254         }
255
256         r = acl_find_uid(acl, uid, &entry);
257         if (r <= 0) {
258
259                 if (acl_create_entry(&acl, &entry) < 0 ||
260                     acl_set_tag_type(entry, ACL_USER) < 0 ||
261                     acl_set_qualifier(entry, &uid) < 0) {
262                         log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
263                         goto finish;
264                 }
265         }
266
267         if (acl_get_permset(entry, &permset) < 0 ||
268             acl_add_perm(permset, ACL_READ) < 0 ||
269             acl_calc_mask(&acl) < 0) {
270                 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
271                 goto finish;
272         }
273
274         if (acl_set_fd(f->fd, acl) < 0)
275                 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
276
277 finish:
278         acl_free(acl);
279 #endif
280 }
281
282 static JournalFile* find_journal(Server *s, uid_t uid) {
283         char *p;
284         int r;
285         JournalFile *f;
286         sd_id128_t machine;
287
288         assert(s);
289
290         /* We split up user logs only on /var, not on /run. If the
291          * runtime file is open, we write to it exclusively, in order
292          * to guarantee proper order as soon as we flush /run to
293          * /var and close the runtime file. */
294
295         if (s->runtime_journal)
296                 return s->runtime_journal;
297
298         if (uid <= 0)
299                 return s->system_journal;
300
301         r = sd_id128_get_machine(&machine);
302         if (r < 0)
303                 return s->system_journal;
304
305         f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
306         if (f)
307                 return f;
308
309         if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
310                      SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
311                 return s->system_journal;
312
313         while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
314                 /* Too many open? Then let's close one */
315                 f = hashmap_steal_first(s->user_journals);
316                 assert(f);
317                 journal_file_close(f);
318         }
319
320         r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, s->system_journal, &f);
321         free(p);
322
323         if (r < 0)
324                 return s->system_journal;
325
326         server_fix_perms(s, f, uid);
327
328         r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
329         if (r < 0) {
330                 journal_file_close(f);
331                 return s->system_journal;
332         }
333
334         return f;
335 }
336
337 static void server_rotate(Server *s) {
338         JournalFile *f;
339         void *k;
340         Iterator i;
341         int r;
342
343         log_info("Rotating...");
344
345         if (s->runtime_journal) {
346                 r = journal_file_rotate(&s->runtime_journal, s->compress, false);
347                 if (r < 0)
348                         if (s->runtime_journal)
349                                 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
350                         else
351                                 log_error("Failed to create new runtime journal: %s", strerror(-r));
352                 else
353                         server_fix_perms(s, s->runtime_journal, 0);
354         }
355
356         if (s->system_journal) {
357                 r = journal_file_rotate(&s->system_journal, s->compress, s->seal);
358                 if (r < 0)
359                         if (s->system_journal)
360                                 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
361                         else
362                                 log_error("Failed to create new system journal: %s", strerror(-r));
363
364                 else
365                         server_fix_perms(s, s->system_journal, 0);
366         }
367
368         HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
369                 r = journal_file_rotate(&f, s->compress, s->seal);
370                 if (r < 0)
371                         if (f->path)
372                                 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
373                         else
374                                 log_error("Failed to create user journal: %s", strerror(-r));
375                 else {
376                         hashmap_replace(s->user_journals, k, f);
377                         server_fix_perms(s, s->system_journal, PTR_TO_UINT32(k));
378                 }
379         }
380 }
381
382 static void server_vacuum(Server *s) {
383         char *p;
384         char ids[33];
385         sd_id128_t machine;
386         int r;
387
388         log_info("Vacuuming...");
389
390         r = sd_id128_get_machine(&machine);
391         if (r < 0) {
392                 log_error("Failed to get machine ID: %s", strerror(-r));
393                 return;
394         }
395
396         sd_id128_to_string(machine, ids);
397
398         if (s->system_journal) {
399                 if (asprintf(&p, "/var/log/journal/%s", ids) < 0) {
400                         log_oom();
401                         return;
402                 }
403
404                 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free);
405                 if (r < 0 && r != -ENOENT)
406                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
407                 free(p);
408         }
409
410         if (s->runtime_journal) {
411                 if (asprintf(&p, "/run/log/journal/%s", ids) < 0) {
412                         log_oom();
413                         return;
414                 }
415
416                 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free);
417                 if (r < 0 && r != -ENOENT)
418                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
419                 free(p);
420         }
421
422         s->cached_available_space_timestamp = 0;
423 }
424
425 static char *shortened_cgroup_path(pid_t pid) {
426         int r;
427         char *process_path, *init_path, *path;
428
429         assert(pid > 0);
430
431         r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, pid, &process_path);
432         if (r < 0)
433                 return NULL;
434
435         r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &init_path);
436         if (r < 0) {
437                 free(process_path);
438                 return NULL;
439         }
440
441         if (endswith(init_path, "/system"))
442                 init_path[strlen(init_path) - 7] = 0;
443         else if (streq(init_path, "/"))
444                 init_path[0] = 0;
445
446         if (startswith(process_path, init_path)) {
447                 char *p;
448
449                 p = strdup(process_path + strlen(init_path));
450                 if (!p) {
451                         free(process_path);
452                         free(init_path);
453                         return NULL;
454                 }
455                 path = p;
456         } else {
457                 path = process_path;
458                 process_path = NULL;
459         }
460
461         free(process_path);
462         free(init_path);
463
464         return path;
465 }
466
467 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n) {
468         JournalFile *f;
469         bool vacuumed = false;
470         int r;
471
472         assert(s);
473         assert(iovec);
474         assert(n > 0);
475
476         f = find_journal(s, uid);
477         if (!f)
478                 return;
479
480         if (journal_file_rotate_suggested(f)) {
481                 log_info("Journal header limits reached or header out-of-date, rotating.");
482                 server_rotate(s);
483                 server_vacuum(s);
484                 vacuumed = true;
485
486                 f = find_journal(s, uid);
487                 if (!f)
488                         return;
489         }
490
491         for (;;) {
492                 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
493                 if (r >= 0)
494                         return;
495
496                 if (vacuumed ||
497                     (r != -E2BIG && /* hit limit */
498                      r != -EFBIG && /* hit fs limit */
499                      r != -EDQUOT && /* quota hit */
500                      r != -ENOSPC && /* disk full */
501                      r != -EBADMSG && /* corrupted */
502                      r != -ENODATA && /* truncated */
503                      r != -EHOSTDOWN && /* other machine */
504                      r != -EPROTONOSUPPORT && /* unsupported feature */
505                      r != -EBUSY && /* unclean shutdown */
506                      r != -ESHUTDOWN /* already archived */)) {
507                         log_error("Failed to write entry, ignoring: %s", strerror(-r));
508                         return;
509                 }
510
511                 if (r == -E2BIG || r == -EFBIG || r == EDQUOT || r == ENOSPC)
512                         log_info("Allocation limit reached, rotating.");
513                 else if (r == -EHOSTDOWN)
514                         log_info("Journal file from other machine, rotating.");
515                 else if (r == -EBUSY)
516                         log_info("Unlcean shutdown, rotating.");
517                 else
518                         log_warning("Journal file corrupted, rotating.");
519
520                 server_rotate(s);
521                 server_vacuum(s);
522                 vacuumed = true;
523
524                 f = find_journal(s, uid);
525                 if (!f)
526                         return;
527
528                 log_info("Retrying write.");
529         }
530 }
531
532 static void dispatch_message_real(
533                 Server *s,
534                 struct iovec *iovec, unsigned n, unsigned m,
535                 struct ucred *ucred,
536                 struct timeval *tv,
537                 const char *label, size_t label_len,
538                 const char *unit_id) {
539
540         char *pid = NULL, *uid = NULL, *gid = NULL,
541                 *source_time = NULL, *boot_id = NULL, *machine_id = NULL,
542                 *comm = NULL, *cmdline = NULL, *hostname = NULL,
543                 *audit_session = NULL, *audit_loginuid = NULL,
544                 *exe = NULL, *cgroup = NULL, *session = NULL,
545                 *owner_uid = NULL, *unit = NULL, *selinux_context = NULL;
546
547         char idbuf[33];
548         sd_id128_t id;
549         int r;
550         char *t;
551         uid_t loginuid = 0, realuid = 0;
552
553         assert(s);
554         assert(iovec);
555         assert(n > 0);
556         assert(n + N_IOVEC_META_FIELDS <= m);
557
558         if (ucred) {
559                 uint32_t audit;
560 #ifdef HAVE_LOGIND
561                 uid_t owner;
562 #endif
563
564                 realuid = ucred->uid;
565
566                 if (asprintf(&pid, "_PID=%lu", (unsigned long) ucred->pid) >= 0)
567                         IOVEC_SET_STRING(iovec[n++], pid);
568
569                 if (asprintf(&uid, "_UID=%lu", (unsigned long) ucred->uid) >= 0)
570                         IOVEC_SET_STRING(iovec[n++], uid);
571
572                 if (asprintf(&gid, "_GID=%lu", (unsigned long) ucred->gid) >= 0)
573                         IOVEC_SET_STRING(iovec[n++], gid);
574
575                 r = get_process_comm(ucred->pid, &t);
576                 if (r >= 0) {
577                         comm = strappend("_COMM=", t);
578                         free(t);
579
580                         if (comm)
581                                 IOVEC_SET_STRING(iovec[n++], comm);
582                 }
583
584                 r = get_process_exe(ucred->pid, &t);
585                 if (r >= 0) {
586                         exe = strappend("_EXE=", t);
587                         free(t);
588
589                         if (exe)
590                                 IOVEC_SET_STRING(iovec[n++], exe);
591                 }
592
593                 r = get_process_cmdline(ucred->pid, LINE_MAX, false, &t);
594                 if (r >= 0) {
595                         cmdline = strappend("_CMDLINE=", t);
596                         free(t);
597
598                         if (cmdline)
599                                 IOVEC_SET_STRING(iovec[n++], cmdline);
600                 }
601
602                 r = audit_session_from_pid(ucred->pid, &audit);
603                 if (r >= 0)
604                         if (asprintf(&audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit) >= 0)
605                                 IOVEC_SET_STRING(iovec[n++], audit_session);
606
607                 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
608                 if (r >= 0)
609                         if (asprintf(&audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0)
610                                 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
611
612                 t = shortened_cgroup_path(ucred->pid);
613                 if (t) {
614                         cgroup = strappend("_SYSTEMD_CGROUP=", t);
615                         free(t);
616
617                         if (cgroup)
618                                 IOVEC_SET_STRING(iovec[n++], cgroup);
619                 }
620
621 #ifdef HAVE_LOGIND
622                 if (sd_pid_get_session(ucred->pid, &t) >= 0) {
623                         session = strappend("_SYSTEMD_SESSION=", t);
624                         free(t);
625
626                         if (session)
627                                 IOVEC_SET_STRING(iovec[n++], session);
628                 }
629
630                 if (sd_pid_get_owner_uid(ucred->uid, &owner) >= 0)
631                         if (asprintf(&owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner) >= 0)
632                                 IOVEC_SET_STRING(iovec[n++], owner_uid);
633 #endif
634
635                 if (cg_pid_get_unit(ucred->pid, &t) >= 0) {
636                         unit = strappend("_SYSTEMD_UNIT=", t);
637                         free(t);
638                 } else if (unit_id)
639                         unit = strappend("_SYSTEMD_UNIT=", unit_id);
640
641                 if (unit)
642                         IOVEC_SET_STRING(iovec[n++], unit);
643
644 #ifdef HAVE_SELINUX
645                 if (label) {
646                         selinux_context = malloc(sizeof("_SELINUX_CONTEXT=") + label_len);
647                         if (selinux_context) {
648                                 memcpy(selinux_context, "_SELINUX_CONTEXT=", sizeof("_SELINUX_CONTEXT=")-1);
649                                 memcpy(selinux_context+sizeof("_SELINUX_CONTEXT=")-1, label, label_len);
650                                 selinux_context[sizeof("_SELINUX_CONTEXT=")-1+label_len] = 0;
651                                 IOVEC_SET_STRING(iovec[n++], selinux_context);
652                         }
653                 } else {
654                         security_context_t con;
655
656                         if (getpidcon(ucred->pid, &con) >= 0) {
657                                 selinux_context = strappend("_SELINUX_CONTEXT=", con);
658                                 if (selinux_context)
659                                         IOVEC_SET_STRING(iovec[n++], selinux_context);
660
661                                 freecon(con);
662                         }
663                 }
664 #endif
665         }
666
667         if (tv) {
668                 if (asprintf(&source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu",
669                              (unsigned long long) timeval_load(tv)) >= 0)
670                         IOVEC_SET_STRING(iovec[n++], source_time);
671         }
672
673         /* Note that strictly speaking storing the boot id here is
674          * redundant since the entry includes this in-line
675          * anyway. However, we need this indexed, too. */
676         r = sd_id128_get_boot(&id);
677         if (r >= 0)
678                 if (asprintf(&boot_id, "_BOOT_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
679                         IOVEC_SET_STRING(iovec[n++], boot_id);
680
681         r = sd_id128_get_machine(&id);
682         if (r >= 0)
683                 if (asprintf(&machine_id, "_MACHINE_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
684                         IOVEC_SET_STRING(iovec[n++], machine_id);
685
686         t = gethostname_malloc();
687         if (t) {
688                 hostname = strappend("_HOSTNAME=", t);
689                 free(t);
690                 if (hostname)
691                         IOVEC_SET_STRING(iovec[n++], hostname);
692         }
693
694         assert(n <= m);
695
696         write_to_journal(s, realuid == 0 ? 0 : loginuid, iovec, n);
697
698         free(pid);
699         free(uid);
700         free(gid);
701         free(comm);
702         free(exe);
703         free(cmdline);
704         free(source_time);
705         free(boot_id);
706         free(machine_id);
707         free(hostname);
708         free(audit_session);
709         free(audit_loginuid);
710         free(cgroup);
711         free(session);
712         free(owner_uid);
713         free(unit);
714         free(selinux_context);
715 }
716
717 static void driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
718         char mid[11 + 32 + 1];
719         char buffer[16 + LINE_MAX + 1];
720         struct iovec iovec[N_IOVEC_META_FIELDS + 4];
721         int n = 0;
722         va_list ap;
723         struct ucred ucred;
724
725         assert(s);
726         assert(format);
727
728         IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
729         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
730
731         memcpy(buffer, "MESSAGE=", 8);
732         va_start(ap, format);
733         vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
734         va_end(ap);
735         char_array_0(buffer);
736         IOVEC_SET_STRING(iovec[n++], buffer);
737
738         snprintf(mid, sizeof(mid), "MESSAGE_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(message_id));
739         char_array_0(mid);
740         IOVEC_SET_STRING(iovec[n++], mid);
741
742         zero(ucred);
743         ucred.pid = getpid();
744         ucred.uid = getuid();
745         ucred.gid = getgid();
746
747         dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL);
748 }
749
750 static void dispatch_message(Server *s,
751                              struct iovec *iovec, unsigned n, unsigned m,
752                              struct ucred *ucred,
753                              struct timeval *tv,
754                              const char *label, size_t label_len,
755                              const char *unit_id,
756                              int priority) {
757         int rl;
758         char *path = NULL, *c;
759
760         assert(s);
761         assert(iovec || n == 0);
762
763         if (n == 0)
764                 return;
765
766         if (LOG_PRI(priority) > s->max_level_store)
767                 return;
768
769         if (!ucred)
770                 goto finish;
771
772         path = shortened_cgroup_path(ucred->pid);
773         if (!path)
774                 goto finish;
775
776         /* example: /user/lennart/3/foobar
777          *          /system/dbus.service/foobar
778          *
779          * So let's cut of everything past the third /, since that is
780          * wher user directories start */
781
782         c = strchr(path, '/');
783         if (c) {
784                 c = strchr(c+1, '/');
785                 if (c) {
786                         c = strchr(c+1, '/');
787                         if (c)
788                                 *c = 0;
789                 }
790         }
791
792         rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available_space(s));
793
794         if (rl == 0) {
795                 free(path);
796                 return;
797         }
798
799         /* Write a suppression message if we suppressed something */
800         if (rl > 1)
801                 driver_message(s, SD_MESSAGE_JOURNAL_DROPPED, "Suppressed %u messages from %s", rl - 1, path);
802
803         free(path);
804
805 finish:
806         dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id);
807 }
808
809 static void forward_syslog_iovec(Server *s, const struct iovec *iovec, unsigned n_iovec, struct ucred *ucred, struct timeval *tv) {
810         struct msghdr msghdr;
811         struct cmsghdr *cmsg;
812         union {
813                 struct cmsghdr cmsghdr;
814                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred))];
815         } control;
816         union sockaddr_union sa;
817
818         assert(s);
819         assert(iovec);
820         assert(n_iovec > 0);
821
822         zero(msghdr);
823         msghdr.msg_iov = (struct iovec*) iovec;
824         msghdr.msg_iovlen = n_iovec;
825
826         zero(sa);
827         sa.un.sun_family = AF_UNIX;
828         strncpy(sa.un.sun_path, "/run/systemd/journal/syslog", sizeof(sa.un.sun_path));
829         msghdr.msg_name = &sa;
830         msghdr.msg_namelen = offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path);
831
832         if (ucred) {
833                 zero(control);
834                 msghdr.msg_control = &control;
835                 msghdr.msg_controllen = sizeof(control);
836
837                 cmsg = CMSG_FIRSTHDR(&msghdr);
838                 cmsg->cmsg_level = SOL_SOCKET;
839                 cmsg->cmsg_type = SCM_CREDENTIALS;
840                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred));
841                 memcpy(CMSG_DATA(cmsg), ucred, sizeof(struct ucred));
842                 msghdr.msg_controllen = cmsg->cmsg_len;
843         }
844
845         /* Forward the syslog message we received via /dev/log to
846          * /run/systemd/syslog. Unfortunately we currently can't set
847          * the SO_TIMESTAMP auxiliary data, and hence we don't. */
848
849         if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
850                 return;
851
852         /* The socket is full? I guess the syslog implementation is
853          * too slow, and we shouldn't wait for that... */
854         if (errno == EAGAIN)
855                 return;
856
857         if (ucred && errno == ESRCH) {
858                 struct ucred u;
859
860                 /* Hmm, presumably the sender process vanished
861                  * by now, so let's fix it as good as we
862                  * can, and retry */
863
864                 u = *ucred;
865                 u.pid = getpid();
866                 memcpy(CMSG_DATA(cmsg), &u, sizeof(struct ucred));
867
868                 if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
869                         return;
870
871                 if (errno == EAGAIN)
872                         return;
873         }
874
875         if (errno != ENOENT)
876                 log_debug("Failed to forward syslog message: %m");
877 }
878
879 static void forward_syslog_raw(Server *s, int priority, const char *buffer, struct ucred *ucred, struct timeval *tv) {
880         struct iovec iovec;
881
882         assert(s);
883         assert(buffer);
884
885         if (LOG_PRI(priority) > s->max_level_syslog)
886                 return;
887
888         IOVEC_SET_STRING(iovec, buffer);
889         forward_syslog_iovec(s, &iovec, 1, ucred, tv);
890 }
891
892 static void forward_syslog(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred, struct timeval *tv) {
893         struct iovec iovec[5];
894         char header_priority[6], header_time[64], header_pid[16];
895         int n = 0;
896         time_t t;
897         struct tm *tm;
898         char *ident_buf = NULL;
899
900         assert(s);
901         assert(priority >= 0);
902         assert(priority <= 999);
903         assert(message);
904
905         if (LOG_PRI(priority) > s->max_level_syslog)
906                 return;
907
908         /* First: priority field */
909         snprintf(header_priority, sizeof(header_priority), "<%i>", priority);
910         char_array_0(header_priority);
911         IOVEC_SET_STRING(iovec[n++], header_priority);
912
913         /* Second: timestamp */
914         t = tv ? tv->tv_sec : ((time_t) (now(CLOCK_REALTIME) / USEC_PER_SEC));
915         tm = localtime(&t);
916         if (!tm)
917                 return;
918         if (strftime(header_time, sizeof(header_time), "%h %e %T ", tm) <= 0)
919                 return;
920         IOVEC_SET_STRING(iovec[n++], header_time);
921
922         /* Third: identifier and PID */
923         if (ucred) {
924                 if (!identifier) {
925                         get_process_comm(ucred->pid, &ident_buf);
926                         identifier = ident_buf;
927                 }
928
929                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
930                 char_array_0(header_pid);
931
932                 if (identifier)
933                         IOVEC_SET_STRING(iovec[n++], identifier);
934
935                 IOVEC_SET_STRING(iovec[n++], header_pid);
936         } else if (identifier) {
937                 IOVEC_SET_STRING(iovec[n++], identifier);
938                 IOVEC_SET_STRING(iovec[n++], ": ");
939         }
940
941         /* Fourth: message */
942         IOVEC_SET_STRING(iovec[n++], message);
943
944         forward_syslog_iovec(s, iovec, n, ucred, tv);
945
946         free(ident_buf);
947 }
948
949 static int fixup_priority(int priority) {
950
951         if ((priority & LOG_FACMASK) == 0)
952                 return (priority & LOG_PRIMASK) | LOG_USER;
953
954         return priority;
955 }
956
957 static void forward_kmsg(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred) {
958         struct iovec iovec[5];
959         char header_priority[6], header_pid[16];
960         int n = 0;
961         char *ident_buf = NULL;
962
963         assert(s);
964         assert(priority >= 0);
965         assert(priority <= 999);
966         assert(message);
967
968         if (_unlikely_(LOG_PRI(priority) > s->max_level_kmsg))
969                 return;
970
971         if (_unlikely_(s->dev_kmsg_fd < 0))
972                 return;
973
974         /* Never allow messages with kernel facility to be written to
975          * kmsg, regardless where the data comes from. */
976         priority = fixup_priority(priority);
977
978         /* First: priority field */
979         snprintf(header_priority, sizeof(header_priority), "<%i>", priority);
980         char_array_0(header_priority);
981         IOVEC_SET_STRING(iovec[n++], header_priority);
982
983         /* Second: identifier and PID */
984         if (ucred) {
985                 if (!identifier) {
986                         get_process_comm(ucred->pid, &ident_buf);
987                         identifier = ident_buf;
988                 }
989
990                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
991                 char_array_0(header_pid);
992
993                 if (identifier)
994                         IOVEC_SET_STRING(iovec[n++], identifier);
995
996                 IOVEC_SET_STRING(iovec[n++], header_pid);
997         } else if (identifier) {
998                 IOVEC_SET_STRING(iovec[n++], identifier);
999                 IOVEC_SET_STRING(iovec[n++], ": ");
1000         }
1001
1002         /* Fourth: message */
1003         IOVEC_SET_STRING(iovec[n++], message);
1004         IOVEC_SET_STRING(iovec[n++], "\n");
1005
1006         if (writev(s->dev_kmsg_fd, iovec, n) < 0)
1007                 log_debug("Failed to write to /dev/kmsg for logging: %s", strerror(errno));
1008
1009         free(ident_buf);
1010 }
1011
1012 static void forward_console(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred) {
1013         struct iovec iovec[4];
1014         char header_pid[16];
1015         int n = 0, fd;
1016         char *ident_buf = NULL;
1017         const char *tty;
1018
1019         assert(s);
1020         assert(message);
1021
1022         if (LOG_PRI(priority) > s->max_level_console)
1023                 return;
1024
1025         /* First: identifier and PID */
1026         if (ucred) {
1027                 if (!identifier) {
1028                         get_process_comm(ucred->pid, &ident_buf);
1029                         identifier = ident_buf;
1030                 }
1031
1032                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
1033                 char_array_0(header_pid);
1034
1035                 if (identifier)
1036                         IOVEC_SET_STRING(iovec[n++], identifier);
1037
1038                 IOVEC_SET_STRING(iovec[n++], header_pid);
1039         } else if (identifier) {
1040                 IOVEC_SET_STRING(iovec[n++], identifier);
1041                 IOVEC_SET_STRING(iovec[n++], ": ");
1042         }
1043
1044         /* Third: message */
1045         IOVEC_SET_STRING(iovec[n++], message);
1046         IOVEC_SET_STRING(iovec[n++], "\n");
1047
1048         tty = s->tty_path ? s->tty_path : "/dev/console";
1049
1050         fd = open_terminal(tty, O_WRONLY|O_NOCTTY|O_CLOEXEC);
1051         if (fd < 0) {
1052                 log_debug("Failed to open %s for logging: %s", tty, strerror(errno));
1053                 goto finish;
1054         }
1055
1056         if (writev(fd, iovec, n) < 0)
1057                 log_debug("Failed to write to %s for logging: %s", tty, strerror(errno));
1058
1059         close_nointr_nofail(fd);
1060
1061 finish:
1062         free(ident_buf);
1063 }
1064
1065 static void read_identifier(const char **buf, char **identifier, char **pid) {
1066         const char *p;
1067         char *t;
1068         size_t l, e;
1069
1070         assert(buf);
1071         assert(identifier);
1072         assert(pid);
1073
1074         p = *buf;
1075
1076         p += strspn(p, WHITESPACE);
1077         l = strcspn(p, WHITESPACE);
1078
1079         if (l <= 0 ||
1080             p[l-1] != ':')
1081                 return;
1082
1083         e = l;
1084         l--;
1085
1086         if (p[l-1] == ']') {
1087                 size_t k = l-1;
1088
1089                 for (;;) {
1090
1091                         if (p[k] == '[') {
1092                                 t = strndup(p+k+1, l-k-2);
1093                                 if (t)
1094                                         *pid = t;
1095
1096                                 l = k;
1097                                 break;
1098                         }
1099
1100                         if (k == 0)
1101                                 break;
1102
1103                         k--;
1104                 }
1105         }
1106
1107         t = strndup(p, l);
1108         if (t)
1109                 *identifier = t;
1110
1111         *buf = p + e;
1112         *buf += strspn(*buf, WHITESPACE);
1113 }
1114
1115 static void process_syslog_message(Server *s, const char *buf, struct ucred *ucred, struct timeval *tv, const char *label, size_t label_len) {
1116         char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *syslog_pid = NULL;
1117         struct iovec iovec[N_IOVEC_META_FIELDS + 6];
1118         unsigned n = 0;
1119         int priority = LOG_USER | LOG_INFO;
1120         char *identifier = NULL, *pid = NULL;
1121         const char *orig;
1122
1123         assert(s);
1124         assert(buf);
1125
1126         orig = buf;
1127         parse_syslog_priority((char**) &buf, &priority);
1128
1129         if (s->forward_to_syslog)
1130                 forward_syslog_raw(s, priority, orig, ucred, tv);
1131
1132         skip_syslog_date((char**) &buf);
1133         read_identifier(&buf, &identifier, &pid);
1134
1135         if (s->forward_to_kmsg)
1136                 forward_kmsg(s, priority, identifier, buf, ucred);
1137
1138         if (s->forward_to_console)
1139                 forward_console(s, priority, identifier, buf, ucred);
1140
1141         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=syslog");
1142
1143         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1144                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1145
1146         if (priority & LOG_FACMASK)
1147                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1148                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1149
1150         if (identifier) {
1151                 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
1152                 if (syslog_identifier)
1153                         IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1154         }
1155
1156         if (pid) {
1157                 syslog_pid = strappend("SYSLOG_PID=", pid);
1158                 if (syslog_pid)
1159                         IOVEC_SET_STRING(iovec[n++], syslog_pid);
1160         }
1161
1162         message = strappend("MESSAGE=", buf);
1163         if (message)
1164                 IOVEC_SET_STRING(iovec[n++], message);
1165
1166         dispatch_message(s, iovec, n, ELEMENTSOF(iovec), ucred, tv, label, label_len, NULL, priority);
1167
1168         free(message);
1169         free(identifier);
1170         free(pid);
1171         free(syslog_priority);
1172         free(syslog_facility);
1173         free(syslog_identifier);
1174         free(syslog_pid);
1175 }
1176
1177 static bool valid_user_field(const char *p, size_t l) {
1178         const char *a;
1179
1180         /* We kinda enforce POSIX syntax recommendations for
1181            environment variables here, but make a couple of additional
1182            requirements.
1183
1184            http://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html */
1185
1186         /* No empty field names */
1187         if (l <= 0)
1188                 return false;
1189
1190         /* Don't allow names longer than 64 chars */
1191         if (l > 64)
1192                 return false;
1193
1194         /* Variables starting with an underscore are protected */
1195         if (p[0] == '_')
1196                 return false;
1197
1198         /* Don't allow digits as first character */
1199         if (p[0] >= '0' && p[0] <= '9')
1200                 return false;
1201
1202         /* Only allow A-Z0-9 and '_' */
1203         for (a = p; a < p + l; a++)
1204                 if (!((*a >= 'A' && *a <= 'Z') ||
1205                       (*a >= '0' && *a <= '9') ||
1206                       *a == '_'))
1207                         return false;
1208
1209         return true;
1210 }
1211
1212 static void process_native_message(
1213                 Server *s,
1214                 const void *buffer, size_t buffer_size,
1215                 struct ucred *ucred,
1216                 struct timeval *tv,
1217                 const char *label, size_t label_len) {
1218
1219         struct iovec *iovec = NULL;
1220         unsigned n = 0, m = 0, j, tn = (unsigned) -1;
1221         const char *p;
1222         size_t remaining;
1223         int priority = LOG_INFO;
1224         char *identifier = NULL, *message = NULL;
1225
1226         assert(s);
1227         assert(buffer || buffer_size == 0);
1228
1229         p = buffer;
1230         remaining = buffer_size;
1231
1232         while (remaining > 0) {
1233                 const char *e, *q;
1234
1235                 e = memchr(p, '\n', remaining);
1236
1237                 if (!e) {
1238                         /* Trailing noise, let's ignore it, and flush what we collected */
1239                         log_debug("Received message with trailing noise, ignoring.");
1240                         break;
1241                 }
1242
1243                 if (e == p) {
1244                         /* Entry separator */
1245                         dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, NULL, priority);
1246                         n = 0;
1247                         priority = LOG_INFO;
1248
1249                         p++;
1250                         remaining--;
1251                         continue;
1252                 }
1253
1254                 if (*p == '.' || *p == '#') {
1255                         /* Ignore control commands for now, and
1256                          * comments too. */
1257                         remaining -= (e - p) + 1;
1258                         p = e + 1;
1259                         continue;
1260                 }
1261
1262                 /* A property follows */
1263
1264                 if (n+N_IOVEC_META_FIELDS >= m) {
1265                         struct iovec *c;
1266                         unsigned u;
1267
1268                         u = MAX((n+N_IOVEC_META_FIELDS+1) * 2U, 4U);
1269                         c = realloc(iovec, u * sizeof(struct iovec));
1270                         if (!c) {
1271                                 log_oom();
1272                                 break;
1273                         }
1274
1275                         iovec = c;
1276                         m = u;
1277                 }
1278
1279                 q = memchr(p, '=', e - p);
1280                 if (q) {
1281                         if (valid_user_field(p, q - p)) {
1282                                 size_t l;
1283
1284                                 l = e - p;
1285
1286                                 /* If the field name starts with an
1287                                  * underscore, skip the variable,
1288                                  * since that indidates a trusted
1289                                  * field */
1290                                 iovec[n].iov_base = (char*) p;
1291                                 iovec[n].iov_len = l;
1292                                 n++;
1293
1294                                 /* We need to determine the priority
1295                                  * of this entry for the rate limiting
1296                                  * logic */
1297                                 if (l == 10 &&
1298                                     memcmp(p, "PRIORITY=", 9) == 0 &&
1299                                     p[9] >= '0' && p[9] <= '9')
1300                                         priority = (priority & LOG_FACMASK) | (p[9] - '0');
1301
1302                                 else if (l == 17 &&
1303                                          memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
1304                                          p[16] >= '0' && p[16] <= '9')
1305                                         priority = (priority & LOG_PRIMASK) | ((p[16] - '0') << 3);
1306
1307                                 else if (l == 18 &&
1308                                          memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
1309                                          p[16] >= '0' && p[16] <= '9' &&
1310                                          p[17] >= '0' && p[17] <= '9')
1311                                         priority = (priority & LOG_PRIMASK) | (((p[16] - '0')*10 + (p[17] - '0')) << 3);
1312
1313                                 else if (l >= 19 &&
1314                                          memcmp(p, "SYSLOG_IDENTIFIER=", 18) == 0) {
1315                                         char *t;
1316
1317                                         t = strndup(p + 18, l - 18);
1318                                         if (t) {
1319                                                 free(identifier);
1320                                                 identifier = t;
1321                                         }
1322                                 } else if (l >= 8 &&
1323                                            memcmp(p, "MESSAGE=", 8) == 0) {
1324                                         char *t;
1325
1326                                         t = strndup(p + 8, l - 8);
1327                                         if (t) {
1328                                                 free(message);
1329                                                 message = t;
1330                                         }
1331                                 }
1332                         }
1333
1334                         remaining -= (e - p) + 1;
1335                         p = e + 1;
1336                         continue;
1337                 } else {
1338                         le64_t l_le;
1339                         uint64_t l;
1340                         char *k;
1341
1342                         if (remaining < e - p + 1 + sizeof(uint64_t) + 1) {
1343                                 log_debug("Failed to parse message, ignoring.");
1344                                 break;
1345                         }
1346
1347                         memcpy(&l_le, e + 1, sizeof(uint64_t));
1348                         l = le64toh(l_le);
1349
1350                         if (remaining < e - p + 1 + sizeof(uint64_t) + l + 1 ||
1351                             e[1+sizeof(uint64_t)+l] != '\n') {
1352                                 log_debug("Failed to parse message, ignoring.");
1353                                 break;
1354                         }
1355
1356                         k = malloc((e - p) + 1 + l);
1357                         if (!k) {
1358                                 log_oom();
1359                                 break;
1360                         }
1361
1362                         memcpy(k, p, e - p);
1363                         k[e - p] = '=';
1364                         memcpy(k + (e - p) + 1, e + 1 + sizeof(uint64_t), l);
1365
1366                         if (valid_user_field(p, e - p)) {
1367                                 iovec[n].iov_base = k;
1368                                 iovec[n].iov_len = (e - p) + 1 + l;
1369                                 n++;
1370                         } else
1371                                 free(k);
1372
1373                         remaining -= (e - p) + 1 + sizeof(uint64_t) + l + 1;
1374                         p = e + 1 + sizeof(uint64_t) + l + 1;
1375                 }
1376         }
1377
1378         if (n <= 0)
1379                 goto finish;
1380
1381         tn = n++;
1382         IOVEC_SET_STRING(iovec[tn], "_TRANSPORT=journal");
1383
1384         if (message) {
1385                 if (s->forward_to_syslog)
1386                         forward_syslog(s, priority, identifier, message, ucred, tv);
1387
1388                 if (s->forward_to_kmsg)
1389                         forward_kmsg(s, priority, identifier, message, ucred);
1390
1391                 if (s->forward_to_console)
1392                         forward_console(s, priority, identifier, message, ucred);
1393         }
1394
1395         dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, NULL, priority);
1396
1397 finish:
1398         for (j = 0; j < n; j++)  {
1399                 if (j == tn)
1400                         continue;
1401
1402                 if (iovec[j].iov_base < buffer ||
1403                     (const uint8_t*) iovec[j].iov_base >= (const uint8_t*) buffer + buffer_size)
1404                         free(iovec[j].iov_base);
1405         }
1406
1407         free(iovec);
1408         free(identifier);
1409         free(message);
1410 }
1411
1412 static void process_native_file(
1413                 Server *s,
1414                 int fd,
1415                 struct ucred *ucred,
1416                 struct timeval *tv,
1417                 const char *label, size_t label_len) {
1418
1419         struct stat st;
1420         void *p;
1421         ssize_t n;
1422
1423         assert(s);
1424         assert(fd >= 0);
1425
1426         /* Data is in the passed file, since it didn't fit in a
1427          * datagram. We can't map the file here, since clients might
1428          * then truncate it and trigger a SIGBUS for us. So let's
1429          * stupidly read it */
1430
1431         if (fstat(fd, &st) < 0) {
1432                 log_error("Failed to stat passed file, ignoring: %m");
1433                 return;
1434         }
1435
1436         if (!S_ISREG(st.st_mode)) {
1437                 log_error("File passed is not regular. Ignoring.");
1438                 return;
1439         }
1440
1441         if (st.st_size <= 0)
1442                 return;
1443
1444         if (st.st_size > ENTRY_SIZE_MAX) {
1445                 log_error("File passed too large. Ignoring.");
1446                 return;
1447         }
1448
1449         p = malloc(st.st_size);
1450         if (!p) {
1451                 log_oom();
1452                 return;
1453         }
1454
1455         n = pread(fd, p, st.st_size, 0);
1456         if (n < 0)
1457                 log_error("Failed to read file, ignoring: %s", strerror(-n));
1458         else if (n > 0)
1459                 process_native_message(s, p, n, ucred, tv, label, label_len);
1460
1461         free(p);
1462 }
1463
1464 static int stdout_stream_log(StdoutStream *s, const char *p) {
1465         struct iovec iovec[N_IOVEC_META_FIELDS + 5];
1466         char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *syslog_identifier = NULL;
1467         unsigned n = 0;
1468         int priority;
1469         char *label = NULL;
1470         size_t label_len = 0;
1471
1472         assert(s);
1473         assert(p);
1474
1475         if (isempty(p))
1476                 return 0;
1477
1478         priority = s->priority;
1479
1480         if (s->level_prefix)
1481                 parse_syslog_priority((char**) &p, &priority);
1482
1483         if (s->forward_to_syslog || s->server->forward_to_syslog)
1484                 forward_syslog(s->server, fixup_priority(priority), s->identifier, p, &s->ucred, NULL);
1485
1486         if (s->forward_to_kmsg || s->server->forward_to_kmsg)
1487                 forward_kmsg(s->server, priority, s->identifier, p, &s->ucred);
1488
1489         if (s->forward_to_console || s->server->forward_to_console)
1490                 forward_console(s->server, priority, s->identifier, p, &s->ucred);
1491
1492         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=stdout");
1493
1494         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1495                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1496
1497         if (priority & LOG_FACMASK)
1498                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1499                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1500
1501         if (s->identifier) {
1502                 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", s->identifier);
1503                 if (syslog_identifier)
1504                         IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1505         }
1506
1507         message = strappend("MESSAGE=", p);
1508         if (message)
1509                 IOVEC_SET_STRING(iovec[n++], message);
1510
1511 #ifdef HAVE_SELINUX
1512         if (s->security_context) {
1513                 label = (char*) s->security_context;
1514                 label_len = strlen((char*) s->security_context);
1515         }
1516 #endif
1517
1518         dispatch_message(s->server, iovec, n, ELEMENTSOF(iovec), &s->ucred, NULL, label, label_len, s->unit_id, priority);
1519
1520         free(message);
1521         free(syslog_priority);
1522         free(syslog_facility);
1523         free(syslog_identifier);
1524
1525         return 0;
1526 }
1527
1528 static int stdout_stream_line(StdoutStream *s, char *p) {
1529         int r;
1530
1531         assert(s);
1532         assert(p);
1533
1534         p = strstrip(p);
1535
1536         switch (s->state) {
1537
1538         case STDOUT_STREAM_IDENTIFIER:
1539                 if (isempty(p))
1540                         s->identifier = NULL;
1541                 else  {
1542                         s->identifier = strdup(p);
1543                         if (!s->identifier)
1544                                 return log_oom();
1545                 }
1546
1547                 s->state = STDOUT_STREAM_UNIT_ID;
1548                 return 0;
1549
1550         case STDOUT_STREAM_UNIT_ID:
1551                 if (s->ucred.uid == 0) {
1552                         if (isempty(p))
1553                                 s->unit_id = NULL;
1554                         else  {
1555                                 s->unit_id = strdup(p);
1556                                 if (!s->unit_id)
1557                                         return log_oom();
1558                         }
1559                 }
1560
1561                 s->state = STDOUT_STREAM_PRIORITY;
1562                 return 0;
1563
1564         case STDOUT_STREAM_PRIORITY:
1565                 r = safe_atoi(p, &s->priority);
1566                 if (r < 0 || s->priority <= 0 || s->priority >= 999) {
1567                         log_warning("Failed to parse log priority line.");
1568                         return -EINVAL;
1569                 }
1570
1571                 s->state = STDOUT_STREAM_LEVEL_PREFIX;
1572                 return 0;
1573
1574         case STDOUT_STREAM_LEVEL_PREFIX:
1575                 r = parse_boolean(p);
1576                 if (r < 0) {
1577                         log_warning("Failed to parse level prefix line.");
1578                         return -EINVAL;
1579                 }
1580
1581                 s->level_prefix = !!r;
1582                 s->state = STDOUT_STREAM_FORWARD_TO_SYSLOG;
1583                 return 0;
1584
1585         case STDOUT_STREAM_FORWARD_TO_SYSLOG:
1586                 r = parse_boolean(p);
1587                 if (r < 0) {
1588                         log_warning("Failed to parse forward to syslog line.");
1589                         return -EINVAL;
1590                 }
1591
1592                 s->forward_to_syslog = !!r;
1593                 s->state = STDOUT_STREAM_FORWARD_TO_KMSG;
1594                 return 0;
1595
1596         case STDOUT_STREAM_FORWARD_TO_KMSG:
1597                 r = parse_boolean(p);
1598                 if (r < 0) {
1599                         log_warning("Failed to parse copy to kmsg line.");
1600                         return -EINVAL;
1601                 }
1602
1603                 s->forward_to_kmsg = !!r;
1604                 s->state = STDOUT_STREAM_FORWARD_TO_CONSOLE;
1605                 return 0;
1606
1607         case STDOUT_STREAM_FORWARD_TO_CONSOLE:
1608                 r = parse_boolean(p);
1609                 if (r < 0) {
1610                         log_warning("Failed to parse copy to console line.");
1611                         return -EINVAL;
1612                 }
1613
1614                 s->forward_to_console = !!r;
1615                 s->state = STDOUT_STREAM_RUNNING;
1616                 return 0;
1617
1618         case STDOUT_STREAM_RUNNING:
1619                 return stdout_stream_log(s, p);
1620         }
1621
1622         assert_not_reached("Unknown stream state");
1623 }
1624
1625 static int stdout_stream_scan(StdoutStream *s, bool force_flush) {
1626         char *p;
1627         size_t remaining;
1628         int r;
1629
1630         assert(s);
1631
1632         p = s->buffer;
1633         remaining = s->length;
1634         for (;;) {
1635                 char *end;
1636                 size_t skip;
1637
1638                 end = memchr(p, '\n', remaining);
1639                 if (end)
1640                         skip = end - p + 1;
1641                 else if (remaining >= sizeof(s->buffer) - 1) {
1642                         end = p + sizeof(s->buffer) - 1;
1643                         skip = remaining;
1644                 } else
1645                         break;
1646
1647                 *end = 0;
1648
1649                 r = stdout_stream_line(s, p);
1650                 if (r < 0)
1651                         return r;
1652
1653                 remaining -= skip;
1654                 p += skip;
1655         }
1656
1657         if (force_flush && remaining > 0) {
1658                 p[remaining] = 0;
1659                 r = stdout_stream_line(s, p);
1660                 if (r < 0)
1661                         return r;
1662
1663                 p += remaining;
1664                 remaining = 0;
1665         }
1666
1667         if (p > s->buffer) {
1668                 memmove(s->buffer, p, remaining);
1669                 s->length = remaining;
1670         }
1671
1672         return 0;
1673 }
1674
1675 static int stdout_stream_process(StdoutStream *s) {
1676         ssize_t l;
1677         int r;
1678
1679         assert(s);
1680
1681         l = read(s->fd, s->buffer+s->length, sizeof(s->buffer)-1-s->length);
1682         if (l < 0) {
1683
1684                 if (errno == EAGAIN)
1685                         return 0;
1686
1687                 log_warning("Failed to read from stream: %m");
1688                 return -errno;
1689         }
1690
1691         if (l == 0) {
1692                 r = stdout_stream_scan(s, true);
1693                 if (r < 0)
1694                         return r;
1695
1696                 return 0;
1697         }
1698
1699         s->length += l;
1700         r = stdout_stream_scan(s, false);
1701         if (r < 0)
1702                 return r;
1703
1704         return 1;
1705
1706 }
1707
1708 static void stdout_stream_free(StdoutStream *s) {
1709         assert(s);
1710
1711         if (s->server) {
1712                 assert(s->server->n_stdout_streams > 0);
1713                 s->server->n_stdout_streams --;
1714                 LIST_REMOVE(StdoutStream, stdout_stream, s->server->stdout_streams, s);
1715         }
1716
1717         if (s->fd >= 0) {
1718                 if (s->server)
1719                         epoll_ctl(s->server->epoll_fd, EPOLL_CTL_DEL, s->fd, NULL);
1720
1721                 close_nointr_nofail(s->fd);
1722         }
1723
1724 #ifdef HAVE_SELINUX
1725         if (s->security_context)
1726                 freecon(s->security_context);
1727 #endif
1728
1729         free(s->identifier);
1730         free(s);
1731 }
1732
1733 static int stdout_stream_new(Server *s) {
1734         StdoutStream *stream;
1735         int fd, r;
1736         socklen_t len;
1737         struct epoll_event ev;
1738
1739         assert(s);
1740
1741         fd = accept4(s->stdout_fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC);
1742         if (fd < 0) {
1743                 if (errno == EAGAIN)
1744                         return 0;
1745
1746                 log_error("Failed to accept stdout connection: %m");
1747                 return -errno;
1748         }
1749
1750         if (s->n_stdout_streams >= STDOUT_STREAMS_MAX) {
1751                 log_warning("Too many stdout streams, refusing connection.");
1752                 close_nointr_nofail(fd);
1753                 return 0;
1754         }
1755
1756         stream = new0(StdoutStream, 1);
1757         if (!stream) {
1758                 close_nointr_nofail(fd);
1759                 return log_oom();
1760         }
1761
1762         stream->fd = fd;
1763
1764         len = sizeof(stream->ucred);
1765         if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &stream->ucred, &len) < 0) {
1766                 log_error("Failed to determine peer credentials: %m");
1767                 r = -errno;
1768                 goto fail;
1769         }
1770
1771 #ifdef HAVE_SELINUX
1772         if (getpeercon(fd, &stream->security_context) < 0 && errno != ENOPROTOOPT)
1773                 log_error("Failed to determine peer security context: %m");
1774 #endif
1775
1776         if (shutdown(fd, SHUT_WR) < 0) {
1777                 log_error("Failed to shutdown writing side of socket: %m");
1778                 r = -errno;
1779                 goto fail;
1780         }
1781
1782         zero(ev);
1783         ev.data.ptr = stream;
1784         ev.events = EPOLLIN;
1785         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0) {
1786                 log_error("Failed to add stream to event loop: %m");
1787                 r = -errno;
1788                 goto fail;
1789         }
1790
1791         stream->server = s;
1792         LIST_PREPEND(StdoutStream, stdout_stream, s->stdout_streams, stream);
1793         s->n_stdout_streams ++;
1794
1795         return 0;
1796
1797 fail:
1798         stdout_stream_free(stream);
1799         return r;
1800 }
1801
1802 static bool is_us(const char *pid) {
1803         pid_t t;
1804
1805         assert(pid);
1806
1807         if (parse_pid(pid, &t) < 0)
1808                 return false;
1809
1810         return t == getpid();
1811 }
1812
1813 static void dev_kmsg_record(Server *s, char *p, size_t l) {
1814         struct iovec iovec[N_IOVEC_META_FIELDS + 7 + N_IOVEC_KERNEL_FIELDS];
1815         char *message = NULL, *syslog_priority = NULL, *syslog_pid = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *source_time = NULL;
1816         int priority, r;
1817         unsigned n = 0, z = 0, j;
1818         usec_t usec;
1819         char *identifier = NULL, *pid = NULL, *e, *f, *k;
1820         uint64_t serial;
1821         size_t pl;
1822
1823         assert(s);
1824         assert(p);
1825
1826         if (l <= 0)
1827                 return;
1828
1829         e = memchr(p, ',', l);
1830         if (!e)
1831                 return;
1832         *e = 0;
1833
1834         r = safe_atoi(p, &priority);
1835         if (r < 0 || priority < 0 || priority > 999)
1836                 return;
1837
1838         if (s->forward_to_kmsg && (priority & LOG_FACMASK) != LOG_KERN)
1839                 return;
1840
1841         l -= (e - p) + 1;
1842         p = e + 1;
1843         e = memchr(p, ',', l);
1844         if (!e)
1845                 return;
1846         *e = 0;
1847
1848         r = safe_atou64(p, &serial);
1849         if (r < 0)
1850                 return;
1851
1852         if (s->kernel_seqnum) {
1853                 /* We already read this one? */
1854                 if (serial < *s->kernel_seqnum)
1855                         return;
1856
1857                 /* Did we lose any? */
1858                 if (serial > *s->kernel_seqnum)
1859                         driver_message(s, SD_MESSAGE_JOURNAL_MISSED, "Missed %llu kernel messages", (unsigned long long) serial - *s->kernel_seqnum - 1);
1860
1861                 /* Make sure we never read this one again. Note that
1862                  * we always store the next message serial we expect
1863                  * here, simply because this makes handling the first
1864                  * message with serial 0 easy. */
1865                 *s->kernel_seqnum = serial + 1;
1866         }
1867
1868         l -= (e - p) + 1;
1869         p = e + 1;
1870         f = memchr(p, ';', l);
1871         if (!f)
1872                 return;
1873         /* Kernel 3.6 has the flags field, kernel 3.5 lacks that */
1874         e = memchr(p, ',', l);
1875         if (!e || f < e)
1876                 e = f;
1877         *e = 0;
1878
1879         r = parse_usec(p, &usec);
1880         if (r < 0)
1881                 return;
1882
1883         l -= (f - p) + 1;
1884         p = f + 1;
1885         e = memchr(p, '\n', l);
1886         if (!e)
1887                 return;
1888         *e = 0;
1889
1890         pl = e - p;
1891         l -= (e - p) + 1;
1892         k = e + 1;
1893
1894         for (j = 0; l > 0 && j < N_IOVEC_KERNEL_FIELDS; j++) {
1895                 char *m;
1896                 /* Meta data fields attached */
1897
1898                 if (*k != ' ')
1899                         break;
1900
1901                 k ++, l --;
1902
1903                 e = memchr(k, '\n', l);
1904                 if (!e)
1905                         return;
1906
1907                 *e = 0;
1908
1909                 m = cunescape_length_with_prefix(k, e - k, "_KERNEL_");
1910                 if (!m)
1911                         break;
1912
1913                 IOVEC_SET_STRING(iovec[n++], m);
1914                 z++;
1915
1916                 l -= (e - k) + 1;
1917                 k = e + 1;
1918         }
1919
1920         if (asprintf(&source_time, "_SOURCE_MONOTONIC_TIMESTAMP=%llu",
1921                      (unsigned long long) usec) >= 0)
1922                 IOVEC_SET_STRING(iovec[n++], source_time);
1923
1924         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=kernel");
1925
1926         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1927                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1928
1929         if ((priority & LOG_FACMASK) == LOG_KERN)
1930                 IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=kernel");
1931         else {
1932                 read_identifier((const char**) &p, &identifier, &pid);
1933
1934                 /* Avoid any messages we generated ourselves via
1935                  * log_info() and friends. */
1936                 if (pid && is_us(pid))
1937                         goto finish;
1938
1939                 if (identifier) {
1940                         syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
1941                         if (syslog_identifier)
1942                                 IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1943                 }
1944
1945                 if (pid) {
1946                         syslog_pid = strappend("SYSLOG_PID=", pid);
1947                         if (syslog_pid)
1948                                 IOVEC_SET_STRING(iovec[n++], syslog_pid);
1949                 }
1950
1951                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1952                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1953         }
1954
1955         message = cunescape_length_with_prefix(p, pl, "MESSAGE=");
1956         if (message)
1957                 IOVEC_SET_STRING(iovec[n++], message);
1958
1959         dispatch_message(s, iovec, n, ELEMENTSOF(iovec), NULL, NULL, NULL, 0, NULL, priority);
1960
1961 finish:
1962         for (j = 0; j < z; j++)
1963                 free(iovec[j].iov_base);
1964
1965         free(message);
1966         free(syslog_priority);
1967         free(syslog_identifier);
1968         free(syslog_pid);
1969         free(syslog_facility);
1970         free(source_time);
1971         free(identifier);
1972         free(pid);
1973 }
1974
1975 static int system_journal_open(Server *s) {
1976         int r;
1977         char *fn;
1978         sd_id128_t machine;
1979         char ids[33];
1980
1981         r = sd_id128_get_machine(&machine);
1982         if (r < 0)
1983                 return r;
1984
1985         sd_id128_to_string(machine, ids);
1986
1987         if (!s->system_journal &&
1988             (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
1989             access("/run/systemd/journal/flushed", F_OK) >= 0) {
1990
1991                 /* If in auto mode: first try to create the machine
1992                  * path, but not the prefix.
1993                  *
1994                  * If in persistent mode: create /var/log/journal and
1995                  * the machine path */
1996
1997                 if (s->storage == STORAGE_PERSISTENT)
1998                         (void) mkdir("/var/log/journal/", 0755);
1999
2000                 fn = strappend("/var/log/journal/", ids);
2001                 if (!fn)
2002                         return -ENOMEM;
2003
2004                 (void) mkdir(fn, 0755);
2005                 free(fn);
2006
2007                 fn = strjoin("/var/log/journal/", ids, "/system.journal", NULL);
2008                 if (!fn)
2009                         return -ENOMEM;
2010
2011                 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
2012                 free(fn);
2013
2014                 if (r >= 0)
2015                         server_fix_perms(s, s->system_journal, 0);
2016                 else if (r < 0) {
2017
2018                         if (r != -ENOENT && r != -EROFS)
2019                                 log_warning("Failed to open system journal: %s", strerror(-r));
2020
2021                         r = 0;
2022                 }
2023         }
2024
2025         if (!s->runtime_journal &&
2026             (s->storage != STORAGE_NONE)) {
2027
2028                 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
2029                 if (!fn)
2030                         return -ENOMEM;
2031
2032                 if (s->system_journal) {
2033
2034                         /* Try to open the runtime journal, but only
2035                          * if it already exists, so that we can flush
2036                          * it into the system journal */
2037
2038                         r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
2039                         free(fn);
2040
2041                         if (r < 0) {
2042                                 if (r != -ENOENT)
2043                                         log_warning("Failed to open runtime journal: %s", strerror(-r));
2044
2045                                 r = 0;
2046                         }
2047
2048                 } else {
2049
2050                         /* OK, we really need the runtime journal, so create
2051                          * it if necessary. */
2052
2053                         (void) mkdir_parents(fn, 0755);
2054                         r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
2055                         free(fn);
2056
2057                         if (r < 0) {
2058                                 log_error("Failed to open runtime journal: %s", strerror(-r));
2059                                 return r;
2060                         }
2061                 }
2062
2063                 if (s->runtime_journal)
2064                         server_fix_perms(s, s->runtime_journal, 0);
2065         }
2066
2067         return r;
2068 }
2069
2070 static int server_flush_to_var(Server *s) {
2071         Object *o = NULL;
2072         int r;
2073         sd_id128_t machine;
2074         sd_journal *j;
2075
2076         assert(s);
2077
2078         if (s->storage != STORAGE_AUTO &&
2079             s->storage != STORAGE_PERSISTENT)
2080                 return 0;
2081
2082         if (!s->runtime_journal)
2083                 return 0;
2084
2085         system_journal_open(s);
2086
2087         if (!s->system_journal)
2088                 return 0;
2089
2090         log_info("Flushing to /var...");
2091
2092         r = sd_id128_get_machine(&machine);
2093         if (r < 0) {
2094                 log_error("Failed to get machine id: %s", strerror(-r));
2095                 return r;
2096         }
2097
2098         r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
2099         if (r < 0) {
2100                 log_error("Failed to read runtime journal: %s", strerror(-r));
2101                 return r;
2102         }
2103
2104         SD_JOURNAL_FOREACH(j) {
2105                 JournalFile *f;
2106
2107                 f = j->current_file;
2108                 assert(f && f->current_offset > 0);
2109
2110                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
2111                 if (r < 0) {
2112                         log_error("Can't read entry: %s", strerror(-r));
2113                         goto finish;
2114                 }
2115
2116                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
2117                 if (r == -E2BIG) {
2118                         log_info("Allocation limit reached.");
2119
2120                         journal_file_post_change(s->system_journal);
2121                         server_rotate(s);
2122                         server_vacuum(s);
2123
2124                         r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
2125                 }
2126
2127                 if (r < 0) {
2128                         log_error("Can't write entry: %s", strerror(-r));
2129                         goto finish;
2130                 }
2131         }
2132
2133 finish:
2134         journal_file_post_change(s->system_journal);
2135
2136         journal_file_close(s->runtime_journal);
2137         s->runtime_journal = NULL;
2138
2139         if (r >= 0)
2140                 rm_rf("/run/log/journal", false, true, false);
2141
2142         return r;
2143 }
2144
2145 static int server_read_dev_kmsg(Server *s) {
2146         char buffer[8192+1]; /* the kernel-side limit per record is 8K currently */
2147         ssize_t l;
2148
2149         assert(s);
2150         assert(s->dev_kmsg_fd >= 0);
2151
2152         l = read(s->dev_kmsg_fd, buffer, sizeof(buffer) - 1);
2153         if (l == 0)
2154                 return 0;
2155         if (l < 0) {
2156                 /* Old kernels who don't allow reading from /dev/kmsg
2157                  * return EINVAL when we try. So handle this cleanly,
2158                  * but don' try to ever read from it again. */
2159                 if (errno == EINVAL) {
2160                         epoll_ctl(s->epoll_fd, EPOLL_CTL_DEL, s->dev_kmsg_fd, NULL);
2161                         return 0;
2162                 }
2163
2164                 if (errno == EAGAIN || errno == EINTR)
2165                         return 0;
2166
2167                 log_error("Failed to read from kernel: %m");
2168                 return -errno;
2169         }
2170
2171         dev_kmsg_record(s, buffer, l);
2172         return 1;
2173 }
2174
2175 static int server_flush_dev_kmsg(Server *s) {
2176         int r;
2177
2178         assert(s);
2179
2180         if (s->dev_kmsg_fd < 0)
2181                 return 0;
2182
2183         if (!s->dev_kmsg_readable)
2184                 return 0;
2185
2186         log_info("Flushing /dev/kmsg...");
2187
2188         for (;;) {
2189                 r = server_read_dev_kmsg(s);
2190                 if (r < 0)
2191                         return r;
2192
2193                 if (r == 0)
2194                         break;
2195         }
2196
2197         return 0;
2198 }
2199
2200 static int process_event(Server *s, struct epoll_event *ev) {
2201         assert(s);
2202         assert(ev);
2203
2204         if (ev->data.fd == s->signal_fd) {
2205                 struct signalfd_siginfo sfsi;
2206                 ssize_t n;
2207
2208                 if (ev->events != EPOLLIN) {
2209                         log_info("Got invalid event from epoll.");
2210                         return -EIO;
2211                 }
2212
2213                 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
2214                 if (n != sizeof(sfsi)) {
2215
2216                         if (n >= 0)
2217                                 return -EIO;
2218
2219                         if (errno == EINTR || errno == EAGAIN)
2220                                 return 1;
2221
2222                         return -errno;
2223                 }
2224
2225                 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
2226
2227                 if (sfsi.ssi_signo == SIGUSR1) {
2228                         touch("/run/systemd/journal/flushed");
2229                         server_flush_to_var(s);
2230                         return 1;
2231                 }
2232
2233                 if (sfsi.ssi_signo == SIGUSR2) {
2234                         server_rotate(s);
2235                         server_vacuum(s);
2236                         return 1;
2237                 }
2238
2239                 return 0;
2240
2241         } else if (ev->data.fd == s->dev_kmsg_fd) {
2242                 int r;
2243
2244                 if (ev->events != EPOLLIN) {
2245                         log_info("Got invalid event from epoll.");
2246                         return -EIO;
2247                 }
2248
2249                 r = server_read_dev_kmsg(s);
2250                 if (r < 0)
2251                         return r;
2252
2253                 return 1;
2254
2255         } else if (ev->data.fd == s->native_fd ||
2256                    ev->data.fd == s->syslog_fd) {
2257
2258                 if (ev->events != EPOLLIN) {
2259                         log_info("Got invalid event from epoll.");
2260                         return -EIO;
2261                 }
2262
2263                 for (;;) {
2264                         struct msghdr msghdr;
2265                         struct iovec iovec;
2266                         struct ucred *ucred = NULL;
2267                         struct timeval *tv = NULL;
2268                         struct cmsghdr *cmsg;
2269                         char *label = NULL;
2270                         size_t label_len = 0;
2271                         union {
2272                                 struct cmsghdr cmsghdr;
2273
2274                                 /* We use NAME_MAX space for the
2275                                  * SELinux label here. The kernel
2276                                  * currently enforces no limit, but
2277                                  * according to suggestions from the
2278                                  * SELinux people this will change and
2279                                  * it will probably be identical to
2280                                  * NAME_MAX. For now we use that, but
2281                                  * this should be updated one day when
2282                                  * the final limit is known.*/
2283                                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
2284                                             CMSG_SPACE(sizeof(struct timeval)) +
2285                                             CMSG_SPACE(sizeof(int)) + /* fd */
2286                                             CMSG_SPACE(NAME_MAX)]; /* selinux label */
2287                         } control;
2288                         ssize_t n;
2289                         int v;
2290                         int *fds = NULL;
2291                         unsigned n_fds = 0;
2292
2293                         if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
2294                                 log_error("SIOCINQ failed: %m");
2295                                 return -errno;
2296                         }
2297
2298                         if (s->buffer_size < (size_t) v) {
2299                                 void *b;
2300                                 size_t l;
2301
2302                                 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
2303                                 b = realloc(s->buffer, l+1);
2304
2305                                 if (!b) {
2306                                         log_error("Couldn't increase buffer.");
2307                                         return -ENOMEM;
2308                                 }
2309
2310                                 s->buffer_size = l;
2311                                 s->buffer = b;
2312                         }
2313
2314                         zero(iovec);
2315                         iovec.iov_base = s->buffer;
2316                         iovec.iov_len = s->buffer_size;
2317
2318                         zero(control);
2319                         zero(msghdr);
2320                         msghdr.msg_iov = &iovec;
2321                         msghdr.msg_iovlen = 1;
2322                         msghdr.msg_control = &control;
2323                         msghdr.msg_controllen = sizeof(control);
2324
2325                         n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
2326                         if (n < 0) {
2327
2328                                 if (errno == EINTR || errno == EAGAIN)
2329                                         return 1;
2330
2331                                 log_error("recvmsg() failed: %m");
2332                                 return -errno;
2333                         }
2334
2335                         for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
2336
2337                                 if (cmsg->cmsg_level == SOL_SOCKET &&
2338                                     cmsg->cmsg_type == SCM_CREDENTIALS &&
2339                                     cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
2340                                         ucred = (struct ucred*) CMSG_DATA(cmsg);
2341                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
2342                                          cmsg->cmsg_type == SCM_SECURITY) {
2343                                         label = (char*) CMSG_DATA(cmsg);
2344                                         label_len = cmsg->cmsg_len - CMSG_LEN(0);
2345                                 } else if (cmsg->cmsg_level == SOL_SOCKET &&
2346                                          cmsg->cmsg_type == SO_TIMESTAMP &&
2347                                          cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
2348                                         tv = (struct timeval*) CMSG_DATA(cmsg);
2349                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
2350                                          cmsg->cmsg_type == SCM_RIGHTS) {
2351                                         fds = (int*) CMSG_DATA(cmsg);
2352                                         n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
2353                                 }
2354                         }
2355
2356                         if (ev->data.fd == s->syslog_fd) {
2357                                 char *e;
2358
2359                                 if (n > 0 && n_fds == 0) {
2360                                         e = memchr(s->buffer, '\n', n);
2361                                         if (e)
2362                                                 *e = 0;
2363                                         else
2364                                                 s->buffer[n] = 0;
2365
2366                                         process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
2367                                 } else if (n_fds > 0)
2368                                         log_warning("Got file descriptors via syslog socket. Ignoring.");
2369
2370                         } else {
2371                                 if (n > 0 && n_fds == 0)
2372                                         process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
2373                                 else if (n == 0 && n_fds == 1)
2374                                         process_native_file(s, fds[0], ucred, tv, label, label_len);
2375                                 else if (n_fds > 0)
2376                                         log_warning("Got too many file descriptors via native socket. Ignoring.");
2377                         }
2378
2379                         close_many(fds, n_fds);
2380                 }
2381
2382                 return 1;
2383
2384         } else if (ev->data.fd == s->stdout_fd) {
2385
2386                 if (ev->events != EPOLLIN) {
2387                         log_info("Got invalid event from epoll.");
2388                         return -EIO;
2389                 }
2390
2391                 stdout_stream_new(s);
2392                 return 1;
2393
2394         } else {
2395                 StdoutStream *stream;
2396
2397                 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
2398                         log_info("Got invalid event from epoll.");
2399                         return -EIO;
2400                 }
2401
2402                 /* If it is none of the well-known fds, it must be an
2403                  * stdout stream fd. Note that this is a bit ugly here
2404                  * (since we rely that none of the well-known fds
2405                  * could be interpreted as pointer), but nonetheless
2406                  * safe, since the well-known fds would never get an
2407                  * fd > 4096, i.e. beyond the first memory page */
2408
2409                 stream = ev->data.ptr;
2410
2411                 if (stdout_stream_process(stream) <= 0)
2412                         stdout_stream_free(stream);
2413
2414                 return 1;
2415         }
2416
2417         log_error("Unknown event.");
2418         return 0;
2419 }
2420
2421 static int open_syslog_socket(Server *s) {
2422         union sockaddr_union sa;
2423         int one, r;
2424         struct epoll_event ev;
2425
2426         assert(s);
2427
2428         if (s->syslog_fd < 0) {
2429
2430                 s->syslog_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2431                 if (s->syslog_fd < 0) {
2432                         log_error("socket() failed: %m");
2433                         return -errno;
2434                 }
2435
2436                 zero(sa);
2437                 sa.un.sun_family = AF_UNIX;
2438                 strncpy(sa.un.sun_path, "/dev/log", sizeof(sa.un.sun_path));
2439
2440                 unlink(sa.un.sun_path);
2441
2442                 r = bind(s->syslog_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2443                 if (r < 0) {
2444                         log_error("bind() failed: %m");
2445                         return -errno;
2446                 }
2447
2448                 chmod(sa.un.sun_path, 0666);
2449         } else
2450                 fd_nonblock(s->syslog_fd, 1);
2451
2452         one = 1;
2453         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
2454         if (r < 0) {
2455                 log_error("SO_PASSCRED failed: %m");
2456                 return -errno;
2457         }
2458
2459 #ifdef HAVE_SELINUX
2460         one = 1;
2461         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
2462         if (r < 0)
2463                 log_warning("SO_PASSSEC failed: %m");
2464 #endif
2465
2466         one = 1;
2467         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
2468         if (r < 0) {
2469                 log_error("SO_TIMESTAMP failed: %m");
2470                 return -errno;
2471         }
2472
2473         zero(ev);
2474         ev.events = EPOLLIN;
2475         ev.data.fd = s->syslog_fd;
2476         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->syslog_fd, &ev) < 0) {
2477                 log_error("Failed to add syslog server fd to epoll object: %m");
2478                 return -errno;
2479         }
2480
2481         return 0;
2482 }
2483
2484 static int open_native_socket(Server*s) {
2485         union sockaddr_union sa;
2486         int one, r;
2487         struct epoll_event ev;
2488
2489         assert(s);
2490
2491         if (s->native_fd < 0) {
2492
2493                 s->native_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2494                 if (s->native_fd < 0) {
2495                         log_error("socket() failed: %m");
2496                         return -errno;
2497                 }
2498
2499                 zero(sa);
2500                 sa.un.sun_family = AF_UNIX;
2501                 strncpy(sa.un.sun_path, "/run/systemd/journal/socket", sizeof(sa.un.sun_path));
2502
2503                 unlink(sa.un.sun_path);
2504
2505                 r = bind(s->native_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2506                 if (r < 0) {
2507                         log_error("bind() failed: %m");
2508                         return -errno;
2509                 }
2510
2511                 chmod(sa.un.sun_path, 0666);
2512         } else
2513                 fd_nonblock(s->native_fd, 1);
2514
2515         one = 1;
2516         r = setsockopt(s->native_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
2517         if (r < 0) {
2518                 log_error("SO_PASSCRED failed: %m");
2519                 return -errno;
2520         }
2521
2522 #ifdef HAVE_SELINUX
2523         one = 1;
2524         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
2525         if (r < 0)
2526                 log_warning("SO_PASSSEC failed: %m");
2527 #endif
2528
2529         one = 1;
2530         r = setsockopt(s->native_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
2531         if (r < 0) {
2532                 log_error("SO_TIMESTAMP failed: %m");
2533                 return -errno;
2534         }
2535
2536         zero(ev);
2537         ev.events = EPOLLIN;
2538         ev.data.fd = s->native_fd;
2539         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->native_fd, &ev) < 0) {
2540                 log_error("Failed to add native server fd to epoll object: %m");
2541                 return -errno;
2542         }
2543
2544         return 0;
2545 }
2546
2547 static int open_stdout_socket(Server *s) {
2548         union sockaddr_union sa;
2549         int r;
2550         struct epoll_event ev;
2551
2552         assert(s);
2553
2554         if (s->stdout_fd < 0) {
2555
2556                 s->stdout_fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2557                 if (s->stdout_fd < 0) {
2558                         log_error("socket() failed: %m");
2559                         return -errno;
2560                 }
2561
2562                 zero(sa);
2563                 sa.un.sun_family = AF_UNIX;
2564                 strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
2565
2566                 unlink(sa.un.sun_path);
2567
2568                 r = bind(s->stdout_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2569                 if (r < 0) {
2570                         log_error("bind() failed: %m");
2571                         return -errno;
2572                 }
2573
2574                 chmod(sa.un.sun_path, 0666);
2575
2576                 if (listen(s->stdout_fd, SOMAXCONN) < 0) {
2577                         log_error("liste() failed: %m");
2578                         return -errno;
2579                 }
2580         } else
2581                 fd_nonblock(s->stdout_fd, 1);
2582
2583         zero(ev);
2584         ev.events = EPOLLIN;
2585         ev.data.fd = s->stdout_fd;
2586         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->stdout_fd, &ev) < 0) {
2587                 log_error("Failed to add stdout server fd to epoll object: %m");
2588                 return -errno;
2589         }
2590
2591         return 0;
2592 }
2593
2594 static int open_dev_kmsg(Server *s) {
2595         struct epoll_event ev;
2596
2597         assert(s);
2598
2599         s->dev_kmsg_fd = open("/dev/kmsg", O_RDWR|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
2600         if (s->dev_kmsg_fd < 0) {
2601                 log_warning("Failed to open /dev/kmsg, ignoring: %m");
2602                 return 0;
2603         }
2604
2605         zero(ev);
2606         ev.events = EPOLLIN;
2607         ev.data.fd = s->dev_kmsg_fd;
2608         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->dev_kmsg_fd, &ev) < 0) {
2609
2610                 /* This will fail with EPERM on older kernels where
2611                  * /dev/kmsg is not readable. */
2612                 if (errno == EPERM)
2613                         return 0;
2614
2615                 log_error("Failed to add /dev/kmsg fd to epoll object: %m");
2616                 return -errno;
2617         }
2618
2619         s->dev_kmsg_readable = true;
2620
2621         return 0;
2622 }
2623
2624 static int open_kernel_seqnum(Server *s) {
2625         int fd;
2626         uint64_t *p;
2627
2628         assert(s);
2629
2630         /* We store the seqnum we last read in an mmaped file. That
2631          * way we can just use it like a variable, but it is
2632          * persistant and automatically flushed at reboot. */
2633
2634         fd = open("/run/systemd/journal/kernel-seqnum", O_RDWR|O_CREAT|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW, 0644);
2635         if (fd < 0) {
2636                 log_error("Failed to open /run/systemd/journal/kernel-seqnum, ignoring: %m");
2637                 return 0;
2638         }
2639
2640         if (posix_fallocate(fd, 0, sizeof(uint64_t)) < 0) {
2641                 log_error("Failed to allocate sequential number file, ignoring: %m");
2642                 close_nointr_nofail(fd);
2643                 return 0;
2644         }
2645
2646         p = mmap(NULL, sizeof(uint64_t), PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2647         if (p == MAP_FAILED) {
2648                 log_error("Failed to map sequential number file, ignoring: %m");
2649                 close_nointr_nofail(fd);
2650                 return 0;
2651         }
2652
2653         close_nointr_nofail(fd);
2654         s->kernel_seqnum = p;
2655
2656         return 0;
2657 }
2658
2659 static int open_signalfd(Server *s) {
2660         sigset_t mask;
2661         struct epoll_event ev;
2662
2663         assert(s);
2664
2665         assert_se(sigemptyset(&mask) == 0);
2666         sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
2667         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
2668
2669         s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
2670         if (s->signal_fd < 0) {
2671                 log_error("signalfd(): %m");
2672                 return -errno;
2673         }
2674
2675         zero(ev);
2676         ev.events = EPOLLIN;
2677         ev.data.fd = s->signal_fd;
2678
2679         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
2680                 log_error("epoll_ctl(): %m");
2681                 return -errno;
2682         }
2683
2684         return 0;
2685 }
2686
2687 static int server_parse_proc_cmdline(Server *s) {
2688         char *line, *w, *state;
2689         int r;
2690         size_t l;
2691
2692         if (detect_container(NULL) > 0)
2693                 return 0;
2694
2695         r = read_one_line_file("/proc/cmdline", &line);
2696         if (r < 0) {
2697                 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
2698                 return 0;
2699         }
2700
2701         FOREACH_WORD_QUOTED(w, l, line, state) {
2702                 char *word;
2703
2704                 word = strndup(w, l);
2705                 if (!word) {
2706                         r = -ENOMEM;
2707                         goto finish;
2708                 }
2709
2710                 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
2711                         r = parse_boolean(word + 35);
2712                         if (r < 0)
2713                                 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
2714                         else
2715                                 s->forward_to_syslog = r;
2716                 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
2717                         r = parse_boolean(word + 33);
2718                         if (r < 0)
2719                                 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
2720                         else
2721                                 s->forward_to_kmsg = r;
2722                 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
2723                         r = parse_boolean(word + 36);
2724                         if (r < 0)
2725                                 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
2726                         else
2727                                 s->forward_to_console = r;
2728                 } else if (startswith(word, "systemd.journald"))
2729                         log_warning("Invalid systemd.journald parameter. Ignoring.");
2730
2731                 free(word);
2732         }
2733
2734         r = 0;
2735
2736 finish:
2737         free(line);
2738         return r;
2739 }
2740
2741 static int server_parse_config_file(Server *s) {
2742         FILE *f;
2743         const char *fn;
2744         int r;
2745
2746         assert(s);
2747
2748         fn = "/etc/systemd/journald.conf";
2749         f = fopen(fn, "re");
2750         if (!f) {
2751                 if (errno == ENOENT)
2752                         return 0;
2753
2754                 log_warning("Failed to open configuration file %s: %m", fn);
2755                 return -errno;
2756         }
2757
2758         r = config_parse(fn, f, "Journal\0", config_item_perf_lookup, (void*) journald_gperf_lookup, false, s);
2759         if (r < 0)
2760                 log_warning("Failed to parse configuration file: %s", strerror(-r));
2761
2762         fclose(f);
2763
2764         return r;
2765 }
2766
2767 static int server_init(Server *s) {
2768         int n, r, fd;
2769
2770         assert(s);
2771
2772         zero(*s);
2773         s->syslog_fd = s->native_fd = s->stdout_fd = s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
2774         s->compress = true;
2775         s->seal = true;
2776
2777         s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
2778         s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
2779
2780         s->forward_to_syslog = true;
2781
2782         s->max_level_store = LOG_DEBUG;
2783         s->max_level_syslog = LOG_DEBUG;
2784         s->max_level_kmsg = LOG_NOTICE;
2785         s->max_level_console = LOG_INFO;
2786
2787         memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
2788         memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
2789
2790         server_parse_config_file(s);
2791         server_parse_proc_cmdline(s);
2792
2793         mkdir_p("/run/systemd/journal", 0755);
2794
2795         s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
2796         if (!s->user_journals)
2797                 return log_oom();
2798
2799         s->mmap = mmap_cache_new();
2800         if (!s->mmap)
2801                 return log_oom();
2802
2803         s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
2804         if (s->epoll_fd < 0) {
2805                 log_error("Failed to create epoll object: %m");
2806                 return -errno;
2807         }
2808
2809         n = sd_listen_fds(true);
2810         if (n < 0) {
2811                 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
2812                 return n;
2813         }
2814
2815         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
2816
2817                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
2818
2819                         if (s->native_fd >= 0) {
2820                                 log_error("Too many native sockets passed.");
2821                                 return -EINVAL;
2822                         }
2823
2824                         s->native_fd = fd;
2825
2826                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
2827
2828                         if (s->stdout_fd >= 0) {
2829                                 log_error("Too many stdout sockets passed.");
2830                                 return -EINVAL;
2831                         }
2832
2833                         s->stdout_fd = fd;
2834
2835                 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
2836
2837                         if (s->syslog_fd >= 0) {
2838                                 log_error("Too many /dev/log sockets passed.");
2839                                 return -EINVAL;
2840                         }
2841
2842                         s->syslog_fd = fd;
2843
2844                 } else {
2845                         log_error("Unknown socket passed.");
2846                         return -EINVAL;
2847                 }
2848         }
2849
2850         r = open_syslog_socket(s);
2851         if (r < 0)
2852                 return r;
2853
2854         r = open_native_socket(s);
2855         if (r < 0)
2856                 return r;
2857
2858         r = open_stdout_socket(s);
2859         if (r < 0)
2860                 return r;
2861
2862         r = open_dev_kmsg(s);
2863         if (r < 0)
2864                 return r;
2865
2866         r = open_kernel_seqnum(s);
2867         if (r < 0)
2868                 return r;
2869
2870         r = open_signalfd(s);
2871         if (r < 0)
2872                 return r;
2873
2874         s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
2875         if (!s->rate_limit)
2876                 return -ENOMEM;
2877
2878         r = system_journal_open(s);
2879         if (r < 0)
2880                 return r;
2881
2882         return 0;
2883 }
2884
2885 static void server_done(Server *s) {
2886         JournalFile *f;
2887         assert(s);
2888
2889         while (s->stdout_streams)
2890                 stdout_stream_free(s->stdout_streams);
2891
2892         if (s->system_journal)
2893                 journal_file_close(s->system_journal);
2894
2895         if (s->runtime_journal)
2896                 journal_file_close(s->runtime_journal);
2897
2898         while ((f = hashmap_steal_first(s->user_journals)))
2899                 journal_file_close(f);
2900
2901         hashmap_free(s->user_journals);
2902
2903         if (s->epoll_fd >= 0)
2904                 close_nointr_nofail(s->epoll_fd);
2905
2906         if (s->signal_fd >= 0)
2907                 close_nointr_nofail(s->signal_fd);
2908
2909         if (s->syslog_fd >= 0)
2910                 close_nointr_nofail(s->syslog_fd);
2911
2912         if (s->native_fd >= 0)
2913                 close_nointr_nofail(s->native_fd);
2914
2915         if (s->stdout_fd >= 0)
2916                 close_nointr_nofail(s->stdout_fd);
2917
2918         if (s->dev_kmsg_fd >= 0)
2919                 close_nointr_nofail(s->dev_kmsg_fd);
2920
2921         if (s->rate_limit)
2922                 journal_rate_limit_free(s->rate_limit);
2923
2924         if (s->kernel_seqnum)
2925                 munmap(s->kernel_seqnum, sizeof(uint64_t));
2926
2927         free(s->buffer);
2928         free(s->tty_path);
2929
2930         if (s->mmap)
2931                 mmap_cache_unref(s->mmap);
2932 }
2933
2934 int main(int argc, char *argv[]) {
2935         Server server;
2936         int r;
2937
2938         /* if (getppid() != 1) { */
2939         /*         log_error("This program should be invoked by init only."); */
2940         /*         return EXIT_FAILURE; */
2941         /* } */
2942
2943         if (argc > 1) {
2944                 log_error("This program does not take arguments.");
2945                 return EXIT_FAILURE;
2946         }
2947
2948         log_set_target(LOG_TARGET_SAFE);
2949         log_set_facility(LOG_SYSLOG);
2950         log_set_max_level(LOG_DEBUG);
2951         log_parse_environment();
2952         log_open();
2953
2954         umask(0022);
2955
2956         r = server_init(&server);
2957         if (r < 0)
2958                 goto finish;
2959
2960         server_vacuum(&server);
2961         server_flush_to_var(&server);
2962         server_flush_dev_kmsg(&server);
2963
2964         log_debug("systemd-journald running as pid %lu", (unsigned long) getpid());
2965         driver_message(&server, SD_MESSAGE_JOURNAL_START, "Journal started");
2966
2967         sd_notify(false,
2968                   "READY=1\n"
2969                   "STATUS=Processing requests...");
2970
2971         for (;;) {
2972                 struct epoll_event event;
2973                 int t;
2974
2975 #ifdef HAVE_GCRYPT
2976                 usec_t u;
2977
2978                 if (server.system_journal &&
2979                     journal_file_next_evolve_usec(server.system_journal, &u)) {
2980                         usec_t n;
2981
2982                         n = now(CLOCK_MONOTONIC);
2983
2984                         if (n >= u)
2985                                 t = 0;
2986                         else
2987                                 t = (int) ((u - n + USEC_PER_MSEC - 1) / USEC_PER_MSEC);
2988                 } else
2989 #endif
2990                         t = -1;
2991
2992                 r = epoll_wait(server.epoll_fd, &event, 1, t);
2993                 if (r < 0) {
2994
2995                         if (errno == EINTR)
2996                                 continue;
2997
2998                         log_error("epoll_wait() failed: %m");
2999                         r = -errno;
3000                         goto finish;
3001                 }
3002
3003                 if (r > 0) {
3004                         r = process_event(&server, &event);
3005                         if (r < 0)
3006                                 goto finish;
3007                         else if (r == 0)
3008                                 break;
3009                 }
3010
3011 #ifdef HAVE_GCRYPT
3012                 if (server.system_journal)
3013                         journal_file_maybe_append_tag(server.system_journal, 0);
3014 #endif
3015         }
3016
3017         log_debug("systemd-journald stopped as pid %lu", (unsigned long) getpid());
3018         driver_message(&server, SD_MESSAGE_JOURNAL_STOP, "Journal stopped");
3019
3020 finish:
3021         sd_notify(false,
3022                   "STATUS=Shutting down...");
3023
3024         server_done(&server);
3025
3026         return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
3027 }