chiark / gitweb /
journal: support changing the console tty to forward to
[elogind.git] / src / journal / journald.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/epoll.h>
23 #include <sys/socket.h>
24 #include <errno.h>
25 #include <sys/signalfd.h>
26 #include <unistd.h>
27 #include <fcntl.h>
28 #include <stddef.h>
29 #include <sys/ioctl.h>
30 #include <linux/sockios.h>
31 #include <sys/statvfs.h>
32
33 #include <systemd/sd-journal.h>
34 #include <systemd/sd-messages.h>
35 #include <systemd/sd-daemon.h>
36
37 #ifdef HAVE_LOGIND
38 #include <systemd/sd-login.h>
39 #endif
40
41 #include "mkdir.h"
42 #include "hashmap.h"
43 #include "journal-file.h"
44 #include "socket-util.h"
45 #include "cgroup-util.h"
46 #include "list.h"
47 #include "journal-rate-limit.h"
48 #include "journal-internal.h"
49 #include "conf-parser.h"
50 #include "journald.h"
51 #include "virt.h"
52 #include "missing.h"
53
54 #ifdef HAVE_ACL
55 #include <sys/acl.h>
56 #include <acl/libacl.h>
57 #include "acl-util.h"
58 #endif
59
60 #ifdef HAVE_SELINUX
61 #include <selinux/selinux.h>
62 #endif
63
64 #define USER_JOURNALS_MAX 1024
65 #define STDOUT_STREAMS_MAX 4096
66
67 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
68 #define DEFAULT_RATE_LIMIT_BURST 200
69
70 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
71
72 #define RECHECK_VAR_AVAILABLE_USEC (30*USEC_PER_SEC)
73
74 #define N_IOVEC_META_FIELDS 17
75
76 #define ENTRY_SIZE_MAX (1024*1024*32)
77
78 typedef enum StdoutStreamState {
79         STDOUT_STREAM_IDENTIFIER,
80         STDOUT_STREAM_PRIORITY,
81         STDOUT_STREAM_LEVEL_PREFIX,
82         STDOUT_STREAM_FORWARD_TO_SYSLOG,
83         STDOUT_STREAM_FORWARD_TO_KMSG,
84         STDOUT_STREAM_FORWARD_TO_CONSOLE,
85         STDOUT_STREAM_RUNNING
86 } StdoutStreamState;
87
88 struct StdoutStream {
89         Server *server;
90         StdoutStreamState state;
91
92         int fd;
93
94         struct ucred ucred;
95 #ifdef HAVE_SELINUX
96         security_context_t security_context;
97 #endif
98
99         char *identifier;
100         int priority;
101         bool level_prefix:1;
102         bool forward_to_syslog:1;
103         bool forward_to_kmsg:1;
104         bool forward_to_console:1;
105
106         char buffer[LINE_MAX+1];
107         size_t length;
108
109         LIST_FIELDS(StdoutStream, stdout_stream);
110 };
111
112 static int server_flush_to_var(Server *s);
113
114 static uint64_t available_space(Server *s) {
115         char ids[33], *p;
116         const char *f;
117         sd_id128_t machine;
118         struct statvfs ss;
119         uint64_t sum = 0, avail = 0, ss_avail = 0;
120         int r;
121         DIR *d;
122         usec_t ts;
123         JournalMetrics *m;
124
125         ts = now(CLOCK_MONOTONIC);
126
127         if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
128                 return s->cached_available_space;
129
130         r = sd_id128_get_machine(&machine);
131         if (r < 0)
132                 return 0;
133
134         if (s->system_journal) {
135                 f = "/var/log/journal/";
136                 m = &s->system_metrics;
137         } else {
138                 f = "/run/log/journal/";
139                 m = &s->runtime_metrics;
140         }
141
142         assert(m);
143
144         p = strappend(f, sd_id128_to_string(machine, ids));
145         if (!p)
146                 return 0;
147
148         d = opendir(p);
149         free(p);
150
151         if (!d)
152                 return 0;
153
154         if (fstatvfs(dirfd(d), &ss) < 0)
155                 goto finish;
156
157         for (;;) {
158                 struct stat st;
159                 struct dirent buf, *de;
160
161                 r = readdir_r(d, &buf, &de);
162                 if (r != 0)
163                         break;
164
165                 if (!de)
166                         break;
167
168                 if (!endswith(de->d_name, ".journal") &&
169                     !endswith(de->d_name, ".journal~"))
170                         continue;
171
172                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
173                         continue;
174
175                 if (!S_ISREG(st.st_mode))
176                         continue;
177
178                 sum += (uint64_t) st.st_blocks * 512UL;
179         }
180
181         avail = sum >= m->max_use ? 0 : m->max_use - sum;
182
183         ss_avail = ss.f_bsize * ss.f_bavail;
184
185         ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
186
187         if (ss_avail < avail)
188                 avail = ss_avail;
189
190         s->cached_available_space = avail;
191         s->cached_available_space_timestamp = ts;
192
193 finish:
194         closedir(d);
195
196         return avail;
197 }
198
199 static void server_read_file_gid(Server *s) {
200         const char *adm = "adm";
201         int r;
202
203         assert(s);
204
205         if (s->file_gid_valid)
206                 return;
207
208         r = get_group_creds(&adm, &s->file_gid);
209         if (r < 0)
210                 log_warning("Failed to resolve 'adm' group: %s", strerror(-r));
211
212         /* if we couldn't read the gid, then it will be 0, but that's
213          * fine and we shouldn't try to resolve the group again, so
214          * let's just pretend it worked right-away. */
215         s->file_gid_valid = true;
216 }
217
218 static void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
219         int r;
220 #ifdef HAVE_ACL
221         acl_t acl;
222         acl_entry_t entry;
223         acl_permset_t permset;
224 #endif
225
226         assert(f);
227
228         server_read_file_gid(s);
229
230         r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
231         if (r < 0)
232                 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
233
234 #ifdef HAVE_ACL
235         if (uid <= 0)
236                 return;
237
238         acl = acl_get_fd(f->fd);
239         if (!acl) {
240                 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
241                 return;
242         }
243
244         r = acl_find_uid(acl, uid, &entry);
245         if (r <= 0) {
246
247                 if (acl_create_entry(&acl, &entry) < 0 ||
248                     acl_set_tag_type(entry, ACL_USER) < 0 ||
249                     acl_set_qualifier(entry, &uid) < 0) {
250                         log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
251                         goto finish;
252                 }
253         }
254
255         if (acl_get_permset(entry, &permset) < 0 ||
256             acl_add_perm(permset, ACL_READ) < 0 ||
257             acl_calc_mask(&acl) < 0) {
258                 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
259                 goto finish;
260         }
261
262         if (acl_set_fd(f->fd, acl) < 0)
263                 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
264
265 finish:
266         acl_free(acl);
267 #endif
268 }
269
270 static JournalFile* find_journal(Server *s, uid_t uid) {
271         char *p;
272         int r;
273         JournalFile *f;
274         char ids[33];
275         sd_id128_t machine;
276
277         assert(s);
278
279         /* We split up user logs only on /var, not on /run. If the
280          * runtime file is open, we write to it exclusively, in order
281          * to guarantee proper order as soon as we flush /run to
282          * /var and close the runtime file. */
283
284         if (s->runtime_journal)
285                 return s->runtime_journal;
286
287         if (uid <= 0)
288                 return s->system_journal;
289
290         r = sd_id128_get_machine(&machine);
291         if (r < 0)
292                 return s->system_journal;
293
294         f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
295         if (f)
296                 return f;
297
298         if (asprintf(&p, "/var/log/journal/%s/user-%lu.journal", sd_id128_to_string(machine, ids), (unsigned long) uid) < 0)
299                 return s->system_journal;
300
301         while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
302                 /* Too many open? Then let's close one */
303                 f = hashmap_steal_first(s->user_journals);
304                 assert(f);
305                 journal_file_close(f);
306         }
307
308         r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->system_journal, &f);
309         free(p);
310
311         if (r < 0)
312                 return s->system_journal;
313
314         server_fix_perms(s, f, uid);
315
316         r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
317         if (r < 0) {
318                 journal_file_close(f);
319                 return s->system_journal;
320         }
321
322         return f;
323 }
324
325 static void server_rotate(Server *s) {
326         JournalFile *f;
327         void *k;
328         Iterator i;
329         int r;
330
331         log_info("Rotating...");
332
333         if (s->runtime_journal) {
334                 r = journal_file_rotate(&s->runtime_journal);
335                 if (r < 0)
336                         if (s->runtime_journal)
337                                 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
338                         else
339                                 log_error("Failed to create new runtime journal: %s", strerror(-r));
340                 else
341                         server_fix_perms(s, s->runtime_journal, 0);
342         }
343
344         if (s->system_journal) {
345                 r = journal_file_rotate(&s->system_journal);
346                 if (r < 0)
347                         if (s->system_journal)
348                                 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
349                         else
350                                 log_error("Failed to create new system journal: %s", strerror(-r));
351
352                 else
353                         server_fix_perms(s, s->system_journal, 0);
354         }
355
356         HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
357                 r = journal_file_rotate(&f);
358                 if (r < 0)
359                         if (f->path)
360                                 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
361                         else
362                                 log_error("Failed to create user journal: %s", strerror(-r));
363                 else {
364                         hashmap_replace(s->user_journals, k, f);
365                         server_fix_perms(s, s->system_journal, PTR_TO_UINT32(k));
366                 }
367         }
368 }
369
370 static void server_vacuum(Server *s) {
371         char *p;
372         char ids[33];
373         sd_id128_t machine;
374         int r;
375
376         log_info("Vacuuming...");
377
378         r = sd_id128_get_machine(&machine);
379         if (r < 0) {
380                 log_error("Failed to get machine ID: %s", strerror(-r));
381                 return;
382         }
383
384         sd_id128_to_string(machine, ids);
385
386         if (s->system_journal) {
387                 if (asprintf(&p, "/var/log/journal/%s", ids) < 0) {
388                         log_error("Out of memory.");
389                         return;
390                 }
391
392                 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free);
393                 if (r < 0 && r != -ENOENT)
394                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
395                 free(p);
396         }
397
398
399         if (s->runtime_journal) {
400                 if (asprintf(&p, "/run/log/journal/%s", ids) < 0) {
401                         log_error("Out of memory.");
402                         return;
403                 }
404
405                 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free);
406                 if (r < 0 && r != -ENOENT)
407                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
408                 free(p);
409         }
410
411         s->cached_available_space_timestamp = 0;
412 }
413
414 static char *shortened_cgroup_path(pid_t pid) {
415         int r;
416         char *process_path, *init_path, *path;
417
418         assert(pid > 0);
419
420         r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, pid, &process_path);
421         if (r < 0)
422                 return NULL;
423
424         r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &init_path);
425         if (r < 0) {
426                 free(process_path);
427                 return NULL;
428         }
429
430         if (endswith(init_path, "/system"))
431                 init_path[strlen(init_path) - 7] = 0;
432         else if (streq(init_path, "/"))
433                 init_path[0] = 0;
434
435         if (startswith(process_path, init_path)) {
436                 char *p;
437
438                 p = strdup(process_path + strlen(init_path));
439                 if (!p) {
440                         free(process_path);
441                         free(init_path);
442                         return NULL;
443                 }
444                 path = p;
445         } else {
446                 path = process_path;
447                 process_path = NULL;
448         }
449
450         free(process_path);
451         free(init_path);
452
453         return path;
454 }
455
456 static void dispatch_message_real(
457                 Server *s,
458                 struct iovec *iovec, unsigned n, unsigned m,
459                 struct ucred *ucred,
460                 struct timeval *tv,
461                 const char *label, size_t label_len) {
462
463         char *pid = NULL, *uid = NULL, *gid = NULL,
464                 *source_time = NULL, *boot_id = NULL, *machine_id = NULL,
465                 *comm = NULL, *cmdline = NULL, *hostname = NULL,
466                 *audit_session = NULL, *audit_loginuid = NULL,
467                 *exe = NULL, *cgroup = NULL, *session = NULL,
468                 *owner_uid = NULL, *unit = NULL, *selinux_context = NULL;
469
470         char idbuf[33];
471         sd_id128_t id;
472         int r;
473         char *t;
474         uid_t loginuid = 0, realuid = 0;
475         JournalFile *f;
476         bool vacuumed = false;
477
478         assert(s);
479         assert(iovec);
480         assert(n > 0);
481         assert(n + N_IOVEC_META_FIELDS <= m);
482
483         if (ucred) {
484                 uint32_t audit;
485 #ifdef HAVE_LOGIND
486                 uid_t owner;
487 #endif
488
489                 realuid = ucred->uid;
490
491                 if (asprintf(&pid, "_PID=%lu", (unsigned long) ucred->pid) >= 0)
492                         IOVEC_SET_STRING(iovec[n++], pid);
493
494                 if (asprintf(&uid, "_UID=%lu", (unsigned long) ucred->uid) >= 0)
495                         IOVEC_SET_STRING(iovec[n++], uid);
496
497                 if (asprintf(&gid, "_GID=%lu", (unsigned long) ucred->gid) >= 0)
498                         IOVEC_SET_STRING(iovec[n++], gid);
499
500                 r = get_process_comm(ucred->pid, &t);
501                 if (r >= 0) {
502                         comm = strappend("_COMM=", t);
503                         free(t);
504
505                         if (comm)
506                                 IOVEC_SET_STRING(iovec[n++], comm);
507                 }
508
509                 r = get_process_exe(ucred->pid, &t);
510                 if (r >= 0) {
511                         exe = strappend("_EXE=", t);
512                         free(t);
513
514                         if (exe)
515                                 IOVEC_SET_STRING(iovec[n++], exe);
516                 }
517
518                 r = get_process_cmdline(ucred->pid, LINE_MAX, false, &t);
519                 if (r >= 0) {
520                         cmdline = strappend("_CMDLINE=", t);
521                         free(t);
522
523                         if (cmdline)
524                                 IOVEC_SET_STRING(iovec[n++], cmdline);
525                 }
526
527                 r = audit_session_from_pid(ucred->pid, &audit);
528                 if (r >= 0)
529                         if (asprintf(&audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit) >= 0)
530                                 IOVEC_SET_STRING(iovec[n++], audit_session);
531
532                 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
533                 if (r >= 0)
534                         if (asprintf(&audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0)
535                                 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
536
537                 t = shortened_cgroup_path(ucred->pid);
538                 if (t) {
539                         cgroup = strappend("_SYSTEMD_CGROUP=", t);
540                         free(t);
541
542                         if (cgroup)
543                                 IOVEC_SET_STRING(iovec[n++], cgroup);
544                 }
545
546 #ifdef HAVE_LOGIND
547                 if (sd_pid_get_session(ucred->pid, &t) >= 0) {
548                         session = strappend("_SYSTEMD_SESSION=", t);
549                         free(t);
550
551                         if (session)
552                                 IOVEC_SET_STRING(iovec[n++], session);
553                 }
554
555                 if (sd_pid_get_owner_uid(ucred->uid, &owner) >= 0)
556                         if (asprintf(&owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner) >= 0)
557                                 IOVEC_SET_STRING(iovec[n++], owner_uid);
558 #endif
559
560                 if (cg_pid_get_unit(ucred->pid, &t) >= 0) {
561                         unit = strappend("_SYSTEMD_UNIT=", t);
562                         free(t);
563
564                         if (unit)
565                                 IOVEC_SET_STRING(iovec[n++], unit);
566                 }
567
568 #ifdef HAVE_SELINUX
569                 if (label) {
570                         selinux_context = malloc(sizeof("_SELINUX_CONTEXT=") + label_len);
571                         if (selinux_context) {
572                                 memcpy(selinux_context, "_SELINUX_CONTEXT=", sizeof("_SELINUX_CONTEXT=")-1);
573                                 memcpy(selinux_context+sizeof("_SELINUX_CONTEXT=")-1, label, label_len);
574                                 selinux_context[sizeof("_SELINUX_CONTEXT=")-1+label_len] = 0;
575                                 IOVEC_SET_STRING(iovec[n++], selinux_context);
576                         }
577                 } else {
578                         security_context_t con;
579
580                         if (getpidcon(ucred->pid, &con) >= 0) {
581                                 selinux_context = strappend("_SELINUX_CONTEXT=", con);
582                                 if (selinux_context)
583                                         IOVEC_SET_STRING(iovec[n++], selinux_context);
584
585                                 freecon(con);
586                         }
587                 }
588 #endif
589         }
590
591         if (tv) {
592                 if (asprintf(&source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu",
593                              (unsigned long long) timeval_load(tv)) >= 0)
594                         IOVEC_SET_STRING(iovec[n++], source_time);
595         }
596
597         /* Note that strictly speaking storing the boot id here is
598          * redundant since the entry includes this in-line
599          * anyway. However, we need this indexed, too. */
600         r = sd_id128_get_boot(&id);
601         if (r >= 0)
602                 if (asprintf(&boot_id, "_BOOT_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
603                         IOVEC_SET_STRING(iovec[n++], boot_id);
604
605         r = sd_id128_get_machine(&id);
606         if (r >= 0)
607                 if (asprintf(&machine_id, "_MACHINE_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
608                         IOVEC_SET_STRING(iovec[n++], machine_id);
609
610         t = gethostname_malloc();
611         if (t) {
612                 hostname = strappend("_HOSTNAME=", t);
613                 free(t);
614                 if (hostname)
615                         IOVEC_SET_STRING(iovec[n++], hostname);
616         }
617
618         assert(n <= m);
619
620         server_flush_to_var(s);
621
622 retry:
623         f = find_journal(s, realuid == 0 ? 0 : loginuid);
624         if (!f)
625                 log_warning("Dropping message, as we can't find a place to store the data.");
626         else {
627                 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
628
629                 if ((r == -E2BIG || /* hit limit */
630                      r == -EFBIG || /* hit fs limit */
631                      r == -EDQUOT || /* quota hit */
632                      r == -ENOSPC || /* disk full */
633                      r == -EBADMSG || /* corrupted */
634                      r == -ENODATA || /* truncated */
635                      r == -EHOSTDOWN || /* other machine */
636                      r == -EPROTONOSUPPORT) && /* unsupported feature */
637                     !vacuumed) {
638
639                         if (r == -E2BIG)
640                                 log_info("Allocation limit reached, rotating.");
641                         else
642                                 log_warning("Journal file corrupted, rotating.");
643
644                         server_rotate(s);
645                         server_vacuum(s);
646                         vacuumed = true;
647
648                         log_info("Retrying write.");
649                         goto retry;
650                 }
651
652                 if (r < 0)
653                         log_error("Failed to write entry, ignoring: %s", strerror(-r));
654         }
655
656         free(pid);
657         free(uid);
658         free(gid);
659         free(comm);
660         free(exe);
661         free(cmdline);
662         free(source_time);
663         free(boot_id);
664         free(machine_id);
665         free(hostname);
666         free(audit_session);
667         free(audit_loginuid);
668         free(cgroup);
669         free(session);
670         free(owner_uid);
671         free(unit);
672         free(selinux_context);
673 }
674
675 static void driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
676         char mid[11 + 32 + 1];
677         char buffer[16 + LINE_MAX + 1];
678         struct iovec iovec[N_IOVEC_META_FIELDS + 4];
679         int n = 0;
680         va_list ap;
681         struct ucred ucred;
682
683         assert(s);
684         assert(format);
685
686         IOVEC_SET_STRING(iovec[n++], "PRIORITY=5");
687         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
688
689         memcpy(buffer, "MESSAGE=", 8);
690         va_start(ap, format);
691         vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
692         va_end(ap);
693         char_array_0(buffer);
694         IOVEC_SET_STRING(iovec[n++], buffer);
695
696         snprintf(mid, sizeof(mid), "MESSAGE_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(message_id));
697         char_array_0(mid);
698         IOVEC_SET_STRING(iovec[n++], mid);
699
700         zero(ucred);
701         ucred.pid = getpid();
702         ucred.uid = getuid();
703         ucred.gid = getgid();
704
705         dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0);
706 }
707
708 static void dispatch_message(Server *s,
709                              struct iovec *iovec, unsigned n, unsigned m,
710                              struct ucred *ucred,
711                              struct timeval *tv,
712                              const char *label, size_t label_len,
713                              int priority) {
714         int rl;
715         char *path = NULL, *c;
716
717         assert(s);
718         assert(iovec || n == 0);
719
720         if (n == 0)
721                 return;
722
723         if (!ucred)
724                 goto finish;
725
726         path = shortened_cgroup_path(ucred->pid);
727         if (!path)
728                 goto finish;
729
730         /* example: /user/lennart/3/foobar
731          *          /system/dbus.service/foobar
732          *
733          * So let's cut of everything past the third /, since that is
734          * wher user directories start */
735
736         c = strchr(path, '/');
737         if (c) {
738                 c = strchr(c+1, '/');
739                 if (c) {
740                         c = strchr(c+1, '/');
741                         if (c)
742                                 *c = 0;
743                 }
744         }
745
746         rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available_space(s));
747
748         if (rl == 0) {
749                 free(path);
750                 return;
751         }
752
753         /* Write a suppression message if we suppressed something */
754         if (rl > 1)
755                 driver_message(s, SD_MESSAGE_JOURNAL_DROPPED, "Suppressed %u messages from %s", rl - 1, path);
756
757         free(path);
758
759 finish:
760         dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len);
761 }
762
763 static void forward_syslog_iovec(Server *s, const struct iovec *iovec, unsigned n_iovec, struct ucred *ucred, struct timeval *tv) {
764         struct msghdr msghdr;
765         struct cmsghdr *cmsg;
766         union {
767                 struct cmsghdr cmsghdr;
768                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred))];
769         } control;
770         union sockaddr_union sa;
771
772         assert(s);
773         assert(iovec);
774         assert(n_iovec > 0);
775
776         zero(msghdr);
777         msghdr.msg_iov = (struct iovec*) iovec;
778         msghdr.msg_iovlen = n_iovec;
779
780         zero(sa);
781         sa.un.sun_family = AF_UNIX;
782         strncpy(sa.un.sun_path, "/run/systemd/journal/syslog", sizeof(sa.un.sun_path));
783         msghdr.msg_name = &sa;
784         msghdr.msg_namelen = offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path);
785
786         if (ucred) {
787                 zero(control);
788                 msghdr.msg_control = &control;
789                 msghdr.msg_controllen = sizeof(control);
790
791                 cmsg = CMSG_FIRSTHDR(&msghdr);
792                 cmsg->cmsg_level = SOL_SOCKET;
793                 cmsg->cmsg_type = SCM_CREDENTIALS;
794                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred));
795                 memcpy(CMSG_DATA(cmsg), ucred, sizeof(struct ucred));
796                 msghdr.msg_controllen = cmsg->cmsg_len;
797         }
798
799         /* Forward the syslog message we received via /dev/log to
800          * /run/systemd/syslog. Unfortunately we currently can't set
801          * the SO_TIMESTAMP auxiliary data, and hence we don't. */
802
803         if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
804                 return;
805
806         /* The socket is full? I guess the syslog implementation is
807          * too slow, and we shouldn't wait for that... */
808         if (errno == EAGAIN)
809                 return;
810
811         if (ucred && errno == ESRCH) {
812                 struct ucred u;
813
814                 /* Hmm, presumably the sender process vanished
815                  * by now, so let's fix it as good as we
816                  * can, and retry */
817
818                 u = *ucred;
819                 u.pid = getpid();
820                 memcpy(CMSG_DATA(cmsg), &u, sizeof(struct ucred));
821
822                 if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
823                         return;
824
825                 if (errno == EAGAIN)
826                         return;
827         }
828
829         log_debug("Failed to forward syslog message: %m");
830 }
831
832 static void forward_syslog_raw(Server *s, const char *buffer, struct ucred *ucred, struct timeval *tv) {
833         struct iovec iovec;
834
835         assert(s);
836         assert(buffer);
837
838         IOVEC_SET_STRING(iovec, buffer);
839         forward_syslog_iovec(s, &iovec, 1, ucred, tv);
840 }
841
842 static void forward_syslog(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred, struct timeval *tv) {
843         struct iovec iovec[5];
844         char header_priority[6], header_time[64], header_pid[16];
845         int n = 0;
846         time_t t;
847         struct tm *tm;
848         char *ident_buf = NULL;
849
850         assert(s);
851         assert(priority >= 0);
852         assert(priority <= 999);
853         assert(message);
854
855         /* First: priority field */
856         snprintf(header_priority, sizeof(header_priority), "<%i>", priority);
857         char_array_0(header_priority);
858         IOVEC_SET_STRING(iovec[n++], header_priority);
859
860         /* Second: timestamp */
861         t = tv ? tv->tv_sec : ((time_t) (now(CLOCK_REALTIME) / USEC_PER_SEC));
862         tm = localtime(&t);
863         if (!tm)
864                 return;
865         if (strftime(header_time, sizeof(header_time), "%h %e %T ", tm) <= 0)
866                 return;
867         IOVEC_SET_STRING(iovec[n++], header_time);
868
869         /* Third: identifier and PID */
870         if (ucred) {
871                 if (!identifier) {
872                         get_process_comm(ucred->pid, &ident_buf);
873                         identifier = ident_buf;
874                 }
875
876                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
877                 char_array_0(header_pid);
878
879                 if (identifier)
880                         IOVEC_SET_STRING(iovec[n++], identifier);
881
882                 IOVEC_SET_STRING(iovec[n++], header_pid);
883         } else if (identifier) {
884                 IOVEC_SET_STRING(iovec[n++], identifier);
885                 IOVEC_SET_STRING(iovec[n++], ": ");
886         }
887
888         /* Fourth: message */
889         IOVEC_SET_STRING(iovec[n++], message);
890
891         forward_syslog_iovec(s, iovec, n, ucred, tv);
892
893         free(ident_buf);
894 }
895
896 static int fixup_priority(int priority) {
897
898         if ((priority & LOG_FACMASK) == 0)
899                 return (priority & LOG_PRIMASK) | LOG_USER;
900
901         return priority;
902 }
903
904 static void forward_kmsg(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred) {
905         struct iovec iovec[5];
906         char header_priority[6], header_pid[16];
907         int n = 0;
908         char *ident_buf = NULL;
909         int fd;
910
911         assert(s);
912         assert(priority >= 0);
913         assert(priority <= 999);
914         assert(message);
915
916         /* Never allow messages with kernel facility to be written to
917          * kmsg, regardless where the data comes from. */
918         priority = fixup_priority(priority);
919
920         /* First: priority field */
921         snprintf(header_priority, sizeof(header_priority), "<%i>", priority);
922         char_array_0(header_priority);
923         IOVEC_SET_STRING(iovec[n++], header_priority);
924
925         /* Second: identifier and PID */
926         if (ucred) {
927                 if (!identifier) {
928                         get_process_comm(ucred->pid, &ident_buf);
929                         identifier = ident_buf;
930                 }
931
932                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
933                 char_array_0(header_pid);
934
935                 if (identifier)
936                         IOVEC_SET_STRING(iovec[n++], identifier);
937
938                 IOVEC_SET_STRING(iovec[n++], header_pid);
939         } else if (identifier) {
940                 IOVEC_SET_STRING(iovec[n++], identifier);
941                 IOVEC_SET_STRING(iovec[n++], ": ");
942         }
943
944         /* Fourth: message */
945         IOVEC_SET_STRING(iovec[n++], message);
946         IOVEC_SET_STRING(iovec[n++], "\n");
947
948         fd = open("/dev/kmsg", O_WRONLY|O_NOCTTY|O_CLOEXEC);
949         if (fd < 0) {
950                 log_debug("Failed to open /dev/kmsg for logging: %s", strerror(errno));
951                 goto finish;
952         }
953
954         if (writev(fd, iovec, n) < 0)
955                 log_debug("Failed to write to /dev/kmsg for logging: %s", strerror(errno));
956
957         close_nointr_nofail(fd);
958
959 finish:
960         free(ident_buf);
961 }
962
963 static void forward_console(Server *s, const char *identifier, const char *message, struct ucred *ucred) {
964         struct iovec iovec[4];
965         char header_pid[16];
966         int n = 0, fd;
967         char *ident_buf = NULL;
968         const char *tty;
969
970         assert(s);
971         assert(message);
972
973         /* First: identifier and PID */
974         if (ucred) {
975                 if (!identifier) {
976                         get_process_comm(ucred->pid, &ident_buf);
977                         identifier = ident_buf;
978                 }
979
980                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
981                 char_array_0(header_pid);
982
983                 if (identifier)
984                         IOVEC_SET_STRING(iovec[n++], identifier);
985
986                 IOVEC_SET_STRING(iovec[n++], header_pid);
987         } else if (identifier) {
988                 IOVEC_SET_STRING(iovec[n++], identifier);
989                 IOVEC_SET_STRING(iovec[n++], ": ");
990         }
991
992         /* Third: message */
993         IOVEC_SET_STRING(iovec[n++], message);
994         IOVEC_SET_STRING(iovec[n++], "\n");
995
996         tty = s->tty_path ? s->tty_path : "/dev/console";
997
998         fd = open_terminal(tty, O_WRONLY|O_NOCTTY|O_CLOEXEC);
999         if (fd < 0) {
1000                 log_debug("Failed to open %s for logging: %s", tty, strerror(errno));
1001                 goto finish;
1002         }
1003
1004         if (writev(fd, iovec, n) < 0)
1005                 log_debug("Failed to write to %s for logging: %s", tty, strerror(errno));
1006
1007         close_nointr_nofail(fd);
1008
1009 finish:
1010         free(ident_buf);
1011 }
1012
1013 static void read_identifier(const char **buf, char **identifier, char **pid) {
1014         const char *p;
1015         char *t;
1016         size_t l, e;
1017
1018         assert(buf);
1019         assert(identifier);
1020         assert(pid);
1021
1022         p = *buf;
1023
1024         p += strspn(p, WHITESPACE);
1025         l = strcspn(p, WHITESPACE);
1026
1027         if (l <= 0 ||
1028             p[l-1] != ':')
1029                 return;
1030
1031         e = l;
1032         l--;
1033
1034         if (p[l-1] == ']') {
1035                 size_t k = l-1;
1036
1037                 for (;;) {
1038
1039                         if (p[k] == '[') {
1040                                 t = strndup(p+k+1, l-k-2);
1041                                 if (t)
1042                                         *pid = t;
1043
1044                                 l = k;
1045                                 break;
1046                         }
1047
1048                         if (k == 0)
1049                                 break;
1050
1051                         k--;
1052                 }
1053         }
1054
1055         t = strndup(p, l);
1056         if (t)
1057                 *identifier = t;
1058
1059         *buf = p + e;
1060         *buf += strspn(*buf, WHITESPACE);
1061 }
1062
1063 static void process_syslog_message(Server *s, const char *buf, struct ucred *ucred, struct timeval *tv, const char *label, size_t label_len) {
1064         char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *syslog_pid = NULL;
1065         struct iovec iovec[N_IOVEC_META_FIELDS + 6];
1066         unsigned n = 0;
1067         int priority = LOG_USER | LOG_INFO;
1068         char *identifier = NULL, *pid = NULL;
1069
1070         assert(s);
1071         assert(buf);
1072
1073         if (s->forward_to_syslog)
1074                 forward_syslog_raw(s, buf, ucred, tv);
1075
1076         parse_syslog_priority((char**) &buf, &priority);
1077         skip_syslog_date((char**) &buf);
1078         read_identifier(&buf, &identifier, &pid);
1079
1080         if (s->forward_to_kmsg)
1081                 forward_kmsg(s, priority, identifier, buf, ucred);
1082
1083         if (s->forward_to_console)
1084                 forward_console(s, identifier, buf, ucred);
1085
1086         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=syslog");
1087
1088         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1089                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1090
1091         if (priority & LOG_FACMASK)
1092                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1093                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1094
1095         if (identifier) {
1096                 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
1097                 if (syslog_identifier)
1098                         IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1099         }
1100
1101         if (pid) {
1102                 syslog_pid = strappend("SYSLOG_PID=", pid);
1103                 if (syslog_pid)
1104                         IOVEC_SET_STRING(iovec[n++], syslog_pid);
1105         }
1106
1107         message = strappend("MESSAGE=", buf);
1108         if (message)
1109                 IOVEC_SET_STRING(iovec[n++], message);
1110
1111         dispatch_message(s, iovec, n, ELEMENTSOF(iovec), ucred, tv, label, label_len, priority);
1112
1113         free(message);
1114         free(identifier);
1115         free(pid);
1116         free(syslog_priority);
1117         free(syslog_facility);
1118         free(syslog_identifier);
1119 }
1120
1121 static bool valid_user_field(const char *p, size_t l) {
1122         const char *a;
1123
1124         /* We kinda enforce POSIX syntax recommendations for
1125            environment variables here, but make a couple of additional
1126            requirements.
1127
1128            http://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html */
1129
1130         /* No empty field names */
1131         if (l <= 0)
1132                 return false;
1133
1134         /* Don't allow names longer than 64 chars */
1135         if (l > 64)
1136                 return false;
1137
1138         /* Variables starting with an underscore are protected */
1139         if (p[0] == '_')
1140                 return false;
1141
1142         /* Don't allow digits as first character */
1143         if (p[0] >= '0' && p[0] <= '9')
1144                 return false;
1145
1146         /* Only allow A-Z0-9 and '_' */
1147         for (a = p; a < p + l; a++)
1148                 if (!((*a >= 'A' && *a <= 'Z') ||
1149                       (*a >= '0' && *a <= '9') ||
1150                       *a == '_'))
1151                         return false;
1152
1153         return true;
1154 }
1155
1156 static void process_native_message(
1157                 Server *s,
1158                 const void *buffer, size_t buffer_size,
1159                 struct ucred *ucred,
1160                 struct timeval *tv,
1161                 const char *label, size_t label_len) {
1162
1163         struct iovec *iovec = NULL;
1164         unsigned n = 0, m = 0, j, tn = (unsigned) -1;
1165         const char *p;
1166         size_t remaining;
1167         int priority = LOG_INFO;
1168         char *identifier = NULL, *message = NULL;
1169
1170         assert(s);
1171         assert(buffer || buffer_size == 0);
1172
1173         p = buffer;
1174         remaining = buffer_size;
1175
1176         while (remaining > 0) {
1177                 const char *e, *q;
1178
1179                 e = memchr(p, '\n', remaining);
1180
1181                 if (!e) {
1182                         /* Trailing noise, let's ignore it, and flush what we collected */
1183                         log_debug("Received message with trailing noise, ignoring.");
1184                         break;
1185                 }
1186
1187                 if (e == p) {
1188                         /* Entry separator */
1189                         dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, priority);
1190                         n = 0;
1191                         priority = LOG_INFO;
1192
1193                         p++;
1194                         remaining--;
1195                         continue;
1196                 }
1197
1198                 if (*p == '.' || *p == '#') {
1199                         /* Ignore control commands for now, and
1200                          * comments too. */
1201                         remaining -= (e - p) + 1;
1202                         p = e + 1;
1203                         continue;
1204                 }
1205
1206                 /* A property follows */
1207
1208                 if (n+N_IOVEC_META_FIELDS >= m) {
1209                         struct iovec *c;
1210                         unsigned u;
1211
1212                         u = MAX((n+N_IOVEC_META_FIELDS+1) * 2U, 4U);
1213                         c = realloc(iovec, u * sizeof(struct iovec));
1214                         if (!c) {
1215                                 log_error("Out of memory");
1216                                 break;
1217                         }
1218
1219                         iovec = c;
1220                         m = u;
1221                 }
1222
1223                 q = memchr(p, '=', e - p);
1224                 if (q) {
1225                         if (valid_user_field(p, q - p)) {
1226                                 size_t l;
1227
1228                                 l = e - p;
1229
1230                                 /* If the field name starts with an
1231                                  * underscore, skip the variable,
1232                                  * since that indidates a trusted
1233                                  * field */
1234                                 iovec[n].iov_base = (char*) p;
1235                                 iovec[n].iov_len = l;
1236                                 n++;
1237
1238                                 /* We need to determine the priority
1239                                  * of this entry for the rate limiting
1240                                  * logic */
1241                                 if (l == 10 &&
1242                                     memcmp(p, "PRIORITY=", 9) == 0 &&
1243                                     p[9] >= '0' && p[9] <= '9')
1244                                         priority = (priority & LOG_FACMASK) | (p[9] - '0');
1245
1246                                 else if (l == 17 &&
1247                                          memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
1248                                          p[16] >= '0' && p[16] <= '9')
1249                                         priority = (priority & LOG_PRIMASK) | ((p[16] - '0') << 3);
1250
1251                                 else if (l == 18 &&
1252                                          memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
1253                                          p[16] >= '0' && p[16] <= '9' &&
1254                                          p[17] >= '0' && p[17] <= '9')
1255                                         priority = (priority & LOG_PRIMASK) | (((p[16] - '0')*10 + (p[17] - '0')) << 3);
1256
1257                                 else if (l >= 19 &&
1258                                          memcmp(p, "SYSLOG_IDENTIFIER=", 18) == 0) {
1259                                         char *t;
1260
1261                                         t = strndup(p + 18, l - 18);
1262                                         if (t) {
1263                                                 free(identifier);
1264                                                 identifier = t;
1265                                         }
1266                                 } else if (l >= 8 &&
1267                                            memcmp(p, "MESSAGE=", 8) == 0) {
1268                                         char *t;
1269
1270                                         t = strndup(p + 8, l - 8);
1271                                         if (t) {
1272                                                 free(message);
1273                                                 message = t;
1274                                         }
1275                                 }
1276                         }
1277
1278                         remaining -= (e - p) + 1;
1279                         p = e + 1;
1280                         continue;
1281                 } else {
1282                         le64_t l_le;
1283                         uint64_t l;
1284                         char *k;
1285
1286                         if (remaining < e - p + 1 + sizeof(uint64_t) + 1) {
1287                                 log_debug("Failed to parse message, ignoring.");
1288                                 break;
1289                         }
1290
1291                         memcpy(&l_le, e + 1, sizeof(uint64_t));
1292                         l = le64toh(l_le);
1293
1294                         if (remaining < e - p + 1 + sizeof(uint64_t) + l + 1 ||
1295                             e[1+sizeof(uint64_t)+l] != '\n') {
1296                                 log_debug("Failed to parse message, ignoring.");
1297                                 break;
1298                         }
1299
1300                         k = malloc((e - p) + 1 + l);
1301                         if (!k) {
1302                                 log_error("Out of memory");
1303                                 break;
1304                         }
1305
1306                         memcpy(k, p, e - p);
1307                         k[e - p] = '=';
1308                         memcpy(k + (e - p) + 1, e + 1 + sizeof(uint64_t), l);
1309
1310                         if (valid_user_field(p, e - p)) {
1311                                 iovec[n].iov_base = k;
1312                                 iovec[n].iov_len = (e - p) + 1 + l;
1313                                 n++;
1314                         } else
1315                                 free(k);
1316
1317                         remaining -= (e - p) + 1 + sizeof(uint64_t) + l + 1;
1318                         p = e + 1 + sizeof(uint64_t) + l + 1;
1319                 }
1320         }
1321
1322         if (n <= 0)
1323                 goto finish;
1324
1325         tn = n++;
1326         IOVEC_SET_STRING(iovec[tn], "_TRANSPORT=journal");
1327
1328         if (message) {
1329                 if (s->forward_to_syslog)
1330                         forward_syslog(s, priority, identifier, message, ucred, tv);
1331
1332                 if (s->forward_to_kmsg)
1333                         forward_kmsg(s, priority, identifier, message, ucred);
1334
1335                 if (s->forward_to_console)
1336                         forward_console(s, identifier, message, ucred);
1337         }
1338
1339         dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, priority);
1340
1341 finish:
1342         for (j = 0; j < n; j++)  {
1343                 if (j == tn)
1344                         continue;
1345
1346                 if (iovec[j].iov_base < buffer ||
1347                     (const uint8_t*) iovec[j].iov_base >= (const uint8_t*) buffer + buffer_size)
1348                         free(iovec[j].iov_base);
1349         }
1350
1351         free(iovec);
1352         free(identifier);
1353         free(message);
1354 }
1355
1356 static void process_native_file(
1357                 Server *s,
1358                 int fd,
1359                 struct ucred *ucred,
1360                 struct timeval *tv,
1361                 const char *label, size_t label_len) {
1362
1363         struct stat st;
1364         void *p;
1365         ssize_t n;
1366
1367         assert(s);
1368         assert(fd >= 0);
1369
1370         /* Data is in the passed file, since it didn't fit in a
1371          * datagram. We can't map the file here, since clients might
1372          * then truncate it and trigger a SIGBUS for us. So let's
1373          * stupidly read it */
1374
1375         if (fstat(fd, &st) < 0) {
1376                 log_error("Failed to stat passed file, ignoring: %m");
1377                 return;
1378         }
1379
1380         if (!S_ISREG(st.st_mode)) {
1381                 log_error("File passed is not regular. Ignoring.");
1382                 return;
1383         }
1384
1385         if (st.st_size <= 0)
1386                 return;
1387
1388         if (st.st_size > ENTRY_SIZE_MAX) {
1389                 log_error("File passed too large. Ignoring.");
1390                 return;
1391         }
1392
1393         p = malloc(st.st_size);
1394         if (!p) {
1395                 log_error("Out of memory");
1396                 return;
1397         }
1398
1399         n = pread(fd, p, st.st_size, 0);
1400         if (n < 0)
1401                 log_error("Failed to read file, ignoring: %s", strerror(-n));
1402         else if (n > 0)
1403                 process_native_message(s, p, n, ucred, tv, label, label_len);
1404
1405         free(p);
1406 }
1407
1408 static int stdout_stream_log(StdoutStream *s, const char *p) {
1409         struct iovec iovec[N_IOVEC_META_FIELDS + 5];
1410         char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *syslog_identifier = NULL;
1411         unsigned n = 0;
1412         int priority;
1413         char *label = NULL;
1414         size_t label_len = 0;
1415
1416         assert(s);
1417         assert(p);
1418
1419         if (isempty(p))
1420                 return 0;
1421
1422         priority = s->priority;
1423
1424         if (s->level_prefix)
1425                 parse_syslog_priority((char**) &p, &priority);
1426
1427         if (s->forward_to_syslog || s->server->forward_to_syslog)
1428                 forward_syslog(s->server, fixup_priority(priority), s->identifier, p, &s->ucred, NULL);
1429
1430         if (s->forward_to_kmsg || s->server->forward_to_kmsg)
1431                 forward_kmsg(s->server, priority, s->identifier, p, &s->ucred);
1432
1433         if (s->forward_to_console || s->server->forward_to_console)
1434                 forward_console(s->server, s->identifier, p, &s->ucred);
1435
1436         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=stdout");
1437
1438         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1439                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1440
1441         if (priority & LOG_FACMASK)
1442                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1443                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1444
1445         if (s->identifier) {
1446                 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", s->identifier);
1447                 if (syslog_identifier)
1448                         IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1449         }
1450
1451         message = strappend("MESSAGE=", p);
1452         if (message)
1453                 IOVEC_SET_STRING(iovec[n++], message);
1454
1455 #ifdef HAVE_SELINUX
1456         if (s->security_context) {
1457                 label = (char*) s->security_context;
1458                 label_len = strlen((char*) s->security_context);
1459         }
1460 #endif
1461
1462         dispatch_message(s->server, iovec, n, ELEMENTSOF(iovec), &s->ucred, NULL, label, label_len, priority);
1463
1464         free(message);
1465         free(syslog_priority);
1466         free(syslog_facility);
1467         free(syslog_identifier);
1468
1469         return 0;
1470 }
1471
1472 static int stdout_stream_line(StdoutStream *s, char *p) {
1473         int r;
1474
1475         assert(s);
1476         assert(p);
1477
1478         p = strstrip(p);
1479
1480         switch (s->state) {
1481
1482         case STDOUT_STREAM_IDENTIFIER:
1483                 if (isempty(p))
1484                         s->identifier = NULL;
1485                 else  {
1486                         s->identifier = strdup(p);
1487                         if (!s->identifier) {
1488                                 log_error("Out of memory");
1489                                 return -ENOMEM;
1490                         }
1491                 }
1492
1493                 s->state = STDOUT_STREAM_PRIORITY;
1494                 return 0;
1495
1496         case STDOUT_STREAM_PRIORITY:
1497                 r = safe_atoi(p, &s->priority);
1498                 if (r < 0 || s->priority <= 0 || s->priority >= 999) {
1499                         log_warning("Failed to parse log priority line.");
1500                         return -EINVAL;
1501                 }
1502
1503                 s->state = STDOUT_STREAM_LEVEL_PREFIX;
1504                 return 0;
1505
1506         case STDOUT_STREAM_LEVEL_PREFIX:
1507                 r = parse_boolean(p);
1508                 if (r < 0) {
1509                         log_warning("Failed to parse level prefix line.");
1510                         return -EINVAL;
1511                 }
1512
1513                 s->level_prefix = !!r;
1514                 s->state = STDOUT_STREAM_FORWARD_TO_SYSLOG;
1515                 return 0;
1516
1517         case STDOUT_STREAM_FORWARD_TO_SYSLOG:
1518                 r = parse_boolean(p);
1519                 if (r < 0) {
1520                         log_warning("Failed to parse forward to syslog line.");
1521                         return -EINVAL;
1522                 }
1523
1524                 s->forward_to_syslog = !!r;
1525                 s->state = STDOUT_STREAM_FORWARD_TO_KMSG;
1526                 return 0;
1527
1528         case STDOUT_STREAM_FORWARD_TO_KMSG:
1529                 r = parse_boolean(p);
1530                 if (r < 0) {
1531                         log_warning("Failed to parse copy to kmsg line.");
1532                         return -EINVAL;
1533                 }
1534
1535                 s->forward_to_kmsg = !!r;
1536                 s->state = STDOUT_STREAM_FORWARD_TO_CONSOLE;
1537                 return 0;
1538
1539         case STDOUT_STREAM_FORWARD_TO_CONSOLE:
1540                 r = parse_boolean(p);
1541                 if (r < 0) {
1542                         log_warning("Failed to parse copy to console line.");
1543                         return -EINVAL;
1544                 }
1545
1546                 s->forward_to_console = !!r;
1547                 s->state = STDOUT_STREAM_RUNNING;
1548                 return 0;
1549
1550         case STDOUT_STREAM_RUNNING:
1551                 return stdout_stream_log(s, p);
1552         }
1553
1554         assert_not_reached("Unknown stream state");
1555 }
1556
1557 static int stdout_stream_scan(StdoutStream *s, bool force_flush) {
1558         char *p;
1559         size_t remaining;
1560         int r;
1561
1562         assert(s);
1563
1564         p = s->buffer;
1565         remaining = s->length;
1566         for (;;) {
1567                 char *end;
1568                 size_t skip;
1569
1570                 end = memchr(p, '\n', remaining);
1571                 if (end)
1572                         skip = end - p + 1;
1573                 else if (remaining >= sizeof(s->buffer) - 1) {
1574                         end = p + sizeof(s->buffer) - 1;
1575                         skip = remaining;
1576                 } else
1577                         break;
1578
1579                 *end = 0;
1580
1581                 r = stdout_stream_line(s, p);
1582                 if (r < 0)
1583                         return r;
1584
1585                 remaining -= skip;
1586                 p += skip;
1587         }
1588
1589         if (force_flush && remaining > 0) {
1590                 p[remaining] = 0;
1591                 r = stdout_stream_line(s, p);
1592                 if (r < 0)
1593                         return r;
1594
1595                 p += remaining;
1596                 remaining = 0;
1597         }
1598
1599         if (p > s->buffer) {
1600                 memmove(s->buffer, p, remaining);
1601                 s->length = remaining;
1602         }
1603
1604         return 0;
1605 }
1606
1607 static int stdout_stream_process(StdoutStream *s) {
1608         ssize_t l;
1609         int r;
1610
1611         assert(s);
1612
1613         l = read(s->fd, s->buffer+s->length, sizeof(s->buffer)-1-s->length);
1614         if (l < 0) {
1615
1616                 if (errno == EAGAIN)
1617                         return 0;
1618
1619                 log_warning("Failed to read from stream: %m");
1620                 return -errno;
1621         }
1622
1623         if (l == 0) {
1624                 r = stdout_stream_scan(s, true);
1625                 if (r < 0)
1626                         return r;
1627
1628                 return 0;
1629         }
1630
1631         s->length += l;
1632         r = stdout_stream_scan(s, false);
1633         if (r < 0)
1634                 return r;
1635
1636         return 1;
1637
1638 }
1639
1640 static void stdout_stream_free(StdoutStream *s) {
1641         assert(s);
1642
1643         if (s->server) {
1644                 assert(s->server->n_stdout_streams > 0);
1645                 s->server->n_stdout_streams --;
1646                 LIST_REMOVE(StdoutStream, stdout_stream, s->server->stdout_streams, s);
1647         }
1648
1649         if (s->fd >= 0) {
1650                 if (s->server)
1651                         epoll_ctl(s->server->epoll_fd, EPOLL_CTL_DEL, s->fd, NULL);
1652
1653                 close_nointr_nofail(s->fd);
1654         }
1655
1656 #ifdef HAVE_SELINUX
1657         if (s->security_context)
1658                 freecon(s->security_context);
1659 #endif
1660
1661         free(s->identifier);
1662         free(s);
1663 }
1664
1665 static int stdout_stream_new(Server *s) {
1666         StdoutStream *stream;
1667         int fd, r;
1668         socklen_t len;
1669         struct epoll_event ev;
1670
1671         assert(s);
1672
1673         fd = accept4(s->stdout_fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC);
1674         if (fd < 0) {
1675                 if (errno == EAGAIN)
1676                         return 0;
1677
1678                 log_error("Failed to accept stdout connection: %m");
1679                 return -errno;
1680         }
1681
1682         if (s->n_stdout_streams >= STDOUT_STREAMS_MAX) {
1683                 log_warning("Too many stdout streams, refusing connection.");
1684                 close_nointr_nofail(fd);
1685                 return 0;
1686         }
1687
1688         stream = new0(StdoutStream, 1);
1689         if (!stream) {
1690                 log_error("Out of memory.");
1691                 close_nointr_nofail(fd);
1692                 return -ENOMEM;
1693         }
1694
1695         stream->fd = fd;
1696
1697         len = sizeof(stream->ucred);
1698         if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &stream->ucred, &len) < 0) {
1699                 log_error("Failed to determine peer credentials: %m");
1700                 r = -errno;
1701                 goto fail;
1702         }
1703
1704 #ifdef HAVE_SELINUX
1705         if (getpeercon(fd, &stream->security_context) < 0 && errno != ENOPROTOOPT)
1706                 log_error("Failed to determine peer security context: %m");
1707 #endif
1708
1709         if (shutdown(fd, SHUT_WR) < 0) {
1710                 log_error("Failed to shutdown writing side of socket: %m");
1711                 r = -errno;
1712                 goto fail;
1713         }
1714
1715         zero(ev);
1716         ev.data.ptr = stream;
1717         ev.events = EPOLLIN;
1718         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0) {
1719                 log_error("Failed to add stream to event loop: %m");
1720                 r = -errno;
1721                 goto fail;
1722         }
1723
1724         stream->server = s;
1725         LIST_PREPEND(StdoutStream, stdout_stream, s->stdout_streams, stream);
1726         s->n_stdout_streams ++;
1727
1728         return 0;
1729
1730 fail:
1731         stdout_stream_free(stream);
1732         return r;
1733 }
1734
1735 static int parse_kernel_timestamp(char **_p, usec_t *t) {
1736         usec_t r;
1737         int k, i;
1738         char *p;
1739
1740         assert(_p);
1741         assert(*_p);
1742         assert(t);
1743
1744         p = *_p;
1745
1746         if (strlen(p) < 14 || p[0] != '[' || p[13] != ']' || p[6] != '.')
1747                 return 0;
1748
1749         r = 0;
1750
1751         for (i = 1; i <= 5; i++) {
1752                 r *= 10;
1753
1754                 if (p[i] == ' ')
1755                         continue;
1756
1757                 k = undecchar(p[i]);
1758                 if (k < 0)
1759                         return 0;
1760
1761                 r += k;
1762         }
1763
1764         for (i = 7; i <= 12; i++) {
1765                 r *= 10;
1766
1767                 k = undecchar(p[i]);
1768                 if (k < 0)
1769                         return 0;
1770
1771                 r += k;
1772         }
1773
1774         *t = r;
1775         *_p += 14;
1776         *_p += strspn(*_p, WHITESPACE);
1777
1778         return 1;
1779 }
1780
1781 static bool is_us(const char *pid) {
1782         pid_t t;
1783
1784         assert(pid);
1785
1786         if (parse_pid(pid, &t) < 0)
1787                 return false;
1788
1789         return t == getpid();
1790 }
1791
1792 static void proc_kmsg_line(Server *s, const char *p) {
1793         struct iovec iovec[N_IOVEC_META_FIELDS + 7];
1794         char *message = NULL, *syslog_priority = NULL, *syslog_pid = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *source_time = NULL;
1795         int priority = LOG_KERN | LOG_INFO;
1796         unsigned n = 0;
1797         usec_t usec;
1798         char *identifier = NULL, *pid = NULL;
1799
1800         assert(s);
1801         assert(p);
1802
1803         if (isempty(p))
1804                 return;
1805
1806         parse_syslog_priority((char **) &p, &priority);
1807
1808         if (s->forward_to_kmsg && (priority & LOG_FACMASK) != LOG_KERN)
1809                 return;
1810
1811         if (parse_kernel_timestamp((char **) &p, &usec) > 0) {
1812                 if (asprintf(&source_time, "_SOURCE_MONOTONIC_TIMESTAMP=%llu",
1813                              (unsigned long long) usec) >= 0)
1814                         IOVEC_SET_STRING(iovec[n++], source_time);
1815         }
1816
1817         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=kernel");
1818
1819         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1820                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1821
1822         if ((priority & LOG_FACMASK) == LOG_KERN) {
1823
1824                 if (s->forward_to_syslog)
1825                         forward_syslog(s, priority, "kernel", p, NULL, NULL);
1826
1827                 IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=kernel");
1828         } else {
1829                 read_identifier(&p, &identifier, &pid);
1830
1831                 /* Avoid any messages we generated ourselves via
1832                  * log_info() and friends. */
1833                 if (pid && is_us(pid))
1834                         goto finish;
1835
1836                 if (s->forward_to_syslog)
1837                         forward_syslog(s, priority, identifier, p, NULL, NULL);
1838
1839                 if (identifier) {
1840                         syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
1841                         if (syslog_identifier)
1842                                 IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1843                 }
1844
1845                 if (pid) {
1846                         syslog_pid = strappend("SYSLOG_PID=", pid);
1847                         if (syslog_pid)
1848                                 IOVEC_SET_STRING(iovec[n++], syslog_pid);
1849                 }
1850
1851                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1852                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1853         }
1854
1855         message = strappend("MESSAGE=", p);
1856         if (message)
1857                 IOVEC_SET_STRING(iovec[n++], message);
1858
1859         dispatch_message(s, iovec, n, ELEMENTSOF(iovec), NULL, NULL, NULL, 0, priority);
1860
1861 finish:
1862         free(message);
1863         free(syslog_priority);
1864         free(syslog_identifier);
1865         free(syslog_pid);
1866         free(syslog_facility);
1867         free(source_time);
1868         free(identifier);
1869         free(pid);
1870 }
1871
1872 static void proc_kmsg_scan(Server *s) {
1873         char *p;
1874         size_t remaining;
1875
1876         assert(s);
1877
1878         p = s->proc_kmsg_buffer;
1879         remaining = s->proc_kmsg_length;
1880         for (;;) {
1881                 char *end;
1882                 size_t skip;
1883
1884                 end = memchr(p, '\n', remaining);
1885                 if (end)
1886                         skip = end - p + 1;
1887                 else if (remaining >= sizeof(s->proc_kmsg_buffer) - 1) {
1888                         end = p + sizeof(s->proc_kmsg_buffer) - 1;
1889                         skip = remaining;
1890                 } else
1891                         break;
1892
1893                 *end = 0;
1894
1895                 proc_kmsg_line(s, p);
1896
1897                 remaining -= skip;
1898                 p += skip;
1899         }
1900
1901         if (p > s->proc_kmsg_buffer) {
1902                 memmove(s->proc_kmsg_buffer, p, remaining);
1903                 s->proc_kmsg_length = remaining;
1904         }
1905 }
1906
1907 static int system_journal_open(Server *s) {
1908         int r;
1909         char *fn;
1910         sd_id128_t machine;
1911         char ids[33];
1912
1913         r = sd_id128_get_machine(&machine);
1914         if (r < 0)
1915                 return r;
1916
1917         sd_id128_to_string(machine, ids);
1918
1919         if (!s->system_journal) {
1920
1921                 /* First try to create the machine path, but not the prefix */
1922                 fn = strappend("/var/log/journal/", ids);
1923                 if (!fn)
1924                         return -ENOMEM;
1925                 (void) mkdir(fn, 0755);
1926                 free(fn);
1927
1928                 /* The create the system journal file */
1929                 fn = join("/var/log/journal/", ids, "/system.journal", NULL);
1930                 if (!fn)
1931                         return -ENOMEM;
1932
1933                 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, NULL, &s->system_journal);
1934                 free(fn);
1935
1936                 if (r >= 0) {
1937                         journal_default_metrics(&s->system_metrics, s->system_journal->fd);
1938
1939                         s->system_journal->metrics = s->system_metrics;
1940                         s->system_journal->compress = s->compress;
1941
1942                         server_fix_perms(s, s->system_journal, 0);
1943                 } else if (r < 0) {
1944
1945                         if (r != -ENOENT && r != -EROFS)
1946                                 log_warning("Failed to open system journal: %s", strerror(-r));
1947
1948                         r = 0;
1949                 }
1950         }
1951
1952         if (!s->runtime_journal) {
1953
1954                 fn = join("/run/log/journal/", ids, "/system.journal", NULL);
1955                 if (!fn)
1956                         return -ENOMEM;
1957
1958                 if (s->system_journal) {
1959
1960                         /* Try to open the runtime journal, but only
1961                          * if it already exists, so that we can flush
1962                          * it into the system journal */
1963
1964                         r = journal_file_open(fn, O_RDWR, 0640, NULL, &s->runtime_journal);
1965                         free(fn);
1966
1967                         if (r < 0) {
1968                                 if (r != -ENOENT)
1969                                         log_warning("Failed to open runtime journal: %s", strerror(-r));
1970
1971                                 r = 0;
1972                         }
1973
1974                 } else {
1975
1976                         /* OK, we really need the runtime journal, so create
1977                          * it if necessary. */
1978
1979                         (void) mkdir_parents_label(fn, 0755);
1980                         r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, NULL, &s->runtime_journal);
1981                         free(fn);
1982
1983                         if (r < 0) {
1984                                 log_error("Failed to open runtime journal: %s", strerror(-r));
1985                                 return r;
1986                         }
1987                 }
1988
1989                 if (s->runtime_journal) {
1990                         journal_default_metrics(&s->runtime_metrics, s->runtime_journal->fd);
1991
1992                         s->runtime_journal->metrics = s->runtime_metrics;
1993                         s->runtime_journal->compress = s->compress;
1994
1995                         server_fix_perms(s, s->runtime_journal, 0);
1996                 }
1997         }
1998
1999         return r;
2000 }
2001
2002 static int server_flush_to_var(Server *s) {
2003         char path[] = "/run/log/journal/xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
2004         Object *o = NULL;
2005         int r;
2006         sd_id128_t machine;
2007         sd_journal *j;
2008         usec_t ts;
2009
2010         assert(s);
2011
2012         if (!s->runtime_journal)
2013                 return 0;
2014
2015         ts = now(CLOCK_MONOTONIC);
2016         if (s->var_available_timestamp + RECHECK_VAR_AVAILABLE_USEC > ts)
2017                 return 0;
2018
2019         s->var_available_timestamp = ts;
2020
2021         system_journal_open(s);
2022
2023         if (!s->system_journal)
2024                 return 0;
2025
2026         log_info("Flushing to /var...");
2027
2028         r = sd_id128_get_machine(&machine);
2029         if (r < 0) {
2030                 log_error("Failed to get machine id: %s", strerror(-r));
2031                 return r;
2032         }
2033
2034         r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
2035         if (r < 0) {
2036                 log_error("Failed to read runtime journal: %s", strerror(-r));
2037                 return r;
2038         }
2039
2040         SD_JOURNAL_FOREACH(j) {
2041                 JournalFile *f;
2042
2043                 f = j->current_file;
2044                 assert(f && f->current_offset > 0);
2045
2046                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
2047                 if (r < 0) {
2048                         log_error("Can't read entry: %s", strerror(-r));
2049                         goto finish;
2050                 }
2051
2052                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
2053                 if (r == -E2BIG) {
2054                         log_info("Allocation limit reached.");
2055
2056                         journal_file_post_change(s->system_journal);
2057                         server_rotate(s);
2058                         server_vacuum(s);
2059
2060                         r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
2061                 }
2062
2063                 if (r < 0) {
2064                         log_error("Can't write entry: %s", strerror(-r));
2065                         goto finish;
2066                 }
2067         }
2068
2069 finish:
2070         journal_file_post_change(s->system_journal);
2071
2072         journal_file_close(s->runtime_journal);
2073         s->runtime_journal = NULL;
2074
2075         if (r >= 0) {
2076                 sd_id128_to_string(machine, path + 17);
2077                 rm_rf(path, false, true, false);
2078         }
2079
2080         return r;
2081 }
2082
2083 static int server_read_proc_kmsg(Server *s) {
2084         ssize_t l;
2085         assert(s);
2086         assert(s->proc_kmsg_fd >= 0);
2087
2088         l = read(s->proc_kmsg_fd, s->proc_kmsg_buffer + s->proc_kmsg_length, sizeof(s->proc_kmsg_buffer) - 1 - s->proc_kmsg_length);
2089         if (l < 0) {
2090
2091                 if (errno == EAGAIN || errno == EINTR)
2092                         return 0;
2093
2094                 log_error("Failed to read from kernel: %m");
2095                 return -errno;
2096         }
2097
2098         s->proc_kmsg_length += l;
2099
2100         proc_kmsg_scan(s);
2101         return 1;
2102 }
2103
2104 static int server_flush_proc_kmsg(Server *s) {
2105         int r;
2106
2107         assert(s);
2108
2109         if (s->proc_kmsg_fd < 0)
2110                 return 0;
2111
2112         log_info("Flushing /proc/kmsg...");
2113
2114         for (;;) {
2115                 r = server_read_proc_kmsg(s);
2116                 if (r < 0)
2117                         return r;
2118
2119                 if (r == 0)
2120                         break;
2121         }
2122
2123         return 0;
2124 }
2125
2126 static int process_event(Server *s, struct epoll_event *ev) {
2127         assert(s);
2128
2129         if (ev->data.fd == s->signal_fd) {
2130                 struct signalfd_siginfo sfsi;
2131                 ssize_t n;
2132
2133                 if (ev->events != EPOLLIN) {
2134                         log_info("Got invalid event from epoll.");
2135                         return -EIO;
2136                 }
2137
2138                 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
2139                 if (n != sizeof(sfsi)) {
2140
2141                         if (n >= 0)
2142                                 return -EIO;
2143
2144                         if (errno == EINTR || errno == EAGAIN)
2145                                 return 1;
2146
2147                         return -errno;
2148                 }
2149
2150                 if (sfsi.ssi_signo == SIGUSR1) {
2151                         server_flush_to_var(s);
2152                         return 0;
2153                 }
2154
2155                 log_debug("Received SIG%s", signal_to_string(sfsi.ssi_signo));
2156                 return 0;
2157
2158         } else if (ev->data.fd == s->proc_kmsg_fd) {
2159                 int r;
2160
2161                 if (ev->events != EPOLLIN) {
2162                         log_info("Got invalid event from epoll.");
2163                         return -EIO;
2164                 }
2165
2166                 r = server_read_proc_kmsg(s);
2167                 if (r < 0)
2168                         return r;
2169
2170                 return 1;
2171
2172         } else if (ev->data.fd == s->native_fd ||
2173                    ev->data.fd == s->syslog_fd) {
2174
2175                 if (ev->events != EPOLLIN) {
2176                         log_info("Got invalid event from epoll.");
2177                         return -EIO;
2178                 }
2179
2180                 for (;;) {
2181                         struct msghdr msghdr;
2182                         struct iovec iovec;
2183                         struct ucred *ucred = NULL;
2184                         struct timeval *tv = NULL;
2185                         struct cmsghdr *cmsg;
2186                         char *label = NULL;
2187                         size_t label_len = 0;
2188                         union {
2189                                 struct cmsghdr cmsghdr;
2190
2191                                 /* We use NAME_MAX space for the
2192                                  * SELinux label here. The kernel
2193                                  * currently enforces no limit, but
2194                                  * according to suggestions from the
2195                                  * SELinux people this will change and
2196                                  * it will probably be identical to
2197                                  * NAME_MAX. For now we use that, but
2198                                  * this should be updated one day when
2199                                  * the final limit is known.*/
2200                                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
2201                                             CMSG_SPACE(sizeof(struct timeval)) +
2202                                             CMSG_SPACE(sizeof(int)) + /* fd */
2203                                             CMSG_SPACE(NAME_MAX)]; /* selinux label */
2204                         } control;
2205                         ssize_t n;
2206                         int v;
2207                         int *fds = NULL;
2208                         unsigned n_fds = 0;
2209
2210                         if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
2211                                 log_error("SIOCINQ failed: %m");
2212                                 return -errno;
2213                         }
2214
2215                         if (s->buffer_size < (size_t) v) {
2216                                 void *b;
2217                                 size_t l;
2218
2219                                 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
2220                                 b = realloc(s->buffer, l+1);
2221
2222                                 if (!b) {
2223                                         log_error("Couldn't increase buffer.");
2224                                         return -ENOMEM;
2225                                 }
2226
2227                                 s->buffer_size = l;
2228                                 s->buffer = b;
2229                         }
2230
2231                         zero(iovec);
2232                         iovec.iov_base = s->buffer;
2233                         iovec.iov_len = s->buffer_size;
2234
2235                         zero(control);
2236                         zero(msghdr);
2237                         msghdr.msg_iov = &iovec;
2238                         msghdr.msg_iovlen = 1;
2239                         msghdr.msg_control = &control;
2240                         msghdr.msg_controllen = sizeof(control);
2241
2242                         n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
2243                         if (n < 0) {
2244
2245                                 if (errno == EINTR || errno == EAGAIN)
2246                                         return 1;
2247
2248                                 log_error("recvmsg() failed: %m");
2249                                 return -errno;
2250                         }
2251
2252                         for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
2253
2254                                 if (cmsg->cmsg_level == SOL_SOCKET &&
2255                                     cmsg->cmsg_type == SCM_CREDENTIALS &&
2256                                     cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
2257                                         ucred = (struct ucred*) CMSG_DATA(cmsg);
2258                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
2259                                          cmsg->cmsg_type == SCM_SECURITY) {
2260                                         label = (char*) CMSG_DATA(cmsg);
2261                                         label_len = cmsg->cmsg_len - CMSG_LEN(0);
2262                                 } else if (cmsg->cmsg_level == SOL_SOCKET &&
2263                                          cmsg->cmsg_type == SO_TIMESTAMP &&
2264                                          cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
2265                                         tv = (struct timeval*) CMSG_DATA(cmsg);
2266                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
2267                                          cmsg->cmsg_type == SCM_RIGHTS) {
2268                                         fds = (int*) CMSG_DATA(cmsg);
2269                                         n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
2270                                 }
2271                         }
2272
2273                         if (ev->data.fd == s->syslog_fd) {
2274                                 char *e;
2275
2276                                 if (n > 0 && n_fds == 0) {
2277                                         e = memchr(s->buffer, '\n', n);
2278                                         if (e)
2279                                                 *e = 0;
2280                                         else
2281                                                 s->buffer[n] = 0;
2282
2283                                         process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
2284                                 } else if (n_fds > 0)
2285                                         log_warning("Got file descriptors via syslog socket. Ignoring.");
2286
2287                         } else {
2288                                 if (n > 0 && n_fds == 0)
2289                                         process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
2290                                 else if (n == 0 && n_fds == 1)
2291                                         process_native_file(s, fds[0], ucred, tv, label, label_len);
2292                                 else if (n_fds > 0)
2293                                         log_warning("Got too many file descriptors via native socket. Ignoring.");
2294                         }
2295
2296                         close_many(fds, n_fds);
2297                 }
2298
2299                 return 1;
2300
2301         } else if (ev->data.fd == s->stdout_fd) {
2302
2303                 if (ev->events != EPOLLIN) {
2304                         log_info("Got invalid event from epoll.");
2305                         return -EIO;
2306                 }
2307
2308                 stdout_stream_new(s);
2309                 return 1;
2310
2311         } else {
2312                 StdoutStream *stream;
2313
2314                 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
2315                         log_info("Got invalid event from epoll.");
2316                         return -EIO;
2317                 }
2318
2319                 /* If it is none of the well-known fds, it must be an
2320                  * stdout stream fd. Note that this is a bit ugly here
2321                  * (since we rely that none of the well-known fds
2322                  * could be interpreted as pointer), but nonetheless
2323                  * safe, since the well-known fds would never get an
2324                  * fd > 4096, i.e. beyond the first memory page */
2325
2326                 stream = ev->data.ptr;
2327
2328                 if (stdout_stream_process(stream) <= 0)
2329                         stdout_stream_free(stream);
2330
2331                 return 1;
2332         }
2333
2334         log_error("Unknown event.");
2335         return 0;
2336 }
2337
2338 static int open_syslog_socket(Server *s) {
2339         union sockaddr_union sa;
2340         int one, r;
2341         struct epoll_event ev;
2342
2343         assert(s);
2344
2345         if (s->syslog_fd < 0) {
2346
2347                 s->syslog_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2348                 if (s->syslog_fd < 0) {
2349                         log_error("socket() failed: %m");
2350                         return -errno;
2351                 }
2352
2353                 zero(sa);
2354                 sa.un.sun_family = AF_UNIX;
2355                 strncpy(sa.un.sun_path, "/dev/log", sizeof(sa.un.sun_path));
2356
2357                 unlink(sa.un.sun_path);
2358
2359                 r = bind(s->syslog_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2360                 if (r < 0) {
2361                         log_error("bind() failed: %m");
2362                         return -errno;
2363                 }
2364
2365                 chmod(sa.un.sun_path, 0666);
2366         } else
2367                 fd_nonblock(s->syslog_fd, 1);
2368
2369         one = 1;
2370         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
2371         if (r < 0) {
2372                 log_error("SO_PASSCRED failed: %m");
2373                 return -errno;
2374         }
2375
2376 #ifdef HAVE_SELINUX
2377         one = 1;
2378         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
2379         if (r < 0)
2380                 log_warning("SO_PASSSEC failed: %m");
2381 #endif
2382
2383         one = 1;
2384         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
2385         if (r < 0) {
2386                 log_error("SO_TIMESTAMP failed: %m");
2387                 return -errno;
2388         }
2389
2390         zero(ev);
2391         ev.events = EPOLLIN;
2392         ev.data.fd = s->syslog_fd;
2393         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->syslog_fd, &ev) < 0) {
2394                 log_error("Failed to add syslog server fd to epoll object: %m");
2395                 return -errno;
2396         }
2397
2398         return 0;
2399 }
2400
2401 static int open_native_socket(Server*s) {
2402         union sockaddr_union sa;
2403         int one, r;
2404         struct epoll_event ev;
2405
2406         assert(s);
2407
2408         if (s->native_fd < 0) {
2409
2410                 s->native_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2411                 if (s->native_fd < 0) {
2412                         log_error("socket() failed: %m");
2413                         return -errno;
2414                 }
2415
2416                 zero(sa);
2417                 sa.un.sun_family = AF_UNIX;
2418                 strncpy(sa.un.sun_path, "/run/systemd/journal/socket", sizeof(sa.un.sun_path));
2419
2420                 unlink(sa.un.sun_path);
2421
2422                 r = bind(s->native_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2423                 if (r < 0) {
2424                         log_error("bind() failed: %m");
2425                         return -errno;
2426                 }
2427
2428                 chmod(sa.un.sun_path, 0666);
2429         } else
2430                 fd_nonblock(s->native_fd, 1);
2431
2432         one = 1;
2433         r = setsockopt(s->native_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
2434         if (r < 0) {
2435                 log_error("SO_PASSCRED failed: %m");
2436                 return -errno;
2437         }
2438
2439 #ifdef HAVE_SELINUX
2440         one = 1;
2441         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
2442         if (r < 0)
2443                 log_warning("SO_PASSSEC failed: %m");
2444 #endif
2445
2446         one = 1;
2447         r = setsockopt(s->native_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
2448         if (r < 0) {
2449                 log_error("SO_TIMESTAMP failed: %m");
2450                 return -errno;
2451         }
2452
2453         zero(ev);
2454         ev.events = EPOLLIN;
2455         ev.data.fd = s->native_fd;
2456         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->native_fd, &ev) < 0) {
2457                 log_error("Failed to add native server fd to epoll object: %m");
2458                 return -errno;
2459         }
2460
2461         return 0;
2462 }
2463
2464 static int open_stdout_socket(Server *s) {
2465         union sockaddr_union sa;
2466         int r;
2467         struct epoll_event ev;
2468
2469         assert(s);
2470
2471         if (s->stdout_fd < 0) {
2472
2473                 s->stdout_fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2474                 if (s->stdout_fd < 0) {
2475                         log_error("socket() failed: %m");
2476                         return -errno;
2477                 }
2478
2479                 zero(sa);
2480                 sa.un.sun_family = AF_UNIX;
2481                 strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
2482
2483                 unlink(sa.un.sun_path);
2484
2485                 r = bind(s->stdout_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2486                 if (r < 0) {
2487                         log_error("bind() failed: %m");
2488                         return -errno;
2489                 }
2490
2491                 chmod(sa.un.sun_path, 0666);
2492
2493                 if (listen(s->stdout_fd, SOMAXCONN) < 0) {
2494                         log_error("liste() failed: %m");
2495                         return -errno;
2496                 }
2497         } else
2498                 fd_nonblock(s->stdout_fd, 1);
2499
2500         zero(ev);
2501         ev.events = EPOLLIN;
2502         ev.data.fd = s->stdout_fd;
2503         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->stdout_fd, &ev) < 0) {
2504                 log_error("Failed to add stdout server fd to epoll object: %m");
2505                 return -errno;
2506         }
2507
2508         return 0;
2509 }
2510
2511 static int open_proc_kmsg(Server *s) {
2512         struct epoll_event ev;
2513
2514         assert(s);
2515
2516         if (!s->import_proc_kmsg)
2517                 return 0;
2518
2519         s->proc_kmsg_fd = open("/proc/kmsg", O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
2520         if (s->proc_kmsg_fd < 0) {
2521                 log_warning("Failed to open /proc/kmsg, ignoring: %m");
2522                 return 0;
2523         }
2524
2525         zero(ev);
2526         ev.events = EPOLLIN;
2527         ev.data.fd = s->proc_kmsg_fd;
2528         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->proc_kmsg_fd, &ev) < 0) {
2529                 log_error("Failed to add /proc/kmsg fd to epoll object: %m");
2530                 return -errno;
2531         }
2532
2533         return 0;
2534 }
2535
2536 static int open_signalfd(Server *s) {
2537         sigset_t mask;
2538         struct epoll_event ev;
2539
2540         assert(s);
2541
2542         assert_se(sigemptyset(&mask) == 0);
2543         sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, -1);
2544         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
2545
2546         s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
2547         if (s->signal_fd < 0) {
2548                 log_error("signalfd(): %m");
2549                 return -errno;
2550         }
2551
2552         zero(ev);
2553         ev.events = EPOLLIN;
2554         ev.data.fd = s->signal_fd;
2555
2556         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
2557                 log_error("epoll_ctl(): %m");
2558                 return -errno;
2559         }
2560
2561         return 0;
2562 }
2563
2564 static int server_parse_proc_cmdline(Server *s) {
2565         char *line, *w, *state;
2566         int r;
2567         size_t l;
2568
2569         if (detect_container(NULL) > 0)
2570                 return 0;
2571
2572         r = read_one_line_file("/proc/cmdline", &line);
2573         if (r < 0) {
2574                 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
2575                 return 0;
2576         }
2577
2578         FOREACH_WORD_QUOTED(w, l, line, state) {
2579                 char *word;
2580
2581                 word = strndup(w, l);
2582                 if (!word) {
2583                         r = -ENOMEM;
2584                         goto finish;
2585                 }
2586
2587                 if (startswith(word, "systemd_journald.forward_to_syslog=")) {
2588                         r = parse_boolean(word + 35);
2589                         if (r < 0)
2590                                 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
2591                         else
2592                                 s->forward_to_syslog = r;
2593                 } else if (startswith(word, "systemd_journald.forward_to_kmsg=")) {
2594                         r = parse_boolean(word + 33);
2595                         if (r < 0)
2596                                 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
2597                         else
2598                                 s->forward_to_kmsg = r;
2599                 } else if (startswith(word, "systemd_journald.forward_to_console=")) {
2600                         r = parse_boolean(word + 36);
2601                         if (r < 0)
2602                                 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
2603                         else
2604                                 s->forward_to_console = r;
2605                 }
2606
2607                 free(word);
2608         }
2609
2610         r = 0;
2611
2612 finish:
2613         free(line);
2614         return r;
2615 }
2616
2617 static int server_parse_config_file(Server *s) {
2618         FILE *f;
2619         const char *fn;
2620         int r;
2621
2622         assert(s);
2623
2624         fn = "/etc/systemd/journald.conf";
2625         f = fopen(fn, "re");
2626         if (!f) {
2627                 if (errno == ENOENT)
2628                         return 0;
2629
2630                 log_warning("Failed to open configuration file %s: %m", fn);
2631                 return -errno;
2632         }
2633
2634         r = config_parse(fn, f, "Journal\0", config_item_perf_lookup, (void*) journald_gperf_lookup, false, s);
2635         if (r < 0)
2636                 log_warning("Failed to parse configuration file: %s", strerror(-r));
2637
2638         fclose(f);
2639
2640         return r;
2641 }
2642
2643 static int server_init(Server *s) {
2644         int n, r, fd;
2645
2646         assert(s);
2647
2648         zero(*s);
2649         s->syslog_fd = s->native_fd = s->stdout_fd = s->signal_fd = s->epoll_fd = s->proc_kmsg_fd = -1;
2650         s->compress = true;
2651
2652         s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
2653         s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
2654
2655         s->forward_to_syslog = true;
2656         s->import_proc_kmsg = true;
2657
2658         memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
2659         memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
2660
2661         server_parse_config_file(s);
2662         server_parse_proc_cmdline(s);
2663
2664         s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
2665         if (!s->user_journals) {
2666                 log_error("Out of memory.");
2667                 return -ENOMEM;
2668         }
2669
2670         s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
2671         if (s->epoll_fd < 0) {
2672                 log_error("Failed to create epoll object: %m");
2673                 return -errno;
2674         }
2675
2676         n = sd_listen_fds(true);
2677         if (n < 0) {
2678                 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
2679                 return n;
2680         }
2681
2682         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
2683
2684                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
2685
2686                         if (s->native_fd >= 0) {
2687                                 log_error("Too many native sockets passed.");
2688                                 return -EINVAL;
2689                         }
2690
2691                         s->native_fd = fd;
2692
2693                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
2694
2695                         if (s->stdout_fd >= 0) {
2696                                 log_error("Too many stdout sockets passed.");
2697                                 return -EINVAL;
2698                         }
2699
2700                         s->stdout_fd = fd;
2701
2702                 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
2703
2704                         if (s->syslog_fd >= 0) {
2705                                 log_error("Too many /dev/log sockets passed.");
2706                                 return -EINVAL;
2707                         }
2708
2709                         s->syslog_fd = fd;
2710
2711                 } else {
2712                         log_error("Unknown socket passed.");
2713                         return -EINVAL;
2714                 }
2715         }
2716
2717         r = open_syslog_socket(s);
2718         if (r < 0)
2719                 return r;
2720
2721         r = open_native_socket(s);
2722         if (r < 0)
2723                 return r;
2724
2725         r = open_stdout_socket(s);
2726         if (r < 0)
2727                 return r;
2728
2729         r = open_proc_kmsg(s);
2730         if (r < 0)
2731                 return r;
2732
2733         r = open_signalfd(s);
2734         if (r < 0)
2735                 return r;
2736
2737         s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
2738         if (!s->rate_limit)
2739                 return -ENOMEM;
2740
2741         r = system_journal_open(s);
2742         if (r < 0)
2743                 return r;
2744
2745         return 0;
2746 }
2747
2748 static void server_done(Server *s) {
2749         JournalFile *f;
2750         assert(s);
2751
2752         while (s->stdout_streams)
2753                 stdout_stream_free(s->stdout_streams);
2754
2755         if (s->system_journal)
2756                 journal_file_close(s->system_journal);
2757
2758         if (s->runtime_journal)
2759                 journal_file_close(s->runtime_journal);
2760
2761         while ((f = hashmap_steal_first(s->user_journals)))
2762                 journal_file_close(f);
2763
2764         hashmap_free(s->user_journals);
2765
2766         if (s->epoll_fd >= 0)
2767                 close_nointr_nofail(s->epoll_fd);
2768
2769         if (s->signal_fd >= 0)
2770                 close_nointr_nofail(s->signal_fd);
2771
2772         if (s->syslog_fd >= 0)
2773                 close_nointr_nofail(s->syslog_fd);
2774
2775         if (s->native_fd >= 0)
2776                 close_nointr_nofail(s->native_fd);
2777
2778         if (s->stdout_fd >= 0)
2779                 close_nointr_nofail(s->stdout_fd);
2780
2781         if (s->proc_kmsg_fd >= 0)
2782                 close_nointr_nofail(s->proc_kmsg_fd);
2783
2784         if (s->rate_limit)
2785                 journal_rate_limit_free(s->rate_limit);
2786
2787         free(s->buffer);
2788         free(s->tty_path);
2789 }
2790
2791 int main(int argc, char *argv[]) {
2792         Server server;
2793         int r;
2794
2795         /* if (getppid() != 1) { */
2796         /*         log_error("This program should be invoked by init only."); */
2797         /*         return EXIT_FAILURE; */
2798         /* } */
2799
2800         if (argc > 1) {
2801                 log_error("This program does not take arguments.");
2802                 return EXIT_FAILURE;
2803         }
2804
2805         log_set_target(LOG_TARGET_SAFE);
2806         log_set_facility(LOG_SYSLOG);
2807         log_parse_environment();
2808         log_open();
2809
2810         umask(0022);
2811
2812         r = server_init(&server);
2813         if (r < 0)
2814                 goto finish;
2815
2816         server_vacuum(&server);
2817         server_flush_to_var(&server);
2818         server_flush_proc_kmsg(&server);
2819
2820         log_debug("systemd-journald running as pid %lu", (unsigned long) getpid());
2821         driver_message(&server, SD_MESSAGE_JOURNAL_START, "Journal started");
2822
2823         sd_notify(false,
2824                   "READY=1\n"
2825                   "STATUS=Processing requests...");
2826
2827         for (;;) {
2828                 struct epoll_event event;
2829
2830                 r = epoll_wait(server.epoll_fd, &event, 1, -1);
2831                 if (r < 0) {
2832
2833                         if (errno == EINTR)
2834                                 continue;
2835
2836                         log_error("epoll_wait() failed: %m");
2837                         r = -errno;
2838                         goto finish;
2839                 } else if (r == 0)
2840                         break;
2841
2842                 r = process_event(&server, &event);
2843                 if (r < 0)
2844                         goto finish;
2845                 else if (r == 0)
2846                         break;
2847         }
2848
2849         log_debug("systemd-journald stopped as pid %lu", (unsigned long) getpid());
2850         driver_message(&server, SD_MESSAGE_JOURNAL_STOP, "Journal stopped");
2851
2852 finish:
2853         sd_notify(false,
2854                   "STATUS=Shutting down...");
2855
2856         server_done(&server);
2857
2858         return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
2859 }