chiark / gitweb /
journald: properly handle if we have no PID in a kmsg line
[elogind.git] / src / journal / journald.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/epoll.h>
23 #include <sys/socket.h>
24 #include <errno.h>
25 #include <sys/signalfd.h>
26 #include <unistd.h>
27 #include <fcntl.h>
28 #include <stddef.h>
29 #include <sys/ioctl.h>
30 #include <linux/sockios.h>
31 #include <sys/statvfs.h>
32
33 #include <systemd/sd-journal.h>
34 #include <systemd/sd-messages.h>
35 #include <systemd/sd-daemon.h>
36
37 #ifdef HAVE_LOGIND
38 #include <systemd/sd-login.h>
39 #endif
40
41 #include "mkdir.h"
42 #include "hashmap.h"
43 #include "journal-file.h"
44 #include "socket-util.h"
45 #include "cgroup-util.h"
46 #include "list.h"
47 #include "journal-rate-limit.h"
48 #include "journal-internal.h"
49 #include "conf-parser.h"
50 #include "journald.h"
51 #include "virt.h"
52 #include "missing.h"
53
54 #ifdef HAVE_ACL
55 #include <sys/acl.h>
56 #include <acl/libacl.h>
57 #include "acl-util.h"
58 #endif
59
60 #ifdef HAVE_SELINUX
61 #include <selinux/selinux.h>
62 #endif
63
64 #define USER_JOURNALS_MAX 1024
65 #define STDOUT_STREAMS_MAX 4096
66
67 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
68 #define DEFAULT_RATE_LIMIT_BURST 200
69
70 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
71
72 #define RECHECK_VAR_AVAILABLE_USEC (30*USEC_PER_SEC)
73
74 #define N_IOVEC_META_FIELDS 17
75
76 #define ENTRY_SIZE_MAX (1024*1024*32)
77
78 typedef enum StdoutStreamState {
79         STDOUT_STREAM_IDENTIFIER,
80         STDOUT_STREAM_PRIORITY,
81         STDOUT_STREAM_LEVEL_PREFIX,
82         STDOUT_STREAM_FORWARD_TO_SYSLOG,
83         STDOUT_STREAM_FORWARD_TO_KMSG,
84         STDOUT_STREAM_FORWARD_TO_CONSOLE,
85         STDOUT_STREAM_RUNNING
86 } StdoutStreamState;
87
88 struct StdoutStream {
89         Server *server;
90         StdoutStreamState state;
91
92         int fd;
93
94         struct ucred ucred;
95 #ifdef HAVE_SELINUX
96         security_context_t security_context;
97 #endif
98
99         char *identifier;
100         int priority;
101         bool level_prefix:1;
102         bool forward_to_syslog:1;
103         bool forward_to_kmsg:1;
104         bool forward_to_console:1;
105
106         char buffer[LINE_MAX+1];
107         size_t length;
108
109         LIST_FIELDS(StdoutStream, stdout_stream);
110 };
111
112 static int server_flush_to_var(Server *s);
113
114 static uint64_t available_space(Server *s) {
115         char ids[33], *p;
116         const char *f;
117         sd_id128_t machine;
118         struct statvfs ss;
119         uint64_t sum = 0, avail = 0, ss_avail = 0;
120         int r;
121         DIR *d;
122         usec_t ts;
123         JournalMetrics *m;
124
125         ts = now(CLOCK_MONOTONIC);
126
127         if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
128                 return s->cached_available_space;
129
130         r = sd_id128_get_machine(&machine);
131         if (r < 0)
132                 return 0;
133
134         if (s->system_journal) {
135                 f = "/var/log/journal/";
136                 m = &s->system_metrics;
137         } else {
138                 f = "/run/log/journal/";
139                 m = &s->runtime_metrics;
140         }
141
142         assert(m);
143
144         p = strappend(f, sd_id128_to_string(machine, ids));
145         if (!p)
146                 return 0;
147
148         d = opendir(p);
149         free(p);
150
151         if (!d)
152                 return 0;
153
154         if (fstatvfs(dirfd(d), &ss) < 0)
155                 goto finish;
156
157         for (;;) {
158                 struct stat st;
159                 struct dirent buf, *de;
160
161                 r = readdir_r(d, &buf, &de);
162                 if (r != 0)
163                         break;
164
165                 if (!de)
166                         break;
167
168                 if (!endswith(de->d_name, ".journal") &&
169                     !endswith(de->d_name, ".journal~"))
170                         continue;
171
172                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
173                         continue;
174
175                 if (!S_ISREG(st.st_mode))
176                         continue;
177
178                 sum += (uint64_t) st.st_blocks * 512UL;
179         }
180
181         avail = sum >= m->max_use ? 0 : m->max_use - sum;
182
183         ss_avail = ss.f_bsize * ss.f_bavail;
184
185         ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
186
187         if (ss_avail < avail)
188                 avail = ss_avail;
189
190         s->cached_available_space = avail;
191         s->cached_available_space_timestamp = ts;
192
193 finish:
194         closedir(d);
195
196         return avail;
197 }
198
199 static void server_read_file_gid(Server *s) {
200         const char *adm = "adm";
201         int r;
202
203         assert(s);
204
205         if (s->file_gid_valid)
206                 return;
207
208         r = get_group_creds(&adm, &s->file_gid);
209         if (r < 0)
210                 log_warning("Failed to resolve 'adm' group: %s", strerror(-r));
211
212         /* if we couldn't read the gid, then it will be 0, but that's
213          * fine and we shouldn't try to resolve the group again, so
214          * let's just pretend it worked right-away. */
215         s->file_gid_valid = true;
216 }
217
218 static void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
219         int r;
220 #ifdef HAVE_ACL
221         acl_t acl;
222         acl_entry_t entry;
223         acl_permset_t permset;
224 #endif
225
226         assert(f);
227
228         server_read_file_gid(s);
229
230         r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
231         if (r < 0)
232                 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
233
234 #ifdef HAVE_ACL
235         if (uid <= 0)
236                 return;
237
238         acl = acl_get_fd(f->fd);
239         if (!acl) {
240                 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
241                 return;
242         }
243
244         r = acl_find_uid(acl, uid, &entry);
245         if (r <= 0) {
246
247                 if (acl_create_entry(&acl, &entry) < 0 ||
248                     acl_set_tag_type(entry, ACL_USER) < 0 ||
249                     acl_set_qualifier(entry, &uid) < 0) {
250                         log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
251                         goto finish;
252                 }
253         }
254
255         if (acl_get_permset(entry, &permset) < 0 ||
256             acl_add_perm(permset, ACL_READ) < 0 ||
257             acl_calc_mask(&acl) < 0) {
258                 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
259                 goto finish;
260         }
261
262         if (acl_set_fd(f->fd, acl) < 0)
263                 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
264
265 finish:
266         acl_free(acl);
267 #endif
268 }
269
270 static JournalFile* find_journal(Server *s, uid_t uid) {
271         char *p;
272         int r;
273         JournalFile *f;
274         char ids[33];
275         sd_id128_t machine;
276
277         assert(s);
278
279         /* We split up user logs only on /var, not on /run. If the
280          * runtime file is open, we write to it exclusively, in order
281          * to guarantee proper order as soon as we flush /run to
282          * /var and close the runtime file. */
283
284         if (s->runtime_journal)
285                 return s->runtime_journal;
286
287         if (uid <= 0)
288                 return s->system_journal;
289
290         r = sd_id128_get_machine(&machine);
291         if (r < 0)
292                 return s->system_journal;
293
294         f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
295         if (f)
296                 return f;
297
298         if (asprintf(&p, "/var/log/journal/%s/user-%lu.journal", sd_id128_to_string(machine, ids), (unsigned long) uid) < 0)
299                 return s->system_journal;
300
301         while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
302                 /* Too many open? Then let's close one */
303                 f = hashmap_steal_first(s->user_journals);
304                 assert(f);
305                 journal_file_close(f);
306         }
307
308         r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->system_journal, &f);
309         free(p);
310
311         if (r < 0)
312                 return s->system_journal;
313
314         server_fix_perms(s, f, uid);
315
316         r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
317         if (r < 0) {
318                 journal_file_close(f);
319                 return s->system_journal;
320         }
321
322         return f;
323 }
324
325 static void server_rotate(Server *s) {
326         JournalFile *f;
327         void *k;
328         Iterator i;
329         int r;
330
331         log_info("Rotating...");
332
333         if (s->runtime_journal) {
334                 r = journal_file_rotate(&s->runtime_journal);
335                 if (r < 0)
336                         if (s->runtime_journal)
337                                 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
338                         else
339                                 log_error("Failed to create new runtime journal: %s", strerror(-r));
340                 else
341                         server_fix_perms(s, s->runtime_journal, 0);
342         }
343
344         if (s->system_journal) {
345                 r = journal_file_rotate(&s->system_journal);
346                 if (r < 0)
347                         if (s->system_journal)
348                                 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
349                         else
350                                 log_error("Failed to create new system journal: %s", strerror(-r));
351
352                 else
353                         server_fix_perms(s, s->system_journal, 0);
354         }
355
356         HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
357                 r = journal_file_rotate(&f);
358                 if (r < 0)
359                         if (f->path)
360                                 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
361                         else
362                                 log_error("Failed to create user journal: %s", strerror(-r));
363                 else {
364                         hashmap_replace(s->user_journals, k, f);
365                         server_fix_perms(s, s->system_journal, PTR_TO_UINT32(k));
366                 }
367         }
368 }
369
370 static void server_vacuum(Server *s) {
371         char *p;
372         char ids[33];
373         sd_id128_t machine;
374         int r;
375
376         log_info("Vacuuming...");
377
378         r = sd_id128_get_machine(&machine);
379         if (r < 0) {
380                 log_error("Failed to get machine ID: %s", strerror(-r));
381                 return;
382         }
383
384         sd_id128_to_string(machine, ids);
385
386         if (s->system_journal) {
387                 if (asprintf(&p, "/var/log/journal/%s", ids) < 0) {
388                         log_error("Out of memory.");
389                         return;
390                 }
391
392                 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free);
393                 if (r < 0 && r != -ENOENT)
394                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
395                 free(p);
396         }
397
398
399         if (s->runtime_journal) {
400                 if (asprintf(&p, "/run/log/journal/%s", ids) < 0) {
401                         log_error("Out of memory.");
402                         return;
403                 }
404
405                 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free);
406                 if (r < 0 && r != -ENOENT)
407                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
408                 free(p);
409         }
410
411         s->cached_available_space_timestamp = 0;
412 }
413
414 static char *shortened_cgroup_path(pid_t pid) {
415         int r;
416         char *process_path, *init_path, *path;
417
418         assert(pid > 0);
419
420         r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, pid, &process_path);
421         if (r < 0)
422                 return NULL;
423
424         r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &init_path);
425         if (r < 0) {
426                 free(process_path);
427                 return NULL;
428         }
429
430         if (endswith(init_path, "/system"))
431                 init_path[strlen(init_path) - 7] = 0;
432         else if (streq(init_path, "/"))
433                 init_path[0] = 0;
434
435         if (startswith(process_path, init_path)) {
436                 char *p;
437
438                 p = strdup(process_path + strlen(init_path));
439                 if (!p) {
440                         free(process_path);
441                         free(init_path);
442                         return NULL;
443                 }
444                 path = p;
445         } else {
446                 path = process_path;
447                 process_path = NULL;
448         }
449
450         free(process_path);
451         free(init_path);
452
453         return path;
454 }
455
456 static void dispatch_message_real(
457                 Server *s,
458                 struct iovec *iovec, unsigned n, unsigned m,
459                 struct ucred *ucred,
460                 struct timeval *tv,
461                 const char *label, size_t label_len) {
462
463         char *pid = NULL, *uid = NULL, *gid = NULL,
464                 *source_time = NULL, *boot_id = NULL, *machine_id = NULL,
465                 *comm = NULL, *cmdline = NULL, *hostname = NULL,
466                 *audit_session = NULL, *audit_loginuid = NULL,
467                 *exe = NULL, *cgroup = NULL, *session = NULL,
468                 *owner_uid = NULL, *unit = NULL, *selinux_context = NULL;
469
470         char idbuf[33];
471         sd_id128_t id;
472         int r;
473         char *t;
474         uid_t loginuid = 0, realuid = 0;
475         JournalFile *f;
476         bool vacuumed = false;
477
478         assert(s);
479         assert(iovec);
480         assert(n > 0);
481         assert(n + N_IOVEC_META_FIELDS <= m);
482
483         if (ucred) {
484                 uint32_t audit;
485 #ifdef HAVE_LOGIND
486                 uid_t owner;
487 #endif
488
489                 realuid = ucred->uid;
490
491                 if (asprintf(&pid, "_PID=%lu", (unsigned long) ucred->pid) >= 0)
492                         IOVEC_SET_STRING(iovec[n++], pid);
493
494                 if (asprintf(&uid, "_UID=%lu", (unsigned long) ucred->uid) >= 0)
495                         IOVEC_SET_STRING(iovec[n++], uid);
496
497                 if (asprintf(&gid, "_GID=%lu", (unsigned long) ucred->gid) >= 0)
498                         IOVEC_SET_STRING(iovec[n++], gid);
499
500                 r = get_process_comm(ucred->pid, &t);
501                 if (r >= 0) {
502                         comm = strappend("_COMM=", t);
503                         free(t);
504
505                         if (comm)
506                                 IOVEC_SET_STRING(iovec[n++], comm);
507                 }
508
509                 r = get_process_exe(ucred->pid, &t);
510                 if (r >= 0) {
511                         exe = strappend("_EXE=", t);
512                         free(t);
513
514                         if (exe)
515                                 IOVEC_SET_STRING(iovec[n++], exe);
516                 }
517
518                 r = get_process_cmdline(ucred->pid, LINE_MAX, false, &t);
519                 if (r >= 0) {
520                         cmdline = strappend("_CMDLINE=", t);
521                         free(t);
522
523                         if (cmdline)
524                                 IOVEC_SET_STRING(iovec[n++], cmdline);
525                 }
526
527                 r = audit_session_from_pid(ucred->pid, &audit);
528                 if (r >= 0)
529                         if (asprintf(&audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit) >= 0)
530                                 IOVEC_SET_STRING(iovec[n++], audit_session);
531
532                 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
533                 if (r >= 0)
534                         if (asprintf(&audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0)
535                                 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
536
537                 t = shortened_cgroup_path(ucred->pid);
538                 if (t) {
539                         cgroup = strappend("_SYSTEMD_CGROUP=", t);
540                         free(t);
541
542                         if (cgroup)
543                                 IOVEC_SET_STRING(iovec[n++], cgroup);
544                 }
545
546 #ifdef HAVE_LOGIND
547                 if (sd_pid_get_session(ucred->pid, &t) >= 0) {
548                         session = strappend("_SYSTEMD_SESSION=", t);
549                         free(t);
550
551                         if (session)
552                                 IOVEC_SET_STRING(iovec[n++], session);
553                 }
554
555                 if (sd_pid_get_owner_uid(ucred->uid, &owner) >= 0)
556                         if (asprintf(&owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner) >= 0)
557                                 IOVEC_SET_STRING(iovec[n++], owner_uid);
558 #endif
559
560                 if (cg_pid_get_unit(ucred->pid, &t) >= 0) {
561                         unit = strappend("_SYSTEMD_UNIT=", t);
562                         free(t);
563
564                         if (unit)
565                                 IOVEC_SET_STRING(iovec[n++], unit);
566                 }
567
568 #ifdef HAVE_SELINUX
569                 if (label) {
570                         selinux_context = malloc(sizeof("_SELINUX_CONTEXT=") + label_len);
571                         if (selinux_context) {
572                                 memcpy(selinux_context, "_SELINUX_CONTEXT=", sizeof("_SELINUX_CONTEXT=")-1);
573                                 memcpy(selinux_context+sizeof("_SELINUX_CONTEXT=")-1, label, label_len);
574                                 selinux_context[sizeof("_SELINUX_CONTEXT=")-1+label_len] = 0;
575                                 IOVEC_SET_STRING(iovec[n++], selinux_context);
576                         }
577                 } else {
578                         security_context_t con;
579
580                         if (getpidcon(ucred->pid, &con) >= 0) {
581                                 selinux_context = strappend("_SELINUX_CONTEXT=", con);
582                                 if (selinux_context)
583                                         IOVEC_SET_STRING(iovec[n++], selinux_context);
584
585                                 freecon(con);
586                         }
587                 }
588 #endif
589         }
590
591         if (tv) {
592                 if (asprintf(&source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu",
593                              (unsigned long long) timeval_load(tv)) >= 0)
594                         IOVEC_SET_STRING(iovec[n++], source_time);
595         }
596
597         /* Note that strictly speaking storing the boot id here is
598          * redundant since the entry includes this in-line
599          * anyway. However, we need this indexed, too. */
600         r = sd_id128_get_boot(&id);
601         if (r >= 0)
602                 if (asprintf(&boot_id, "_BOOT_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
603                         IOVEC_SET_STRING(iovec[n++], boot_id);
604
605         r = sd_id128_get_machine(&id);
606         if (r >= 0)
607                 if (asprintf(&machine_id, "_MACHINE_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
608                         IOVEC_SET_STRING(iovec[n++], machine_id);
609
610         t = gethostname_malloc();
611         if (t) {
612                 hostname = strappend("_HOSTNAME=", t);
613                 free(t);
614                 if (hostname)
615                         IOVEC_SET_STRING(iovec[n++], hostname);
616         }
617
618         assert(n <= m);
619
620         server_flush_to_var(s);
621
622 retry:
623         f = find_journal(s, realuid == 0 ? 0 : loginuid);
624         if (!f)
625                 log_warning("Dropping message, as we can't find a place to store the data.");
626         else {
627                 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
628
629                 if ((r == -E2BIG || /* hit limit */
630                      r == -EFBIG || /* hit fs limit */
631                      r == -EDQUOT || /* quota hit */
632                      r == -ENOSPC || /* disk full */
633                      r == -EBADMSG || /* corrupted */
634                      r == -ENODATA || /* truncated */
635                      r == -EHOSTDOWN || /* other machine */
636                      r == -EPROTONOSUPPORT) && /* unsupported feature */
637                     !vacuumed) {
638
639                         if (r == -E2BIG)
640                                 log_info("Allocation limit reached, rotating.");
641                         else
642                                 log_warning("Journal file corrupted, rotating.");
643
644                         server_rotate(s);
645                         server_vacuum(s);
646                         vacuumed = true;
647
648                         log_info("Retrying write.");
649                         goto retry;
650                 }
651
652                 if (r < 0)
653                         log_error("Failed to write entry, ignoring: %s", strerror(-r));
654         }
655
656         free(pid);
657         free(uid);
658         free(gid);
659         free(comm);
660         free(exe);
661         free(cmdline);
662         free(source_time);
663         free(boot_id);
664         free(machine_id);
665         free(hostname);
666         free(audit_session);
667         free(audit_loginuid);
668         free(cgroup);
669         free(session);
670         free(owner_uid);
671         free(unit);
672         free(selinux_context);
673 }
674
675 static void driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
676         char mid[11 + 32 + 1];
677         char buffer[16 + LINE_MAX + 1];
678         struct iovec iovec[N_IOVEC_META_FIELDS + 4];
679         int n = 0;
680         va_list ap;
681         struct ucred ucred;
682
683         assert(s);
684         assert(format);
685
686         IOVEC_SET_STRING(iovec[n++], "PRIORITY=5");
687         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
688
689         memcpy(buffer, "MESSAGE=", 8);
690         va_start(ap, format);
691         vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
692         va_end(ap);
693         char_array_0(buffer);
694         IOVEC_SET_STRING(iovec[n++], buffer);
695
696         snprintf(mid, sizeof(mid), "MESSAGE_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(message_id));
697         char_array_0(mid);
698         IOVEC_SET_STRING(iovec[n++], mid);
699
700         zero(ucred);
701         ucred.pid = getpid();
702         ucred.uid = getuid();
703         ucred.gid = getgid();
704
705         dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0);
706 }
707
708 static void dispatch_message(Server *s,
709                              struct iovec *iovec, unsigned n, unsigned m,
710                              struct ucred *ucred,
711                              struct timeval *tv,
712                              const char *label, size_t label_len,
713                              int priority) {
714         int rl;
715         char *path = NULL, *c;
716
717         assert(s);
718         assert(iovec || n == 0);
719
720         if (n == 0)
721                 return;
722
723         if (!ucred)
724                 goto finish;
725
726         path = shortened_cgroup_path(ucred->pid);
727         if (!path)
728                 goto finish;
729
730         /* example: /user/lennart/3/foobar
731          *          /system/dbus.service/foobar
732          *
733          * So let's cut of everything past the third /, since that is
734          * wher user directories start */
735
736         c = strchr(path, '/');
737         if (c) {
738                 c = strchr(c+1, '/');
739                 if (c) {
740                         c = strchr(c+1, '/');
741                         if (c)
742                                 *c = 0;
743                 }
744         }
745
746         rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available_space(s));
747
748         if (rl == 0) {
749                 free(path);
750                 return;
751         }
752
753         /* Write a suppression message if we suppressed something */
754         if (rl > 1)
755                 driver_message(s, SD_MESSAGE_JOURNAL_DROPPED, "Suppressed %u messages from %s", rl - 1, path);
756
757         free(path);
758
759 finish:
760         dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len);
761 }
762
763 static void forward_syslog_iovec(Server *s, const struct iovec *iovec, unsigned n_iovec, struct ucred *ucred, struct timeval *tv) {
764         struct msghdr msghdr;
765         struct cmsghdr *cmsg;
766         union {
767                 struct cmsghdr cmsghdr;
768                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred))];
769         } control;
770         union sockaddr_union sa;
771
772         assert(s);
773         assert(iovec);
774         assert(n_iovec > 0);
775
776         zero(msghdr);
777         msghdr.msg_iov = (struct iovec*) iovec;
778         msghdr.msg_iovlen = n_iovec;
779
780         zero(sa);
781         sa.un.sun_family = AF_UNIX;
782         strncpy(sa.un.sun_path, "/run/systemd/journal/syslog", sizeof(sa.un.sun_path));
783         msghdr.msg_name = &sa;
784         msghdr.msg_namelen = offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path);
785
786         if (ucred) {
787                 zero(control);
788                 msghdr.msg_control = &control;
789                 msghdr.msg_controllen = sizeof(control);
790
791                 cmsg = CMSG_FIRSTHDR(&msghdr);
792                 cmsg->cmsg_level = SOL_SOCKET;
793                 cmsg->cmsg_type = SCM_CREDENTIALS;
794                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred));
795                 memcpy(CMSG_DATA(cmsg), ucred, sizeof(struct ucred));
796                 msghdr.msg_controllen = cmsg->cmsg_len;
797         }
798
799         /* Forward the syslog message we received via /dev/log to
800          * /run/systemd/syslog. Unfortunately we currently can't set
801          * the SO_TIMESTAMP auxiliary data, and hence we don't. */
802
803         if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
804                 return;
805
806         /* The socket is full? I guess the syslog implementation is
807          * too slow, and we shouldn't wait for that... */
808         if (errno == EAGAIN)
809                 return;
810
811         if (ucred && errno == ESRCH) {
812                 struct ucred u;
813
814                 /* Hmm, presumably the sender process vanished
815                  * by now, so let's fix it as good as we
816                  * can, and retry */
817
818                 u = *ucred;
819                 u.pid = getpid();
820                 memcpy(CMSG_DATA(cmsg), &u, sizeof(struct ucred));
821
822                 if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
823                         return;
824
825                 if (errno == EAGAIN)
826                         return;
827         }
828
829         log_debug("Failed to forward syslog message: %m");
830 }
831
832 static void forward_syslog_raw(Server *s, const char *buffer, struct ucred *ucred, struct timeval *tv) {
833         struct iovec iovec;
834
835         assert(s);
836         assert(buffer);
837
838         IOVEC_SET_STRING(iovec, buffer);
839         forward_syslog_iovec(s, &iovec, 1, ucred, tv);
840 }
841
842 static void forward_syslog(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred, struct timeval *tv) {
843         struct iovec iovec[5];
844         char header_priority[6], header_time[64], header_pid[16];
845         int n = 0;
846         time_t t;
847         struct tm *tm;
848         char *ident_buf = NULL;
849
850         assert(s);
851         assert(priority >= 0);
852         assert(priority <= 999);
853         assert(message);
854
855         /* First: priority field */
856         snprintf(header_priority, sizeof(header_priority), "<%i>", priority);
857         char_array_0(header_priority);
858         IOVEC_SET_STRING(iovec[n++], header_priority);
859
860         /* Second: timestamp */
861         t = tv ? tv->tv_sec : ((time_t) (now(CLOCK_REALTIME) / USEC_PER_SEC));
862         tm = localtime(&t);
863         if (!tm)
864                 return;
865         if (strftime(header_time, sizeof(header_time), "%h %e %T ", tm) <= 0)
866                 return;
867         IOVEC_SET_STRING(iovec[n++], header_time);
868
869         /* Third: identifier and PID */
870         if (ucred) {
871                 if (!identifier) {
872                         get_process_comm(ucred->pid, &ident_buf);
873                         identifier = ident_buf;
874                 }
875
876                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
877                 char_array_0(header_pid);
878
879                 if (identifier)
880                         IOVEC_SET_STRING(iovec[n++], identifier);
881
882                 IOVEC_SET_STRING(iovec[n++], header_pid);
883         } else if (identifier) {
884                 IOVEC_SET_STRING(iovec[n++], identifier);
885                 IOVEC_SET_STRING(iovec[n++], ": ");
886         }
887
888         /* Fourth: message */
889         IOVEC_SET_STRING(iovec[n++], message);
890
891         forward_syslog_iovec(s, iovec, n, ucred, tv);
892
893         free(ident_buf);
894 }
895
896 static int fixup_priority(int priority) {
897
898         if ((priority & LOG_FACMASK) == 0)
899                 return (priority & LOG_PRIMASK) | LOG_USER;
900
901         return priority;
902 }
903
904 static void forward_kmsg(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred) {
905         struct iovec iovec[5];
906         char header_priority[6], header_pid[16];
907         int n = 0;
908         char *ident_buf = NULL;
909         int fd;
910
911         assert(s);
912         assert(priority >= 0);
913         assert(priority <= 999);
914         assert(message);
915
916         /* Never allow messages with kernel facility to be written to
917          * kmsg, regardless where the data comes from. */
918         priority = fixup_priority(priority);
919
920         /* First: priority field */
921         snprintf(header_priority, sizeof(header_priority), "<%i>", priority);
922         char_array_0(header_priority);
923         IOVEC_SET_STRING(iovec[n++], header_priority);
924
925         /* Second: identifier and PID */
926         if (ucred) {
927                 if (!identifier) {
928                         get_process_comm(ucred->pid, &ident_buf);
929                         identifier = ident_buf;
930                 }
931
932                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
933                 char_array_0(header_pid);
934
935                 if (identifier)
936                         IOVEC_SET_STRING(iovec[n++], identifier);
937
938                 IOVEC_SET_STRING(iovec[n++], header_pid);
939         } else if (identifier) {
940                 IOVEC_SET_STRING(iovec[n++], identifier);
941                 IOVEC_SET_STRING(iovec[n++], ": ");
942         }
943
944         /* Fourth: message */
945         IOVEC_SET_STRING(iovec[n++], message);
946         IOVEC_SET_STRING(iovec[n++], "\n");
947
948         fd = open("/dev/kmsg", O_WRONLY|O_NOCTTY|O_CLOEXEC);
949         if (fd < 0) {
950                 log_debug("Failed to open /dev/kmsg for logging: %s", strerror(errno));
951                 goto finish;
952         }
953
954         if (writev(fd, iovec, n) < 0)
955                 log_debug("Failed to write to /dev/kmsg for logging: %s", strerror(errno));
956
957         close_nointr_nofail(fd);
958
959 finish:
960         free(ident_buf);
961 }
962
963 static void forward_console(Server *s, const char *identifier, const char *message, struct ucred *ucred) {
964         struct iovec iovec[4];
965         char header_pid[16];
966         int n = 0, fd;
967         char *ident_buf = NULL;
968
969         assert(s);
970         assert(message);
971
972         /* First: identifier and PID */
973         if (ucred) {
974                 if (!identifier) {
975                         get_process_comm(ucred->pid, &ident_buf);
976                         identifier = ident_buf;
977                 }
978
979                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
980                 char_array_0(header_pid);
981
982                 if (identifier)
983                         IOVEC_SET_STRING(iovec[n++], identifier);
984
985                 IOVEC_SET_STRING(iovec[n++], header_pid);
986         } else if (identifier) {
987                 IOVEC_SET_STRING(iovec[n++], identifier);
988                 IOVEC_SET_STRING(iovec[n++], ": ");
989         }
990
991         /* Third: message */
992         IOVEC_SET_STRING(iovec[n++], message);
993         IOVEC_SET_STRING(iovec[n++], "\n");
994
995         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
996         if (fd < 0) {
997                 log_debug("Failed to open /dev/console for logging: %s", strerror(errno));
998                 goto finish;
999         }
1000
1001         if (writev(fd, iovec, n) < 0)
1002                 log_debug("Failed to write to /dev/console for logging: %s", strerror(errno));
1003
1004         close_nointr_nofail(fd);
1005
1006 finish:
1007         free(ident_buf);
1008 }
1009
1010 static void read_identifier(const char **buf, char **identifier, char **pid) {
1011         const char *p;
1012         char *t;
1013         size_t l, e;
1014
1015         assert(buf);
1016         assert(identifier);
1017         assert(pid);
1018
1019         p = *buf;
1020
1021         p += strspn(p, WHITESPACE);
1022         l = strcspn(p, WHITESPACE);
1023
1024         if (l <= 0 ||
1025             p[l-1] != ':')
1026                 return;
1027
1028         e = l;
1029         l--;
1030
1031         if (p[l-1] == ']') {
1032                 size_t k = l-1;
1033
1034                 for (;;) {
1035
1036                         if (p[k] == '[') {
1037                                 t = strndup(p+k+1, l-k-2);
1038                                 if (t)
1039                                         *pid = t;
1040
1041                                 l = k;
1042                                 break;
1043                         }
1044
1045                         if (k == 0)
1046                                 break;
1047
1048                         k--;
1049                 }
1050         }
1051
1052         t = strndup(p, l);
1053         if (t)
1054                 *identifier = t;
1055
1056         *buf = p + e;
1057         *buf += strspn(*buf, WHITESPACE);
1058 }
1059
1060 static void process_syslog_message(Server *s, const char *buf, struct ucred *ucred, struct timeval *tv, const char *label, size_t label_len) {
1061         char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *syslog_pid = NULL;
1062         struct iovec iovec[N_IOVEC_META_FIELDS + 6];
1063         unsigned n = 0;
1064         int priority = LOG_USER | LOG_INFO;
1065         char *identifier = NULL, *pid = NULL;
1066
1067         assert(s);
1068         assert(buf);
1069
1070         if (s->forward_to_syslog)
1071                 forward_syslog_raw(s, buf, ucred, tv);
1072
1073         parse_syslog_priority((char**) &buf, &priority);
1074         skip_syslog_date((char**) &buf);
1075         read_identifier(&buf, &identifier, &pid);
1076
1077         if (s->forward_to_kmsg)
1078                 forward_kmsg(s, priority, identifier, buf, ucred);
1079
1080         if (s->forward_to_console)
1081                 forward_console(s, identifier, buf, ucred);
1082
1083         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=syslog");
1084
1085         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1086                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1087
1088         if (priority & LOG_FACMASK)
1089                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1090                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1091
1092         if (identifier) {
1093                 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
1094                 if (syslog_identifier)
1095                         IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1096         }
1097
1098         if (pid) {
1099                 syslog_pid = strappend("SYSLOG_PID=", pid);
1100                 if (syslog_pid)
1101                         IOVEC_SET_STRING(iovec[n++], syslog_pid);
1102         }
1103
1104         message = strappend("MESSAGE=", buf);
1105         if (message)
1106                 IOVEC_SET_STRING(iovec[n++], message);
1107
1108         dispatch_message(s, iovec, n, ELEMENTSOF(iovec), ucred, tv, label, label_len, priority);
1109
1110         free(message);
1111         free(identifier);
1112         free(pid);
1113         free(syslog_priority);
1114         free(syslog_facility);
1115         free(syslog_identifier);
1116 }
1117
1118 static bool valid_user_field(const char *p, size_t l) {
1119         const char *a;
1120
1121         /* We kinda enforce POSIX syntax recommendations for
1122            environment variables here, but make a couple of additional
1123            requirements.
1124
1125            http://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html */
1126
1127         /* No empty field names */
1128         if (l <= 0)
1129                 return false;
1130
1131         /* Don't allow names longer than 64 chars */
1132         if (l > 64)
1133                 return false;
1134
1135         /* Variables starting with an underscore are protected */
1136         if (p[0] == '_')
1137                 return false;
1138
1139         /* Don't allow digits as first character */
1140         if (p[0] >= '0' && p[0] <= '9')
1141                 return false;
1142
1143         /* Only allow A-Z0-9 and '_' */
1144         for (a = p; a < p + l; a++)
1145                 if (!((*a >= 'A' && *a <= 'Z') ||
1146                       (*a >= '0' && *a <= '9') ||
1147                       *a == '_'))
1148                         return false;
1149
1150         return true;
1151 }
1152
1153 static void process_native_message(
1154                 Server *s,
1155                 const void *buffer, size_t buffer_size,
1156                 struct ucred *ucred,
1157                 struct timeval *tv,
1158                 const char *label, size_t label_len) {
1159
1160         struct iovec *iovec = NULL;
1161         unsigned n = 0, m = 0, j, tn = (unsigned) -1;
1162         const char *p;
1163         size_t remaining;
1164         int priority = LOG_INFO;
1165         char *identifier = NULL, *message = NULL;
1166
1167         assert(s);
1168         assert(buffer || buffer_size == 0);
1169
1170         p = buffer;
1171         remaining = buffer_size;
1172
1173         while (remaining > 0) {
1174                 const char *e, *q;
1175
1176                 e = memchr(p, '\n', remaining);
1177
1178                 if (!e) {
1179                         /* Trailing noise, let's ignore it, and flush what we collected */
1180                         log_debug("Received message with trailing noise, ignoring.");
1181                         break;
1182                 }
1183
1184                 if (e == p) {
1185                         /* Entry separator */
1186                         dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, priority);
1187                         n = 0;
1188                         priority = LOG_INFO;
1189
1190                         p++;
1191                         remaining--;
1192                         continue;
1193                 }
1194
1195                 if (*p == '.' || *p == '#') {
1196                         /* Ignore control commands for now, and
1197                          * comments too. */
1198                         remaining -= (e - p) + 1;
1199                         p = e + 1;
1200                         continue;
1201                 }
1202
1203                 /* A property follows */
1204
1205                 if (n+N_IOVEC_META_FIELDS >= m) {
1206                         struct iovec *c;
1207                         unsigned u;
1208
1209                         u = MAX((n+N_IOVEC_META_FIELDS+1) * 2U, 4U);
1210                         c = realloc(iovec, u * sizeof(struct iovec));
1211                         if (!c) {
1212                                 log_error("Out of memory");
1213                                 break;
1214                         }
1215
1216                         iovec = c;
1217                         m = u;
1218                 }
1219
1220                 q = memchr(p, '=', e - p);
1221                 if (q) {
1222                         if (valid_user_field(p, q - p)) {
1223                                 size_t l;
1224
1225                                 l = e - p;
1226
1227                                 /* If the field name starts with an
1228                                  * underscore, skip the variable,
1229                                  * since that indidates a trusted
1230                                  * field */
1231                                 iovec[n].iov_base = (char*) p;
1232                                 iovec[n].iov_len = l;
1233                                 n++;
1234
1235                                 /* We need to determine the priority
1236                                  * of this entry for the rate limiting
1237                                  * logic */
1238                                 if (l == 10 &&
1239                                     memcmp(p, "PRIORITY=", 9) == 0 &&
1240                                     p[9] >= '0' && p[9] <= '9')
1241                                         priority = (priority & LOG_FACMASK) | (p[9] - '0');
1242
1243                                 else if (l == 17 &&
1244                                          memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
1245                                          p[16] >= '0' && p[16] <= '9')
1246                                         priority = (priority & LOG_PRIMASK) | ((p[16] - '0') << 3);
1247
1248                                 else if (l == 18 &&
1249                                          memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
1250                                          p[16] >= '0' && p[16] <= '9' &&
1251                                          p[17] >= '0' && p[17] <= '9')
1252                                         priority = (priority & LOG_PRIMASK) | (((p[16] - '0')*10 + (p[17] - '0')) << 3);
1253
1254                                 else if (l >= 19 &&
1255                                          memcmp(p, "SYSLOG_IDENTIFIER=", 18) == 0) {
1256                                         char *t;
1257
1258                                         t = strndup(p + 18, l - 18);
1259                                         if (t) {
1260                                                 free(identifier);
1261                                                 identifier = t;
1262                                         }
1263                                 } else if (l >= 8 &&
1264                                            memcmp(p, "MESSAGE=", 8) == 0) {
1265                                         char *t;
1266
1267                                         t = strndup(p + 8, l - 8);
1268                                         if (t) {
1269                                                 free(message);
1270                                                 message = t;
1271                                         }
1272                                 }
1273                         }
1274
1275                         remaining -= (e - p) + 1;
1276                         p = e + 1;
1277                         continue;
1278                 } else {
1279                         le64_t l_le;
1280                         uint64_t l;
1281                         char *k;
1282
1283                         if (remaining < e - p + 1 + sizeof(uint64_t) + 1) {
1284                                 log_debug("Failed to parse message, ignoring.");
1285                                 break;
1286                         }
1287
1288                         memcpy(&l_le, e + 1, sizeof(uint64_t));
1289                         l = le64toh(l_le);
1290
1291                         if (remaining < e - p + 1 + sizeof(uint64_t) + l + 1 ||
1292                             e[1+sizeof(uint64_t)+l] != '\n') {
1293                                 log_debug("Failed to parse message, ignoring.");
1294                                 break;
1295                         }
1296
1297                         k = malloc((e - p) + 1 + l);
1298                         if (!k) {
1299                                 log_error("Out of memory");
1300                                 break;
1301                         }
1302
1303                         memcpy(k, p, e - p);
1304                         k[e - p] = '=';
1305                         memcpy(k + (e - p) + 1, e + 1 + sizeof(uint64_t), l);
1306
1307                         if (valid_user_field(p, e - p)) {
1308                                 iovec[n].iov_base = k;
1309                                 iovec[n].iov_len = (e - p) + 1 + l;
1310                                 n++;
1311                         } else
1312                                 free(k);
1313
1314                         remaining -= (e - p) + 1 + sizeof(uint64_t) + l + 1;
1315                         p = e + 1 + sizeof(uint64_t) + l + 1;
1316                 }
1317         }
1318
1319         if (n <= 0)
1320                 goto finish;
1321
1322         tn = n++;
1323         IOVEC_SET_STRING(iovec[tn], "_TRANSPORT=journal");
1324
1325         if (message) {
1326                 if (s->forward_to_syslog)
1327                         forward_syslog(s, priority, identifier, message, ucred, tv);
1328
1329                 if (s->forward_to_kmsg)
1330                         forward_kmsg(s, priority, identifier, message, ucred);
1331
1332                 if (s->forward_to_console)
1333                         forward_console(s, identifier, message, ucred);
1334         }
1335
1336         dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, priority);
1337
1338 finish:
1339         for (j = 0; j < n; j++)  {
1340                 if (j == tn)
1341                         continue;
1342
1343                 if (iovec[j].iov_base < buffer ||
1344                     (const uint8_t*) iovec[j].iov_base >= (const uint8_t*) buffer + buffer_size)
1345                         free(iovec[j].iov_base);
1346         }
1347
1348         free(iovec);
1349         free(identifier);
1350         free(message);
1351 }
1352
1353 static void process_native_file(
1354                 Server *s,
1355                 int fd,
1356                 struct ucred *ucred,
1357                 struct timeval *tv,
1358                 const char *label, size_t label_len) {
1359
1360         struct stat st;
1361         void *p;
1362         ssize_t n;
1363
1364         assert(s);
1365         assert(fd >= 0);
1366
1367         /* Data is in the passed file, since it didn't fit in a
1368          * datagram. We can't map the file here, since clients might
1369          * then truncate it and trigger a SIGBUS for us. So let's
1370          * stupidly read it */
1371
1372         if (fstat(fd, &st) < 0) {
1373                 log_error("Failed to stat passed file, ignoring: %m");
1374                 return;
1375         }
1376
1377         if (!S_ISREG(st.st_mode)) {
1378                 log_error("File passed is not regular. Ignoring.");
1379                 return;
1380         }
1381
1382         if (st.st_size <= 0)
1383                 return;
1384
1385         if (st.st_size > ENTRY_SIZE_MAX) {
1386                 log_error("File passed too large. Ignoring.");
1387                 return;
1388         }
1389
1390         p = malloc(st.st_size);
1391         if (!p) {
1392                 log_error("Out of memory");
1393                 return;
1394         }
1395
1396         n = pread(fd, p, st.st_size, 0);
1397         if (n < 0)
1398                 log_error("Failed to read file, ignoring: %s", strerror(-n));
1399         else if (n > 0)
1400                 process_native_message(s, p, n, ucred, tv, label, label_len);
1401
1402         free(p);
1403 }
1404
1405 static int stdout_stream_log(StdoutStream *s, const char *p) {
1406         struct iovec iovec[N_IOVEC_META_FIELDS + 5];
1407         char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *syslog_identifier = NULL;
1408         unsigned n = 0;
1409         int priority;
1410         char *label = NULL;
1411         size_t label_len = 0;
1412
1413         assert(s);
1414         assert(p);
1415
1416         if (isempty(p))
1417                 return 0;
1418
1419         priority = s->priority;
1420
1421         if (s->level_prefix)
1422                 parse_syslog_priority((char**) &p, &priority);
1423
1424         if (s->forward_to_syslog || s->server->forward_to_syslog)
1425                 forward_syslog(s->server, fixup_priority(priority), s->identifier, p, &s->ucred, NULL);
1426
1427         if (s->forward_to_kmsg || s->server->forward_to_kmsg)
1428                 forward_kmsg(s->server, priority, s->identifier, p, &s->ucred);
1429
1430         if (s->forward_to_console || s->server->forward_to_console)
1431                 forward_console(s->server, s->identifier, p, &s->ucred);
1432
1433         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=stdout");
1434
1435         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1436                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1437
1438         if (priority & LOG_FACMASK)
1439                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1440                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1441
1442         if (s->identifier) {
1443                 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", s->identifier);
1444                 if (syslog_identifier)
1445                         IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1446         }
1447
1448         message = strappend("MESSAGE=", p);
1449         if (message)
1450                 IOVEC_SET_STRING(iovec[n++], message);
1451
1452 #ifdef HAVE_SELINUX
1453         if (s->security_context) {
1454                 label = (char*) s->security_context;
1455                 label_len = strlen((char*) s->security_context);
1456         }
1457 #endif
1458
1459         dispatch_message(s->server, iovec, n, ELEMENTSOF(iovec), &s->ucred, NULL, label, label_len, priority);
1460
1461         free(message);
1462         free(syslog_priority);
1463         free(syslog_facility);
1464         free(syslog_identifier);
1465
1466         return 0;
1467 }
1468
1469 static int stdout_stream_line(StdoutStream *s, char *p) {
1470         int r;
1471
1472         assert(s);
1473         assert(p);
1474
1475         p = strstrip(p);
1476
1477         switch (s->state) {
1478
1479         case STDOUT_STREAM_IDENTIFIER:
1480                 if (isempty(p))
1481                         s->identifier = NULL;
1482                 else  {
1483                         s->identifier = strdup(p);
1484                         if (!s->identifier) {
1485                                 log_error("Out of memory");
1486                                 return -ENOMEM;
1487                         }
1488                 }
1489
1490                 s->state = STDOUT_STREAM_PRIORITY;
1491                 return 0;
1492
1493         case STDOUT_STREAM_PRIORITY:
1494                 r = safe_atoi(p, &s->priority);
1495                 if (r < 0 || s->priority <= 0 || s->priority >= 999) {
1496                         log_warning("Failed to parse log priority line.");
1497                         return -EINVAL;
1498                 }
1499
1500                 s->state = STDOUT_STREAM_LEVEL_PREFIX;
1501                 return 0;
1502
1503         case STDOUT_STREAM_LEVEL_PREFIX:
1504                 r = parse_boolean(p);
1505                 if (r < 0) {
1506                         log_warning("Failed to parse level prefix line.");
1507                         return -EINVAL;
1508                 }
1509
1510                 s->level_prefix = !!r;
1511                 s->state = STDOUT_STREAM_FORWARD_TO_SYSLOG;
1512                 return 0;
1513
1514         case STDOUT_STREAM_FORWARD_TO_SYSLOG:
1515                 r = parse_boolean(p);
1516                 if (r < 0) {
1517                         log_warning("Failed to parse forward to syslog line.");
1518                         return -EINVAL;
1519                 }
1520
1521                 s->forward_to_syslog = !!r;
1522                 s->state = STDOUT_STREAM_FORWARD_TO_KMSG;
1523                 return 0;
1524
1525         case STDOUT_STREAM_FORWARD_TO_KMSG:
1526                 r = parse_boolean(p);
1527                 if (r < 0) {
1528                         log_warning("Failed to parse copy to kmsg line.");
1529                         return -EINVAL;
1530                 }
1531
1532                 s->forward_to_kmsg = !!r;
1533                 s->state = STDOUT_STREAM_FORWARD_TO_CONSOLE;
1534                 return 0;
1535
1536         case STDOUT_STREAM_FORWARD_TO_CONSOLE:
1537                 r = parse_boolean(p);
1538                 if (r < 0) {
1539                         log_warning("Failed to parse copy to console line.");
1540                         return -EINVAL;
1541                 }
1542
1543                 s->forward_to_console = !!r;
1544                 s->state = STDOUT_STREAM_RUNNING;
1545                 return 0;
1546
1547         case STDOUT_STREAM_RUNNING:
1548                 return stdout_stream_log(s, p);
1549         }
1550
1551         assert_not_reached("Unknown stream state");
1552 }
1553
1554 static int stdout_stream_scan(StdoutStream *s, bool force_flush) {
1555         char *p;
1556         size_t remaining;
1557         int r;
1558
1559         assert(s);
1560
1561         p = s->buffer;
1562         remaining = s->length;
1563         for (;;) {
1564                 char *end;
1565                 size_t skip;
1566
1567                 end = memchr(p, '\n', remaining);
1568                 if (end)
1569                         skip = end - p + 1;
1570                 else if (remaining >= sizeof(s->buffer) - 1) {
1571                         end = p + sizeof(s->buffer) - 1;
1572                         skip = remaining;
1573                 } else
1574                         break;
1575
1576                 *end = 0;
1577
1578                 r = stdout_stream_line(s, p);
1579                 if (r < 0)
1580                         return r;
1581
1582                 remaining -= skip;
1583                 p += skip;
1584         }
1585
1586         if (force_flush && remaining > 0) {
1587                 p[remaining] = 0;
1588                 r = stdout_stream_line(s, p);
1589                 if (r < 0)
1590                         return r;
1591
1592                 p += remaining;
1593                 remaining = 0;
1594         }
1595
1596         if (p > s->buffer) {
1597                 memmove(s->buffer, p, remaining);
1598                 s->length = remaining;
1599         }
1600
1601         return 0;
1602 }
1603
1604 static int stdout_stream_process(StdoutStream *s) {
1605         ssize_t l;
1606         int r;
1607
1608         assert(s);
1609
1610         l = read(s->fd, s->buffer+s->length, sizeof(s->buffer)-1-s->length);
1611         if (l < 0) {
1612
1613                 if (errno == EAGAIN)
1614                         return 0;
1615
1616                 log_warning("Failed to read from stream: %m");
1617                 return -errno;
1618         }
1619
1620         if (l == 0) {
1621                 r = stdout_stream_scan(s, true);
1622                 if (r < 0)
1623                         return r;
1624
1625                 return 0;
1626         }
1627
1628         s->length += l;
1629         r = stdout_stream_scan(s, false);
1630         if (r < 0)
1631                 return r;
1632
1633         return 1;
1634
1635 }
1636
1637 static void stdout_stream_free(StdoutStream *s) {
1638         assert(s);
1639
1640         if (s->server) {
1641                 assert(s->server->n_stdout_streams > 0);
1642                 s->server->n_stdout_streams --;
1643                 LIST_REMOVE(StdoutStream, stdout_stream, s->server->stdout_streams, s);
1644         }
1645
1646         if (s->fd >= 0) {
1647                 if (s->server)
1648                         epoll_ctl(s->server->epoll_fd, EPOLL_CTL_DEL, s->fd, NULL);
1649
1650                 close_nointr_nofail(s->fd);
1651         }
1652
1653 #ifdef HAVE_SELINUX
1654         if (s->security_context)
1655                 freecon(s->security_context);
1656 #endif
1657
1658         free(s->identifier);
1659         free(s);
1660 }
1661
1662 static int stdout_stream_new(Server *s) {
1663         StdoutStream *stream;
1664         int fd, r;
1665         socklen_t len;
1666         struct epoll_event ev;
1667
1668         assert(s);
1669
1670         fd = accept4(s->stdout_fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC);
1671         if (fd < 0) {
1672                 if (errno == EAGAIN)
1673                         return 0;
1674
1675                 log_error("Failed to accept stdout connection: %m");
1676                 return -errno;
1677         }
1678
1679         if (s->n_stdout_streams >= STDOUT_STREAMS_MAX) {
1680                 log_warning("Too many stdout streams, refusing connection.");
1681                 close_nointr_nofail(fd);
1682                 return 0;
1683         }
1684
1685         stream = new0(StdoutStream, 1);
1686         if (!stream) {
1687                 log_error("Out of memory.");
1688                 close_nointr_nofail(fd);
1689                 return -ENOMEM;
1690         }
1691
1692         stream->fd = fd;
1693
1694         len = sizeof(stream->ucred);
1695         if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &stream->ucred, &len) < 0) {
1696                 log_error("Failed to determine peer credentials: %m");
1697                 r = -errno;
1698                 goto fail;
1699         }
1700
1701 #ifdef HAVE_SELINUX
1702         if (getpeercon(fd, &stream->security_context) < 0 && errno != ENOPROTOOPT)
1703                 log_error("Failed to determine peer security context: %m");
1704 #endif
1705
1706         if (shutdown(fd, SHUT_WR) < 0) {
1707                 log_error("Failed to shutdown writing side of socket: %m");
1708                 r = -errno;
1709                 goto fail;
1710         }
1711
1712         zero(ev);
1713         ev.data.ptr = stream;
1714         ev.events = EPOLLIN;
1715         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0) {
1716                 log_error("Failed to add stream to event loop: %m");
1717                 r = -errno;
1718                 goto fail;
1719         }
1720
1721         stream->server = s;
1722         LIST_PREPEND(StdoutStream, stdout_stream, s->stdout_streams, stream);
1723         s->n_stdout_streams ++;
1724
1725         return 0;
1726
1727 fail:
1728         stdout_stream_free(stream);
1729         return r;
1730 }
1731
1732 static int parse_kernel_timestamp(char **_p, usec_t *t) {
1733         usec_t r;
1734         int k, i;
1735         char *p;
1736
1737         assert(_p);
1738         assert(*_p);
1739         assert(t);
1740
1741         p = *_p;
1742
1743         if (strlen(p) < 14 || p[0] != '[' || p[13] != ']' || p[6] != '.')
1744                 return 0;
1745
1746         r = 0;
1747
1748         for (i = 1; i <= 5; i++) {
1749                 r *= 10;
1750
1751                 if (p[i] == ' ')
1752                         continue;
1753
1754                 k = undecchar(p[i]);
1755                 if (k < 0)
1756                         return 0;
1757
1758                 r += k;
1759         }
1760
1761         for (i = 7; i <= 12; i++) {
1762                 r *= 10;
1763
1764                 k = undecchar(p[i]);
1765                 if (k < 0)
1766                         return 0;
1767
1768                 r += k;
1769         }
1770
1771         *t = r;
1772         *_p += 14;
1773         *_p += strspn(*_p, WHITESPACE);
1774
1775         return 1;
1776 }
1777
1778 static bool is_us(const char *pid) {
1779         pid_t t;
1780
1781         assert(pid);
1782
1783         if (parse_pid(pid, &t) < 0)
1784                 return false;
1785
1786         return t == getpid();
1787 }
1788
1789 static void proc_kmsg_line(Server *s, const char *p) {
1790         struct iovec iovec[N_IOVEC_META_FIELDS + 7];
1791         char *message = NULL, *syslog_priority = NULL, *syslog_pid = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *source_time = NULL;
1792         int priority = LOG_KERN | LOG_INFO;
1793         unsigned n = 0;
1794         usec_t usec;
1795         char *identifier = NULL, *pid = NULL;
1796
1797         assert(s);
1798         assert(p);
1799
1800         if (isempty(p))
1801                 return;
1802
1803         parse_syslog_priority((char **) &p, &priority);
1804
1805         if (s->forward_to_kmsg && (priority & LOG_FACMASK) != LOG_KERN)
1806                 return;
1807
1808         if (parse_kernel_timestamp((char **) &p, &usec) > 0) {
1809                 if (asprintf(&source_time, "_SOURCE_MONOTONIC_TIMESTAMP=%llu",
1810                              (unsigned long long) usec) >= 0)
1811                         IOVEC_SET_STRING(iovec[n++], source_time);
1812         }
1813
1814         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=kernel");
1815
1816         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1817                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1818
1819         if ((priority & LOG_FACMASK) == LOG_KERN) {
1820
1821                 if (s->forward_to_syslog)
1822                         forward_syslog(s, priority, "kernel", p, NULL, NULL);
1823
1824                 IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=kernel");
1825         } else {
1826                 read_identifier(&p, &identifier, &pid);
1827
1828                 /* Avoid any messages we generated ourselves via
1829                  * log_info() and friends. */
1830                 if (pid && is_us(pid))
1831                         goto finish;
1832
1833                 if (s->forward_to_syslog)
1834                         forward_syslog(s, priority, identifier, p, NULL, NULL);
1835
1836                 if (identifier) {
1837                         syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
1838                         if (syslog_identifier)
1839                                 IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1840                 }
1841
1842                 if (pid) {
1843                         syslog_pid = strappend("SYSLOG_PID=", pid);
1844                         if (syslog_pid)
1845                                 IOVEC_SET_STRING(iovec[n++], syslog_pid);
1846                 }
1847
1848                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1849                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1850         }
1851
1852         message = strappend("MESSAGE=", p);
1853         if (message)
1854                 IOVEC_SET_STRING(iovec[n++], message);
1855
1856         dispatch_message(s, iovec, n, ELEMENTSOF(iovec), NULL, NULL, NULL, 0, priority);
1857
1858 finish:
1859         free(message);
1860         free(syslog_priority);
1861         free(syslog_identifier);
1862         free(syslog_pid);
1863         free(syslog_facility);
1864         free(source_time);
1865         free(identifier);
1866         free(pid);
1867 }
1868
1869 static void proc_kmsg_scan(Server *s) {
1870         char *p;
1871         size_t remaining;
1872
1873         assert(s);
1874
1875         p = s->proc_kmsg_buffer;
1876         remaining = s->proc_kmsg_length;
1877         for (;;) {
1878                 char *end;
1879                 size_t skip;
1880
1881                 end = memchr(p, '\n', remaining);
1882                 if (end)
1883                         skip = end - p + 1;
1884                 else if (remaining >= sizeof(s->proc_kmsg_buffer) - 1) {
1885                         end = p + sizeof(s->proc_kmsg_buffer) - 1;
1886                         skip = remaining;
1887                 } else
1888                         break;
1889
1890                 *end = 0;
1891
1892                 proc_kmsg_line(s, p);
1893
1894                 remaining -= skip;
1895                 p += skip;
1896         }
1897
1898         if (p > s->proc_kmsg_buffer) {
1899                 memmove(s->proc_kmsg_buffer, p, remaining);
1900                 s->proc_kmsg_length = remaining;
1901         }
1902 }
1903
1904 static int system_journal_open(Server *s) {
1905         int r;
1906         char *fn;
1907         sd_id128_t machine;
1908         char ids[33];
1909
1910         r = sd_id128_get_machine(&machine);
1911         if (r < 0)
1912                 return r;
1913
1914         sd_id128_to_string(machine, ids);
1915
1916         if (!s->system_journal) {
1917
1918                 /* First try to create the machine path, but not the prefix */
1919                 fn = strappend("/var/log/journal/", ids);
1920                 if (!fn)
1921                         return -ENOMEM;
1922                 (void) mkdir(fn, 0755);
1923                 free(fn);
1924
1925                 /* The create the system journal file */
1926                 fn = join("/var/log/journal/", ids, "/system.journal", NULL);
1927                 if (!fn)
1928                         return -ENOMEM;
1929
1930                 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, NULL, &s->system_journal);
1931                 free(fn);
1932
1933                 if (r >= 0) {
1934                         journal_default_metrics(&s->system_metrics, s->system_journal->fd);
1935
1936                         s->system_journal->metrics = s->system_metrics;
1937                         s->system_journal->compress = s->compress;
1938
1939                         server_fix_perms(s, s->system_journal, 0);
1940                 } else if (r < 0) {
1941
1942                         if (r != -ENOENT && r != -EROFS)
1943                                 log_warning("Failed to open system journal: %s", strerror(-r));
1944
1945                         r = 0;
1946                 }
1947         }
1948
1949         if (!s->runtime_journal) {
1950
1951                 fn = join("/run/log/journal/", ids, "/system.journal", NULL);
1952                 if (!fn)
1953                         return -ENOMEM;
1954
1955                 if (s->system_journal) {
1956
1957                         /* Try to open the runtime journal, but only
1958                          * if it already exists, so that we can flush
1959                          * it into the system journal */
1960
1961                         r = journal_file_open(fn, O_RDWR, 0640, NULL, &s->runtime_journal);
1962                         free(fn);
1963
1964                         if (r < 0) {
1965                                 if (r != -ENOENT)
1966                                         log_warning("Failed to open runtime journal: %s", strerror(-r));
1967
1968                                 r = 0;
1969                         }
1970
1971                 } else {
1972
1973                         /* OK, we really need the runtime journal, so create
1974                          * it if necessary. */
1975
1976                         (void) mkdir_parents(fn, 0755);
1977                         r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, NULL, &s->runtime_journal);
1978                         free(fn);
1979
1980                         if (r < 0) {
1981                                 log_error("Failed to open runtime journal: %s", strerror(-r));
1982                                 return r;
1983                         }
1984                 }
1985
1986                 if (s->runtime_journal) {
1987                         journal_default_metrics(&s->runtime_metrics, s->runtime_journal->fd);
1988
1989                         s->runtime_journal->metrics = s->runtime_metrics;
1990                         s->runtime_journal->compress = s->compress;
1991
1992                         server_fix_perms(s, s->runtime_journal, 0);
1993                 }
1994         }
1995
1996         return r;
1997 }
1998
1999 static int server_flush_to_var(Server *s) {
2000         char path[] = "/run/log/journal/xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
2001         Object *o = NULL;
2002         int r;
2003         sd_id128_t machine;
2004         sd_journal *j;
2005         usec_t ts;
2006
2007         assert(s);
2008
2009         if (!s->runtime_journal)
2010                 return 0;
2011
2012         ts = now(CLOCK_MONOTONIC);
2013         if (s->var_available_timestamp + RECHECK_VAR_AVAILABLE_USEC > ts)
2014                 return 0;
2015
2016         s->var_available_timestamp = ts;
2017
2018         system_journal_open(s);
2019
2020         if (!s->system_journal)
2021                 return 0;
2022
2023         log_info("Flushing to /var...");
2024
2025         r = sd_id128_get_machine(&machine);
2026         if (r < 0) {
2027                 log_error("Failed to get machine id: %s", strerror(-r));
2028                 return r;
2029         }
2030
2031         r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
2032         if (r < 0) {
2033                 log_error("Failed to read runtime journal: %s", strerror(-r));
2034                 return r;
2035         }
2036
2037         SD_JOURNAL_FOREACH(j) {
2038                 JournalFile *f;
2039
2040                 f = j->current_file;
2041                 assert(f && f->current_offset > 0);
2042
2043                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
2044                 if (r < 0) {
2045                         log_error("Can't read entry: %s", strerror(-r));
2046                         goto finish;
2047                 }
2048
2049                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
2050                 if (r == -E2BIG) {
2051                         log_info("Allocation limit reached.");
2052
2053                         journal_file_post_change(s->system_journal);
2054                         server_rotate(s);
2055                         server_vacuum(s);
2056
2057                         r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
2058                 }
2059
2060                 if (r < 0) {
2061                         log_error("Can't write entry: %s", strerror(-r));
2062                         goto finish;
2063                 }
2064         }
2065
2066 finish:
2067         journal_file_post_change(s->system_journal);
2068
2069         journal_file_close(s->runtime_journal);
2070         s->runtime_journal = NULL;
2071
2072         if (r >= 0) {
2073                 sd_id128_to_string(machine, path + 17);
2074                 rm_rf(path, false, true, false);
2075         }
2076
2077         return r;
2078 }
2079
2080 static int server_read_proc_kmsg(Server *s) {
2081         ssize_t l;
2082         assert(s);
2083         assert(s->proc_kmsg_fd >= 0);
2084
2085         l = read(s->proc_kmsg_fd, s->proc_kmsg_buffer + s->proc_kmsg_length, sizeof(s->proc_kmsg_buffer) - 1 - s->proc_kmsg_length);
2086         if (l < 0) {
2087
2088                 if (errno == EAGAIN || errno == EINTR)
2089                         return 0;
2090
2091                 log_error("Failed to read from kernel: %m");
2092                 return -errno;
2093         }
2094
2095         s->proc_kmsg_length += l;
2096
2097         proc_kmsg_scan(s);
2098         return 1;
2099 }
2100
2101 static int server_flush_proc_kmsg(Server *s) {
2102         int r;
2103
2104         assert(s);
2105
2106         if (s->proc_kmsg_fd < 0)
2107                 return 0;
2108
2109         log_info("Flushing /proc/kmsg...");
2110
2111         for (;;) {
2112                 r = server_read_proc_kmsg(s);
2113                 if (r < 0)
2114                         return r;
2115
2116                 if (r == 0)
2117                         break;
2118         }
2119
2120         return 0;
2121 }
2122
2123 static int process_event(Server *s, struct epoll_event *ev) {
2124         assert(s);
2125
2126         if (ev->data.fd == s->signal_fd) {
2127                 struct signalfd_siginfo sfsi;
2128                 ssize_t n;
2129
2130                 if (ev->events != EPOLLIN) {
2131                         log_info("Got invalid event from epoll.");
2132                         return -EIO;
2133                 }
2134
2135                 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
2136                 if (n != sizeof(sfsi)) {
2137
2138                         if (n >= 0)
2139                                 return -EIO;
2140
2141                         if (errno == EINTR || errno == EAGAIN)
2142                                 return 1;
2143
2144                         return -errno;
2145                 }
2146
2147                 if (sfsi.ssi_signo == SIGUSR1) {
2148                         server_flush_to_var(s);
2149                         return 0;
2150                 }
2151
2152                 log_debug("Received SIG%s", signal_to_string(sfsi.ssi_signo));
2153                 return 0;
2154
2155         } else if (ev->data.fd == s->proc_kmsg_fd) {
2156                 int r;
2157
2158                 if (ev->events != EPOLLIN) {
2159                         log_info("Got invalid event from epoll.");
2160                         return -EIO;
2161                 }
2162
2163                 r = server_read_proc_kmsg(s);
2164                 if (r < 0)
2165                         return r;
2166
2167                 return 1;
2168
2169         } else if (ev->data.fd == s->native_fd ||
2170                    ev->data.fd == s->syslog_fd) {
2171
2172                 if (ev->events != EPOLLIN) {
2173                         log_info("Got invalid event from epoll.");
2174                         return -EIO;
2175                 }
2176
2177                 for (;;) {
2178                         struct msghdr msghdr;
2179                         struct iovec iovec;
2180                         struct ucred *ucred = NULL;
2181                         struct timeval *tv = NULL;
2182                         struct cmsghdr *cmsg;
2183                         char *label = NULL;
2184                         size_t label_len = 0;
2185                         union {
2186                                 struct cmsghdr cmsghdr;
2187
2188                                 /* We use NAME_MAX space for the
2189                                  * SELinux label here. The kernel
2190                                  * currently enforces no limit, but
2191                                  * according to suggestions from the
2192                                  * SELinux people this will change and
2193                                  * it will probably be identical to
2194                                  * NAME_MAX. For now we use that, but
2195                                  * this should be updated one day when
2196                                  * the final limit is known.*/
2197                                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
2198                                             CMSG_SPACE(sizeof(struct timeval)) +
2199                                             CMSG_SPACE(sizeof(int)) + /* fd */
2200                                             CMSG_SPACE(NAME_MAX)]; /* selinux label */
2201                         } control;
2202                         ssize_t n;
2203                         int v;
2204                         int *fds = NULL;
2205                         unsigned n_fds = 0;
2206
2207                         if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
2208                                 log_error("SIOCINQ failed: %m");
2209                                 return -errno;
2210                         }
2211
2212                         if (s->buffer_size < (size_t) v) {
2213                                 void *b;
2214                                 size_t l;
2215
2216                                 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
2217                                 b = realloc(s->buffer, l+1);
2218
2219                                 if (!b) {
2220                                         log_error("Couldn't increase buffer.");
2221                                         return -ENOMEM;
2222                                 }
2223
2224                                 s->buffer_size = l;
2225                                 s->buffer = b;
2226                         }
2227
2228                         zero(iovec);
2229                         iovec.iov_base = s->buffer;
2230                         iovec.iov_len = s->buffer_size;
2231
2232                         zero(control);
2233                         zero(msghdr);
2234                         msghdr.msg_iov = &iovec;
2235                         msghdr.msg_iovlen = 1;
2236                         msghdr.msg_control = &control;
2237                         msghdr.msg_controllen = sizeof(control);
2238
2239                         n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
2240                         if (n < 0) {
2241
2242                                 if (errno == EINTR || errno == EAGAIN)
2243                                         return 1;
2244
2245                                 log_error("recvmsg() failed: %m");
2246                                 return -errno;
2247                         }
2248
2249                         for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
2250
2251                                 if (cmsg->cmsg_level == SOL_SOCKET &&
2252                                     cmsg->cmsg_type == SCM_CREDENTIALS &&
2253                                     cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
2254                                         ucred = (struct ucred*) CMSG_DATA(cmsg);
2255                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
2256                                          cmsg->cmsg_type == SCM_SECURITY) {
2257                                         label = (char*) CMSG_DATA(cmsg);
2258                                         label_len = cmsg->cmsg_len - CMSG_LEN(0);
2259                                 } else if (cmsg->cmsg_level == SOL_SOCKET &&
2260                                          cmsg->cmsg_type == SO_TIMESTAMP &&
2261                                          cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
2262                                         tv = (struct timeval*) CMSG_DATA(cmsg);
2263                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
2264                                          cmsg->cmsg_type == SCM_RIGHTS) {
2265                                         fds = (int*) CMSG_DATA(cmsg);
2266                                         n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
2267                                 }
2268                         }
2269
2270                         if (ev->data.fd == s->syslog_fd) {
2271                                 char *e;
2272
2273                                 if (n > 0 && n_fds == 0) {
2274                                         e = memchr(s->buffer, '\n', n);
2275                                         if (e)
2276                                                 *e = 0;
2277                                         else
2278                                                 s->buffer[n] = 0;
2279
2280                                         process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
2281                                 } else if (n_fds > 0)
2282                                         log_warning("Got file descriptors via syslog socket. Ignoring.");
2283
2284                         } else {
2285                                 if (n > 0 && n_fds == 0)
2286                                         process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
2287                                 else if (n == 0 && n_fds == 1)
2288                                         process_native_file(s, fds[0], ucred, tv, label, label_len);
2289                                 else if (n_fds > 0)
2290                                         log_warning("Got too many file descriptors via native socket. Ignoring.");
2291                         }
2292
2293                         close_many(fds, n_fds);
2294                 }
2295
2296                 return 1;
2297
2298         } else if (ev->data.fd == s->stdout_fd) {
2299
2300                 if (ev->events != EPOLLIN) {
2301                         log_info("Got invalid event from epoll.");
2302                         return -EIO;
2303                 }
2304
2305                 stdout_stream_new(s);
2306                 return 1;
2307
2308         } else {
2309                 StdoutStream *stream;
2310
2311                 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
2312                         log_info("Got invalid event from epoll.");
2313                         return -EIO;
2314                 }
2315
2316                 /* If it is none of the well-known fds, it must be an
2317                  * stdout stream fd. Note that this is a bit ugly here
2318                  * (since we rely that none of the well-known fds
2319                  * could be interpreted as pointer), but nonetheless
2320                  * safe, since the well-known fds would never get an
2321                  * fd > 4096, i.e. beyond the first memory page */
2322
2323                 stream = ev->data.ptr;
2324
2325                 if (stdout_stream_process(stream) <= 0)
2326                         stdout_stream_free(stream);
2327
2328                 return 1;
2329         }
2330
2331         log_error("Unknown event.");
2332         return 0;
2333 }
2334
2335 static int open_syslog_socket(Server *s) {
2336         union sockaddr_union sa;
2337         int one, r;
2338         struct epoll_event ev;
2339
2340         assert(s);
2341
2342         if (s->syslog_fd < 0) {
2343
2344                 s->syslog_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2345                 if (s->syslog_fd < 0) {
2346                         log_error("socket() failed: %m");
2347                         return -errno;
2348                 }
2349
2350                 zero(sa);
2351                 sa.un.sun_family = AF_UNIX;
2352                 strncpy(sa.un.sun_path, "/dev/log", sizeof(sa.un.sun_path));
2353
2354                 unlink(sa.un.sun_path);
2355
2356                 r = bind(s->syslog_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2357                 if (r < 0) {
2358                         log_error("bind() failed: %m");
2359                         return -errno;
2360                 }
2361
2362                 chmod(sa.un.sun_path, 0666);
2363         } else
2364                 fd_nonblock(s->syslog_fd, 1);
2365
2366         one = 1;
2367         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
2368         if (r < 0) {
2369                 log_error("SO_PASSCRED failed: %m");
2370                 return -errno;
2371         }
2372
2373 #ifdef HAVE_SELINUX
2374         one = 1;
2375         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
2376         if (r < 0)
2377                 log_warning("SO_PASSSEC failed: %m");
2378 #endif
2379
2380         one = 1;
2381         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
2382         if (r < 0) {
2383                 log_error("SO_TIMESTAMP failed: %m");
2384                 return -errno;
2385         }
2386
2387         zero(ev);
2388         ev.events = EPOLLIN;
2389         ev.data.fd = s->syslog_fd;
2390         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->syslog_fd, &ev) < 0) {
2391                 log_error("Failed to add syslog server fd to epoll object: %m");
2392                 return -errno;
2393         }
2394
2395         return 0;
2396 }
2397
2398 static int open_native_socket(Server*s) {
2399         union sockaddr_union sa;
2400         int one, r;
2401         struct epoll_event ev;
2402
2403         assert(s);
2404
2405         if (s->native_fd < 0) {
2406
2407                 s->native_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2408                 if (s->native_fd < 0) {
2409                         log_error("socket() failed: %m");
2410                         return -errno;
2411                 }
2412
2413                 zero(sa);
2414                 sa.un.sun_family = AF_UNIX;
2415                 strncpy(sa.un.sun_path, "/run/systemd/journal/socket", sizeof(sa.un.sun_path));
2416
2417                 unlink(sa.un.sun_path);
2418
2419                 r = bind(s->native_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2420                 if (r < 0) {
2421                         log_error("bind() failed: %m");
2422                         return -errno;
2423                 }
2424
2425                 chmod(sa.un.sun_path, 0666);
2426         } else
2427                 fd_nonblock(s->native_fd, 1);
2428
2429         one = 1;
2430         r = setsockopt(s->native_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
2431         if (r < 0) {
2432                 log_error("SO_PASSCRED failed: %m");
2433                 return -errno;
2434         }
2435
2436 #ifdef HAVE_SELINUX
2437         one = 1;
2438         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
2439         if (r < 0)
2440                 log_warning("SO_PASSSEC failed: %m");
2441 #endif
2442
2443         one = 1;
2444         r = setsockopt(s->native_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
2445         if (r < 0) {
2446                 log_error("SO_TIMESTAMP failed: %m");
2447                 return -errno;
2448         }
2449
2450         zero(ev);
2451         ev.events = EPOLLIN;
2452         ev.data.fd = s->native_fd;
2453         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->native_fd, &ev) < 0) {
2454                 log_error("Failed to add native server fd to epoll object: %m");
2455                 return -errno;
2456         }
2457
2458         return 0;
2459 }
2460
2461 static int open_stdout_socket(Server *s) {
2462         union sockaddr_union sa;
2463         int r;
2464         struct epoll_event ev;
2465
2466         assert(s);
2467
2468         if (s->stdout_fd < 0) {
2469
2470                 s->stdout_fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2471                 if (s->stdout_fd < 0) {
2472                         log_error("socket() failed: %m");
2473                         return -errno;
2474                 }
2475
2476                 zero(sa);
2477                 sa.un.sun_family = AF_UNIX;
2478                 strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
2479
2480                 unlink(sa.un.sun_path);
2481
2482                 r = bind(s->stdout_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2483                 if (r < 0) {
2484                         log_error("bind() failed: %m");
2485                         return -errno;
2486                 }
2487
2488                 chmod(sa.un.sun_path, 0666);
2489
2490                 if (listen(s->stdout_fd, SOMAXCONN) < 0) {
2491                         log_error("liste() failed: %m");
2492                         return -errno;
2493                 }
2494         } else
2495                 fd_nonblock(s->stdout_fd, 1);
2496
2497         zero(ev);
2498         ev.events = EPOLLIN;
2499         ev.data.fd = s->stdout_fd;
2500         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->stdout_fd, &ev) < 0) {
2501                 log_error("Failed to add stdout server fd to epoll object: %m");
2502                 return -errno;
2503         }
2504
2505         return 0;
2506 }
2507
2508 static int open_proc_kmsg(Server *s) {
2509         struct epoll_event ev;
2510
2511         assert(s);
2512
2513         if (!s->import_proc_kmsg)
2514                 return 0;
2515
2516         s->proc_kmsg_fd = open("/proc/kmsg", O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
2517         if (s->proc_kmsg_fd < 0) {
2518                 log_warning("Failed to open /proc/kmsg, ignoring: %m");
2519                 return 0;
2520         }
2521
2522         zero(ev);
2523         ev.events = EPOLLIN;
2524         ev.data.fd = s->proc_kmsg_fd;
2525         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->proc_kmsg_fd, &ev) < 0) {
2526                 log_error("Failed to add /proc/kmsg fd to epoll object: %m");
2527                 return -errno;
2528         }
2529
2530         return 0;
2531 }
2532
2533 static int open_signalfd(Server *s) {
2534         sigset_t mask;
2535         struct epoll_event ev;
2536
2537         assert(s);
2538
2539         assert_se(sigemptyset(&mask) == 0);
2540         sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, -1);
2541         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
2542
2543         s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
2544         if (s->signal_fd < 0) {
2545                 log_error("signalfd(): %m");
2546                 return -errno;
2547         }
2548
2549         zero(ev);
2550         ev.events = EPOLLIN;
2551         ev.data.fd = s->signal_fd;
2552
2553         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
2554                 log_error("epoll_ctl(): %m");
2555                 return -errno;
2556         }
2557
2558         return 0;
2559 }
2560
2561 static int server_parse_proc_cmdline(Server *s) {
2562         char *line, *w, *state;
2563         int r;
2564         size_t l;
2565
2566         if (detect_container(NULL) > 0)
2567                 return 0;
2568
2569         r = read_one_line_file("/proc/cmdline", &line);
2570         if (r < 0) {
2571                 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
2572                 return 0;
2573         }
2574
2575         FOREACH_WORD_QUOTED(w, l, line, state) {
2576                 char *word;
2577
2578                 word = strndup(w, l);
2579                 if (!word) {
2580                         r = -ENOMEM;
2581                         goto finish;
2582                 }
2583
2584                 if (startswith(word, "systemd_journald.forward_to_syslog=")) {
2585                         r = parse_boolean(word + 35);
2586                         if (r < 0)
2587                                 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
2588                         else
2589                                 s->forward_to_syslog = r;
2590                 } else if (startswith(word, "systemd_journald.forward_to_kmsg=")) {
2591                         r = parse_boolean(word + 33);
2592                         if (r < 0)
2593                                 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
2594                         else
2595                                 s->forward_to_kmsg = r;
2596                 } else if (startswith(word, "systemd_journald.forward_to_console=")) {
2597                         r = parse_boolean(word + 36);
2598                         if (r < 0)
2599                                 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
2600                         else
2601                                 s->forward_to_console = r;
2602                 }
2603
2604                 free(word);
2605         }
2606
2607         r = 0;
2608
2609 finish:
2610         free(line);
2611         return r;
2612 }
2613
2614 static int server_parse_config_file(Server *s) {
2615         FILE *f;
2616         const char *fn;
2617         int r;
2618
2619         assert(s);
2620
2621         fn = "/etc/systemd/journald.conf";
2622         f = fopen(fn, "re");
2623         if (!f) {
2624                 if (errno == ENOENT)
2625                         return 0;
2626
2627                 log_warning("Failed to open configuration file %s: %m", fn);
2628                 return -errno;
2629         }
2630
2631         r = config_parse(fn, f, "Journal\0", config_item_perf_lookup, (void*) journald_gperf_lookup, false, s);
2632         if (r < 0)
2633                 log_warning("Failed to parse configuration file: %s", strerror(-r));
2634
2635         fclose(f);
2636
2637         return r;
2638 }
2639
2640 static int server_init(Server *s) {
2641         int n, r, fd;
2642
2643         assert(s);
2644
2645         zero(*s);
2646         s->syslog_fd = s->native_fd = s->stdout_fd = s->signal_fd = s->epoll_fd = s->proc_kmsg_fd = -1;
2647         s->compress = true;
2648
2649         s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
2650         s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
2651
2652         s->forward_to_syslog = true;
2653         s->import_proc_kmsg = true;
2654
2655         memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
2656         memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
2657
2658         server_parse_config_file(s);
2659         server_parse_proc_cmdline(s);
2660
2661         s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
2662         if (!s->user_journals) {
2663                 log_error("Out of memory.");
2664                 return -ENOMEM;
2665         }
2666
2667         s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
2668         if (s->epoll_fd < 0) {
2669                 log_error("Failed to create epoll object: %m");
2670                 return -errno;
2671         }
2672
2673         n = sd_listen_fds(true);
2674         if (n < 0) {
2675                 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
2676                 return n;
2677         }
2678
2679         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
2680
2681                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
2682
2683                         if (s->native_fd >= 0) {
2684                                 log_error("Too many native sockets passed.");
2685                                 return -EINVAL;
2686                         }
2687
2688                         s->native_fd = fd;
2689
2690                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
2691
2692                         if (s->stdout_fd >= 0) {
2693                                 log_error("Too many stdout sockets passed.");
2694                                 return -EINVAL;
2695                         }
2696
2697                         s->stdout_fd = fd;
2698
2699                 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
2700
2701                         if (s->syslog_fd >= 0) {
2702                                 log_error("Too many /dev/log sockets passed.");
2703                                 return -EINVAL;
2704                         }
2705
2706                         s->syslog_fd = fd;
2707
2708                 } else {
2709                         log_error("Unknown socket passed.");
2710                         return -EINVAL;
2711                 }
2712         }
2713
2714         r = open_syslog_socket(s);
2715         if (r < 0)
2716                 return r;
2717
2718         r = open_native_socket(s);
2719         if (r < 0)
2720                 return r;
2721
2722         r = open_stdout_socket(s);
2723         if (r < 0)
2724                 return r;
2725
2726         r = open_proc_kmsg(s);
2727         if (r < 0)
2728                 return r;
2729
2730         r = open_signalfd(s);
2731         if (r < 0)
2732                 return r;
2733
2734         s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
2735         if (!s->rate_limit)
2736                 return -ENOMEM;
2737
2738         r = system_journal_open(s);
2739         if (r < 0)
2740                 return r;
2741
2742         return 0;
2743 }
2744
2745 static void server_done(Server *s) {
2746         JournalFile *f;
2747         assert(s);
2748
2749         while (s->stdout_streams)
2750                 stdout_stream_free(s->stdout_streams);
2751
2752         if (s->system_journal)
2753                 journal_file_close(s->system_journal);
2754
2755         if (s->runtime_journal)
2756                 journal_file_close(s->runtime_journal);
2757
2758         while ((f = hashmap_steal_first(s->user_journals)))
2759                 journal_file_close(f);
2760
2761         hashmap_free(s->user_journals);
2762
2763         if (s->epoll_fd >= 0)
2764                 close_nointr_nofail(s->epoll_fd);
2765
2766         if (s->signal_fd >= 0)
2767                 close_nointr_nofail(s->signal_fd);
2768
2769         if (s->syslog_fd >= 0)
2770                 close_nointr_nofail(s->syslog_fd);
2771
2772         if (s->native_fd >= 0)
2773                 close_nointr_nofail(s->native_fd);
2774
2775         if (s->stdout_fd >= 0)
2776                 close_nointr_nofail(s->stdout_fd);
2777
2778         if (s->proc_kmsg_fd >= 0)
2779                 close_nointr_nofail(s->proc_kmsg_fd);
2780
2781         if (s->rate_limit)
2782                 journal_rate_limit_free(s->rate_limit);
2783
2784         free(s->buffer);
2785 }
2786
2787 int main(int argc, char *argv[]) {
2788         Server server;
2789         int r;
2790
2791         /* if (getppid() != 1) { */
2792         /*         log_error("This program should be invoked by init only."); */
2793         /*         return EXIT_FAILURE; */
2794         /* } */
2795
2796         if (argc > 1) {
2797                 log_error("This program does not take arguments.");
2798                 return EXIT_FAILURE;
2799         }
2800
2801         log_set_target(LOG_TARGET_SAFE);
2802         log_set_facility(LOG_SYSLOG);
2803         log_parse_environment();
2804         log_open();
2805
2806         umask(0022);
2807
2808         r = server_init(&server);
2809         if (r < 0)
2810                 goto finish;
2811
2812         server_vacuum(&server);
2813         server_flush_to_var(&server);
2814         server_flush_proc_kmsg(&server);
2815
2816         log_debug("systemd-journald running as pid %lu", (unsigned long) getpid());
2817         driver_message(&server, SD_MESSAGE_JOURNAL_START, "Journal started");
2818
2819         sd_notify(false,
2820                   "READY=1\n"
2821                   "STATUS=Processing requests...");
2822
2823         for (;;) {
2824                 struct epoll_event event;
2825
2826                 r = epoll_wait(server.epoll_fd, &event, 1, -1);
2827                 if (r < 0) {
2828
2829                         if (errno == EINTR)
2830                                 continue;
2831
2832                         log_error("epoll_wait() failed: %m");
2833                         r = -errno;
2834                         goto finish;
2835                 } else if (r == 0)
2836                         break;
2837
2838                 r = process_event(&server, &event);
2839                 if (r < 0)
2840                         goto finish;
2841                 else if (r == 0)
2842                         break;
2843         }
2844
2845         log_debug("systemd-journald stopped as pid %lu", (unsigned long) getpid());
2846         driver_message(&server, SD_MESSAGE_JOURNAL_STOP, "Journal stopped");
2847
2848 finish:
2849         sd_notify(false,
2850                   "STATUS=Shutting down...");
2851
2852         server_done(&server);
2853
2854         return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
2855 }