chiark / gitweb /
fix a couple of things found with the llvm static analyzer
[elogind.git] / src / journal / journald.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/epoll.h>
23 #include <sys/socket.h>
24 #include <errno.h>
25 #include <sys/signalfd.h>
26 #include <unistd.h>
27 #include <fcntl.h>
28 #include <stddef.h>
29 #include <sys/ioctl.h>
30 #include <linux/sockios.h>
31 #include <sys/statvfs.h>
32
33 #include <systemd/sd-journal.h>
34 #include <systemd/sd-login.h>
35 #include <systemd/sd-messages.h>
36 #include <systemd/sd-daemon.h>
37
38 #include "mkdir.h"
39 #include "hashmap.h"
40 #include "journal-file.h"
41 #include "socket-util.h"
42 #include "cgroup-util.h"
43 #include "list.h"
44 #include "journal-rate-limit.h"
45 #include "journal-internal.h"
46 #include "conf-parser.h"
47 #include "journald.h"
48 #include "virt.h"
49 #include "missing.h"
50
51 #ifdef HAVE_ACL
52 #include <sys/acl.h>
53 #include <acl/libacl.h>
54 #include "acl-util.h"
55 #endif
56
57 #ifdef HAVE_SELINUX
58 #include <selinux/selinux.h>
59 #endif
60
61 #define USER_JOURNALS_MAX 1024
62 #define STDOUT_STREAMS_MAX 4096
63
64 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
65 #define DEFAULT_RATE_LIMIT_BURST 200
66
67 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
68
69 #define RECHECK_VAR_AVAILABLE_USEC (30*USEC_PER_SEC)
70
71 #define N_IOVEC_META_FIELDS 17
72
73 #define ENTRY_SIZE_MAX (1024*1024*32)
74
75 typedef enum StdoutStreamState {
76         STDOUT_STREAM_IDENTIFIER,
77         STDOUT_STREAM_PRIORITY,
78         STDOUT_STREAM_LEVEL_PREFIX,
79         STDOUT_STREAM_FORWARD_TO_SYSLOG,
80         STDOUT_STREAM_FORWARD_TO_KMSG,
81         STDOUT_STREAM_FORWARD_TO_CONSOLE,
82         STDOUT_STREAM_RUNNING
83 } StdoutStreamState;
84
85 struct StdoutStream {
86         Server *server;
87         StdoutStreamState state;
88
89         int fd;
90
91         struct ucred ucred;
92 #ifdef HAVE_SELINUX
93         security_context_t security_context;
94 #endif
95
96         char *identifier;
97         int priority;
98         bool level_prefix:1;
99         bool forward_to_syslog:1;
100         bool forward_to_kmsg:1;
101         bool forward_to_console:1;
102
103         char buffer[LINE_MAX+1];
104         size_t length;
105
106         LIST_FIELDS(StdoutStream, stdout_stream);
107 };
108
109 static int server_flush_to_var(Server *s);
110
111 static uint64_t available_space(Server *s) {
112         char ids[33], *p;
113         const char *f;
114         sd_id128_t machine;
115         struct statvfs ss;
116         uint64_t sum = 0, avail = 0, ss_avail = 0;
117         int r;
118         DIR *d;
119         usec_t ts;
120         JournalMetrics *m;
121
122         ts = now(CLOCK_MONOTONIC);
123
124         if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
125                 return s->cached_available_space;
126
127         r = sd_id128_get_machine(&machine);
128         if (r < 0)
129                 return 0;
130
131         if (s->system_journal) {
132                 f = "/var/log/journal/";
133                 m = &s->system_metrics;
134         } else {
135                 f = "/run/log/journal/";
136                 m = &s->runtime_metrics;
137         }
138
139         assert(m);
140
141         p = strappend(f, sd_id128_to_string(machine, ids));
142         if (!p)
143                 return 0;
144
145         d = opendir(p);
146         free(p);
147
148         if (!d)
149                 return 0;
150
151         if (fstatvfs(dirfd(d), &ss) < 0)
152                 goto finish;
153
154         for (;;) {
155                 struct stat st;
156                 struct dirent buf, *de;
157
158                 r = readdir_r(d, &buf, &de);
159                 if (r != 0)
160                         break;
161
162                 if (!de)
163                         break;
164
165                 if (!endswith(de->d_name, ".journal") &&
166                     !endswith(de->d_name, ".journal~"))
167                         continue;
168
169                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
170                         continue;
171
172                 if (!S_ISREG(st.st_mode))
173                         continue;
174
175                 sum += (uint64_t) st.st_blocks * 512UL;
176         }
177
178         avail = sum >= m->max_use ? 0 : m->max_use - sum;
179
180         ss_avail = ss.f_bsize * ss.f_bavail;
181
182         ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
183
184         if (ss_avail < avail)
185                 avail = ss_avail;
186
187         s->cached_available_space = avail;
188         s->cached_available_space_timestamp = ts;
189
190 finish:
191         closedir(d);
192
193         return avail;
194 }
195
196 static void server_read_file_gid(Server *s) {
197         const char *adm = "adm";
198         int r;
199
200         assert(s);
201
202         if (s->file_gid_valid)
203                 return;
204
205         r = get_group_creds(&adm, &s->file_gid);
206         if (r < 0)
207                 log_warning("Failed to resolve 'adm' group: %s", strerror(-r));
208
209         /* if we couldn't read the gid, then it will be 0, but that's
210          * fine and we shouldn't try to resolve the group again, so
211          * let's just pretend it worked right-away. */
212         s->file_gid_valid = true;
213 }
214
215 static void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
216         int r;
217 #ifdef HAVE_ACL
218         acl_t acl;
219         acl_entry_t entry;
220         acl_permset_t permset;
221 #endif
222
223         assert(f);
224
225         server_read_file_gid(s);
226
227         r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
228         if (r < 0)
229                 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
230
231 #ifdef HAVE_ACL
232         if (uid <= 0)
233                 return;
234
235         acl = acl_get_fd(f->fd);
236         if (!acl) {
237                 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
238                 return;
239         }
240
241         r = acl_find_uid(acl, uid, &entry);
242         if (r <= 0) {
243
244                 if (acl_create_entry(&acl, &entry) < 0 ||
245                     acl_set_tag_type(entry, ACL_USER) < 0 ||
246                     acl_set_qualifier(entry, &uid) < 0) {
247                         log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
248                         goto finish;
249                 }
250         }
251
252         if (acl_get_permset(entry, &permset) < 0 ||
253             acl_add_perm(permset, ACL_READ) < 0 ||
254             acl_calc_mask(&acl) < 0) {
255                 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
256                 goto finish;
257         }
258
259         if (acl_set_fd(f->fd, acl) < 0)
260                 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
261
262 finish:
263         acl_free(acl);
264 #endif
265 }
266
267 static JournalFile* find_journal(Server *s, uid_t uid) {
268         char *p;
269         int r;
270         JournalFile *f;
271         char ids[33];
272         sd_id128_t machine;
273
274         assert(s);
275
276         /* We split up user logs only on /var, not on /run. If the
277          * runtime file is open, we write to it exclusively, in order
278          * to guarantee proper order as soon as we flush /run to
279          * /var and close the runtime file. */
280
281         if (s->runtime_journal)
282                 return s->runtime_journal;
283
284         if (uid <= 0)
285                 return s->system_journal;
286
287         r = sd_id128_get_machine(&machine);
288         if (r < 0)
289                 return s->system_journal;
290
291         f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
292         if (f)
293                 return f;
294
295         if (asprintf(&p, "/var/log/journal/%s/user-%lu.journal", sd_id128_to_string(machine, ids), (unsigned long) uid) < 0)
296                 return s->system_journal;
297
298         while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
299                 /* Too many open? Then let's close one */
300                 f = hashmap_steal_first(s->user_journals);
301                 assert(f);
302                 journal_file_close(f);
303         }
304
305         r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->system_journal, &f);
306         free(p);
307
308         if (r < 0)
309                 return s->system_journal;
310
311         server_fix_perms(s, f, uid);
312
313         r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
314         if (r < 0) {
315                 journal_file_close(f);
316                 return s->system_journal;
317         }
318
319         return f;
320 }
321
322 static void server_rotate(Server *s) {
323         JournalFile *f;
324         void *k;
325         Iterator i;
326         int r;
327
328         log_info("Rotating...");
329
330         if (s->runtime_journal) {
331                 r = journal_file_rotate(&s->runtime_journal);
332                 if (r < 0)
333                         log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
334                 else
335                         server_fix_perms(s, s->runtime_journal, 0);
336         }
337
338         if (s->system_journal) {
339                 r = journal_file_rotate(&s->system_journal);
340                 if (r < 0)
341                         log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
342                 else
343                         server_fix_perms(s, s->system_journal, 0);
344         }
345
346         HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
347                 r = journal_file_rotate(&f);
348                 if (r < 0)
349                         log_error("Failed to rotate %s: %s", f->path, strerror(-r));
350                 else {
351                         hashmap_replace(s->user_journals, k, f);
352                         server_fix_perms(s, s->system_journal, PTR_TO_UINT32(k));
353                 }
354         }
355 }
356
357 static void server_vacuum(Server *s) {
358         char *p;
359         char ids[33];
360         sd_id128_t machine;
361         int r;
362
363         log_info("Vacuuming...");
364
365         r = sd_id128_get_machine(&machine);
366         if (r < 0) {
367                 log_error("Failed to get machine ID: %s", strerror(-r));
368                 return;
369         }
370
371         sd_id128_to_string(machine, ids);
372
373         if (s->system_journal) {
374                 if (asprintf(&p, "/var/log/journal/%s", ids) < 0) {
375                         log_error("Out of memory.");
376                         return;
377                 }
378
379                 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free);
380                 if (r < 0 && r != -ENOENT)
381                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
382                 free(p);
383         }
384
385
386         if (s->runtime_journal) {
387                 if (asprintf(&p, "/run/log/journal/%s", ids) < 0) {
388                         log_error("Out of memory.");
389                         return;
390                 }
391
392                 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free);
393                 if (r < 0 && r != -ENOENT)
394                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
395                 free(p);
396         }
397
398         s->cached_available_space_timestamp = 0;
399 }
400
401 static char *shortened_cgroup_path(pid_t pid) {
402         int r;
403         char *process_path, *init_path, *path;
404
405         assert(pid > 0);
406
407         r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, pid, &process_path);
408         if (r < 0)
409                 return NULL;
410
411         r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &init_path);
412         if (r < 0) {
413                 free(process_path);
414                 return NULL;
415         }
416
417         if (endswith(init_path, "/system"))
418                 init_path[strlen(init_path) - 7] = 0;
419         else if (streq(init_path, "/"))
420                 init_path[0] = 0;
421
422         if (startswith(process_path, init_path)) {
423                 char *p;
424
425                 p = strdup(process_path + strlen(init_path));
426                 if (!p) {
427                         free(process_path);
428                         free(init_path);
429                         return NULL;
430                 }
431                 path = p;
432         } else {
433                 path = process_path;
434                 process_path = NULL;
435         }
436
437         free(process_path);
438         free(init_path);
439
440         return path;
441 }
442
443 static void dispatch_message_real(
444                 Server *s,
445                 struct iovec *iovec, unsigned n, unsigned m,
446                 struct ucred *ucred,
447                 struct timeval *tv,
448                 const char *label, size_t label_len) {
449
450         char *pid = NULL, *uid = NULL, *gid = NULL,
451                 *source_time = NULL, *boot_id = NULL, *machine_id = NULL,
452                 *comm = NULL, *cmdline = NULL, *hostname = NULL,
453                 *audit_session = NULL, *audit_loginuid = NULL,
454                 *exe = NULL, *cgroup = NULL, *session = NULL,
455                 *owner_uid = NULL, *unit = NULL, *selinux_context = NULL;
456
457         char idbuf[33];
458         sd_id128_t id;
459         int r;
460         char *t;
461         uid_t loginuid = 0, realuid = 0;
462         JournalFile *f;
463         bool vacuumed = false;
464
465         assert(s);
466         assert(iovec);
467         assert(n > 0);
468         assert(n + N_IOVEC_META_FIELDS <= m);
469
470         if (ucred) {
471                 uint32_t audit;
472                 uid_t owner;
473
474                 realuid = ucred->uid;
475
476                 if (asprintf(&pid, "_PID=%lu", (unsigned long) ucred->pid) >= 0)
477                         IOVEC_SET_STRING(iovec[n++], pid);
478
479                 if (asprintf(&uid, "_UID=%lu", (unsigned long) ucred->uid) >= 0)
480                         IOVEC_SET_STRING(iovec[n++], uid);
481
482                 if (asprintf(&gid, "_GID=%lu", (unsigned long) ucred->gid) >= 0)
483                         IOVEC_SET_STRING(iovec[n++], gid);
484
485                 r = get_process_comm(ucred->pid, &t);
486                 if (r >= 0) {
487                         comm = strappend("_COMM=", t);
488                         free(t);
489
490                         if (comm)
491                                 IOVEC_SET_STRING(iovec[n++], comm);
492                 }
493
494                 r = get_process_exe(ucred->pid, &t);
495                 if (r >= 0) {
496                         exe = strappend("_EXE=", t);
497                         free(t);
498
499                         if (exe)
500                                 IOVEC_SET_STRING(iovec[n++], exe);
501                 }
502
503                 r = get_process_cmdline(ucred->pid, LINE_MAX, false, &t);
504                 if (r >= 0) {
505                         cmdline = strappend("_CMDLINE=", t);
506                         free(t);
507
508                         if (cmdline)
509                                 IOVEC_SET_STRING(iovec[n++], cmdline);
510                 }
511
512                 r = audit_session_from_pid(ucred->pid, &audit);
513                 if (r >= 0)
514                         if (asprintf(&audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit) >= 0)
515                                 IOVEC_SET_STRING(iovec[n++], audit_session);
516
517                 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
518                 if (r >= 0)
519                         if (asprintf(&audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0)
520                                 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
521
522                 t = shortened_cgroup_path(ucred->pid);
523                 if (t) {
524                         cgroup = strappend("_SYSTEMD_CGROUP=", t);
525                         free(t);
526
527                         if (cgroup)
528                                 IOVEC_SET_STRING(iovec[n++], cgroup);
529                 }
530
531                 if (sd_pid_get_session(ucred->pid, &t) >= 0) {
532                         session = strappend("_SYSTEMD_SESSION=", t);
533                         free(t);
534
535                         if (session)
536                                 IOVEC_SET_STRING(iovec[n++], session);
537                 }
538
539                 if (sd_pid_get_unit(ucred->pid, &t) >= 0) {
540                         unit = strappend("_SYSTEMD_UNIT=", t);
541                         free(t);
542
543                         if (unit)
544                                 IOVEC_SET_STRING(iovec[n++], unit);
545                 }
546
547                 if (sd_pid_get_owner_uid(ucred->uid, &owner) >= 0)
548                         if (asprintf(&owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner) >= 0)
549                                 IOVEC_SET_STRING(iovec[n++], owner_uid);
550
551 #ifdef HAVE_SELINUX
552                 if (label) {
553                         selinux_context = malloc(sizeof("_SELINUX_CONTEXT=") + label_len);
554                         if (selinux_context) {
555                                 memcpy(selinux_context, "_SELINUX_CONTEXT=", sizeof("_SELINUX_CONTEXT=")-1);
556                                 memcpy(selinux_context+sizeof("_SELINUX_CONTEXT=")-1, label, label_len);
557                                 selinux_context[sizeof("_SELINUX_CONTEXT=")-1+label_len] = 0;
558                                 IOVEC_SET_STRING(iovec[n++], selinux_context);
559                         }
560                 } else {
561                         security_context_t con;
562
563                         if (getpidcon(ucred->pid, &con) >= 0) {
564                                 selinux_context = strappend("_SELINUX_CONTEXT=", con);
565                                 if (selinux_context)
566                                         IOVEC_SET_STRING(iovec[n++], selinux_context);
567
568                                 freecon(con);
569                         }
570                 }
571 #endif
572         }
573
574         if (tv) {
575                 if (asprintf(&source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu",
576                              (unsigned long long) timeval_load(tv)) >= 0)
577                         IOVEC_SET_STRING(iovec[n++], source_time);
578         }
579
580         /* Note that strictly speaking storing the boot id here is
581          * redundant since the entry includes this in-line
582          * anyway. However, we need this indexed, too. */
583         r = sd_id128_get_boot(&id);
584         if (r >= 0)
585                 if (asprintf(&boot_id, "_BOOT_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
586                         IOVEC_SET_STRING(iovec[n++], boot_id);
587
588         r = sd_id128_get_machine(&id);
589         if (r >= 0)
590                 if (asprintf(&machine_id, "_MACHINE_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
591                         IOVEC_SET_STRING(iovec[n++], machine_id);
592
593         t = gethostname_malloc();
594         if (t) {
595                 hostname = strappend("_HOSTNAME=", t);
596                 free(t);
597                 if (hostname)
598                         IOVEC_SET_STRING(iovec[n++], hostname);
599         }
600
601         assert(n <= m);
602
603         server_flush_to_var(s);
604
605 retry:
606         f = find_journal(s, realuid == 0 ? 0 : loginuid);
607         if (!f)
608                 log_warning("Dropping message, as we can't find a place to store the data.");
609         else {
610                 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
611
612                 if ((r == -E2BIG || /* hit limit */
613                      r == -EFBIG || /* hit fs limit */
614                      r == -EDQUOT || /* quota hit */
615                      r == -ENOSPC || /* disk full */
616                      r == -EBADMSG || /* corrupted */
617                      r == -ENODATA || /* truncated */
618                      r == -EHOSTDOWN || /* other machine */
619                      r == -EPROTONOSUPPORT) && /* unsupported feature */
620                     !vacuumed) {
621
622                         if (r == -E2BIG)
623                                 log_info("Allocation limit reached, rotating.");
624                         else
625                                 log_warning("Journal file corrupted, rotating.");
626
627                         server_rotate(s);
628                         server_vacuum(s);
629                         vacuumed = true;
630
631                         log_info("Retrying write.");
632                         goto retry;
633                 }
634
635                 if (r < 0)
636                         log_error("Failed to write entry, ignoring: %s", strerror(-r));
637         }
638
639         free(pid);
640         free(uid);
641         free(gid);
642         free(comm);
643         free(exe);
644         free(cmdline);
645         free(source_time);
646         free(boot_id);
647         free(machine_id);
648         free(hostname);
649         free(audit_session);
650         free(audit_loginuid);
651         free(cgroup);
652         free(session);
653         free(owner_uid);
654         free(unit);
655         free(selinux_context);
656 }
657
658 static void driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
659         char mid[11 + 32 + 1];
660         char buffer[16 + LINE_MAX + 1];
661         struct iovec iovec[N_IOVEC_META_FIELDS + 4];
662         int n = 0;
663         va_list ap;
664         struct ucred ucred;
665
666         assert(s);
667         assert(format);
668
669         IOVEC_SET_STRING(iovec[n++], "PRIORITY=5");
670         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
671
672         memcpy(buffer, "MESSAGE=", 8);
673         va_start(ap, format);
674         vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
675         va_end(ap);
676         char_array_0(buffer);
677         IOVEC_SET_STRING(iovec[n++], buffer);
678
679         snprintf(mid, sizeof(mid), "MESSAGE_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(message_id));
680         char_array_0(mid);
681         IOVEC_SET_STRING(iovec[n++], mid);
682
683         zero(ucred);
684         ucred.pid = getpid();
685         ucred.uid = getuid();
686         ucred.gid = getgid();
687
688         dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0);
689 }
690
691 static void dispatch_message(Server *s,
692                              struct iovec *iovec, unsigned n, unsigned m,
693                              struct ucred *ucred,
694                              struct timeval *tv,
695                              const char *label, size_t label_len,
696                              int priority) {
697         int rl;
698         char *path = NULL, *c;
699
700         assert(s);
701         assert(iovec || n == 0);
702
703         if (n == 0)
704                 return;
705
706         if (!ucred)
707                 goto finish;
708
709         path = shortened_cgroup_path(ucred->pid);
710         if (!path)
711                 goto finish;
712
713         /* example: /user/lennart/3/foobar
714          *          /system/dbus.service/foobar
715          *
716          * So let's cut of everything past the third /, since that is
717          * wher user directories start */
718
719         c = strchr(path, '/');
720         if (c) {
721                 c = strchr(c+1, '/');
722                 if (c) {
723                         c = strchr(c+1, '/');
724                         if (c)
725                                 *c = 0;
726                 }
727         }
728
729         rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available_space(s));
730
731         if (rl == 0) {
732                 free(path);
733                 return;
734         }
735
736         /* Write a suppression message if we suppressed something */
737         if (rl > 1)
738                 driver_message(s, SD_MESSAGE_JOURNAL_DROPPED, "Suppressed %u messages from %s", rl - 1, path);
739
740         free(path);
741
742 finish:
743         dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len);
744 }
745
746 static void forward_syslog_iovec(Server *s, const struct iovec *iovec, unsigned n_iovec, struct ucred *ucred, struct timeval *tv) {
747         struct msghdr msghdr;
748         struct cmsghdr *cmsg;
749         union {
750                 struct cmsghdr cmsghdr;
751                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred))];
752         } control;
753         union sockaddr_union sa;
754
755         assert(s);
756         assert(iovec);
757         assert(n_iovec > 0);
758
759         zero(msghdr);
760         msghdr.msg_iov = (struct iovec*) iovec;
761         msghdr.msg_iovlen = n_iovec;
762
763         zero(sa);
764         sa.un.sun_family = AF_UNIX;
765         strncpy(sa.un.sun_path, "/run/systemd/journal/syslog", sizeof(sa.un.sun_path));
766         msghdr.msg_name = &sa;
767         msghdr.msg_namelen = offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path);
768
769         if (ucred) {
770                 zero(control);
771                 msghdr.msg_control = &control;
772                 msghdr.msg_controllen = sizeof(control);
773
774                 cmsg = CMSG_FIRSTHDR(&msghdr);
775                 cmsg->cmsg_level = SOL_SOCKET;
776                 cmsg->cmsg_type = SCM_CREDENTIALS;
777                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred));
778                 memcpy(CMSG_DATA(cmsg), ucred, sizeof(struct ucred));
779                 msghdr.msg_controllen = cmsg->cmsg_len;
780         }
781
782         /* Forward the syslog message we received via /dev/log to
783          * /run/systemd/syslog. Unfortunately we currently can't set
784          * the SO_TIMESTAMP auxiliary data, and hence we don't. */
785
786         if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
787                 return;
788
789         /* The socket is full? I guess the syslog implementation is
790          * too slow, and we shouldn't wait for that... */
791         if (errno == EAGAIN)
792                 return;
793
794         if (ucred && errno == ESRCH) {
795                 struct ucred u;
796
797                 /* Hmm, presumably the sender process vanished
798                  * by now, so let's fix it as good as we
799                  * can, and retry */
800
801                 u = *ucred;
802                 u.pid = getpid();
803                 memcpy(CMSG_DATA(cmsg), &u, sizeof(struct ucred));
804
805                 if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
806                         return;
807
808                 if (errno == EAGAIN)
809                         return;
810         }
811
812         log_debug("Failed to forward syslog message: %m");
813 }
814
815 static void forward_syslog_raw(Server *s, const char *buffer, struct ucred *ucred, struct timeval *tv) {
816         struct iovec iovec;
817
818         assert(s);
819         assert(buffer);
820
821         IOVEC_SET_STRING(iovec, buffer);
822         forward_syslog_iovec(s, &iovec, 1, ucred, tv);
823 }
824
825 static void forward_syslog(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred, struct timeval *tv) {
826         struct iovec iovec[5];
827         char header_priority[6], header_time[64], header_pid[16];
828         int n = 0;
829         time_t t;
830         struct tm *tm;
831         char *ident_buf = NULL;
832
833         assert(s);
834         assert(priority >= 0);
835         assert(priority <= 999);
836         assert(message);
837
838         /* First: priority field */
839         snprintf(header_priority, sizeof(header_priority), "<%i>", priority);
840         char_array_0(header_priority);
841         IOVEC_SET_STRING(iovec[n++], header_priority);
842
843         /* Second: timestamp */
844         t = tv ? tv->tv_sec : ((time_t) (now(CLOCK_REALTIME) / USEC_PER_SEC));
845         tm = localtime(&t);
846         if (!tm)
847                 return;
848         if (strftime(header_time, sizeof(header_time), "%h %e %T ", tm) <= 0)
849                 return;
850         IOVEC_SET_STRING(iovec[n++], header_time);
851
852         /* Third: identifier and PID */
853         if (ucred) {
854                 if (!identifier) {
855                         get_process_comm(ucred->pid, &ident_buf);
856                         identifier = ident_buf;
857                 }
858
859                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
860                 char_array_0(header_pid);
861
862                 if (identifier)
863                         IOVEC_SET_STRING(iovec[n++], identifier);
864
865                 IOVEC_SET_STRING(iovec[n++], header_pid);
866         } else if (identifier) {
867                 IOVEC_SET_STRING(iovec[n++], identifier);
868                 IOVEC_SET_STRING(iovec[n++], ": ");
869         }
870
871         /* Fourth: message */
872         IOVEC_SET_STRING(iovec[n++], message);
873
874         forward_syslog_iovec(s, iovec, n, ucred, tv);
875
876         free(ident_buf);
877 }
878
879 static int fixup_priority(int priority) {
880
881         if ((priority & LOG_FACMASK) == 0)
882                 return (priority & LOG_PRIMASK) | LOG_USER;
883
884         return priority;
885 }
886
887 static void forward_kmsg(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred) {
888         struct iovec iovec[5];
889         char header_priority[6], header_pid[16];
890         int n = 0;
891         char *ident_buf = NULL;
892         int fd;
893
894         assert(s);
895         assert(priority >= 0);
896         assert(priority <= 999);
897         assert(message);
898
899         /* Never allow messages with kernel facility to be written to
900          * kmsg, regardless where the data comes from. */
901         priority = fixup_priority(priority);
902
903         /* First: priority field */
904         snprintf(header_priority, sizeof(header_priority), "<%i>", priority);
905         char_array_0(header_priority);
906         IOVEC_SET_STRING(iovec[n++], header_priority);
907
908         /* Second: identifier and PID */
909         if (ucred) {
910                 if (!identifier) {
911                         get_process_comm(ucred->pid, &ident_buf);
912                         identifier = ident_buf;
913                 }
914
915                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
916                 char_array_0(header_pid);
917
918                 if (identifier)
919                         IOVEC_SET_STRING(iovec[n++], identifier);
920
921                 IOVEC_SET_STRING(iovec[n++], header_pid);
922         } else if (identifier) {
923                 IOVEC_SET_STRING(iovec[n++], identifier);
924                 IOVEC_SET_STRING(iovec[n++], ": ");
925         }
926
927         /* Fourth: message */
928         IOVEC_SET_STRING(iovec[n++], message);
929         IOVEC_SET_STRING(iovec[n++], "\n");
930
931         fd = open("/dev/kmsg", O_WRONLY|O_NOCTTY|O_CLOEXEC);
932         if (fd < 0) {
933                 log_debug("Failed to open /dev/kmsg for logging: %s", strerror(errno));
934                 goto finish;
935         }
936
937         if (writev(fd, iovec, n) < 0)
938                 log_debug("Failed to write to /dev/kmsg for logging: %s", strerror(errno));
939
940         close_nointr_nofail(fd);
941
942 finish:
943         free(ident_buf);
944 }
945
946 static void forward_console(Server *s, const char *identifier, const char *message, struct ucred *ucred) {
947         struct iovec iovec[4];
948         char header_pid[16];
949         int n = 0, fd;
950         char *ident_buf = NULL;
951
952         assert(s);
953         assert(message);
954
955         /* First: identifier and PID */
956         if (ucred) {
957                 if (!identifier) {
958                         get_process_comm(ucred->pid, &ident_buf);
959                         identifier = ident_buf;
960                 }
961
962                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
963                 char_array_0(header_pid);
964
965                 if (identifier)
966                         IOVEC_SET_STRING(iovec[n++], identifier);
967
968                 IOVEC_SET_STRING(iovec[n++], header_pid);
969         } else if (identifier) {
970                 IOVEC_SET_STRING(iovec[n++], identifier);
971                 IOVEC_SET_STRING(iovec[n++], ": ");
972         }
973
974         /* Third: message */
975         IOVEC_SET_STRING(iovec[n++], message);
976         IOVEC_SET_STRING(iovec[n++], "\n");
977
978         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
979         if (fd < 0) {
980                 log_debug("Failed to open /dev/console for logging: %s", strerror(errno));
981                 goto finish;
982         }
983
984         if (writev(fd, iovec, n) < 0)
985                 log_debug("Failed to write to /dev/console for logging: %s", strerror(errno));
986
987         close_nointr_nofail(fd);
988
989 finish:
990         free(ident_buf);
991 }
992
993 static void read_identifier(const char **buf, char **identifier, char **pid) {
994         const char *p;
995         char *t;
996         size_t l, e;
997
998         assert(buf);
999         assert(identifier);
1000         assert(pid);
1001
1002         p = *buf;
1003
1004         p += strspn(p, WHITESPACE);
1005         l = strcspn(p, WHITESPACE);
1006
1007         if (l <= 0 ||
1008             p[l-1] != ':')
1009                 return;
1010
1011         e = l;
1012         l--;
1013
1014         if (p[l-1] == ']') {
1015                 size_t k = l-1;
1016
1017                 for (;;) {
1018
1019                         if (p[k] == '[') {
1020                                 t = strndup(p+k+1, l-k-2);
1021                                 if (t)
1022                                         *pid = t;
1023
1024                                 l = k;
1025                                 break;
1026                         }
1027
1028                         if (k == 0)
1029                                 break;
1030
1031                         k--;
1032                 }
1033         }
1034
1035         t = strndup(p, l);
1036         if (t)
1037                 *identifier = t;
1038
1039         *buf = p + e;
1040         *buf += strspn(*buf, WHITESPACE);
1041 }
1042
1043 static void process_syslog_message(Server *s, const char *buf, struct ucred *ucred, struct timeval *tv, const char *label, size_t label_len) {
1044         char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *syslog_pid = NULL;
1045         struct iovec iovec[N_IOVEC_META_FIELDS + 6];
1046         unsigned n = 0;
1047         int priority = LOG_USER | LOG_INFO;
1048         char *identifier = NULL, *pid = NULL;
1049
1050         assert(s);
1051         assert(buf);
1052
1053         if (s->forward_to_syslog)
1054                 forward_syslog_raw(s, buf, ucred, tv);
1055
1056         parse_syslog_priority((char**) &buf, &priority);
1057         skip_syslog_date((char**) &buf);
1058         read_identifier(&buf, &identifier, &pid);
1059
1060         if (s->forward_to_kmsg)
1061                 forward_kmsg(s, priority, identifier, buf, ucred);
1062
1063         if (s->forward_to_console)
1064                 forward_console(s, identifier, buf, ucred);
1065
1066         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=syslog");
1067
1068         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1069                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1070
1071         if (priority & LOG_FACMASK)
1072                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1073                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1074
1075         if (identifier) {
1076                 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
1077                 if (syslog_identifier)
1078                         IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1079         }
1080
1081         if (pid) {
1082                 syslog_pid = strappend("SYSLOG_PID=", pid);
1083                 if (syslog_pid)
1084                         IOVEC_SET_STRING(iovec[n++], syslog_pid);
1085         }
1086
1087         message = strappend("MESSAGE=", buf);
1088         if (message)
1089                 IOVEC_SET_STRING(iovec[n++], message);
1090
1091         dispatch_message(s, iovec, n, ELEMENTSOF(iovec), ucred, tv, label, label_len, priority);
1092
1093         free(message);
1094         free(identifier);
1095         free(pid);
1096         free(syslog_priority);
1097         free(syslog_facility);
1098         free(syslog_identifier);
1099 }
1100
1101 static bool valid_user_field(const char *p, size_t l) {
1102         const char *a;
1103
1104         /* We kinda enforce POSIX syntax recommendations for
1105            environment variables here, but make a couple of additional
1106            requirements.
1107
1108            http://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html */
1109
1110         /* No empty field names */
1111         if (l <= 0)
1112                 return false;
1113
1114         /* Don't allow names longer than 64 chars */
1115         if (l > 64)
1116                 return false;
1117
1118         /* Variables starting with an underscore are protected */
1119         if (p[0] == '_')
1120                 return false;
1121
1122         /* Don't allow digits as first character */
1123         if (p[0] >= '0' && p[0] <= '9')
1124                 return false;
1125
1126         /* Only allow A-Z0-9 and '_' */
1127         for (a = p; a < p + l; a++)
1128                 if (!((*a >= 'A' && *a <= 'Z') ||
1129                       (*a >= '0' && *a <= '9') ||
1130                       *a == '_'))
1131                         return false;
1132
1133         return true;
1134 }
1135
1136 static void process_native_message(
1137                 Server *s,
1138                 const void *buffer, size_t buffer_size,
1139                 struct ucred *ucred,
1140                 struct timeval *tv,
1141                 const char *label, size_t label_len) {
1142
1143         struct iovec *iovec = NULL;
1144         unsigned n = 0, m = 0, j, tn = (unsigned) -1;
1145         const char *p;
1146         size_t remaining;
1147         int priority = LOG_INFO;
1148         char *identifier = NULL, *message = NULL;
1149
1150         assert(s);
1151         assert(buffer || buffer_size == 0);
1152
1153         p = buffer;
1154         remaining = buffer_size;
1155
1156         while (remaining > 0) {
1157                 const char *e, *q;
1158
1159                 e = memchr(p, '\n', remaining);
1160
1161                 if (!e) {
1162                         /* Trailing noise, let's ignore it, and flush what we collected */
1163                         log_debug("Received message with trailing noise, ignoring.");
1164                         break;
1165                 }
1166
1167                 if (e == p) {
1168                         /* Entry separator */
1169                         dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, priority);
1170                         n = 0;
1171                         priority = LOG_INFO;
1172
1173                         p++;
1174                         remaining--;
1175                         continue;
1176                 }
1177
1178                 if (*p == '.' || *p == '#') {
1179                         /* Ignore control commands for now, and
1180                          * comments too. */
1181                         remaining -= (e - p) + 1;
1182                         p = e + 1;
1183                         continue;
1184                 }
1185
1186                 /* A property follows */
1187
1188                 if (n+N_IOVEC_META_FIELDS >= m) {
1189                         struct iovec *c;
1190                         unsigned u;
1191
1192                         u = MAX((n+N_IOVEC_META_FIELDS+1) * 2U, 4U);
1193                         c = realloc(iovec, u * sizeof(struct iovec));
1194                         if (!c) {
1195                                 log_error("Out of memory");
1196                                 break;
1197                         }
1198
1199                         iovec = c;
1200                         m = u;
1201                 }
1202
1203                 q = memchr(p, '=', e - p);
1204                 if (q) {
1205                         if (valid_user_field(p, q - p)) {
1206                                 size_t l;
1207
1208                                 l = e - p;
1209
1210                                 /* If the field name starts with an
1211                                  * underscore, skip the variable,
1212                                  * since that indidates a trusted
1213                                  * field */
1214                                 iovec[n].iov_base = (char*) p;
1215                                 iovec[n].iov_len = l;
1216                                 n++;
1217
1218                                 /* We need to determine the priority
1219                                  * of this entry for the rate limiting
1220                                  * logic */
1221                                 if (l == 10 &&
1222                                     memcmp(p, "PRIORITY=", 9) == 0 &&
1223                                     p[9] >= '0' && p[9] <= '9')
1224                                         priority = (priority & LOG_FACMASK) | (p[9] - '0');
1225
1226                                 else if (l == 17 &&
1227                                          memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
1228                                          p[16] >= '0' && p[16] <= '9')
1229                                         priority = (priority & LOG_PRIMASK) | ((p[16] - '0') << 3);
1230
1231                                 else if (l == 18 &&
1232                                          memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
1233                                          p[16] >= '0' && p[16] <= '9' &&
1234                                          p[17] >= '0' && p[17] <= '9')
1235                                         priority = (priority & LOG_PRIMASK) | (((p[16] - '0')*10 + (p[17] - '0')) << 3);
1236
1237                                 else if (l >= 12 &&
1238                                          memcmp(p, "SYSLOG_IDENTIFIER=", 11) == 0) {
1239                                         char *t;
1240
1241                                         t = strndup(p + 11, l - 11);
1242                                         if (t) {
1243                                                 free(identifier);
1244                                                 identifier = t;
1245                                         }
1246                                 } else if (l >= 8 &&
1247                                            memcmp(p, "MESSAGE=", 8) == 0) {
1248                                         char *t;
1249
1250                                         t = strndup(p + 8, l - 8);
1251                                         if (t) {
1252                                                 free(message);
1253                                                 message = t;
1254                                         }
1255                                 }
1256                         }
1257
1258                         remaining -= (e - p) + 1;
1259                         p = e + 1;
1260                         continue;
1261                 } else {
1262                         le64_t l_le;
1263                         uint64_t l;
1264                         char *k;
1265
1266                         if (remaining < e - p + 1 + sizeof(uint64_t) + 1) {
1267                                 log_debug("Failed to parse message, ignoring.");
1268                                 break;
1269                         }
1270
1271                         memcpy(&l_le, e + 1, sizeof(uint64_t));
1272                         l = le64toh(l_le);
1273
1274                         if (remaining < e - p + 1 + sizeof(uint64_t) + l + 1 ||
1275                             e[1+sizeof(uint64_t)+l] != '\n') {
1276                                 log_debug("Failed to parse message, ignoring.");
1277                                 break;
1278                         }
1279
1280                         k = malloc((e - p) + 1 + l);
1281                         if (!k) {
1282                                 log_error("Out of memory");
1283                                 break;
1284                         }
1285
1286                         memcpy(k, p, e - p);
1287                         k[e - p] = '=';
1288                         memcpy(k + (e - p) + 1, e + 1 + sizeof(uint64_t), l);
1289
1290                         if (valid_user_field(p, e - p)) {
1291                                 iovec[n].iov_base = k;
1292                                 iovec[n].iov_len = (e - p) + 1 + l;
1293                                 n++;
1294                         } else
1295                                 free(k);
1296
1297                         remaining -= (e - p) + 1 + sizeof(uint64_t) + l + 1;
1298                         p = e + 1 + sizeof(uint64_t) + l + 1;
1299                 }
1300         }
1301
1302         if (n <= 0)
1303                 goto finish;
1304
1305         tn = n++;
1306         IOVEC_SET_STRING(iovec[tn], "_TRANSPORT=journal");
1307
1308         if (message) {
1309                 if (s->forward_to_syslog)
1310                         forward_syslog(s, priority, identifier, message, ucred, tv);
1311
1312                 if (s->forward_to_kmsg)
1313                         forward_kmsg(s, priority, identifier, message, ucred);
1314
1315                 if (s->forward_to_console)
1316                         forward_console(s, identifier, message, ucred);
1317         }
1318
1319         dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, priority);
1320
1321 finish:
1322         for (j = 0; j < n; j++)  {
1323                 if (j == tn)
1324                         continue;
1325
1326                 if (iovec[j].iov_base < buffer ||
1327                     (const uint8_t*) iovec[j].iov_base >= (const uint8_t*) buffer + buffer_size)
1328                         free(iovec[j].iov_base);
1329         }
1330
1331         free(iovec);
1332         free(identifier);
1333         free(message);
1334 }
1335
1336 static void process_native_file(
1337                 Server *s,
1338                 int fd,
1339                 struct ucred *ucred,
1340                 struct timeval *tv,
1341                 const char *label, size_t label_len) {
1342
1343         struct stat st;
1344         void *p;
1345         ssize_t n;
1346
1347         assert(s);
1348         assert(fd >= 0);
1349
1350         /* Data is in the passed file, since it didn't fit in a
1351          * datagram. We can't map the file here, since clients might
1352          * then truncate it and trigger a SIGBUS for us. So let's
1353          * stupidly read it */
1354
1355         if (fstat(fd, &st) < 0) {
1356                 log_error("Failed to stat passed file, ignoring: %m");
1357                 return;
1358         }
1359
1360         if (!S_ISREG(st.st_mode)) {
1361                 log_error("File passed is not regular. Ignoring.");
1362                 return;
1363         }
1364
1365         if (st.st_size <= 0)
1366                 return;
1367
1368         if (st.st_size > ENTRY_SIZE_MAX) {
1369                 log_error("File passed too large. Ignoring.");
1370                 return;
1371         }
1372
1373         p = malloc(st.st_size);
1374         if (!p) {
1375                 log_error("Out of memory");
1376                 return;
1377         }
1378
1379         n = pread(fd, p, st.st_size, 0);
1380         if (n < 0)
1381                 log_error("Failed to read file, ignoring: %s", strerror(-n));
1382         else if (n > 0)
1383                 process_native_message(s, p, n, ucred, tv, label, label_len);
1384
1385         free(p);
1386 }
1387
1388 static int stdout_stream_log(StdoutStream *s, const char *p) {
1389         struct iovec iovec[N_IOVEC_META_FIELDS + 5];
1390         char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *syslog_identifier = NULL;
1391         unsigned n = 0;
1392         int priority;
1393         char *label = NULL;
1394         size_t label_len = 0;
1395
1396         assert(s);
1397         assert(p);
1398
1399         if (isempty(p))
1400                 return 0;
1401
1402         priority = s->priority;
1403
1404         if (s->level_prefix)
1405                 parse_syslog_priority((char**) &p, &priority);
1406
1407         if (s->forward_to_syslog || s->server->forward_to_syslog)
1408                 forward_syslog(s->server, fixup_priority(priority), s->identifier, p, &s->ucred, NULL);
1409
1410         if (s->forward_to_kmsg || s->server->forward_to_kmsg)
1411                 forward_kmsg(s->server, priority, s->identifier, p, &s->ucred);
1412
1413         if (s->forward_to_console || s->server->forward_to_console)
1414                 forward_console(s->server, s->identifier, p, &s->ucred);
1415
1416         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=stdout");
1417
1418         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1419                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1420
1421         if (priority & LOG_FACMASK)
1422                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1423                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1424
1425         if (s->identifier) {
1426                 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", s->identifier);
1427                 if (syslog_identifier)
1428                         IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1429         }
1430
1431         message = strappend("MESSAGE=", p);
1432         if (message)
1433                 IOVEC_SET_STRING(iovec[n++], message);
1434
1435 #ifdef HAVE_SELINUX
1436         if (s->security_context) {
1437                 label = (char*) s->security_context;
1438                 label_len = strlen((char*) s->security_context);
1439         }
1440 #endif
1441
1442         dispatch_message(s->server, iovec, n, ELEMENTSOF(iovec), &s->ucred, NULL, label, label_len, priority);
1443
1444         free(message);
1445         free(syslog_priority);
1446         free(syslog_facility);
1447         free(syslog_identifier);
1448
1449         return 0;
1450 }
1451
1452 static int stdout_stream_line(StdoutStream *s, char *p) {
1453         int r;
1454
1455         assert(s);
1456         assert(p);
1457
1458         p = strstrip(p);
1459
1460         switch (s->state) {
1461
1462         case STDOUT_STREAM_IDENTIFIER:
1463                 if (isempty(p))
1464                         s->identifier = NULL;
1465                 else  {
1466                         s->identifier = strdup(p);
1467                         if (!s->identifier) {
1468                                 log_error("Out of memory");
1469                                 return -ENOMEM;
1470                         }
1471                 }
1472
1473                 s->state = STDOUT_STREAM_PRIORITY;
1474                 return 0;
1475
1476         case STDOUT_STREAM_PRIORITY:
1477                 r = safe_atoi(p, &s->priority);
1478                 if (r < 0 || s->priority <= 0 || s->priority >= 999) {
1479                         log_warning("Failed to parse log priority line.");
1480                         return -EINVAL;
1481                 }
1482
1483                 s->state = STDOUT_STREAM_LEVEL_PREFIX;
1484                 return 0;
1485
1486         case STDOUT_STREAM_LEVEL_PREFIX:
1487                 r = parse_boolean(p);
1488                 if (r < 0) {
1489                         log_warning("Failed to parse level prefix line.");
1490                         return -EINVAL;
1491                 }
1492
1493                 s->level_prefix = !!r;
1494                 s->state = STDOUT_STREAM_FORWARD_TO_SYSLOG;
1495                 return 0;
1496
1497         case STDOUT_STREAM_FORWARD_TO_SYSLOG:
1498                 r = parse_boolean(p);
1499                 if (r < 0) {
1500                         log_warning("Failed to parse forward to syslog line.");
1501                         return -EINVAL;
1502                 }
1503
1504                 s->forward_to_syslog = !!r;
1505                 s->state = STDOUT_STREAM_FORWARD_TO_KMSG;
1506                 return 0;
1507
1508         case STDOUT_STREAM_FORWARD_TO_KMSG:
1509                 r = parse_boolean(p);
1510                 if (r < 0) {
1511                         log_warning("Failed to parse copy to kmsg line.");
1512                         return -EINVAL;
1513                 }
1514
1515                 s->forward_to_kmsg = !!r;
1516                 s->state = STDOUT_STREAM_FORWARD_TO_CONSOLE;
1517                 return 0;
1518
1519         case STDOUT_STREAM_FORWARD_TO_CONSOLE:
1520                 r = parse_boolean(p);
1521                 if (r < 0) {
1522                         log_warning("Failed to parse copy to console line.");
1523                         return -EINVAL;
1524                 }
1525
1526                 s->forward_to_console = !!r;
1527                 s->state = STDOUT_STREAM_RUNNING;
1528                 return 0;
1529
1530         case STDOUT_STREAM_RUNNING:
1531                 return stdout_stream_log(s, p);
1532         }
1533
1534         assert_not_reached("Unknown stream state");
1535 }
1536
1537 static int stdout_stream_scan(StdoutStream *s, bool force_flush) {
1538         char *p;
1539         size_t remaining;
1540         int r;
1541
1542         assert(s);
1543
1544         p = s->buffer;
1545         remaining = s->length;
1546         for (;;) {
1547                 char *end;
1548                 size_t skip;
1549
1550                 end = memchr(p, '\n', remaining);
1551                 if (end)
1552                         skip = end - p + 1;
1553                 else if (remaining >= sizeof(s->buffer) - 1) {
1554                         end = p + sizeof(s->buffer) - 1;
1555                         skip = remaining;
1556                 } else
1557                         break;
1558
1559                 *end = 0;
1560
1561                 r = stdout_stream_line(s, p);
1562                 if (r < 0)
1563                         return r;
1564
1565                 remaining -= skip;
1566                 p += skip;
1567         }
1568
1569         if (force_flush && remaining > 0) {
1570                 p[remaining] = 0;
1571                 r = stdout_stream_line(s, p);
1572                 if (r < 0)
1573                         return r;
1574
1575                 p += remaining;
1576                 remaining = 0;
1577         }
1578
1579         if (p > s->buffer) {
1580                 memmove(s->buffer, p, remaining);
1581                 s->length = remaining;
1582         }
1583
1584         return 0;
1585 }
1586
1587 static int stdout_stream_process(StdoutStream *s) {
1588         ssize_t l;
1589         int r;
1590
1591         assert(s);
1592
1593         l = read(s->fd, s->buffer+s->length, sizeof(s->buffer)-1-s->length);
1594         if (l < 0) {
1595
1596                 if (errno == EAGAIN)
1597                         return 0;
1598
1599                 log_warning("Failed to read from stream: %m");
1600                 return -errno;
1601         }
1602
1603         if (l == 0) {
1604                 r = stdout_stream_scan(s, true);
1605                 if (r < 0)
1606                         return r;
1607
1608                 return 0;
1609         }
1610
1611         s->length += l;
1612         r = stdout_stream_scan(s, false);
1613         if (r < 0)
1614                 return r;
1615
1616         return 1;
1617
1618 }
1619
1620 static void stdout_stream_free(StdoutStream *s) {
1621         assert(s);
1622
1623         if (s->server) {
1624                 assert(s->server->n_stdout_streams > 0);
1625                 s->server->n_stdout_streams --;
1626                 LIST_REMOVE(StdoutStream, stdout_stream, s->server->stdout_streams, s);
1627         }
1628
1629         if (s->fd >= 0) {
1630                 if (s->server)
1631                         epoll_ctl(s->server->epoll_fd, EPOLL_CTL_DEL, s->fd, NULL);
1632
1633                 close_nointr_nofail(s->fd);
1634         }
1635
1636 #ifdef HAVE_SELINUX
1637         if (s->security_context)
1638                 freecon(s->security_context);
1639 #endif
1640
1641         free(s->identifier);
1642         free(s);
1643 }
1644
1645 static int stdout_stream_new(Server *s) {
1646         StdoutStream *stream;
1647         int fd, r;
1648         socklen_t len;
1649         struct epoll_event ev;
1650
1651         assert(s);
1652
1653         fd = accept4(s->stdout_fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC);
1654         if (fd < 0) {
1655                 if (errno == EAGAIN)
1656                         return 0;
1657
1658                 log_error("Failed to accept stdout connection: %m");
1659                 return -errno;
1660         }
1661
1662         if (s->n_stdout_streams >= STDOUT_STREAMS_MAX) {
1663                 log_warning("Too many stdout streams, refusing connection.");
1664                 close_nointr_nofail(fd);
1665                 return 0;
1666         }
1667
1668         stream = new0(StdoutStream, 1);
1669         if (!stream) {
1670                 log_error("Out of memory.");
1671                 close_nointr_nofail(fd);
1672                 return -ENOMEM;
1673         }
1674
1675         stream->fd = fd;
1676
1677         len = sizeof(stream->ucred);
1678         if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &stream->ucred, &len) < 0) {
1679                 log_error("Failed to determine peer credentials: %m");
1680                 r = -errno;
1681                 goto fail;
1682         }
1683
1684 #ifdef HAVE_SELINUX
1685         if (getpeercon(fd, &stream->security_context) < 0)
1686                 log_error("Failed to determine peer security context.");
1687 #endif
1688
1689         if (shutdown(fd, SHUT_WR) < 0) {
1690                 log_error("Failed to shutdown writing side of socket: %m");
1691                 r = -errno;
1692                 goto fail;
1693         }
1694
1695         zero(ev);
1696         ev.data.ptr = stream;
1697         ev.events = EPOLLIN;
1698         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0) {
1699                 log_error("Failed to add stream to event loop: %m");
1700                 r = -errno;
1701                 goto fail;
1702         }
1703
1704         stream->server = s;
1705         LIST_PREPEND(StdoutStream, stdout_stream, s->stdout_streams, stream);
1706         s->n_stdout_streams ++;
1707
1708         return 0;
1709
1710 fail:
1711         stdout_stream_free(stream);
1712         return r;
1713 }
1714
1715 static int parse_kernel_timestamp(char **_p, usec_t *t) {
1716         usec_t r;
1717         int k, i;
1718         char *p;
1719
1720         assert(_p);
1721         assert(*_p);
1722         assert(t);
1723
1724         p = *_p;
1725
1726         if (strlen(p) < 14 || p[0] != '[' || p[13] != ']' || p[6] != '.')
1727                 return 0;
1728
1729         r = 0;
1730
1731         for (i = 1; i <= 5; i++) {
1732                 r *= 10;
1733
1734                 if (p[i] == ' ')
1735                         continue;
1736
1737                 k = undecchar(p[i]);
1738                 if (k < 0)
1739                         return 0;
1740
1741                 r += k;
1742         }
1743
1744         for (i = 7; i <= 12; i++) {
1745                 r *= 10;
1746
1747                 k = undecchar(p[i]);
1748                 if (k < 0)
1749                         return 0;
1750
1751                 r += k;
1752         }
1753
1754         *t = r;
1755         *_p += 14;
1756         *_p += strspn(*_p, WHITESPACE);
1757
1758         return 1;
1759 }
1760
1761 static void proc_kmsg_line(Server *s, const char *p) {
1762         struct iovec iovec[N_IOVEC_META_FIELDS + 7];
1763         char *message = NULL, *syslog_priority = NULL, *syslog_pid = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *source_time = NULL;
1764         int priority = LOG_KERN | LOG_INFO;
1765         unsigned n = 0;
1766         usec_t usec;
1767         char *identifier = NULL, *pid = NULL;
1768
1769         assert(s);
1770         assert(p);
1771
1772         if (isempty(p))
1773                 return;
1774
1775         parse_syslog_priority((char **) &p, &priority);
1776
1777         if (s->forward_to_kmsg && (priority & LOG_FACMASK) != LOG_KERN)
1778                 return;
1779
1780         if (parse_kernel_timestamp((char **) &p, &usec) > 0) {
1781                 if (asprintf(&source_time, "_SOURCE_MONOTONIC_TIMESTAMP=%llu",
1782                              (unsigned long long) usec) >= 0)
1783                         IOVEC_SET_STRING(iovec[n++], source_time);
1784         }
1785
1786         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=kernel");
1787
1788         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1789                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1790
1791         if ((priority & LOG_FACMASK) == LOG_KERN) {
1792
1793                 if (s->forward_to_syslog)
1794                         forward_syslog(s, priority, "kernel", p, NULL, NULL);
1795
1796                 IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=kernel");
1797         } else {
1798                 read_identifier(&p, &identifier, &pid);
1799
1800                 if (s->forward_to_syslog)
1801                         forward_syslog(s, priority, identifier, p, NULL, NULL);
1802
1803                 if (identifier) {
1804                         syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
1805                         if (syslog_identifier)
1806                                 IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1807                 }
1808
1809                 if (pid) {
1810                         syslog_pid = strappend("SYSLOG_PID=", pid);
1811                         if (syslog_pid)
1812                                 IOVEC_SET_STRING(iovec[n++], syslog_pid);
1813                 }
1814
1815                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1816                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1817         }
1818
1819         message = strappend("MESSAGE=", p);
1820         if (message)
1821                 IOVEC_SET_STRING(iovec[n++], message);
1822
1823         dispatch_message(s, iovec, n, ELEMENTSOF(iovec), NULL, NULL, NULL, 0, priority);
1824
1825         free(message);
1826         free(syslog_priority);
1827         free(syslog_identifier);
1828         free(syslog_pid);
1829         free(syslog_facility);
1830         free(source_time);
1831         free(identifier);
1832         free(pid);
1833 }
1834
1835 static void proc_kmsg_scan(Server *s) {
1836         char *p;
1837         size_t remaining;
1838
1839         assert(s);
1840
1841         p = s->proc_kmsg_buffer;
1842         remaining = s->proc_kmsg_length;
1843         for (;;) {
1844                 char *end;
1845                 size_t skip;
1846
1847                 end = memchr(p, '\n', remaining);
1848                 if (end)
1849                         skip = end - p + 1;
1850                 else if (remaining >= sizeof(s->proc_kmsg_buffer) - 1) {
1851                         end = p + sizeof(s->proc_kmsg_buffer) - 1;
1852                         skip = remaining;
1853                 } else
1854                         break;
1855
1856                 *end = 0;
1857
1858                 proc_kmsg_line(s, p);
1859
1860                 remaining -= skip;
1861                 p += skip;
1862         }
1863
1864         if (p > s->proc_kmsg_buffer) {
1865                 memmove(s->proc_kmsg_buffer, p, remaining);
1866                 s->proc_kmsg_length = remaining;
1867         }
1868 }
1869
1870 static int system_journal_open(Server *s) {
1871         int r;
1872         char *fn;
1873         sd_id128_t machine;
1874         char ids[33];
1875
1876         r = sd_id128_get_machine(&machine);
1877         if (r < 0)
1878                 return r;
1879
1880         sd_id128_to_string(machine, ids);
1881
1882         if (!s->system_journal) {
1883
1884                 /* First try to create the machine path, but not the prefix */
1885                 fn = strappend("/var/log/journal/", ids);
1886                 if (!fn)
1887                         return -ENOMEM;
1888                 (void) mkdir(fn, 0755);
1889                 free(fn);
1890
1891                 /* The create the system journal file */
1892                 fn = join("/var/log/journal/", ids, "/system.journal", NULL);
1893                 if (!fn)
1894                         return -ENOMEM;
1895
1896                 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, NULL, &s->system_journal);
1897                 free(fn);
1898
1899                 if (r >= 0) {
1900                         journal_default_metrics(&s->system_metrics, s->system_journal->fd);
1901
1902                         s->system_journal->metrics = s->system_metrics;
1903                         s->system_journal->compress = s->compress;
1904
1905                         server_fix_perms(s, s->system_journal, 0);
1906                 } else if (r < 0) {
1907
1908                         if (r != -ENOENT && r != -EROFS)
1909                                 log_warning("Failed to open system journal: %s", strerror(-r));
1910
1911                         r = 0;
1912                 }
1913         }
1914
1915         if (!s->runtime_journal) {
1916
1917                 fn = join("/run/log/journal/", ids, "/system.journal", NULL);
1918                 if (!fn)
1919                         return -ENOMEM;
1920
1921                 if (s->system_journal) {
1922
1923                         /* Try to open the runtime journal, but only
1924                          * if it already exists, so that we can flush
1925                          * it into the system journal */
1926
1927                         r = journal_file_open(fn, O_RDWR, 0640, NULL, &s->runtime_journal);
1928                         free(fn);
1929
1930                         if (r < 0) {
1931                                 if (r != -ENOENT)
1932                                         log_warning("Failed to open runtime journal: %s", strerror(-r));
1933
1934                                 r = 0;
1935                         }
1936
1937                 } else {
1938
1939                         /* OK, we really need the runtime journal, so create
1940                          * it if necessary. */
1941
1942                         (void) mkdir_parents(fn, 0755);
1943                         r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, NULL, &s->runtime_journal);
1944                         free(fn);
1945
1946                         if (r < 0) {
1947                                 log_error("Failed to open runtime journal: %s", strerror(-r));
1948                                 return r;
1949                         }
1950                 }
1951
1952                 if (s->runtime_journal) {
1953                         journal_default_metrics(&s->runtime_metrics, s->runtime_journal->fd);
1954
1955                         s->runtime_journal->metrics = s->runtime_metrics;
1956                         s->runtime_journal->compress = s->compress;
1957
1958                         server_fix_perms(s, s->runtime_journal, 0);
1959                 }
1960         }
1961
1962         return r;
1963 }
1964
1965 static int server_flush_to_var(Server *s) {
1966         char path[] = "/run/log/journal/xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
1967         Object *o = NULL;
1968         int r;
1969         sd_id128_t machine;
1970         sd_journal *j;
1971         usec_t ts;
1972
1973         assert(s);
1974
1975         if (!s->runtime_journal)
1976                 return 0;
1977
1978         ts = now(CLOCK_MONOTONIC);
1979         if (s->var_available_timestamp + RECHECK_VAR_AVAILABLE_USEC > ts)
1980                 return 0;
1981
1982         s->var_available_timestamp = ts;
1983
1984         system_journal_open(s);
1985
1986         if (!s->system_journal)
1987                 return 0;
1988
1989         log_info("Flushing to /var...");
1990
1991         r = sd_id128_get_machine(&machine);
1992         if (r < 0) {
1993                 log_error("Failed to get machine id: %s", strerror(-r));
1994                 return r;
1995         }
1996
1997         r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1998         if (r < 0) {
1999                 log_error("Failed to read runtime journal: %s", strerror(-r));
2000                 return r;
2001         }
2002
2003         SD_JOURNAL_FOREACH(j) {
2004                 JournalFile *f;
2005
2006                 f = j->current_file;
2007                 assert(f && f->current_offset > 0);
2008
2009                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
2010                 if (r < 0) {
2011                         log_error("Can't read entry: %s", strerror(-r));
2012                         goto finish;
2013                 }
2014
2015                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
2016                 if (r == -E2BIG) {
2017                         log_info("Allocation limit reached.");
2018
2019                         journal_file_post_change(s->system_journal);
2020                         server_rotate(s);
2021                         server_vacuum(s);
2022
2023                         r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
2024                 }
2025
2026                 if (r < 0) {
2027                         log_error("Can't write entry: %s", strerror(-r));
2028                         goto finish;
2029                 }
2030         }
2031
2032 finish:
2033         journal_file_post_change(s->system_journal);
2034
2035         journal_file_close(s->runtime_journal);
2036         s->runtime_journal = NULL;
2037
2038         if (r >= 0) {
2039                 sd_id128_to_string(machine, path + 17);
2040                 rm_rf(path, false, true, false);
2041         }
2042
2043         return r;
2044 }
2045
2046 static int server_read_proc_kmsg(Server *s) {
2047         ssize_t l;
2048         assert(s);
2049         assert(s->proc_kmsg_fd >= 0);
2050
2051         l = read(s->proc_kmsg_fd, s->proc_kmsg_buffer + s->proc_kmsg_length, sizeof(s->proc_kmsg_buffer) - 1 - s->proc_kmsg_length);
2052         if (l < 0) {
2053
2054                 if (errno == EAGAIN || errno == EINTR)
2055                         return 0;
2056
2057                 log_error("Failed to read from kernel: %m");
2058                 return -errno;
2059         }
2060
2061         s->proc_kmsg_length += l;
2062
2063         proc_kmsg_scan(s);
2064         return 1;
2065 }
2066
2067 static int server_flush_proc_kmsg(Server *s) {
2068         int r;
2069
2070         assert(s);
2071
2072         if (s->proc_kmsg_fd < 0)
2073                 return 0;
2074
2075         log_info("Flushing /proc/kmsg...");
2076
2077         for (;;) {
2078                 r = server_read_proc_kmsg(s);
2079                 if (r < 0)
2080                         return r;
2081
2082                 if (r == 0)
2083                         break;
2084         }
2085
2086         return 0;
2087 }
2088
2089 static int process_event(Server *s, struct epoll_event *ev) {
2090         assert(s);
2091
2092         if (ev->data.fd == s->signal_fd) {
2093                 struct signalfd_siginfo sfsi;
2094                 ssize_t n;
2095
2096                 if (ev->events != EPOLLIN) {
2097                         log_info("Got invalid event from epoll.");
2098                         return -EIO;
2099                 }
2100
2101                 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
2102                 if (n != sizeof(sfsi)) {
2103
2104                         if (n >= 0)
2105                                 return -EIO;
2106
2107                         if (errno == EINTR || errno == EAGAIN)
2108                                 return 1;
2109
2110                         return -errno;
2111                 }
2112
2113                 if (sfsi.ssi_signo == SIGUSR1) {
2114                         server_flush_to_var(s);
2115                         return 0;
2116                 }
2117
2118                 log_debug("Received SIG%s", signal_to_string(sfsi.ssi_signo));
2119                 return 0;
2120
2121         } else if (ev->data.fd == s->proc_kmsg_fd) {
2122                 int r;
2123
2124                 if (ev->events != EPOLLIN) {
2125                         log_info("Got invalid event from epoll.");
2126                         return -EIO;
2127                 }
2128
2129                 r = server_read_proc_kmsg(s);
2130                 if (r < 0)
2131                         return r;
2132
2133                 return 1;
2134
2135         } else if (ev->data.fd == s->native_fd ||
2136                    ev->data.fd == s->syslog_fd) {
2137
2138                 if (ev->events != EPOLLIN) {
2139                         log_info("Got invalid event from epoll.");
2140                         return -EIO;
2141                 }
2142
2143                 for (;;) {
2144                         struct msghdr msghdr;
2145                         struct iovec iovec;
2146                         struct ucred *ucred = NULL;
2147                         struct timeval *tv = NULL;
2148                         struct cmsghdr *cmsg;
2149                         char *label = NULL;
2150                         size_t label_len = 0;
2151                         union {
2152                                 struct cmsghdr cmsghdr;
2153
2154                                 /* We use NAME_MAX space for the
2155                                  * SELinux label here. The kernel
2156                                  * currently enforces no limit, but
2157                                  * according to suggestions from the
2158                                  * SELinux people this will change and
2159                                  * it will probably be identical to
2160                                  * NAME_MAX. For now we use that, but
2161                                  * this should be updated one day when
2162                                  * the final limit is known.*/
2163                                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
2164                                             CMSG_SPACE(sizeof(struct timeval)) +
2165                                             CMSG_SPACE(sizeof(int)) + /* fd */
2166                                             CMSG_SPACE(NAME_MAX)]; /* selinux label */
2167                         } control;
2168                         ssize_t n;
2169                         int v;
2170                         int *fds = NULL;
2171                         unsigned n_fds = 0;
2172
2173                         if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
2174                                 log_error("SIOCINQ failed: %m");
2175                                 return -errno;
2176                         }
2177
2178                         if (s->buffer_size < (size_t) v) {
2179                                 void *b;
2180                                 size_t l;
2181
2182                                 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
2183                                 b = realloc(s->buffer, l+1);
2184
2185                                 if (!b) {
2186                                         log_error("Couldn't increase buffer.");
2187                                         return -ENOMEM;
2188                                 }
2189
2190                                 s->buffer_size = l;
2191                                 s->buffer = b;
2192                         }
2193
2194                         zero(iovec);
2195                         iovec.iov_base = s->buffer;
2196                         iovec.iov_len = s->buffer_size;
2197
2198                         zero(control);
2199                         zero(msghdr);
2200                         msghdr.msg_iov = &iovec;
2201                         msghdr.msg_iovlen = 1;
2202                         msghdr.msg_control = &control;
2203                         msghdr.msg_controllen = sizeof(control);
2204
2205                         n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
2206                         if (n < 0) {
2207
2208                                 if (errno == EINTR || errno == EAGAIN)
2209                                         return 1;
2210
2211                                 log_error("recvmsg() failed: %m");
2212                                 return -errno;
2213                         }
2214
2215                         for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
2216
2217                                 if (cmsg->cmsg_level == SOL_SOCKET &&
2218                                     cmsg->cmsg_type == SCM_CREDENTIALS &&
2219                                     cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
2220                                         ucred = (struct ucred*) CMSG_DATA(cmsg);
2221                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
2222                                          cmsg->cmsg_type == SCM_SECURITY) {
2223                                         label = (char*) CMSG_DATA(cmsg);
2224                                         label_len = cmsg->cmsg_len - CMSG_LEN(0);
2225                                 } else if (cmsg->cmsg_level == SOL_SOCKET &&
2226                                          cmsg->cmsg_type == SO_TIMESTAMP &&
2227                                          cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
2228                                         tv = (struct timeval*) CMSG_DATA(cmsg);
2229                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
2230                                          cmsg->cmsg_type == SCM_RIGHTS) {
2231                                         fds = (int*) CMSG_DATA(cmsg);
2232                                         n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
2233                                 }
2234                         }
2235
2236                         if (ev->data.fd == s->syslog_fd) {
2237                                 char *e;
2238
2239                                 if (n > 0 && n_fds == 0) {
2240                                         e = memchr(s->buffer, '\n', n);
2241                                         if (e)
2242                                                 *e = 0;
2243                                         else
2244                                                 s->buffer[n] = 0;
2245
2246                                         process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
2247                                 } else if (n_fds > 0)
2248                                         log_warning("Got file descriptors via syslog socket. Ignoring.");
2249
2250                         } else {
2251                                 if (n > 0 && n_fds == 0)
2252                                         process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
2253                                 else if (n == 0 && n_fds == 1)
2254                                         process_native_file(s, fds[0], ucred, tv, label, label_len);
2255                                 else if (n_fds > 0)
2256                                         log_warning("Got too many file descriptors via native socket. Ignoring.");
2257                         }
2258
2259                         close_many(fds, n_fds);
2260                 }
2261
2262                 return 1;
2263
2264         } else if (ev->data.fd == s->stdout_fd) {
2265
2266                 if (ev->events != EPOLLIN) {
2267                         log_info("Got invalid event from epoll.");
2268                         return -EIO;
2269                 }
2270
2271                 stdout_stream_new(s);
2272                 return 1;
2273
2274         } else {
2275                 StdoutStream *stream;
2276
2277                 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
2278                         log_info("Got invalid event from epoll.");
2279                         return -EIO;
2280                 }
2281
2282                 /* If it is none of the well-known fds, it must be an
2283                  * stdout stream fd. Note that this is a bit ugly here
2284                  * (since we rely that none of the well-known fds
2285                  * could be interpreted as pointer), but nonetheless
2286                  * safe, since the well-known fds would never get an
2287                  * fd > 4096, i.e. beyond the first memory page */
2288
2289                 stream = ev->data.ptr;
2290
2291                 if (stdout_stream_process(stream) <= 0)
2292                         stdout_stream_free(stream);
2293
2294                 return 1;
2295         }
2296
2297         log_error("Unknown event.");
2298         return 0;
2299 }
2300
2301 static int open_syslog_socket(Server *s) {
2302         union sockaddr_union sa;
2303         int one, r;
2304         struct epoll_event ev;
2305
2306         assert(s);
2307
2308         if (s->syslog_fd < 0) {
2309
2310                 s->syslog_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2311                 if (s->syslog_fd < 0) {
2312                         log_error("socket() failed: %m");
2313                         return -errno;
2314                 }
2315
2316                 zero(sa);
2317                 sa.un.sun_family = AF_UNIX;
2318                 strncpy(sa.un.sun_path, "/dev/log", sizeof(sa.un.sun_path));
2319
2320                 unlink(sa.un.sun_path);
2321
2322                 r = bind(s->syslog_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2323                 if (r < 0) {
2324                         log_error("bind() failed: %m");
2325                         return -errno;
2326                 }
2327
2328                 chmod(sa.un.sun_path, 0666);
2329         } else
2330                 fd_nonblock(s->syslog_fd, 1);
2331
2332         one = 1;
2333         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
2334         if (r < 0) {
2335                 log_error("SO_PASSCRED failed: %m");
2336                 return -errno;
2337         }
2338
2339 #ifdef HAVE_SELINUX
2340         one = 1;
2341         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
2342         if (r < 0)
2343                 log_warning("SO_PASSSEC failed: %m");
2344 #endif
2345
2346         one = 1;
2347         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
2348         if (r < 0) {
2349                 log_error("SO_TIMESTAMP failed: %m");
2350                 return -errno;
2351         }
2352
2353         zero(ev);
2354         ev.events = EPOLLIN;
2355         ev.data.fd = s->syslog_fd;
2356         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->syslog_fd, &ev) < 0) {
2357                 log_error("Failed to add syslog server fd to epoll object: %m");
2358                 return -errno;
2359         }
2360
2361         return 0;
2362 }
2363
2364 static int open_native_socket(Server*s) {
2365         union sockaddr_union sa;
2366         int one, r;
2367         struct epoll_event ev;
2368
2369         assert(s);
2370
2371         if (s->native_fd < 0) {
2372
2373                 s->native_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2374                 if (s->native_fd < 0) {
2375                         log_error("socket() failed: %m");
2376                         return -errno;
2377                 }
2378
2379                 zero(sa);
2380                 sa.un.sun_family = AF_UNIX;
2381                 strncpy(sa.un.sun_path, "/run/systemd/journal/socket", sizeof(sa.un.sun_path));
2382
2383                 unlink(sa.un.sun_path);
2384
2385                 r = bind(s->native_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2386                 if (r < 0) {
2387                         log_error("bind() failed: %m");
2388                         return -errno;
2389                 }
2390
2391                 chmod(sa.un.sun_path, 0666);
2392         } else
2393                 fd_nonblock(s->native_fd, 1);
2394
2395         one = 1;
2396         r = setsockopt(s->native_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
2397         if (r < 0) {
2398                 log_error("SO_PASSCRED failed: %m");
2399                 return -errno;
2400         }
2401
2402 #ifdef HAVE_SELINUX
2403         one = 1;
2404         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
2405         if (r < 0)
2406                 log_warning("SO_PASSSEC failed: %m");
2407 #endif
2408
2409         one = 1;
2410         r = setsockopt(s->native_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
2411         if (r < 0) {
2412                 log_error("SO_TIMESTAMP failed: %m");
2413                 return -errno;
2414         }
2415
2416         zero(ev);
2417         ev.events = EPOLLIN;
2418         ev.data.fd = s->native_fd;
2419         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->native_fd, &ev) < 0) {
2420                 log_error("Failed to add native server fd to epoll object: %m");
2421                 return -errno;
2422         }
2423
2424         return 0;
2425 }
2426
2427 static int open_stdout_socket(Server *s) {
2428         union sockaddr_union sa;
2429         int r;
2430         struct epoll_event ev;
2431
2432         assert(s);
2433
2434         if (s->stdout_fd < 0) {
2435
2436                 s->stdout_fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2437                 if (s->stdout_fd < 0) {
2438                         log_error("socket() failed: %m");
2439                         return -errno;
2440                 }
2441
2442                 zero(sa);
2443                 sa.un.sun_family = AF_UNIX;
2444                 strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
2445
2446                 unlink(sa.un.sun_path);
2447
2448                 r = bind(s->stdout_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2449                 if (r < 0) {
2450                         log_error("bind() failed: %m");
2451                         return -errno;
2452                 }
2453
2454                 chmod(sa.un.sun_path, 0666);
2455
2456                 if (listen(s->stdout_fd, SOMAXCONN) < 0) {
2457                         log_error("liste() failed: %m");
2458                         return -errno;
2459                 }
2460         } else
2461                 fd_nonblock(s->stdout_fd, 1);
2462
2463         zero(ev);
2464         ev.events = EPOLLIN;
2465         ev.data.fd = s->stdout_fd;
2466         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->stdout_fd, &ev) < 0) {
2467                 log_error("Failed to add stdout server fd to epoll object: %m");
2468                 return -errno;
2469         }
2470
2471         return 0;
2472 }
2473
2474 static int open_proc_kmsg(Server *s) {
2475         struct epoll_event ev;
2476
2477         assert(s);
2478
2479         if (!s->import_proc_kmsg)
2480                 return 0;
2481
2482
2483         s->proc_kmsg_fd = open("/proc/kmsg", O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
2484         if (s->proc_kmsg_fd < 0) {
2485                 log_warning("Failed to open /proc/kmsg, ignoring: %m");
2486                 return 0;
2487         }
2488
2489         zero(ev);
2490         ev.events = EPOLLIN;
2491         ev.data.fd = s->proc_kmsg_fd;
2492         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->proc_kmsg_fd, &ev) < 0) {
2493                 log_error("Failed to add /proc/kmsg fd to epoll object: %m");
2494                 return -errno;
2495         }
2496
2497         return 0;
2498 }
2499
2500 static int open_signalfd(Server *s) {
2501         sigset_t mask;
2502         struct epoll_event ev;
2503
2504         assert(s);
2505
2506         assert_se(sigemptyset(&mask) == 0);
2507         sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, -1);
2508         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
2509
2510         s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
2511         if (s->signal_fd < 0) {
2512                 log_error("signalfd(): %m");
2513                 return -errno;
2514         }
2515
2516         zero(ev);
2517         ev.events = EPOLLIN;
2518         ev.data.fd = s->signal_fd;
2519
2520         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
2521                 log_error("epoll_ctl(): %m");
2522                 return -errno;
2523         }
2524
2525         return 0;
2526 }
2527
2528 static int server_parse_proc_cmdline(Server *s) {
2529         char *line, *w, *state;
2530         int r;
2531         size_t l;
2532
2533         if (detect_container(NULL) > 0)
2534                 return 0;
2535
2536         r = read_one_line_file("/proc/cmdline", &line);
2537         if (r < 0) {
2538                 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
2539                 return 0;
2540         }
2541
2542         FOREACH_WORD_QUOTED(w, l, line, state) {
2543                 char *word;
2544
2545                 word = strndup(w, l);
2546                 if (!word) {
2547                         r = -ENOMEM;
2548                         goto finish;
2549                 }
2550
2551                 if (startswith(word, "systemd_journald.forward_to_syslog=")) {
2552                         r = parse_boolean(word + 35);
2553                         if (r < 0)
2554                                 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
2555                         else
2556                                 s->forward_to_syslog = r;
2557                 } else if (startswith(word, "systemd_journald.forward_to_kmsg=")) {
2558                         r = parse_boolean(word + 33);
2559                         if (r < 0)
2560                                 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
2561                         else
2562                                 s->forward_to_kmsg = r;
2563                 } else if (startswith(word, "systemd_journald.forward_to_console=")) {
2564                         r = parse_boolean(word + 36);
2565                         if (r < 0)
2566                                 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
2567                         else
2568                                 s->forward_to_console = r;
2569                 }
2570
2571                 free(word);
2572         }
2573
2574         r = 0;
2575
2576 finish:
2577         free(line);
2578         return r;
2579 }
2580
2581 static int server_parse_config_file(Server *s) {
2582         FILE *f;
2583         const char *fn;
2584         int r;
2585
2586         assert(s);
2587
2588         fn = "/etc/systemd/journald.conf";
2589         f = fopen(fn, "re");
2590         if (!f) {
2591                 if (errno == ENOENT)
2592                         return 0;
2593
2594                 log_warning("Failed to open configuration file %s: %m", fn);
2595                 return -errno;
2596         }
2597
2598         r = config_parse(fn, f, "Journal\0", config_item_perf_lookup, (void*) journald_gperf_lookup, false, s);
2599         if (r < 0)
2600                 log_warning("Failed to parse configuration file: %s", strerror(-r));
2601
2602         fclose(f);
2603
2604         return r;
2605 }
2606
2607 static int server_init(Server *s) {
2608         int n, r, fd;
2609
2610         assert(s);
2611
2612         zero(*s);
2613         s->syslog_fd = s->native_fd = s->stdout_fd = s->signal_fd = s->epoll_fd = s->proc_kmsg_fd = -1;
2614         s->compress = true;
2615
2616         s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
2617         s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
2618
2619         s->forward_to_syslog = true;
2620         s->import_proc_kmsg = true;
2621
2622         memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
2623         memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
2624
2625         server_parse_config_file(s);
2626         server_parse_proc_cmdline(s);
2627
2628         s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
2629         if (!s->user_journals) {
2630                 log_error("Out of memory.");
2631                 return -ENOMEM;
2632         }
2633
2634         s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
2635         if (s->epoll_fd < 0) {
2636                 log_error("Failed to create epoll object: %m");
2637                 return -errno;
2638         }
2639
2640         n = sd_listen_fds(true);
2641         if (n < 0) {
2642                 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
2643                 return n;
2644         }
2645
2646         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
2647
2648                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
2649
2650                         if (s->native_fd >= 0) {
2651                                 log_error("Too many native sockets passed.");
2652                                 return -EINVAL;
2653                         }
2654
2655                         s->native_fd = fd;
2656
2657                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
2658
2659                         if (s->stdout_fd >= 0) {
2660                                 log_error("Too many stdout sockets passed.");
2661                                 return -EINVAL;
2662                         }
2663
2664                         s->stdout_fd = fd;
2665
2666                 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
2667
2668                         if (s->syslog_fd >= 0) {
2669                                 log_error("Too many /dev/log sockets passed.");
2670                                 return -EINVAL;
2671                         }
2672
2673                         s->syslog_fd = fd;
2674
2675                 } else {
2676                         log_error("Unknown socket passed.");
2677                         return -EINVAL;
2678                 }
2679         }
2680
2681         r = open_syslog_socket(s);
2682         if (r < 0)
2683                 return r;
2684
2685         r = open_native_socket(s);
2686         if (r < 0)
2687                 return r;
2688
2689         r = open_stdout_socket(s);
2690         if (r < 0)
2691                 return r;
2692
2693         r = open_proc_kmsg(s);
2694         if (r < 0)
2695                 return r;
2696
2697         r = open_signalfd(s);
2698         if (r < 0)
2699                 return r;
2700
2701         s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
2702         if (!s->rate_limit)
2703                 return -ENOMEM;
2704
2705         r = system_journal_open(s);
2706         if (r < 0)
2707                 return r;
2708
2709         return 0;
2710 }
2711
2712 static void server_done(Server *s) {
2713         JournalFile *f;
2714         assert(s);
2715
2716         while (s->stdout_streams)
2717                 stdout_stream_free(s->stdout_streams);
2718
2719         if (s->system_journal)
2720                 journal_file_close(s->system_journal);
2721
2722         if (s->runtime_journal)
2723                 journal_file_close(s->runtime_journal);
2724
2725         while ((f = hashmap_steal_first(s->user_journals)))
2726                 journal_file_close(f);
2727
2728         hashmap_free(s->user_journals);
2729
2730         if (s->epoll_fd >= 0)
2731                 close_nointr_nofail(s->epoll_fd);
2732
2733         if (s->signal_fd >= 0)
2734                 close_nointr_nofail(s->signal_fd);
2735
2736         if (s->syslog_fd >= 0)
2737                 close_nointr_nofail(s->syslog_fd);
2738
2739         if (s->native_fd >= 0)
2740                 close_nointr_nofail(s->native_fd);
2741
2742         if (s->stdout_fd >= 0)
2743                 close_nointr_nofail(s->stdout_fd);
2744
2745         if (s->proc_kmsg_fd >= 0)
2746                 close_nointr_nofail(s->proc_kmsg_fd);
2747
2748         if (s->rate_limit)
2749                 journal_rate_limit_free(s->rate_limit);
2750
2751         free(s->buffer);
2752 }
2753
2754 int main(int argc, char *argv[]) {
2755         Server server;
2756         int r;
2757
2758         /* if (getppid() != 1) { */
2759         /*         log_error("This program should be invoked by init only."); */
2760         /*         return EXIT_FAILURE; */
2761         /* } */
2762
2763         if (argc > 1) {
2764                 log_error("This program does not take arguments.");
2765                 return EXIT_FAILURE;
2766         }
2767
2768         log_set_target(LOG_TARGET_CONSOLE);
2769         log_set_facility(LOG_SYSLOG);
2770         log_parse_environment();
2771         log_open();
2772
2773         umask(0022);
2774
2775         r = server_init(&server);
2776         if (r < 0)
2777                 goto finish;
2778
2779         server_vacuum(&server);
2780         server_flush_to_var(&server);
2781         server_flush_proc_kmsg(&server);
2782
2783         log_debug("systemd-journald running as pid %lu", (unsigned long) getpid());
2784         driver_message(&server, SD_MESSAGE_JOURNAL_START, "Journal started");
2785
2786         sd_notify(false,
2787                   "READY=1\n"
2788                   "STATUS=Processing requests...");
2789
2790         for (;;) {
2791                 struct epoll_event event;
2792
2793                 r = epoll_wait(server.epoll_fd, &event, 1, -1);
2794                 if (r < 0) {
2795
2796                         if (errno == EINTR)
2797                                 continue;
2798
2799                         log_error("epoll_wait() failed: %m");
2800                         r = -errno;
2801                         goto finish;
2802                 } else if (r == 0)
2803                         break;
2804
2805                 r = process_event(&server, &event);
2806                 if (r < 0)
2807                         goto finish;
2808                 else if (r == 0)
2809                         break;
2810         }
2811
2812         log_debug("systemd-journald stopped as pid %lu", (unsigned long) getpid());
2813         driver_message(&server, SD_MESSAGE_JOURNAL_STOP, "Journal stopped");
2814
2815 finish:
2816         sd_notify(false,
2817                   "STATUS=Shutting down...");
2818
2819         server_done(&server);
2820
2821         return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
2822 }