chiark / gitweb /
e5bcc2620399a5a0922408667c7f9a7934521e37
[elogind.git] / src / journal / journald.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU General Public License as published by
10   the Free Software Foundation; either version 2 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   General Public License for more details.
17
18   You should have received a copy of the GNU General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/epoll.h>
23 #include <sys/socket.h>
24 #include <errno.h>
25 #include <sys/signalfd.h>
26 #include <unistd.h>
27 #include <fcntl.h>
28 #include <stddef.h>
29 #include <sys/ioctl.h>
30 #include <linux/sockios.h>
31 #include <sys/statvfs.h>
32 #include <sys/user.h>
33
34 #include <systemd/sd-journal.h>
35 #include <systemd/sd-login.h>
36 #include <systemd/sd-messages.h>
37 #include <systemd/sd-daemon.h>
38
39 #include "hashmap.h"
40 #include "journal-file.h"
41 #include "socket-util.h"
42 #include "cgroup-util.h"
43 #include "list.h"
44 #include "journal-rate-limit.h"
45 #include "journal-internal.h"
46 #include "conf-parser.h"
47 #include "journald.h"
48 #include "virt.h"
49 #include "missing.h"
50
51 #ifdef HAVE_ACL
52 #include <sys/acl.h>
53 #include <acl/libacl.h>
54 #include "acl-util.h"
55 #endif
56
57 #ifdef HAVE_SELINUX
58 #include <selinux/selinux.h>
59 #endif
60
61 #define USER_JOURNALS_MAX 1024
62 #define STDOUT_STREAMS_MAX 4096
63
64 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
65 #define DEFAULT_RATE_LIMIT_BURST 200
66
67 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
68
69 #define RECHECK_VAR_AVAILABLE_USEC (30*USEC_PER_SEC)
70
71 #define N_IOVEC_META_FIELDS 17
72
73 #define ENTRY_SIZE_MAX (1024*1024*32)
74
75 typedef enum StdoutStreamState {
76         STDOUT_STREAM_IDENTIFIER,
77         STDOUT_STREAM_PRIORITY,
78         STDOUT_STREAM_LEVEL_PREFIX,
79         STDOUT_STREAM_FORWARD_TO_SYSLOG,
80         STDOUT_STREAM_FORWARD_TO_KMSG,
81         STDOUT_STREAM_FORWARD_TO_CONSOLE,
82         STDOUT_STREAM_RUNNING
83 } StdoutStreamState;
84
85 struct StdoutStream {
86         Server *server;
87         StdoutStreamState state;
88
89         int fd;
90
91         struct ucred ucred;
92 #ifdef HAVE_SELINUX
93         security_context_t security_context;
94 #endif
95
96         char *identifier;
97         int priority;
98         bool level_prefix:1;
99         bool forward_to_syslog:1;
100         bool forward_to_kmsg:1;
101         bool forward_to_console:1;
102
103         char buffer[LINE_MAX+1];
104         size_t length;
105
106         LIST_FIELDS(StdoutStream, stdout_stream);
107 };
108
109 static int server_flush_to_var(Server *s);
110
111 static uint64_t available_space(Server *s) {
112         char ids[33], *p;
113         const char *f;
114         sd_id128_t machine;
115         struct statvfs ss;
116         uint64_t sum = 0, avail = 0, ss_avail = 0;
117         int r;
118         DIR *d;
119         usec_t ts;
120         JournalMetrics *m;
121
122         ts = now(CLOCK_MONOTONIC);
123
124         if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
125                 return s->cached_available_space;
126
127         r = sd_id128_get_machine(&machine);
128         if (r < 0)
129                 return 0;
130
131         if (s->system_journal) {
132                 f = "/var/log/journal/";
133                 m = &s->system_metrics;
134         } else {
135                 f = "/run/log/journal/";
136                 m = &s->runtime_metrics;
137         }
138
139         assert(m);
140
141         p = strappend(f, sd_id128_to_string(machine, ids));
142         if (!p)
143                 return 0;
144
145         d = opendir(p);
146         free(p);
147
148         if (!d)
149                 return 0;
150
151         if (fstatvfs(dirfd(d), &ss) < 0)
152                 goto finish;
153
154         for (;;) {
155                 struct stat st;
156                 struct dirent buf, *de;
157                 int k;
158
159                 k = readdir_r(d, &buf, &de);
160                 if (k != 0) {
161                         r = -k;
162                         goto finish;
163                 }
164
165                 if (!de)
166                         break;
167
168                 if (!endswith(de->d_name, ".journal") &&
169                     !endswith(de->d_name, ".journal~"))
170                         continue;
171
172                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
173                         continue;
174
175                 if (!S_ISREG(st.st_mode))
176                         continue;
177
178                 sum += (uint64_t) st.st_blocks * 512UL;
179         }
180
181         avail = sum >= m->max_use ? 0 : m->max_use - sum;
182
183         ss_avail = ss.f_bsize * ss.f_bavail;
184
185         ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
186
187         if (ss_avail < avail)
188                 avail = ss_avail;
189
190         s->cached_available_space = avail;
191         s->cached_available_space_timestamp = ts;
192
193 finish:
194         closedir(d);
195
196         return avail;
197 }
198
199 static void server_read_file_gid(Server *s) {
200         const char *adm = "adm";
201         int r;
202
203         assert(s);
204
205         if (s->file_gid_valid)
206                 return;
207
208         r = get_group_creds(&adm, &s->file_gid);
209         if (r < 0)
210                 log_warning("Failed to resolve 'adm' group: %s", strerror(-r));
211
212         /* if we couldn't read the gid, then it will be 0, but that's
213          * fine and we shouldn't try to resolve the group again, so
214          * let's just pretend it worked right-away. */
215         s->file_gid_valid = true;
216 }
217
218 static void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
219         int r;
220 #ifdef HAVE_ACL
221         acl_t acl;
222         acl_entry_t entry;
223         acl_permset_t permset;
224 #endif
225
226         assert(f);
227
228         server_read_file_gid(s);
229
230         r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
231         if (r < 0)
232                 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
233
234 #ifdef HAVE_ACL
235         if (uid <= 0)
236                 return;
237
238         acl = acl_get_fd(f->fd);
239         if (!acl) {
240                 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
241                 return;
242         }
243
244         r = acl_find_uid(acl, uid, &entry);
245         if (r <= 0) {
246
247                 if (acl_create_entry(&acl, &entry) < 0 ||
248                     acl_set_tag_type(entry, ACL_USER) < 0 ||
249                     acl_set_qualifier(entry, &uid) < 0) {
250                         log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
251                         goto finish;
252                 }
253         }
254
255         if (acl_get_permset(entry, &permset) < 0 ||
256             acl_add_perm(permset, ACL_READ) < 0 ||
257             acl_calc_mask(&acl) < 0) {
258                 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
259                 goto finish;
260         }
261
262         if (acl_set_fd(f->fd, acl) < 0)
263                 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
264
265 finish:
266         acl_free(acl);
267 #endif
268 }
269
270 static JournalFile* find_journal(Server *s, uid_t uid) {
271         char *p;
272         int r;
273         JournalFile *f;
274         char ids[33];
275         sd_id128_t machine;
276
277         assert(s);
278
279         /* We split up user logs only on /var, not on /run. If the
280          * runtime file is open, we write to it exclusively, in order
281          * to guarantee proper order as soon as we flush /run to
282          * /var and close the runtime file. */
283
284         if (s->runtime_journal)
285                 return s->runtime_journal;
286
287         if (uid <= 0)
288                 return s->system_journal;
289
290         r = sd_id128_get_machine(&machine);
291         if (r < 0)
292                 return s->system_journal;
293
294         f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
295         if (f)
296                 return f;
297
298         if (asprintf(&p, "/var/log/journal/%s/user-%lu.journal", sd_id128_to_string(machine, ids), (unsigned long) uid) < 0)
299                 return s->system_journal;
300
301         while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
302                 /* Too many open? Then let's close one */
303                 f = hashmap_steal_first(s->user_journals);
304                 assert(f);
305                 journal_file_close(f);
306         }
307
308         r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->system_journal, &f);
309         free(p);
310
311         if (r < 0)
312                 return s->system_journal;
313
314         server_fix_perms(s, f, uid);
315
316         r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
317         if (r < 0) {
318                 journal_file_close(f);
319                 return s->system_journal;
320         }
321
322         return f;
323 }
324
325 static void server_rotate(Server *s) {
326         JournalFile *f;
327         void *k;
328         Iterator i;
329         int r;
330
331         log_info("Rotating...");
332
333         if (s->runtime_journal) {
334                 r = journal_file_rotate(&s->runtime_journal);
335                 if (r < 0)
336                         log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
337         }
338
339         if (s->system_journal) {
340                 r = journal_file_rotate(&s->system_journal);
341                 if (r < 0)
342                         log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
343         }
344
345         HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
346                 r = journal_file_rotate(&f);
347                 if (r < 0)
348                         log_error("Failed to rotate %s: %s", f->path, strerror(-r));
349                 else
350                         hashmap_replace(s->user_journals, k, f);
351         }
352 }
353
354 static void server_vacuum(Server *s) {
355         char *p;
356         char ids[33];
357         sd_id128_t machine;
358         int r;
359
360         log_info("Vacuuming...");
361
362         r = sd_id128_get_machine(&machine);
363         if (r < 0) {
364                 log_error("Failed to get machine ID: %s", strerror(-r));
365                 return;
366         }
367
368         sd_id128_to_string(machine, ids);
369
370         if (s->system_journal) {
371                 if (asprintf(&p, "/var/log/journal/%s", ids) < 0) {
372                         log_error("Out of memory.");
373                         return;
374                 }
375
376                 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free);
377                 if (r < 0 && r != -ENOENT)
378                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
379                 free(p);
380         }
381
382
383         if (s->runtime_journal) {
384                 if (asprintf(&p, "/run/log/journal/%s", ids) < 0) {
385                         log_error("Out of memory.");
386                         return;
387                 }
388
389                 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free);
390                 if (r < 0 && r != -ENOENT)
391                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
392                 free(p);
393         }
394
395         s->cached_available_space_timestamp = 0;
396 }
397
398 static char *shortened_cgroup_path(pid_t pid) {
399         int r;
400         char *process_path, *init_path, *path;
401
402         assert(pid > 0);
403
404         r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, pid, &process_path);
405         if (r < 0)
406                 return NULL;
407
408         r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &init_path);
409         if (r < 0) {
410                 free(process_path);
411                 return NULL;
412         }
413
414         if (endswith(init_path, "/system"))
415                 init_path[strlen(init_path) - 7] = 0;
416         else if (streq(init_path, "/"))
417                 init_path[0] = 0;
418
419         if (startswith(process_path, init_path)) {
420                 char *p;
421
422                 p = strdup(process_path + strlen(init_path));
423                 if (!p) {
424                         free(process_path);
425                         free(init_path);
426                         return NULL;
427                 }
428                 path = p;
429         } else {
430                 path = process_path;
431                 process_path = NULL;
432         }
433
434         free(process_path);
435         free(init_path);
436
437         return path;
438 }
439
440 static void dispatch_message_real(
441                 Server *s,
442                 struct iovec *iovec, unsigned n, unsigned m,
443                 struct ucred *ucred,
444                 struct timeval *tv,
445                 const char *label, size_t label_len) {
446
447         char *pid = NULL, *uid = NULL, *gid = NULL,
448                 *source_time = NULL, *boot_id = NULL, *machine_id = NULL,
449                 *comm = NULL, *cmdline = NULL, *hostname = NULL,
450                 *audit_session = NULL, *audit_loginuid = NULL,
451                 *exe = NULL, *cgroup = NULL, *session = NULL,
452                 *owner_uid = NULL, *unit = NULL, *selinux_context = NULL;
453
454         char idbuf[33];
455         sd_id128_t id;
456         int r;
457         char *t;
458         uid_t loginuid = 0, realuid = 0;
459         JournalFile *f;
460         bool vacuumed = false;
461
462         assert(s);
463         assert(iovec);
464         assert(n > 0);
465         assert(n + N_IOVEC_META_FIELDS <= m);
466
467         if (ucred) {
468                 uint32_t audit;
469                 uid_t owner;
470
471                 realuid = ucred->uid;
472
473                 if (asprintf(&pid, "_PID=%lu", (unsigned long) ucred->pid) >= 0)
474                         IOVEC_SET_STRING(iovec[n++], pid);
475
476                 if (asprintf(&uid, "_UID=%lu", (unsigned long) ucred->uid) >= 0)
477                         IOVEC_SET_STRING(iovec[n++], uid);
478
479                 if (asprintf(&gid, "_GID=%lu", (unsigned long) ucred->gid) >= 0)
480                         IOVEC_SET_STRING(iovec[n++], gid);
481
482                 r = get_process_comm(ucred->pid, &t);
483                 if (r >= 0) {
484                         comm = strappend("_COMM=", t);
485                         free(t);
486
487                         if (comm)
488                                 IOVEC_SET_STRING(iovec[n++], comm);
489                 }
490
491                 r = get_process_exe(ucred->pid, &t);
492                 if (r >= 0) {
493                         exe = strappend("_EXE=", t);
494                         free(t);
495
496                         if (exe)
497                                 IOVEC_SET_STRING(iovec[n++], exe);
498                 }
499
500                 r = get_process_cmdline(ucred->pid, LINE_MAX, false, &t);
501                 if (r >= 0) {
502                         cmdline = strappend("_CMDLINE=", t);
503                         free(t);
504
505                         if (cmdline)
506                                 IOVEC_SET_STRING(iovec[n++], cmdline);
507                 }
508
509                 r = audit_session_from_pid(ucred->pid, &audit);
510                 if (r >= 0)
511                         if (asprintf(&audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit) >= 0)
512                                 IOVEC_SET_STRING(iovec[n++], audit_session);
513
514                 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
515                 if (r >= 0)
516                         if (asprintf(&audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0)
517                                 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
518
519                 t = shortened_cgroup_path(ucred->pid);
520                 if (t) {
521                         cgroup = strappend("_SYSTEMD_CGROUP=", t);
522                         free(t);
523
524                         if (cgroup)
525                                 IOVEC_SET_STRING(iovec[n++], cgroup);
526                 }
527
528                 if (sd_pid_get_session(ucred->pid, &t) >= 0) {
529                         session = strappend("_SYSTEMD_SESSION=", t);
530                         free(t);
531
532                         if (session)
533                                 IOVEC_SET_STRING(iovec[n++], session);
534                 }
535
536                 if (sd_pid_get_unit(ucred->pid, &t) >= 0) {
537                         unit = strappend("_SYSTEMD_UNIT=", t);
538                         free(t);
539
540                         if (unit)
541                                 IOVEC_SET_STRING(iovec[n++], unit);
542                 }
543
544                 if (sd_pid_get_owner_uid(ucred->uid, &owner) >= 0)
545                         if (asprintf(&owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner) >= 0)
546                                 IOVEC_SET_STRING(iovec[n++], owner_uid);
547
548 #ifdef HAVE_SELINUX
549                 if (label) {
550                         selinux_context = malloc(sizeof("_SELINUX_CONTEXT=") + label_len);
551                         if (selinux_context) {
552                                 memcpy(selinux_context, "_SELINUX_CONTEXT=", sizeof("_SELINUX_CONTEXT=")-1);
553                                 memcpy(selinux_context+sizeof("_SELINUX_CONTEXT=")-1, label, label_len);
554                                 selinux_context[sizeof("_SELINUX_CONTEXT=")-1+label_len] = 0;
555                                 IOVEC_SET_STRING(iovec[n++], selinux_context);
556                         }
557                 } else {
558                         security_context_t con;
559
560                         if (getpidcon(ucred->pid, &con) >= 0) {
561                                 selinux_context = strappend("_SELINUX_CONTEXT=", con);
562                                 if (selinux_context)
563                                         IOVEC_SET_STRING(iovec[n++], selinux_context);
564
565                                 freecon(con);
566                         }
567                 }
568 #endif
569         }
570
571         if (tv) {
572                 if (asprintf(&source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu",
573                              (unsigned long long) timeval_load(tv)) >= 0)
574                         IOVEC_SET_STRING(iovec[n++], source_time);
575         }
576
577         /* Note that strictly speaking storing the boot id here is
578          * redundant since the entry includes this in-line
579          * anyway. However, we need this indexed, too. */
580         r = sd_id128_get_boot(&id);
581         if (r >= 0)
582                 if (asprintf(&boot_id, "_BOOT_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
583                         IOVEC_SET_STRING(iovec[n++], boot_id);
584
585         r = sd_id128_get_machine(&id);
586         if (r >= 0)
587                 if (asprintf(&machine_id, "_MACHINE_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
588                         IOVEC_SET_STRING(iovec[n++], machine_id);
589
590         t = gethostname_malloc();
591         if (t) {
592                 hostname = strappend("_HOSTNAME=", t);
593                 free(t);
594                 if (hostname)
595                         IOVEC_SET_STRING(iovec[n++], hostname);
596         }
597
598         assert(n <= m);
599
600         server_flush_to_var(s);
601
602 retry:
603         f = find_journal(s, realuid == 0 ? 0 : loginuid);
604         if (!f)
605                 log_warning("Dropping message, as we can't find a place to store the data.");
606         else {
607                 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
608
609                 if ((r == -EBADMSG || r == -E2BIG) && !vacuumed) {
610
611                         if (r == -E2BIG)
612                                 log_info("Allocation limit reached, rotating.");
613                         else
614                                 log_warning("Journal file corrupted, rotating.");
615
616                         server_rotate(s);
617                         server_vacuum(s);
618                         vacuumed = true;
619
620                         log_info("Retrying write.");
621                         goto retry;
622                 }
623
624                 if (r < 0)
625                         log_error("Failed to write entry, ignoring: %s", strerror(-r));
626         }
627
628         free(pid);
629         free(uid);
630         free(gid);
631         free(comm);
632         free(exe);
633         free(cmdline);
634         free(source_time);
635         free(boot_id);
636         free(machine_id);
637         free(hostname);
638         free(audit_session);
639         free(audit_loginuid);
640         free(cgroup);
641         free(session);
642         free(owner_uid);
643         free(unit);
644         free(selinux_context);
645 }
646
647 static void driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
648         char mid[11 + 32 + 1];
649         char buffer[16 + LINE_MAX + 1];
650         struct iovec iovec[N_IOVEC_META_FIELDS + 4];
651         int n = 0;
652         va_list ap;
653         struct ucred ucred;
654
655         assert(s);
656         assert(format);
657
658         IOVEC_SET_STRING(iovec[n++], "PRIORITY=5");
659         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
660
661         memcpy(buffer, "MESSAGE=", 8);
662         va_start(ap, format);
663         vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
664         va_end(ap);
665         char_array_0(buffer);
666         IOVEC_SET_STRING(iovec[n++], buffer);
667
668         snprintf(mid, sizeof(mid), "MESSAGE_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(message_id));
669         char_array_0(mid);
670         IOVEC_SET_STRING(iovec[n++], mid);
671
672         zero(ucred);
673         ucred.pid = getpid();
674         ucred.uid = getuid();
675         ucred.gid = getgid();
676
677         dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0);
678 }
679
680 static void dispatch_message(Server *s,
681                              struct iovec *iovec, unsigned n, unsigned m,
682                              struct ucred *ucred,
683                              struct timeval *tv,
684                              const char *label, size_t label_len,
685                              int priority) {
686         int rl;
687         char *path = NULL, *c;
688
689         assert(s);
690         assert(iovec || n == 0);
691
692         if (n == 0)
693                 return;
694
695         if (!ucred)
696                 goto finish;
697
698         path = shortened_cgroup_path(ucred->pid);
699         if (!path)
700                 goto finish;
701
702         /* example: /user/lennart/3/foobar
703          *          /system/dbus.service/foobar
704          *
705          * So let's cut of everything past the third /, since that is
706          * wher user directories start */
707
708         c = strchr(path, '/');
709         if (c) {
710                 c = strchr(c+1, '/');
711                 if (c) {
712                         c = strchr(c+1, '/');
713                         if (c)
714                                 *c = 0;
715                 }
716         }
717
718         rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available_space(s));
719
720         if (rl == 0) {
721                 free(path);
722                 return;
723         }
724
725         /* Write a suppression message if we suppressed something */
726         if (rl > 1)
727                 driver_message(s, SD_MESSAGE_JOURNAL_DROPPED, "Suppressed %u messages from %s", rl - 1, path);
728
729         free(path);
730
731 finish:
732         dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len);
733 }
734
735 static void forward_syslog_iovec(Server *s, const struct iovec *iovec, unsigned n_iovec, struct ucred *ucred, struct timeval *tv) {
736         struct msghdr msghdr;
737         struct cmsghdr *cmsg;
738         union {
739                 struct cmsghdr cmsghdr;
740                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred))];
741         } control;
742         union sockaddr_union sa;
743
744         assert(s);
745         assert(iovec);
746         assert(n_iovec > 0);
747
748         zero(msghdr);
749         msghdr.msg_iov = (struct iovec*) iovec;
750         msghdr.msg_iovlen = n_iovec;
751
752         zero(sa);
753         sa.un.sun_family = AF_UNIX;
754         strncpy(sa.un.sun_path, "/run/systemd/journal/syslog", sizeof(sa.un.sun_path));
755         msghdr.msg_name = &sa;
756         msghdr.msg_namelen = offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path);
757
758         if (ucred) {
759                 zero(control);
760                 msghdr.msg_control = &control;
761                 msghdr.msg_controllen = sizeof(control);
762
763                 cmsg = CMSG_FIRSTHDR(&msghdr);
764                 cmsg->cmsg_level = SOL_SOCKET;
765                 cmsg->cmsg_type = SCM_CREDENTIALS;
766                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred));
767                 memcpy(CMSG_DATA(cmsg), ucred, sizeof(struct ucred));
768                 msghdr.msg_controllen = cmsg->cmsg_len;
769         }
770
771         /* Forward the syslog message we received via /dev/log to
772          * /run/systemd/syslog. Unfortunately we currently can't set
773          * the SO_TIMESTAMP auxiliary data, and hence we don't. */
774
775         if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
776                 return;
777
778         /* The socket is full? I guess the syslog implementation is
779          * too slow, and we shouldn't wait for that... */
780         if (errno == EAGAIN)
781                 return;
782
783         if (ucred && errno == ESRCH) {
784                 struct ucred u;
785
786                 /* Hmm, presumably the sender process vanished
787                  * by now, so let's fix it as good as we
788                  * can, and retry */
789
790                 u = *ucred;
791                 u.pid = getpid();
792                 memcpy(CMSG_DATA(cmsg), &u, sizeof(struct ucred));
793
794                 if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
795                         return;
796
797                 if (errno == EAGAIN)
798                         return;
799         }
800
801         log_debug("Failed to forward syslog message: %m");
802 }
803
804 static void forward_syslog_raw(Server *s, const char *buffer, struct ucred *ucred, struct timeval *tv) {
805         struct iovec iovec;
806
807         assert(s);
808         assert(buffer);
809
810         IOVEC_SET_STRING(iovec, buffer);
811         forward_syslog_iovec(s, &iovec, 1, ucred, tv);
812 }
813
814 static void forward_syslog(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred, struct timeval *tv) {
815         struct iovec iovec[5];
816         char header_priority[6], header_time[64], header_pid[16];
817         int n = 0;
818         time_t t;
819         struct tm *tm;
820         char *ident_buf = NULL;
821
822         assert(s);
823         assert(priority >= 0);
824         assert(priority <= 999);
825         assert(message);
826
827         /* First: priority field */
828         snprintf(header_priority, sizeof(header_priority), "<%i>", priority);
829         char_array_0(header_priority);
830         IOVEC_SET_STRING(iovec[n++], header_priority);
831
832         /* Second: timestamp */
833         t = tv ? tv->tv_sec : ((time_t) (now(CLOCK_REALTIME) / USEC_PER_SEC));
834         tm = localtime(&t);
835         if (!tm)
836                 return;
837         if (strftime(header_time, sizeof(header_time), "%h %e %T ", tm) <= 0)
838                 return;
839         IOVEC_SET_STRING(iovec[n++], header_time);
840
841         /* Third: identifier and PID */
842         if (ucred) {
843                 if (!identifier) {
844                         get_process_comm(ucred->pid, &ident_buf);
845                         identifier = ident_buf;
846                 }
847
848                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
849                 char_array_0(header_pid);
850
851                 if (identifier)
852                         IOVEC_SET_STRING(iovec[n++], identifier);
853
854                 IOVEC_SET_STRING(iovec[n++], header_pid);
855         } else if (identifier) {
856                 IOVEC_SET_STRING(iovec[n++], identifier);
857                 IOVEC_SET_STRING(iovec[n++], ": ");
858         }
859
860         /* Fourth: message */
861         IOVEC_SET_STRING(iovec[n++], message);
862
863         forward_syslog_iovec(s, iovec, n, ucred, tv);
864
865         free(ident_buf);
866 }
867
868 static int fixup_priority(int priority) {
869
870         if ((priority & LOG_FACMASK) == 0)
871                 return (priority & LOG_PRIMASK) | LOG_USER;
872
873         return priority;
874 }
875
876 static void forward_kmsg(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred) {
877         struct iovec iovec[5];
878         char header_priority[6], header_pid[16];
879         int n = 0;
880         char *ident_buf = NULL;
881         int fd;
882
883         assert(s);
884         assert(priority >= 0);
885         assert(priority <= 999);
886         assert(message);
887
888         /* Never allow messages with kernel facility to be written to
889          * kmsg, regardless where the data comes from. */
890         priority = fixup_priority(priority);
891
892         /* First: priority field */
893         snprintf(header_priority, sizeof(header_priority), "<%i>", priority);
894         char_array_0(header_priority);
895         IOVEC_SET_STRING(iovec[n++], header_priority);
896
897         /* Second: identifier and PID */
898         if (ucred) {
899                 if (!identifier) {
900                         get_process_comm(ucred->pid, &ident_buf);
901                         identifier = ident_buf;
902                 }
903
904                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
905                 char_array_0(header_pid);
906
907                 if (identifier)
908                         IOVEC_SET_STRING(iovec[n++], identifier);
909
910                 IOVEC_SET_STRING(iovec[n++], header_pid);
911         } else if (identifier) {
912                 IOVEC_SET_STRING(iovec[n++], identifier);
913                 IOVEC_SET_STRING(iovec[n++], ": ");
914         }
915
916         /* Fourth: message */
917         IOVEC_SET_STRING(iovec[n++], message);
918         IOVEC_SET_STRING(iovec[n++], "\n");
919
920         fd = open("/dev/kmsg", O_WRONLY|O_NOCTTY|O_CLOEXEC);
921         if (fd < 0) {
922                 log_debug("Failed to open /dev/kmsg for logging: %s", strerror(errno));
923                 goto finish;
924         }
925
926         if (writev(fd, iovec, n) < 0)
927                 log_debug("Failed to write to /dev/kmsg for logging: %s", strerror(errno));
928
929         close_nointr_nofail(fd);
930
931 finish:
932         free(ident_buf);
933 }
934
935 static void forward_console(Server *s, const char *identifier, const char *message, struct ucred *ucred) {
936         struct iovec iovec[4];
937         char header_pid[16];
938         int n = 0, fd;
939         char *ident_buf = NULL;
940
941         assert(s);
942         assert(message);
943
944         /* First: identifier and PID */
945         if (ucred) {
946                 if (!identifier) {
947                         get_process_comm(ucred->pid, &ident_buf);
948                         identifier = ident_buf;
949                 }
950
951                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
952                 char_array_0(header_pid);
953
954                 if (identifier)
955                         IOVEC_SET_STRING(iovec[n++], identifier);
956
957                 IOVEC_SET_STRING(iovec[n++], header_pid);
958         } else if (identifier) {
959                 IOVEC_SET_STRING(iovec[n++], identifier);
960                 IOVEC_SET_STRING(iovec[n++], ": ");
961         }
962
963         /* Third: message */
964         IOVEC_SET_STRING(iovec[n++], message);
965         IOVEC_SET_STRING(iovec[n++], "\n");
966
967         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
968         if (fd < 0) {
969                 log_debug("Failed to open /dev/console for logging: %s", strerror(errno));
970                 goto finish;
971         }
972
973         if (writev(fd, iovec, n) < 0)
974                 log_debug("Failed to write to /dev/console for logging: %s", strerror(errno));
975
976         close_nointr_nofail(fd);
977
978 finish:
979         free(ident_buf);
980 }
981
982 static void read_identifier(const char **buf, char **identifier, char **pid) {
983         const char *p;
984         char *t;
985         size_t l, e;
986
987         assert(buf);
988         assert(identifier);
989         assert(pid);
990
991         p = *buf;
992
993         p += strspn(p, WHITESPACE);
994         l = strcspn(p, WHITESPACE);
995
996         if (l <= 0 ||
997             p[l-1] != ':')
998                 return;
999
1000         e = l;
1001         l--;
1002
1003         if (p[l-1] == ']') {
1004                 size_t k = l-1;
1005
1006                 for (;;) {
1007
1008                         if (p[k] == '[') {
1009                                 t = strndup(p+k+1, l-k-2);
1010                                 if (t)
1011                                         *pid = t;
1012
1013                                 l = k;
1014                                 break;
1015                         }
1016
1017                         if (k == 0)
1018                                 break;
1019
1020                         k--;
1021                 }
1022         }
1023
1024         t = strndup(p, l);
1025         if (t)
1026                 *identifier = t;
1027
1028         *buf = p + e;
1029         *buf += strspn(*buf, WHITESPACE);
1030 }
1031
1032 static void process_syslog_message(Server *s, const char *buf, struct ucred *ucred, struct timeval *tv, const char *label, size_t label_len) {
1033         char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *syslog_pid = NULL;
1034         struct iovec iovec[N_IOVEC_META_FIELDS + 6];
1035         unsigned n = 0;
1036         int priority = LOG_USER | LOG_INFO;
1037         char *identifier = NULL, *pid = NULL;
1038
1039         assert(s);
1040         assert(buf);
1041
1042         if (s->forward_to_syslog)
1043                 forward_syslog_raw(s, buf, ucred, tv);
1044
1045         parse_syslog_priority((char**) &buf, &priority);
1046         skip_syslog_date((char**) &buf);
1047         read_identifier(&buf, &identifier, &pid);
1048
1049         if (s->forward_to_kmsg)
1050                 forward_kmsg(s, priority, identifier, buf, ucred);
1051
1052         if (s->forward_to_console)
1053                 forward_console(s, identifier, buf, ucred);
1054
1055         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=syslog");
1056
1057         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1058                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1059
1060         if (priority & LOG_FACMASK)
1061                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1062                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1063
1064         if (identifier) {
1065                 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
1066                 if (syslog_identifier)
1067                         IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1068         }
1069
1070         if (pid) {
1071                 syslog_pid = strappend("SYSLOG_PID=", pid);
1072                 if (syslog_pid)
1073                         IOVEC_SET_STRING(iovec[n++], syslog_pid);
1074         }
1075
1076         message = strappend("MESSAGE=", buf);
1077         if (message)
1078                 IOVEC_SET_STRING(iovec[n++], message);
1079
1080         dispatch_message(s, iovec, n, ELEMENTSOF(iovec), ucred, tv, label, label_len, priority);
1081
1082         free(message);
1083         free(identifier);
1084         free(pid);
1085         free(syslog_priority);
1086         free(syslog_facility);
1087         free(syslog_identifier);
1088 }
1089
1090 static bool valid_user_field(const char *p, size_t l) {
1091         const char *a;
1092
1093         /* We kinda enforce POSIX syntax recommendations for
1094            environment variables here, but make a couple of additional
1095            requirements.
1096
1097            http://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html */
1098
1099         /* No empty field names */
1100         if (l <= 0)
1101                 return false;
1102
1103         /* Don't allow names longer than 64 chars */
1104         if (l > 64)
1105                 return false;
1106
1107         /* Variables starting with an underscore are protected */
1108         if (p[0] == '_')
1109                 return false;
1110
1111         /* Don't allow digits as first character */
1112         if (p[0] >= '0' && p[0] <= '9')
1113                 return false;
1114
1115         /* Only allow A-Z0-9 and '_' */
1116         for (a = p; a < p + l; a++)
1117                 if (!((*a >= 'A' && *a <= 'Z') ||
1118                       (*a >= '0' && *a <= '9') ||
1119                       *a == '_'))
1120                         return false;
1121
1122         return true;
1123 }
1124
1125 static void process_native_message(
1126                 Server *s,
1127                 const void *buffer, size_t buffer_size,
1128                 struct ucred *ucred,
1129                 struct timeval *tv,
1130                 const char *label, size_t label_len) {
1131
1132         struct iovec *iovec = NULL;
1133         unsigned n = 0, m = 0, j, tn = (unsigned) -1;
1134         const char *p;
1135         size_t remaining;
1136         int priority = LOG_INFO;
1137         char *identifier = NULL, *message = NULL;
1138
1139         assert(s);
1140         assert(buffer || n == 0);
1141
1142         p = buffer;
1143         remaining = buffer_size;
1144
1145         while (remaining > 0) {
1146                 const char *e, *q;
1147
1148                 e = memchr(p, '\n', remaining);
1149
1150                 if (!e) {
1151                         /* Trailing noise, let's ignore it, and flush what we collected */
1152                         log_debug("Received message with trailing noise, ignoring.");
1153                         break;
1154                 }
1155
1156                 if (e == p) {
1157                         /* Entry separator */
1158                         dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, priority);
1159                         n = 0;
1160                         priority = LOG_INFO;
1161
1162                         p++;
1163                         remaining--;
1164                         continue;
1165                 }
1166
1167                 if (*p == '.' || *p == '#') {
1168                         /* Ignore control commands for now, and
1169                          * comments too. */
1170                         remaining -= (e - p) + 1;
1171                         p = e + 1;
1172                         continue;
1173                 }
1174
1175                 /* A property follows */
1176
1177                 if (n+N_IOVEC_META_FIELDS >= m) {
1178                         struct iovec *c;
1179                         unsigned u;
1180
1181                         u = MAX((n+N_IOVEC_META_FIELDS+1) * 2U, 4U);
1182                         c = realloc(iovec, u * sizeof(struct iovec));
1183                         if (!c) {
1184                                 log_error("Out of memory");
1185                                 break;
1186                         }
1187
1188                         iovec = c;
1189                         m = u;
1190                 }
1191
1192                 q = memchr(p, '=', e - p);
1193                 if (q) {
1194                         if (valid_user_field(p, q - p)) {
1195                                 size_t l;
1196
1197                                 l = e - p;
1198
1199                                 /* If the field name starts with an
1200                                  * underscore, skip the variable,
1201                                  * since that indidates a trusted
1202                                  * field */
1203                                 iovec[n].iov_base = (char*) p;
1204                                 iovec[n].iov_len = l;
1205                                 n++;
1206
1207                                 /* We need to determine the priority
1208                                  * of this entry for the rate limiting
1209                                  * logic */
1210                                 if (l == 10 &&
1211                                     memcmp(p, "PRIORITY=", 9) == 0 &&
1212                                     p[9] >= '0' && p[9] <= '9')
1213                                         priority = (priority & LOG_FACMASK) | (p[9] - '0');
1214
1215                                 else if (l == 17 &&
1216                                          memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
1217                                          p[16] >= '0' && p[16] <= '9')
1218                                         priority = (priority & LOG_PRIMASK) | ((p[16] - '0') << 3);
1219
1220                                 else if (l == 18 &&
1221                                          memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
1222                                          p[16] >= '0' && p[16] <= '9' &&
1223                                          p[17] >= '0' && p[17] <= '9')
1224                                         priority = (priority & LOG_PRIMASK) | (((p[16] - '0')*10 + (p[17] - '0')) << 3);
1225
1226                                 else if (l >= 12 &&
1227                                          memcmp(p, "SYSLOG_IDENTIFIER=", 11) == 0) {
1228                                         char *t;
1229
1230                                         t = strndup(p + 11, l - 11);
1231                                         if (t) {
1232                                                 free(identifier);
1233                                                 identifier = t;
1234                                         }
1235                                 } else if (l >= 8 &&
1236                                            memcmp(p, "MESSAGE=", 8) == 0) {
1237                                         char *t;
1238
1239                                         t = strndup(p + 8, l - 8);
1240                                         if (t) {
1241                                                 free(message);
1242                                                 message = t;
1243                                         }
1244                                 }
1245                         }
1246
1247                         remaining -= (e - p) + 1;
1248                         p = e + 1;
1249                         continue;
1250                 } else {
1251                         uint64_t l;
1252                         char *k;
1253
1254                         if (remaining < e - p + 1 + sizeof(uint64_t) + 1) {
1255                                 log_debug("Failed to parse message, ignoring.");
1256                                 break;
1257                         }
1258
1259                         memcpy(&l, e + 1, sizeof(uint64_t));
1260                         l = le64toh(l);
1261
1262                         if (remaining < e - p + 1 + sizeof(uint64_t) + l + 1 ||
1263                             e[1+sizeof(uint64_t)+l] != '\n') {
1264                                 log_debug("Failed to parse message, ignoring.");
1265                                 break;
1266                         }
1267
1268                         k = malloc((e - p) + 1 + l);
1269                         if (!k) {
1270                                 log_error("Out of memory");
1271                                 break;
1272                         }
1273
1274                         memcpy(k, p, e - p);
1275                         k[e - p] = '=';
1276                         memcpy(k + (e - p) + 1, e + 1 + sizeof(uint64_t), l);
1277
1278                         if (valid_user_field(p, e - p)) {
1279                                 iovec[n].iov_base = k;
1280                                 iovec[n].iov_len = (e - p) + 1 + l;
1281                                 n++;
1282                         } else
1283                                 free(k);
1284
1285                         remaining -= (e - p) + 1 + sizeof(uint64_t) + l + 1;
1286                         p = e + 1 + sizeof(uint64_t) + l + 1;
1287                 }
1288         }
1289
1290         if (n <= 0)
1291                 goto finish;
1292
1293         tn = n++;
1294         IOVEC_SET_STRING(iovec[tn], "_TRANSPORT=journal");
1295
1296         if (message) {
1297                 if (s->forward_to_syslog)
1298                         forward_syslog(s, priority, identifier, message, ucred, tv);
1299
1300                 if (s->forward_to_kmsg)
1301                         forward_kmsg(s, priority, identifier, message, ucred);
1302
1303                 if (s->forward_to_console)
1304                         forward_console(s, identifier, message, ucred);
1305         }
1306
1307         dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, priority);
1308
1309 finish:
1310         for (j = 0; j < n; j++)  {
1311                 if (j == tn)
1312                         continue;
1313
1314                 if (iovec[j].iov_base < buffer ||
1315                     (const uint8_t*) iovec[j].iov_base >= (const uint8_t*) buffer + buffer_size)
1316                         free(iovec[j].iov_base);
1317         }
1318
1319         free(iovec);
1320         free(identifier);
1321         free(message);
1322 }
1323
1324 static void process_native_file(
1325                 Server *s,
1326                 int fd,
1327                 struct ucred *ucred,
1328                 struct timeval *tv,
1329                 const char *label, size_t label_len) {
1330
1331         struct stat st;
1332         void *p;
1333         ssize_t n;
1334
1335         assert(s);
1336         assert(fd >= 0);
1337
1338         /* Data is in the passed file, since it didn't fit in a
1339          * datagram. We can't map the file here, since clients might
1340          * then truncate it and trigger a SIGBUS for us. So let's
1341          * stupidly read it */
1342
1343         if (fstat(fd, &st) < 0) {
1344                 log_error("Failed to stat passed file, ignoring: %m");
1345                 return;
1346         }
1347
1348         if (!S_ISREG(st.st_mode)) {
1349                 log_error("File passed is not regular. Ignoring.");
1350                 return;
1351         }
1352
1353         if (st.st_size <= 0)
1354                 return;
1355
1356         if (st.st_size > ENTRY_SIZE_MAX) {
1357                 log_error("File passed too large. Ignoring.");
1358                 return;
1359         }
1360
1361         p = malloc(st.st_size);
1362         if (!p) {
1363                 log_error("Out of memory");
1364                 return;
1365         }
1366
1367         n = pread(fd, p, st.st_size, 0);
1368         if (n < 0)
1369                 log_error("Failed to read file, ignoring: %s", strerror(-n));
1370         else if (n > 0)
1371                 process_native_message(s, p, n, ucred, tv, label, label_len);
1372
1373         free(p);
1374 }
1375
1376 static int stdout_stream_log(StdoutStream *s, const char *p) {
1377         struct iovec iovec[N_IOVEC_META_FIELDS + 5];
1378         char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *syslog_identifier = NULL;
1379         unsigned n = 0;
1380         int priority;
1381         char *label = NULL;
1382         size_t label_len = 0;
1383
1384         assert(s);
1385         assert(p);
1386
1387         if (isempty(p))
1388                 return 0;
1389
1390         priority = s->priority;
1391
1392         if (s->level_prefix)
1393                 parse_syslog_priority((char**) &p, &priority);
1394
1395         if (s->forward_to_syslog || s->server->forward_to_syslog)
1396                 forward_syslog(s->server, fixup_priority(priority), s->identifier, p, &s->ucred, NULL);
1397
1398         if (s->forward_to_kmsg || s->server->forward_to_kmsg)
1399                 forward_kmsg(s->server, priority, s->identifier, p, &s->ucred);
1400
1401         if (s->forward_to_console || s->server->forward_to_console)
1402                 forward_console(s->server, s->identifier, p, &s->ucred);
1403
1404         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=stdout");
1405
1406         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1407                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1408
1409         if (priority & LOG_FACMASK)
1410                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1411                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1412
1413         if (s->identifier) {
1414                 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", s->identifier);
1415                 if (syslog_identifier)
1416                         IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1417         }
1418
1419         message = strappend("MESSAGE=", p);
1420         if (message)
1421                 IOVEC_SET_STRING(iovec[n++], message);
1422
1423 #ifdef HAVE_SELINUX
1424         if (s->security_context) {
1425                 label = (char*) s->security_context;
1426                 label_len = strlen((char*) s->security_context);
1427         }
1428 #endif
1429
1430         dispatch_message(s->server, iovec, n, ELEMENTSOF(iovec), &s->ucred, NULL, label, label_len, priority);
1431
1432         free(message);
1433         free(syslog_priority);
1434         free(syslog_facility);
1435         free(syslog_identifier);
1436
1437         return 0;
1438 }
1439
1440 static int stdout_stream_line(StdoutStream *s, char *p) {
1441         int r;
1442
1443         assert(s);
1444         assert(p);
1445
1446         p = strstrip(p);
1447
1448         switch (s->state) {
1449
1450         case STDOUT_STREAM_IDENTIFIER:
1451                 if (isempty(p))
1452                         s->identifier = NULL;
1453                 else  {
1454                         s->identifier = strdup(p);
1455                         if (!s->identifier) {
1456                                 log_error("Out of memory");
1457                                 return -ENOMEM;
1458                         }
1459                 }
1460
1461                 s->state = STDOUT_STREAM_PRIORITY;
1462                 return 0;
1463
1464         case STDOUT_STREAM_PRIORITY:
1465                 r = safe_atoi(p, &s->priority);
1466                 if (r < 0 || s->priority <= 0 || s->priority >= 999) {
1467                         log_warning("Failed to parse log priority line.");
1468                         return -EINVAL;
1469                 }
1470
1471                 s->state = STDOUT_STREAM_LEVEL_PREFIX;
1472                 return 0;
1473
1474         case STDOUT_STREAM_LEVEL_PREFIX:
1475                 r = parse_boolean(p);
1476                 if (r < 0) {
1477                         log_warning("Failed to parse level prefix line.");
1478                         return -EINVAL;
1479                 }
1480
1481                 s->level_prefix = !!r;
1482                 s->state = STDOUT_STREAM_FORWARD_TO_SYSLOG;
1483                 return 0;
1484
1485         case STDOUT_STREAM_FORWARD_TO_SYSLOG:
1486                 r = parse_boolean(p);
1487                 if (r < 0) {
1488                         log_warning("Failed to parse forward to syslog line.");
1489                         return -EINVAL;
1490                 }
1491
1492                 s->forward_to_syslog = !!r;
1493                 s->state = STDOUT_STREAM_FORWARD_TO_KMSG;
1494                 return 0;
1495
1496         case STDOUT_STREAM_FORWARD_TO_KMSG:
1497                 r = parse_boolean(p);
1498                 if (r < 0) {
1499                         log_warning("Failed to parse copy to kmsg line.");
1500                         return -EINVAL;
1501                 }
1502
1503                 s->forward_to_kmsg = !!r;
1504                 s->state = STDOUT_STREAM_FORWARD_TO_CONSOLE;
1505                 return 0;
1506
1507         case STDOUT_STREAM_FORWARD_TO_CONSOLE:
1508                 r = parse_boolean(p);
1509                 if (r < 0) {
1510                         log_warning("Failed to parse copy to console line.");
1511                         return -EINVAL;
1512                 }
1513
1514                 s->forward_to_console = !!r;
1515                 s->state = STDOUT_STREAM_RUNNING;
1516                 return 0;
1517
1518         case STDOUT_STREAM_RUNNING:
1519                 return stdout_stream_log(s, p);
1520         }
1521
1522         assert_not_reached("Unknown stream state");
1523 }
1524
1525 static int stdout_stream_scan(StdoutStream *s, bool force_flush) {
1526         char *p;
1527         size_t remaining;
1528         int r;
1529
1530         assert(s);
1531
1532         p = s->buffer;
1533         remaining = s->length;
1534         for (;;) {
1535                 char *end;
1536                 size_t skip;
1537
1538                 end = memchr(p, '\n', remaining);
1539                 if (end)
1540                         skip = end - p + 1;
1541                 else if (remaining >= sizeof(s->buffer) - 1) {
1542                         end = p + sizeof(s->buffer) - 1;
1543                         skip = remaining;
1544                 } else
1545                         break;
1546
1547                 *end = 0;
1548
1549                 r = stdout_stream_line(s, p);
1550                 if (r < 0)
1551                         return r;
1552
1553                 remaining -= skip;
1554                 p += skip;
1555         }
1556
1557         if (force_flush && remaining > 0) {
1558                 p[remaining] = 0;
1559                 r = stdout_stream_line(s, p);
1560                 if (r < 0)
1561                         return r;
1562
1563                 p += remaining;
1564                 remaining = 0;
1565         }
1566
1567         if (p > s->buffer) {
1568                 memmove(s->buffer, p, remaining);
1569                 s->length = remaining;
1570         }
1571
1572         return 0;
1573 }
1574
1575 static int stdout_stream_process(StdoutStream *s) {
1576         ssize_t l;
1577         int r;
1578
1579         assert(s);
1580
1581         l = read(s->fd, s->buffer+s->length, sizeof(s->buffer)-1-s->length);
1582         if (l < 0) {
1583
1584                 if (errno == EAGAIN)
1585                         return 0;
1586
1587                 log_warning("Failed to read from stream: %m");
1588                 return -errno;
1589         }
1590
1591         if (l == 0) {
1592                 r = stdout_stream_scan(s, true);
1593                 if (r < 0)
1594                         return r;
1595
1596                 return 0;
1597         }
1598
1599         s->length += l;
1600         r = stdout_stream_scan(s, false);
1601         if (r < 0)
1602                 return r;
1603
1604         return 1;
1605
1606 }
1607
1608 static void stdout_stream_free(StdoutStream *s) {
1609         assert(s);
1610
1611         if (s->server) {
1612                 assert(s->server->n_stdout_streams > 0);
1613                 s->server->n_stdout_streams --;
1614                 LIST_REMOVE(StdoutStream, stdout_stream, s->server->stdout_streams, s);
1615         }
1616
1617         if (s->fd >= 0) {
1618                 if (s->server)
1619                         epoll_ctl(s->server->epoll_fd, EPOLL_CTL_DEL, s->fd, NULL);
1620
1621                 close_nointr_nofail(s->fd);
1622         }
1623
1624 #ifdef HAVE_SELINUX
1625         if (s->security_context)
1626                 freecon(s->security_context);
1627 #endif
1628
1629         free(s->identifier);
1630         free(s);
1631 }
1632
1633 static int stdout_stream_new(Server *s) {
1634         StdoutStream *stream;
1635         int fd, r;
1636         socklen_t len;
1637         struct epoll_event ev;
1638
1639         assert(s);
1640
1641         fd = accept4(s->stdout_fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC);
1642         if (fd < 0) {
1643                 if (errno == EAGAIN)
1644                         return 0;
1645
1646                 log_error("Failed to accept stdout connection: %m");
1647                 return -errno;
1648         }
1649
1650         if (s->n_stdout_streams >= STDOUT_STREAMS_MAX) {
1651                 log_warning("Too many stdout streams, refusing connection.");
1652                 close_nointr_nofail(fd);
1653                 return 0;
1654         }
1655
1656         stream = new0(StdoutStream, 1);
1657         if (!stream) {
1658                 log_error("Out of memory.");
1659                 close_nointr_nofail(fd);
1660                 return -ENOMEM;
1661         }
1662
1663         stream->fd = fd;
1664
1665         len = sizeof(stream->ucred);
1666         if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &stream->ucred, &len) < 0) {
1667                 log_error("Failed to determine peer credentials: %m");
1668                 r = -errno;
1669                 goto fail;
1670         }
1671
1672 #ifdef HAVE_SELINUX
1673         if (getpeercon(fd, &stream->security_context) < 0)
1674                 log_error("Failed to determine peer security context.");
1675 #endif
1676
1677         if (shutdown(fd, SHUT_WR) < 0) {
1678                 log_error("Failed to shutdown writing side of socket: %m");
1679                 r = -errno;
1680                 goto fail;
1681         }
1682
1683         zero(ev);
1684         ev.data.ptr = stream;
1685         ev.events = EPOLLIN;
1686         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0) {
1687                 log_error("Failed to add stream to event loop: %m");
1688                 r = -errno;
1689                 goto fail;
1690         }
1691
1692         stream->server = s;
1693         LIST_PREPEND(StdoutStream, stdout_stream, s->stdout_streams, stream);
1694         s->n_stdout_streams ++;
1695
1696         return 0;
1697
1698 fail:
1699         stdout_stream_free(stream);
1700         return r;
1701 }
1702
1703 static int parse_kernel_timestamp(char **_p, usec_t *t) {
1704         usec_t r;
1705         int k, i;
1706         char *p;
1707
1708         assert(_p);
1709         assert(*_p);
1710         assert(t);
1711
1712         p = *_p;
1713
1714         if (strlen(p) < 14 || p[0] != '[' || p[13] != ']' || p[6] != '.')
1715                 return 0;
1716
1717         r = 0;
1718
1719         for (i = 1; i <= 5; i++) {
1720                 r *= 10;
1721
1722                 if (p[i] == ' ')
1723                         continue;
1724
1725                 k = undecchar(p[i]);
1726                 if (k < 0)
1727                         return 0;
1728
1729                 r += k;
1730         }
1731
1732         for (i = 7; i <= 12; i++) {
1733                 r *= 10;
1734
1735                 k = undecchar(p[i]);
1736                 if (k < 0)
1737                         return 0;
1738
1739                 r += k;
1740         }
1741
1742         *t = r;
1743         *_p += 14;
1744         *_p += strspn(*_p, WHITESPACE);
1745
1746         return 1;
1747 }
1748
1749 static void proc_kmsg_line(Server *s, const char *p) {
1750         struct iovec iovec[N_IOVEC_META_FIELDS + 7];
1751         char *message = NULL, *syslog_priority = NULL, *syslog_pid = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *source_time = NULL;
1752         int priority = LOG_KERN | LOG_INFO;
1753         unsigned n = 0;
1754         usec_t usec;
1755         char *identifier = NULL, *pid = NULL;
1756
1757         assert(s);
1758         assert(p);
1759
1760         if (isempty(p))
1761                 return;
1762
1763         parse_syslog_priority((char **) &p, &priority);
1764
1765         if (s->forward_to_kmsg && (priority & LOG_FACMASK) != LOG_KERN)
1766                 return;
1767
1768         if (parse_kernel_timestamp((char **) &p, &usec) > 0) {
1769                 if (asprintf(&source_time, "_SOURCE_MONOTONIC_TIMESTAMP=%llu",
1770                              (unsigned long long) usec) >= 0)
1771                         IOVEC_SET_STRING(iovec[n++], source_time);
1772         }
1773
1774         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=kernel");
1775
1776         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1777                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1778
1779         if ((priority & LOG_FACMASK) == LOG_KERN) {
1780
1781                 if (s->forward_to_syslog)
1782                         forward_syslog(s, priority, "kernel", p, NULL, NULL);
1783
1784                 IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=kernel");
1785         } else {
1786                 read_identifier(&p, &identifier, &pid);
1787
1788                 if (s->forward_to_syslog)
1789                         forward_syslog(s, priority, identifier, p, NULL, NULL);
1790
1791                 if (identifier) {
1792                         syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
1793                         if (syslog_identifier)
1794                                 IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1795                 }
1796
1797                 if (pid) {
1798                         syslog_pid = strappend("SYSLOG_PID=", pid);
1799                         if (syslog_pid)
1800                                 IOVEC_SET_STRING(iovec[n++], syslog_pid);
1801                 }
1802
1803                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1804                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1805         }
1806
1807         message = strappend("MESSAGE=", p);
1808         if (message)
1809                 IOVEC_SET_STRING(iovec[n++], message);
1810
1811         dispatch_message(s, iovec, n, ELEMENTSOF(iovec), NULL, NULL, NULL, 0, priority);
1812
1813         free(message);
1814         free(syslog_priority);
1815         free(syslog_identifier);
1816         free(syslog_pid);
1817         free(syslog_facility);
1818         free(source_time);
1819         free(identifier);
1820         free(pid);
1821 }
1822
1823 static void proc_kmsg_scan(Server *s) {
1824         char *p;
1825         size_t remaining;
1826
1827         assert(s);
1828
1829         p = s->proc_kmsg_buffer;
1830         remaining = s->proc_kmsg_length;
1831         for (;;) {
1832                 char *end;
1833                 size_t skip;
1834
1835                 end = memchr(p, '\n', remaining);
1836                 if (end)
1837                         skip = end - p + 1;
1838                 else if (remaining >= sizeof(s->proc_kmsg_buffer) - 1) {
1839                         end = p + sizeof(s->proc_kmsg_buffer) - 1;
1840                         skip = remaining;
1841                 } else
1842                         break;
1843
1844                 *end = 0;
1845
1846                 proc_kmsg_line(s, p);
1847
1848                 remaining -= skip;
1849                 p += skip;
1850         }
1851
1852         if (p > s->proc_kmsg_buffer) {
1853                 memmove(s->proc_kmsg_buffer, p, remaining);
1854                 s->proc_kmsg_length = remaining;
1855         }
1856 }
1857
1858 static int system_journal_open(Server *s) {
1859         int r;
1860         char *fn;
1861         sd_id128_t machine;
1862         char ids[33];
1863
1864         r = sd_id128_get_machine(&machine);
1865         if (r < 0)
1866                 return r;
1867
1868         sd_id128_to_string(machine, ids);
1869
1870         if (!s->system_journal) {
1871
1872                 /* First try to create the machine path, but not the prefix */
1873                 fn = strappend("/var/log/journal/", ids);
1874                 if (!fn)
1875                         return -ENOMEM;
1876                 (void) mkdir(fn, 0755);
1877                 free(fn);
1878
1879                 /* The create the system journal file */
1880                 fn = join("/var/log/journal/", ids, "/system.journal", NULL);
1881                 if (!fn)
1882                         return -ENOMEM;
1883
1884                 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, NULL, &s->system_journal);
1885                 free(fn);
1886
1887                 if (r >= 0) {
1888                         journal_default_metrics(&s->system_metrics, s->system_journal->fd);
1889
1890                         s->system_journal->metrics = s->system_metrics;
1891                         s->system_journal->compress = s->compress;
1892
1893                         server_fix_perms(s, s->system_journal, 0);
1894                 } else if (r < 0) {
1895
1896                         if (r != -ENOENT && r != -EROFS)
1897                                 log_warning("Failed to open system journal: %s", strerror(-r));
1898
1899                         r = 0;
1900                 }
1901         }
1902
1903         if (!s->runtime_journal) {
1904
1905                 fn = join("/run/log/journal/", ids, "/system.journal", NULL);
1906                 if (!fn)
1907                         return -ENOMEM;
1908
1909                 if (s->system_journal) {
1910
1911                         /* Try to open the runtime journal, but only
1912                          * if it already exists, so that we can flush
1913                          * it into the system journal */
1914
1915                         r = journal_file_open_reliably(fn, O_RDWR, 0640, NULL, &s->runtime_journal);
1916                         free(fn);
1917
1918                         if (r < 0) {
1919                                 if (r != -ENOENT)
1920                                         log_warning("Failed to open runtime journal: %s", strerror(-r));
1921
1922                                 r = 0;
1923                         }
1924
1925                 } else {
1926
1927                         /* OK, we really need the runtime journal, so create
1928                          * it if necessary. */
1929
1930                         (void) mkdir_parents(fn, 0755);
1931                         r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, NULL, &s->runtime_journal);
1932                         free(fn);
1933
1934                         if (r < 0) {
1935                                 log_error("Failed to open runtime journal: %s", strerror(-r));
1936                                 return r;
1937                         }
1938                 }
1939
1940                 if (s->runtime_journal) {
1941                         journal_default_metrics(&s->runtime_metrics, s->runtime_journal->fd);
1942
1943                         s->runtime_journal->metrics = s->runtime_metrics;
1944                         s->runtime_journal->compress = s->compress;
1945
1946                         server_fix_perms(s, s->runtime_journal, 0);
1947                 }
1948         }
1949
1950         return r;
1951 }
1952
1953 static int server_flush_to_var(Server *s) {
1954         char path[] = "/run/log/journal/xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
1955         Object *o = NULL;
1956         int r;
1957         sd_id128_t machine;
1958         sd_journal *j;
1959         usec_t ts;
1960
1961         assert(s);
1962
1963         if (!s->runtime_journal)
1964                 return 0;
1965
1966         ts = now(CLOCK_MONOTONIC);
1967         if (s->var_available_timestamp + RECHECK_VAR_AVAILABLE_USEC > ts)
1968                 return 0;
1969
1970         s->var_available_timestamp = ts;
1971
1972         system_journal_open(s);
1973
1974         if (!s->system_journal)
1975                 return 0;
1976
1977         log_info("Flushing to /var...");
1978
1979         r = sd_id128_get_machine(&machine);
1980         if (r < 0) {
1981                 log_error("Failed to get machine id: %s", strerror(-r));
1982                 return r;
1983         }
1984
1985         r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1986         if (r < 0) {
1987                 log_error("Failed to read runtime journal: %s", strerror(-r));
1988                 return r;
1989         }
1990
1991         SD_JOURNAL_FOREACH(j) {
1992                 JournalFile *f;
1993
1994                 f = j->current_file;
1995                 assert(f && f->current_offset > 0);
1996
1997                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1998                 if (r < 0) {
1999                         log_error("Can't read entry: %s", strerror(-r));
2000                         goto finish;
2001                 }
2002
2003                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
2004                 if (r == -E2BIG) {
2005                         log_info("Allocation limit reached.");
2006
2007                         journal_file_post_change(s->system_journal);
2008                         server_rotate(s);
2009                         server_vacuum(s);
2010
2011                         r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
2012                 }
2013
2014                 if (r < 0) {
2015                         log_error("Can't write entry: %s", strerror(-r));
2016                         goto finish;
2017                 }
2018         }
2019
2020 finish:
2021         journal_file_post_change(s->system_journal);
2022
2023         journal_file_close(s->runtime_journal);
2024         s->runtime_journal = NULL;
2025
2026         if (r >= 0) {
2027                 sd_id128_to_string(machine, path + 17);
2028                 rm_rf(path, false, true, false);
2029         }
2030
2031         return r;
2032 }
2033
2034 static int server_read_proc_kmsg(Server *s) {
2035         ssize_t l;
2036         assert(s);
2037         assert(s->proc_kmsg_fd >= 0);
2038
2039         l = read(s->proc_kmsg_fd, s->proc_kmsg_buffer + s->proc_kmsg_length, sizeof(s->proc_kmsg_buffer) - 1 - s->proc_kmsg_length);
2040         if (l < 0) {
2041
2042                 if (errno == EAGAIN || errno == EINTR)
2043                         return 0;
2044
2045                 log_error("Failed to read from kernel: %m");
2046                 return -errno;
2047         }
2048
2049         s->proc_kmsg_length += l;
2050
2051         proc_kmsg_scan(s);
2052         return 1;
2053 }
2054
2055 static int server_flush_proc_kmsg(Server *s) {
2056         int r;
2057
2058         assert(s);
2059
2060         if (s->proc_kmsg_fd < 0)
2061                 return 0;
2062
2063         log_info("Flushing /proc/kmsg...");
2064
2065         for (;;) {
2066                 r = server_read_proc_kmsg(s);
2067                 if (r < 0)
2068                         return r;
2069
2070                 if (r == 0)
2071                         break;
2072         }
2073
2074         return 0;
2075 }
2076
2077 static int process_event(Server *s, struct epoll_event *ev) {
2078         assert(s);
2079
2080         if (ev->data.fd == s->signal_fd) {
2081                 struct signalfd_siginfo sfsi;
2082                 ssize_t n;
2083
2084                 if (ev->events != EPOLLIN) {
2085                         log_info("Got invalid event from epoll.");
2086                         return -EIO;
2087                 }
2088
2089                 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
2090                 if (n != sizeof(sfsi)) {
2091
2092                         if (n >= 0)
2093                                 return -EIO;
2094
2095                         if (errno == EINTR || errno == EAGAIN)
2096                                 return 1;
2097
2098                         return -errno;
2099                 }
2100
2101                 if (sfsi.ssi_signo == SIGUSR1) {
2102                         server_flush_to_var(s);
2103                         return 0;
2104                 }
2105
2106                 log_debug("Received SIG%s", signal_to_string(sfsi.ssi_signo));
2107                 return 0;
2108
2109         } else if (ev->data.fd == s->proc_kmsg_fd) {
2110                 int r;
2111
2112                 if (ev->events != EPOLLIN) {
2113                         log_info("Got invalid event from epoll.");
2114                         return -EIO;
2115                 }
2116
2117                 r = server_read_proc_kmsg(s);
2118                 if (r < 0)
2119                         return r;
2120
2121                 return 1;
2122
2123         } else if (ev->data.fd == s->native_fd ||
2124                    ev->data.fd == s->syslog_fd) {
2125
2126                 if (ev->events != EPOLLIN) {
2127                         log_info("Got invalid event from epoll.");
2128                         return -EIO;
2129                 }
2130
2131                 for (;;) {
2132                         struct msghdr msghdr;
2133                         struct iovec iovec;
2134                         struct ucred *ucred = NULL;
2135                         struct timeval *tv = NULL;
2136                         struct cmsghdr *cmsg;
2137                         char *label = NULL;
2138                         size_t label_len = 0;
2139                         union {
2140                                 struct cmsghdr cmsghdr;
2141                                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
2142                                             CMSG_SPACE(sizeof(struct timeval)) +
2143                                             CMSG_SPACE(sizeof(int)) +
2144                                             CMSG_SPACE(PAGE_SIZE)]; /* selinux label */
2145                         } control;
2146                         ssize_t n;
2147                         int v;
2148                         int *fds = NULL;
2149                         unsigned n_fds = 0;
2150
2151                         if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
2152                                 log_error("SIOCINQ failed: %m");
2153                                 return -errno;
2154                         }
2155
2156                         if (s->buffer_size < (size_t) v) {
2157                                 void *b;
2158                                 size_t l;
2159
2160                                 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
2161                                 b = realloc(s->buffer, l+1);
2162
2163                                 if (!b) {
2164                                         log_error("Couldn't increase buffer.");
2165                                         return -ENOMEM;
2166                                 }
2167
2168                                 s->buffer_size = l;
2169                                 s->buffer = b;
2170                         }
2171
2172                         zero(iovec);
2173                         iovec.iov_base = s->buffer;
2174                         iovec.iov_len = s->buffer_size;
2175
2176                         zero(control);
2177                         zero(msghdr);
2178                         msghdr.msg_iov = &iovec;
2179                         msghdr.msg_iovlen = 1;
2180                         msghdr.msg_control = &control;
2181                         msghdr.msg_controllen = sizeof(control);
2182
2183                         n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
2184                         if (n < 0) {
2185
2186                                 if (errno == EINTR || errno == EAGAIN)
2187                                         return 1;
2188
2189                                 log_error("recvmsg() failed: %m");
2190                                 return -errno;
2191                         }
2192
2193                         for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
2194
2195                                 if (cmsg->cmsg_level == SOL_SOCKET &&
2196                                     cmsg->cmsg_type == SCM_CREDENTIALS &&
2197                                     cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
2198                                         ucred = (struct ucred*) CMSG_DATA(cmsg);
2199                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
2200                                          cmsg->cmsg_type == SCM_SECURITY) {
2201                                         label = (char*) CMSG_DATA(cmsg);
2202                                         label_len = cmsg->cmsg_len - CMSG_LEN(0);
2203                                 } else if (cmsg->cmsg_level == SOL_SOCKET &&
2204                                          cmsg->cmsg_type == SO_TIMESTAMP &&
2205                                          cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
2206                                         tv = (struct timeval*) CMSG_DATA(cmsg);
2207                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
2208                                          cmsg->cmsg_type == SCM_RIGHTS) {
2209                                         fds = (int*) CMSG_DATA(cmsg);
2210                                         n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
2211                                 }
2212                         }
2213
2214                         if (ev->data.fd == s->syslog_fd) {
2215                                 char *e;
2216
2217                                 if (n > 0 && n_fds == 0) {
2218                                         e = memchr(s->buffer, '\n', n);
2219                                         if (e)
2220                                                 *e = 0;
2221                                         else
2222                                                 s->buffer[n] = 0;
2223
2224                                         process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
2225                                 } else if (n_fds > 0)
2226                                         log_warning("Got file descriptors via syslog socket. Ignoring.");
2227
2228                         } else {
2229                                 if (n > 0 && n_fds == 0)
2230                                         process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
2231                                 else if (n == 0 && n_fds == 1)
2232                                         process_native_file(s, fds[0], ucred, tv, label, label_len);
2233                                 else if (n_fds > 0)
2234                                         log_warning("Got too many file descriptors via native socket. Ignoring.");
2235                         }
2236
2237                         close_many(fds, n_fds);
2238                 }
2239
2240                 return 1;
2241
2242         } else if (ev->data.fd == s->stdout_fd) {
2243
2244                 if (ev->events != EPOLLIN) {
2245                         log_info("Got invalid event from epoll.");
2246                         return -EIO;
2247                 }
2248
2249                 stdout_stream_new(s);
2250                 return 1;
2251
2252         } else {
2253                 StdoutStream *stream;
2254
2255                 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
2256                         log_info("Got invalid event from epoll.");
2257                         return -EIO;
2258                 }
2259
2260                 /* If it is none of the well-known fds, it must be an
2261                  * stdout stream fd. Note that this is a bit ugly here
2262                  * (since we rely that none of the well-known fds
2263                  * could be interpreted as pointer), but nonetheless
2264                  * safe, since the well-known fds would never get an
2265                  * fd > 4096, i.e. beyond the first memory page */
2266
2267                 stream = ev->data.ptr;
2268
2269                 if (stdout_stream_process(stream) <= 0)
2270                         stdout_stream_free(stream);
2271
2272                 return 1;
2273         }
2274
2275         log_error("Unknown event.");
2276         return 0;
2277 }
2278
2279 static int open_syslog_socket(Server *s) {
2280         union sockaddr_union sa;
2281         int one, r;
2282         struct epoll_event ev;
2283
2284         assert(s);
2285
2286         if (s->syslog_fd < 0) {
2287
2288                 s->syslog_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2289                 if (s->syslog_fd < 0) {
2290                         log_error("socket() failed: %m");
2291                         return -errno;
2292                 }
2293
2294                 zero(sa);
2295                 sa.un.sun_family = AF_UNIX;
2296                 strncpy(sa.un.sun_path, "/dev/log", sizeof(sa.un.sun_path));
2297
2298                 unlink(sa.un.sun_path);
2299
2300                 r = bind(s->syslog_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2301                 if (r < 0) {
2302                         log_error("bind() failed: %m");
2303                         return -errno;
2304                 }
2305
2306                 chmod(sa.un.sun_path, 0666);
2307         } else
2308                 fd_nonblock(s->syslog_fd, 1);
2309
2310         one = 1;
2311         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
2312         if (r < 0) {
2313                 log_error("SO_PASSCRED failed: %m");
2314                 return -errno;
2315         }
2316
2317 #ifdef HAVE_SELINUX
2318         one = 1;
2319         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
2320         if (r < 0)
2321                 log_warning("SO_PASSSEC failed: %m");
2322 #endif
2323
2324         one = 1;
2325         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
2326         if (r < 0) {
2327                 log_error("SO_TIMESTAMP failed: %m");
2328                 return -errno;
2329         }
2330
2331         zero(ev);
2332         ev.events = EPOLLIN;
2333         ev.data.fd = s->syslog_fd;
2334         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->syslog_fd, &ev) < 0) {
2335                 log_error("Failed to add syslog server fd to epoll object: %m");
2336                 return -errno;
2337         }
2338
2339         return 0;
2340 }
2341
2342 static int open_native_socket(Server*s) {
2343         union sockaddr_union sa;
2344         int one, r;
2345         struct epoll_event ev;
2346
2347         assert(s);
2348
2349         if (s->native_fd < 0) {
2350
2351                 s->native_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2352                 if (s->native_fd < 0) {
2353                         log_error("socket() failed: %m");
2354                         return -errno;
2355                 }
2356
2357                 zero(sa);
2358                 sa.un.sun_family = AF_UNIX;
2359                 strncpy(sa.un.sun_path, "/run/systemd/journal/socket", sizeof(sa.un.sun_path));
2360
2361                 unlink(sa.un.sun_path);
2362
2363                 r = bind(s->native_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2364                 if (r < 0) {
2365                         log_error("bind() failed: %m");
2366                         return -errno;
2367                 }
2368
2369                 chmod(sa.un.sun_path, 0666);
2370         } else
2371                 fd_nonblock(s->native_fd, 1);
2372
2373         one = 1;
2374         r = setsockopt(s->native_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
2375         if (r < 0) {
2376                 log_error("SO_PASSCRED failed: %m");
2377                 return -errno;
2378         }
2379
2380 #ifdef HAVE_SELINUX
2381         one = 1;
2382         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
2383         if (r < 0)
2384                 log_warning("SO_PASSSEC failed: %m");
2385 #endif
2386
2387         one = 1;
2388         r = setsockopt(s->native_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
2389         if (r < 0) {
2390                 log_error("SO_TIMESTAMP failed: %m");
2391                 return -errno;
2392         }
2393
2394         zero(ev);
2395         ev.events = EPOLLIN;
2396         ev.data.fd = s->native_fd;
2397         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->native_fd, &ev) < 0) {
2398                 log_error("Failed to add native server fd to epoll object: %m");
2399                 return -errno;
2400         }
2401
2402         return 0;
2403 }
2404
2405 static int open_stdout_socket(Server *s) {
2406         union sockaddr_union sa;
2407         int r;
2408         struct epoll_event ev;
2409
2410         assert(s);
2411
2412         if (s->stdout_fd < 0) {
2413
2414                 s->stdout_fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2415                 if (s->stdout_fd < 0) {
2416                         log_error("socket() failed: %m");
2417                         return -errno;
2418                 }
2419
2420                 zero(sa);
2421                 sa.un.sun_family = AF_UNIX;
2422                 strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
2423
2424                 unlink(sa.un.sun_path);
2425
2426                 r = bind(s->stdout_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2427                 if (r < 0) {
2428                         log_error("bind() failed: %m");
2429                         return -errno;
2430                 }
2431
2432                 chmod(sa.un.sun_path, 0666);
2433
2434                 if (listen(s->stdout_fd, SOMAXCONN) < 0) {
2435                         log_error("liste() failed: %m");
2436                         return -errno;
2437                 }
2438         } else
2439                 fd_nonblock(s->stdout_fd, 1);
2440
2441         zero(ev);
2442         ev.events = EPOLLIN;
2443         ev.data.fd = s->stdout_fd;
2444         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->stdout_fd, &ev) < 0) {
2445                 log_error("Failed to add stdout server fd to epoll object: %m");
2446                 return -errno;
2447         }
2448
2449         return 0;
2450 }
2451
2452 static int open_proc_kmsg(Server *s) {
2453         struct epoll_event ev;
2454
2455         assert(s);
2456
2457         if (!s->import_proc_kmsg)
2458                 return 0;
2459
2460
2461         s->proc_kmsg_fd = open("/proc/kmsg", O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
2462         if (s->proc_kmsg_fd < 0) {
2463                 log_warning("Failed to open /proc/kmsg, ignoring: %m");
2464                 return 0;
2465         }
2466
2467         zero(ev);
2468         ev.events = EPOLLIN;
2469         ev.data.fd = s->proc_kmsg_fd;
2470         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->proc_kmsg_fd, &ev) < 0) {
2471                 log_error("Failed to add /proc/kmsg fd to epoll object: %m");
2472                 return -errno;
2473         }
2474
2475         return 0;
2476 }
2477
2478 static int open_signalfd(Server *s) {
2479         sigset_t mask;
2480         struct epoll_event ev;
2481
2482         assert(s);
2483
2484         assert_se(sigemptyset(&mask) == 0);
2485         sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, -1);
2486         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
2487
2488         s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
2489         if (s->signal_fd < 0) {
2490                 log_error("signalfd(): %m");
2491                 return -errno;
2492         }
2493
2494         zero(ev);
2495         ev.events = EPOLLIN;
2496         ev.data.fd = s->signal_fd;
2497
2498         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
2499                 log_error("epoll_ctl(): %m");
2500                 return -errno;
2501         }
2502
2503         return 0;
2504 }
2505
2506 static int server_parse_proc_cmdline(Server *s) {
2507         char *line, *w, *state;
2508         int r;
2509         size_t l;
2510
2511         if (detect_container(NULL) > 0)
2512                 return 0;
2513
2514         r = read_one_line_file("/proc/cmdline", &line);
2515         if (r < 0) {
2516                 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
2517                 return 0;
2518         }
2519
2520         FOREACH_WORD_QUOTED(w, l, line, state) {
2521                 char *word;
2522
2523                 word = strndup(w, l);
2524                 if (!word) {
2525                         r = -ENOMEM;
2526                         goto finish;
2527                 }
2528
2529                 if (startswith(word, "systemd_journald.forward_to_syslog=")) {
2530                         r = parse_boolean(word + 35);
2531                         if (r < 0)
2532                                 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
2533                         else
2534                                 s->forward_to_syslog = r;
2535                 } else if (startswith(word, "systemd_journald.forward_to_kmsg=")) {
2536                         r = parse_boolean(word + 33);
2537                         if (r < 0)
2538                                 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
2539                         else
2540                                 s->forward_to_kmsg = r;
2541                 } else if (startswith(word, "systemd_journald.forward_to_console=")) {
2542                         r = parse_boolean(word + 36);
2543                         if (r < 0)
2544                                 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
2545                         else
2546                                 s->forward_to_console = r;
2547                 }
2548
2549                 free(word);
2550         }
2551
2552         r = 0;
2553
2554 finish:
2555         free(line);
2556         return r;
2557 }
2558
2559 static int server_parse_config_file(Server *s) {
2560         FILE *f;
2561         const char *fn;
2562         int r;
2563
2564         assert(s);
2565
2566         fn = "/etc/systemd/systemd-journald.conf";
2567         f = fopen(fn, "re");
2568         if (!f) {
2569                 if (errno == ENOENT)
2570                         return 0;
2571
2572                 log_warning("Failed to open configuration file %s: %m", fn);
2573                 return -errno;
2574         }
2575
2576         r = config_parse(fn, f, "Journal\0", config_item_perf_lookup, (void*) journald_gperf_lookup, false, s);
2577         if (r < 0)
2578                 log_warning("Failed to parse configuration file: %s", strerror(-r));
2579
2580         fclose(f);
2581
2582         return r;
2583 }
2584
2585 static int server_init(Server *s) {
2586         int n, r, fd;
2587
2588         assert(s);
2589
2590         zero(*s);
2591         s->syslog_fd = s->native_fd = s->stdout_fd = s->signal_fd = s->epoll_fd = s->proc_kmsg_fd = -1;
2592         s->compress = true;
2593
2594         s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
2595         s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
2596
2597         s->forward_to_syslog = true;
2598         s->import_proc_kmsg = true;
2599
2600         memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
2601         memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
2602
2603         server_parse_config_file(s);
2604         server_parse_proc_cmdline(s);
2605
2606         s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
2607         if (!s->user_journals) {
2608                 log_error("Out of memory.");
2609                 return -ENOMEM;
2610         }
2611
2612         s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
2613         if (s->epoll_fd < 0) {
2614                 log_error("Failed to create epoll object: %m");
2615                 return -errno;
2616         }
2617
2618         n = sd_listen_fds(true);
2619         if (n < 0) {
2620                 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
2621                 return n;
2622         }
2623
2624         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
2625
2626                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
2627
2628                         if (s->native_fd >= 0) {
2629                                 log_error("Too many native sockets passed.");
2630                                 return -EINVAL;
2631                         }
2632
2633                         s->native_fd = fd;
2634
2635                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
2636
2637                         if (s->stdout_fd >= 0) {
2638                                 log_error("Too many stdout sockets passed.");
2639                                 return -EINVAL;
2640                         }
2641
2642                         s->stdout_fd = fd;
2643
2644                 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
2645
2646                         if (s->syslog_fd >= 0) {
2647                                 log_error("Too many /dev/log sockets passed.");
2648                                 return -EINVAL;
2649                         }
2650
2651                         s->syslog_fd = fd;
2652
2653                 } else {
2654                         log_error("Unknown socket passed.");
2655                         return -EINVAL;
2656                 }
2657         }
2658
2659         r = open_syslog_socket(s);
2660         if (r < 0)
2661                 return r;
2662
2663         r = open_native_socket(s);
2664         if (r < 0)
2665                 return r;
2666
2667         r = open_stdout_socket(s);
2668         if (r < 0)
2669                 return r;
2670
2671         r = open_proc_kmsg(s);
2672         if (r < 0)
2673                 return r;
2674
2675         r = open_signalfd(s);
2676         if (r < 0)
2677                 return r;
2678
2679         s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
2680         if (!s->rate_limit)
2681                 return -ENOMEM;
2682
2683         r = system_journal_open(s);
2684         if (r < 0)
2685                 return r;
2686
2687         return 0;
2688 }
2689
2690 static void server_done(Server *s) {
2691         JournalFile *f;
2692         assert(s);
2693
2694         while (s->stdout_streams)
2695                 stdout_stream_free(s->stdout_streams);
2696
2697         if (s->system_journal)
2698                 journal_file_close(s->system_journal);
2699
2700         if (s->runtime_journal)
2701                 journal_file_close(s->runtime_journal);
2702
2703         while ((f = hashmap_steal_first(s->user_journals)))
2704                 journal_file_close(f);
2705
2706         hashmap_free(s->user_journals);
2707
2708         if (s->epoll_fd >= 0)
2709                 close_nointr_nofail(s->epoll_fd);
2710
2711         if (s->signal_fd >= 0)
2712                 close_nointr_nofail(s->signal_fd);
2713
2714         if (s->syslog_fd >= 0)
2715                 close_nointr_nofail(s->syslog_fd);
2716
2717         if (s->native_fd >= 0)
2718                 close_nointr_nofail(s->native_fd);
2719
2720         if (s->stdout_fd >= 0)
2721                 close_nointr_nofail(s->stdout_fd);
2722
2723         if (s->proc_kmsg_fd >= 0)
2724                 close_nointr_nofail(s->proc_kmsg_fd);
2725
2726         if (s->rate_limit)
2727                 journal_rate_limit_free(s->rate_limit);
2728
2729         free(s->buffer);
2730 }
2731
2732 int main(int argc, char *argv[]) {
2733         Server server;
2734         int r;
2735
2736         /* if (getppid() != 1) { */
2737         /*         log_error("This program should be invoked by init only."); */
2738         /*         return EXIT_FAILURE; */
2739         /* } */
2740
2741         if (argc > 1) {
2742                 log_error("This program does not take arguments.");
2743                 return EXIT_FAILURE;
2744         }
2745
2746         log_set_target(LOG_TARGET_CONSOLE);
2747         log_parse_environment();
2748         log_open();
2749
2750         umask(0022);
2751
2752         r = server_init(&server);
2753         if (r < 0)
2754                 goto finish;
2755
2756         server_vacuum(&server);
2757         server_flush_to_var(&server);
2758         server_flush_proc_kmsg(&server);
2759
2760         log_debug("systemd-journald running as pid %lu", (unsigned long) getpid());
2761         driver_message(&server, SD_MESSAGE_JOURNAL_START, "Journal started");
2762
2763         sd_notify(false,
2764                   "READY=1\n"
2765                   "STATUS=Processing requests...");
2766
2767         for (;;) {
2768                 struct epoll_event event;
2769
2770                 r = epoll_wait(server.epoll_fd, &event, 1, -1);
2771                 if (r < 0) {
2772
2773                         if (errno == EINTR)
2774                                 continue;
2775
2776                         log_error("epoll_wait() failed: %m");
2777                         r = -errno;
2778                         goto finish;
2779                 } else if (r == 0)
2780                         break;
2781
2782                 r = process_event(&server, &event);
2783                 if (r < 0)
2784                         goto finish;
2785                 else if (r == 0)
2786                         break;
2787         }
2788
2789         log_debug("systemd-journald stopped as pid %lu", (unsigned long) getpid());
2790         driver_message(&server, SD_MESSAGE_JOURNAL_STOP, "Journal stopped");
2791
2792 finish:
2793         sd_notify(false,
2794                   "STATUS=Shutting down...");
2795
2796         server_done(&server);
2797
2798         return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
2799 }