chiark / gitweb /
journald: fix calculation of disk space
[elogind.git] / src / journal / journald.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU General Public License as published by
10   the Free Software Foundation; either version 2 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   General Public License for more details.
17
18   You should have received a copy of the GNU General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/epoll.h>
23 #include <sys/socket.h>
24 #include <errno.h>
25 #include <sys/signalfd.h>
26 #include <unistd.h>
27 #include <fcntl.h>
28 #include <stddef.h>
29 #include <sys/ioctl.h>
30 #include <linux/sockios.h>
31 #include <sys/statvfs.h>
32 #include <sys/user.h>
33
34 #include <systemd/sd-journal.h>
35 #include <systemd/sd-login.h>
36 #include <systemd/sd-messages.h>
37 #include <systemd/sd-daemon.h>
38
39 #include "hashmap.h"
40 #include "journal-file.h"
41 #include "socket-util.h"
42 #include "cgroup-util.h"
43 #include "list.h"
44 #include "journal-rate-limit.h"
45 #include "journal-internal.h"
46 #include "conf-parser.h"
47 #include "journald.h"
48 #include "virt.h"
49 #include "missing.h"
50
51 #ifdef HAVE_ACL
52 #include <sys/acl.h>
53 #include <acl/libacl.h>
54 #include "acl-util.h"
55 #endif
56
57 #ifdef HAVE_SELINUX
58 #include <selinux/selinux.h>
59 #endif
60
61 #define USER_JOURNALS_MAX 1024
62 #define STDOUT_STREAMS_MAX 4096
63
64 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
65 #define DEFAULT_RATE_LIMIT_BURST 200
66
67 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
68
69 #define RECHECK_VAR_AVAILABLE_USEC (30*USEC_PER_SEC)
70
71 #define N_IOVEC_META_FIELDS 17
72
73 #define ENTRY_SIZE_MAX (1024*1024*32)
74
75 typedef enum StdoutStreamState {
76         STDOUT_STREAM_IDENTIFIER,
77         STDOUT_STREAM_PRIORITY,
78         STDOUT_STREAM_LEVEL_PREFIX,
79         STDOUT_STREAM_FORWARD_TO_SYSLOG,
80         STDOUT_STREAM_FORWARD_TO_KMSG,
81         STDOUT_STREAM_FORWARD_TO_CONSOLE,
82         STDOUT_STREAM_RUNNING
83 } StdoutStreamState;
84
85 struct StdoutStream {
86         Server *server;
87         StdoutStreamState state;
88
89         int fd;
90
91         struct ucred ucred;
92 #ifdef HAVE_SELINUX
93         security_context_t security_context;
94 #endif
95
96         char *identifier;
97         int priority;
98         bool level_prefix:1;
99         bool forward_to_syslog:1;
100         bool forward_to_kmsg:1;
101         bool forward_to_console:1;
102
103         char buffer[LINE_MAX+1];
104         size_t length;
105
106         LIST_FIELDS(StdoutStream, stdout_stream);
107 };
108
109 static int server_flush_to_var(Server *s);
110
111 static uint64_t available_space(Server *s) {
112         char ids[33], *p;
113         const char *f;
114         sd_id128_t machine;
115         struct statvfs ss;
116         uint64_t sum = 0, avail = 0, ss_avail = 0;
117         int r;
118         DIR *d;
119         usec_t ts;
120         JournalMetrics *m;
121
122         ts = now(CLOCK_MONOTONIC);
123
124         if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
125                 return s->cached_available_space;
126
127         r = sd_id128_get_machine(&machine);
128         if (r < 0)
129                 return 0;
130
131         if (s->system_journal) {
132                 f = "/var/log/journal/";
133                 m = &s->system_metrics;
134         } else {
135                 f = "/run/log/journal/";
136                 m = &s->runtime_metrics;
137         }
138
139         assert(m);
140
141         p = strappend(f, sd_id128_to_string(machine, ids));
142         if (!p)
143                 return 0;
144
145         d = opendir(p);
146         free(p);
147
148         if (!d)
149                 return 0;
150
151         if (fstatvfs(dirfd(d), &ss) < 0)
152                 goto finish;
153
154         for (;;) {
155                 struct stat st;
156                 struct dirent buf, *de;
157                 int k;
158
159                 k = readdir_r(d, &buf, &de);
160                 if (k != 0) {
161                         r = -k;
162                         goto finish;
163                 }
164
165                 if (!de)
166                         break;
167
168                 if (!dirent_is_file_with_suffix(de, ".journal"))
169                         continue;
170
171                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
172                         continue;
173
174                 if (!S_ISREG(st.st_mode))
175                         continue;
176
177                 sum += (uint64_t) st.st_blocks * 512UL;
178         }
179
180         avail = sum >= m->max_use ? 0 : m->max_use - sum;
181
182         ss_avail = ss.f_bsize * ss.f_bavail;
183
184         ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
185
186         if (ss_avail < avail)
187                 avail = ss_avail;
188
189         s->cached_available_space = avail;
190         s->cached_available_space_timestamp = ts;
191
192 finish:
193         closedir(d);
194
195         return avail;
196 }
197
198 static void server_read_file_gid(Server *s) {
199         const char *adm = "adm";
200         int r;
201
202         assert(s);
203
204         if (s->file_gid_valid)
205                 return;
206
207         r = get_group_creds(&adm, &s->file_gid);
208         if (r < 0)
209                 log_warning("Failed to resolve 'adm' group: %s", strerror(-r));
210
211         /* if we couldn't read the gid, then it will be 0, but that's
212          * fine and we shouldn't try to resolve the group again, so
213          * let's just pretend it worked right-away. */
214         s->file_gid_valid = true;
215 }
216
217 static void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
218         int r;
219 #ifdef HAVE_ACL
220         acl_t acl;
221         acl_entry_t entry;
222         acl_permset_t permset;
223 #endif
224
225         assert(f);
226
227         server_read_file_gid(s);
228
229         r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
230         if (r < 0)
231                 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
232
233 #ifdef HAVE_ACL
234         if (uid <= 0)
235                 return;
236
237         acl = acl_get_fd(f->fd);
238         if (!acl) {
239                 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
240                 return;
241         }
242
243         r = acl_find_uid(acl, uid, &entry);
244         if (r <= 0) {
245
246                 if (acl_create_entry(&acl, &entry) < 0 ||
247                     acl_set_tag_type(entry, ACL_USER) < 0 ||
248                     acl_set_qualifier(entry, &uid) < 0) {
249                         log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
250                         goto finish;
251                 }
252         }
253
254         if (acl_get_permset(entry, &permset) < 0 ||
255             acl_add_perm(permset, ACL_READ) < 0 ||
256             acl_calc_mask(&acl) < 0) {
257                 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
258                 goto finish;
259         }
260
261         if (acl_set_fd(f->fd, acl) < 0)
262                 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
263
264 finish:
265         acl_free(acl);
266 #endif
267 }
268
269 static JournalFile* find_journal(Server *s, uid_t uid) {
270         char *p;
271         int r;
272         JournalFile *f;
273         char ids[33];
274         sd_id128_t machine;
275
276         assert(s);
277
278         /* We split up user logs only on /var, not on /run. If the
279          * runtime file is open, we write to it exclusively, in order
280          * to guarantee proper order as soon as we flush /run to
281          * /var and close the runtime file. */
282
283         if (s->runtime_journal)
284                 return s->runtime_journal;
285
286         if (uid <= 0)
287                 return s->system_journal;
288
289         r = sd_id128_get_machine(&machine);
290         if (r < 0)
291                 return s->system_journal;
292
293         f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
294         if (f)
295                 return f;
296
297         if (asprintf(&p, "/var/log/journal/%s/user-%lu.journal", sd_id128_to_string(machine, ids), (unsigned long) uid) < 0)
298                 return s->system_journal;
299
300         while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
301                 /* Too many open? Then let's close one */
302                 f = hashmap_steal_first(s->user_journals);
303                 assert(f);
304                 journal_file_close(f);
305         }
306
307         r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->system_journal, &f);
308         free(p);
309
310         if (r < 0)
311                 return s->system_journal;
312
313         server_fix_perms(s, f, uid);
314
315         r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
316         if (r < 0) {
317                 journal_file_close(f);
318                 return s->system_journal;
319         }
320
321         return f;
322 }
323
324 static void server_rotate(Server *s) {
325         JournalFile *f;
326         void *k;
327         Iterator i;
328         int r;
329
330         log_info("Rotating...");
331
332         if (s->runtime_journal) {
333                 r = journal_file_rotate(&s->runtime_journal);
334                 if (r < 0)
335                         log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
336         }
337
338         if (s->system_journal) {
339                 r = journal_file_rotate(&s->system_journal);
340                 if (r < 0)
341                         log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
342         }
343
344         HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
345                 r = journal_file_rotate(&f);
346                 if (r < 0)
347                         log_error("Failed to rotate %s: %s", f->path, strerror(-r));
348                 else
349                         hashmap_replace(s->user_journals, k, f);
350         }
351 }
352
353 static void server_vacuum(Server *s) {
354         char *p;
355         char ids[33];
356         sd_id128_t machine;
357         int r;
358
359         log_info("Vacuuming...");
360
361         r = sd_id128_get_machine(&machine);
362         if (r < 0) {
363                 log_error("Failed to get machine ID: %s", strerror(-r));
364                 return;
365         }
366
367         sd_id128_to_string(machine, ids);
368
369         if (s->system_journal) {
370                 if (asprintf(&p, "/var/log/journal/%s", ids) < 0) {
371                         log_error("Out of memory.");
372                         return;
373                 }
374
375                 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free);
376                 if (r < 0 && r != -ENOENT)
377                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
378                 free(p);
379         }
380
381
382         if (s->runtime_journal) {
383                 if (asprintf(&p, "/run/log/journal/%s", ids) < 0) {
384                         log_error("Out of memory.");
385                         return;
386                 }
387
388                 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free);
389                 if (r < 0 && r != -ENOENT)
390                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
391                 free(p);
392         }
393
394         s->cached_available_space_timestamp = 0;
395 }
396
397 static char *shortened_cgroup_path(pid_t pid) {
398         int r;
399         char *process_path, *init_path, *path;
400
401         assert(pid > 0);
402
403         r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, pid, &process_path);
404         if (r < 0)
405                 return NULL;
406
407         r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &init_path);
408         if (r < 0) {
409                 free(process_path);
410                 return NULL;
411         }
412
413         if (endswith(init_path, "/system"))
414                 init_path[strlen(init_path) - 7] = 0;
415         else if (streq(init_path, "/"))
416                 init_path[0] = 0;
417
418         if (startswith(process_path, init_path)) {
419                 char *p;
420
421                 p = strdup(process_path + strlen(init_path));
422                 if (!p) {
423                         free(process_path);
424                         free(init_path);
425                         return NULL;
426                 }
427                 path = p;
428         } else {
429                 path = process_path;
430                 process_path = NULL;
431         }
432
433         free(process_path);
434         free(init_path);
435
436         return path;
437 }
438
439 static void dispatch_message_real(
440                 Server *s,
441                 struct iovec *iovec, unsigned n, unsigned m,
442                 struct ucred *ucred,
443                 struct timeval *tv,
444                 const char *label, size_t label_len) {
445
446         char *pid = NULL, *uid = NULL, *gid = NULL,
447                 *source_time = NULL, *boot_id = NULL, *machine_id = NULL,
448                 *comm = NULL, *cmdline = NULL, *hostname = NULL,
449                 *audit_session = NULL, *audit_loginuid = NULL,
450                 *exe = NULL, *cgroup = NULL, *session = NULL,
451                 *owner_uid = NULL, *unit = NULL, *selinux_context = NULL;
452
453         char idbuf[33];
454         sd_id128_t id;
455         int r;
456         char *t;
457         uid_t loginuid = 0, realuid = 0;
458         JournalFile *f;
459         bool vacuumed = false;
460
461         assert(s);
462         assert(iovec);
463         assert(n > 0);
464         assert(n + N_IOVEC_META_FIELDS <= m);
465
466         if (ucred) {
467                 uint32_t audit;
468                 uid_t owner;
469
470                 realuid = ucred->uid;
471
472                 if (asprintf(&pid, "_PID=%lu", (unsigned long) ucred->pid) >= 0)
473                         IOVEC_SET_STRING(iovec[n++], pid);
474
475                 if (asprintf(&uid, "_UID=%lu", (unsigned long) ucred->uid) >= 0)
476                         IOVEC_SET_STRING(iovec[n++], uid);
477
478                 if (asprintf(&gid, "_GID=%lu", (unsigned long) ucred->gid) >= 0)
479                         IOVEC_SET_STRING(iovec[n++], gid);
480
481                 r = get_process_comm(ucred->pid, &t);
482                 if (r >= 0) {
483                         comm = strappend("_COMM=", t);
484                         free(t);
485
486                         if (comm)
487                                 IOVEC_SET_STRING(iovec[n++], comm);
488                 }
489
490                 r = get_process_exe(ucred->pid, &t);
491                 if (r >= 0) {
492                         exe = strappend("_EXE=", t);
493                         free(t);
494
495                         if (exe)
496                                 IOVEC_SET_STRING(iovec[n++], exe);
497                 }
498
499                 r = get_process_cmdline(ucred->pid, LINE_MAX, false, &t);
500                 if (r >= 0) {
501                         cmdline = strappend("_CMDLINE=", t);
502                         free(t);
503
504                         if (cmdline)
505                                 IOVEC_SET_STRING(iovec[n++], cmdline);
506                 }
507
508                 r = audit_session_from_pid(ucred->pid, &audit);
509                 if (r >= 0)
510                         if (asprintf(&audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit) >= 0)
511                                 IOVEC_SET_STRING(iovec[n++], audit_session);
512
513                 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
514                 if (r >= 0)
515                         if (asprintf(&audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0)
516                                 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
517
518                 t = shortened_cgroup_path(ucred->pid);
519                 if (t) {
520                         cgroup = strappend("_SYSTEMD_CGROUP=", t);
521                         free(t);
522
523                         if (cgroup)
524                                 IOVEC_SET_STRING(iovec[n++], cgroup);
525                 }
526
527                 if (sd_pid_get_session(ucred->pid, &t) >= 0) {
528                         session = strappend("_SYSTEMD_SESSION=", t);
529                         free(t);
530
531                         if (session)
532                                 IOVEC_SET_STRING(iovec[n++], session);
533                 }
534
535                 if (sd_pid_get_unit(ucred->pid, &t) >= 0) {
536                         unit = strappend("_SYSTEMD_UNIT=", t);
537                         free(t);
538
539                         if (unit)
540                                 IOVEC_SET_STRING(iovec[n++], unit);
541                 }
542
543                 if (sd_pid_get_owner_uid(ucred->uid, &owner) >= 0)
544                         if (asprintf(&owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner) >= 0)
545                                 IOVEC_SET_STRING(iovec[n++], owner_uid);
546
547 #ifdef HAVE_SELINUX
548                 if (label) {
549                         selinux_context = malloc(sizeof("_SELINUX_CONTEXT=") + label_len);
550                         if (selinux_context) {
551                                 memcpy(selinux_context, "_SELINUX_CONTEXT=", sizeof("_SELINUX_CONTEXT=")-1);
552                                 memcpy(selinux_context+sizeof("_SELINUX_CONTEXT=")-1, label, label_len);
553                                 selinux_context[sizeof("_SELINUX_CONTEXT=")-1+label_len] = 0;
554                                 IOVEC_SET_STRING(iovec[n++], selinux_context);
555                         }
556                 } else {
557                         security_context_t con;
558
559                         if (getpidcon(ucred->pid, &con) >= 0) {
560                                 selinux_context = strappend("_SELINUX_CONTEXT=", con);
561                                 if (selinux_context)
562                                         IOVEC_SET_STRING(iovec[n++], selinux_context);
563
564                                 freecon(con);
565                         }
566                 }
567 #endif
568         }
569
570         if (tv) {
571                 if (asprintf(&source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu",
572                              (unsigned long long) timeval_load(tv)) >= 0)
573                         IOVEC_SET_STRING(iovec[n++], source_time);
574         }
575
576         /* Note that strictly speaking storing the boot id here is
577          * redundant since the entry includes this in-line
578          * anyway. However, we need this indexed, too. */
579         r = sd_id128_get_boot(&id);
580         if (r >= 0)
581                 if (asprintf(&boot_id, "_BOOT_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
582                         IOVEC_SET_STRING(iovec[n++], boot_id);
583
584         r = sd_id128_get_machine(&id);
585         if (r >= 0)
586                 if (asprintf(&machine_id, "_MACHINE_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
587                         IOVEC_SET_STRING(iovec[n++], machine_id);
588
589         t = gethostname_malloc();
590         if (t) {
591                 hostname = strappend("_HOSTNAME=", t);
592                 free(t);
593                 if (hostname)
594                         IOVEC_SET_STRING(iovec[n++], hostname);
595         }
596
597         assert(n <= m);
598
599         server_flush_to_var(s);
600
601 retry:
602         f = find_journal(s, realuid == 0 ? 0 : loginuid);
603         if (!f)
604                 log_warning("Dropping message, as we can't find a place to store the data.");
605         else {
606                 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
607
608                 if ((r == -EBADMSG || r == -E2BIG) && !vacuumed) {
609
610                         if (r == -E2BIG)
611                                 log_info("Allocation limit reached, rotating.");
612                         else
613                                 log_warning("Journal file corrupted, rotating.");
614
615                         server_rotate(s);
616                         server_vacuum(s);
617                         vacuumed = true;
618
619                         log_info("Retrying write.");
620                         goto retry;
621                 }
622
623                 if (r < 0)
624                         log_error("Failed to write entry, ignoring: %s", strerror(-r));
625         }
626
627         free(pid);
628         free(uid);
629         free(gid);
630         free(comm);
631         free(exe);
632         free(cmdline);
633         free(source_time);
634         free(boot_id);
635         free(machine_id);
636         free(hostname);
637         free(audit_session);
638         free(audit_loginuid);
639         free(cgroup);
640         free(session);
641         free(owner_uid);
642         free(unit);
643         free(selinux_context);
644 }
645
646 static void driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
647         char mid[11 + 32 + 1];
648         char buffer[16 + LINE_MAX + 1];
649         struct iovec iovec[N_IOVEC_META_FIELDS + 4];
650         int n = 0;
651         va_list ap;
652         struct ucred ucred;
653
654         assert(s);
655         assert(format);
656
657         IOVEC_SET_STRING(iovec[n++], "PRIORITY=5");
658         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
659
660         memcpy(buffer, "MESSAGE=", 8);
661         va_start(ap, format);
662         vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
663         va_end(ap);
664         char_array_0(buffer);
665         IOVEC_SET_STRING(iovec[n++], buffer);
666
667         snprintf(mid, sizeof(mid), "MESSAGE_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(message_id));
668         char_array_0(mid);
669         IOVEC_SET_STRING(iovec[n++], mid);
670
671         zero(ucred);
672         ucred.pid = getpid();
673         ucred.uid = getuid();
674         ucred.gid = getgid();
675
676         dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0);
677 }
678
679 static void dispatch_message(Server *s,
680                              struct iovec *iovec, unsigned n, unsigned m,
681                              struct ucred *ucred,
682                              struct timeval *tv,
683                              const char *label, size_t label_len,
684                              int priority) {
685         int rl;
686         char *path = NULL, *c;
687
688         assert(s);
689         assert(iovec || n == 0);
690
691         if (n == 0)
692                 return;
693
694         if (!ucred)
695                 goto finish;
696
697         path = shortened_cgroup_path(ucred->pid);
698         if (!path)
699                 goto finish;
700
701         /* example: /user/lennart/3/foobar
702          *          /system/dbus.service/foobar
703          *
704          * So let's cut of everything past the third /, since that is
705          * wher user directories start */
706
707         c = strchr(path, '/');
708         if (c) {
709                 c = strchr(c+1, '/');
710                 if (c) {
711                         c = strchr(c+1, '/');
712                         if (c)
713                                 *c = 0;
714                 }
715         }
716
717         rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available_space(s));
718
719         if (rl == 0) {
720                 free(path);
721                 return;
722         }
723
724         /* Write a suppression message if we suppressed something */
725         if (rl > 1)
726                 driver_message(s, SD_MESSAGE_JOURNAL_DROPPED, "Suppressed %u messages from %s", rl - 1, path);
727
728         free(path);
729
730 finish:
731         dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len);
732 }
733
734 static void forward_syslog_iovec(Server *s, const struct iovec *iovec, unsigned n_iovec, struct ucred *ucred, struct timeval *tv) {
735         struct msghdr msghdr;
736         struct cmsghdr *cmsg;
737         union {
738                 struct cmsghdr cmsghdr;
739                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred))];
740         } control;
741         union sockaddr_union sa;
742
743         assert(s);
744         assert(iovec);
745         assert(n_iovec > 0);
746
747         zero(msghdr);
748         msghdr.msg_iov = (struct iovec*) iovec;
749         msghdr.msg_iovlen = n_iovec;
750
751         zero(sa);
752         sa.un.sun_family = AF_UNIX;
753         strncpy(sa.un.sun_path, "/run/systemd/journal/syslog", sizeof(sa.un.sun_path));
754         msghdr.msg_name = &sa;
755         msghdr.msg_namelen = offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path);
756
757         if (ucred) {
758                 zero(control);
759                 msghdr.msg_control = &control;
760                 msghdr.msg_controllen = sizeof(control);
761
762                 cmsg = CMSG_FIRSTHDR(&msghdr);
763                 cmsg->cmsg_level = SOL_SOCKET;
764                 cmsg->cmsg_type = SCM_CREDENTIALS;
765                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred));
766                 memcpy(CMSG_DATA(cmsg), ucred, sizeof(struct ucred));
767                 msghdr.msg_controllen = cmsg->cmsg_len;
768         }
769
770         /* Forward the syslog message we received via /dev/log to
771          * /run/systemd/syslog. Unfortunately we currently can't set
772          * the SO_TIMESTAMP auxiliary data, and hence we don't. */
773
774         if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
775                 return;
776
777         /* The socket is full? I guess the syslog implementation is
778          * too slow, and we shouldn't wait for that... */
779         if (errno == EAGAIN)
780                 return;
781
782         if (ucred && errno == ESRCH) {
783                 struct ucred u;
784
785                 /* Hmm, presumably the sender process vanished
786                  * by now, so let's fix it as good as we
787                  * can, and retry */
788
789                 u = *ucred;
790                 u.pid = getpid();
791                 memcpy(CMSG_DATA(cmsg), &u, sizeof(struct ucred));
792
793                 if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
794                         return;
795
796                 if (errno == EAGAIN)
797                         return;
798         }
799
800         log_debug("Failed to forward syslog message: %m");
801 }
802
803 static void forward_syslog_raw(Server *s, const char *buffer, struct ucred *ucred, struct timeval *tv) {
804         struct iovec iovec;
805
806         assert(s);
807         assert(buffer);
808
809         IOVEC_SET_STRING(iovec, buffer);
810         forward_syslog_iovec(s, &iovec, 1, ucred, tv);
811 }
812
813 static void forward_syslog(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred, struct timeval *tv) {
814         struct iovec iovec[5];
815         char header_priority[6], header_time[64], header_pid[16];
816         int n = 0;
817         time_t t;
818         struct tm *tm;
819         char *ident_buf = NULL;
820
821         assert(s);
822         assert(priority >= 0);
823         assert(priority <= 999);
824         assert(message);
825
826         /* First: priority field */
827         snprintf(header_priority, sizeof(header_priority), "<%i>", priority);
828         char_array_0(header_priority);
829         IOVEC_SET_STRING(iovec[n++], header_priority);
830
831         /* Second: timestamp */
832         t = tv ? tv->tv_sec : ((time_t) (now(CLOCK_REALTIME) / USEC_PER_SEC));
833         tm = localtime(&t);
834         if (!tm)
835                 return;
836         if (strftime(header_time, sizeof(header_time), "%h %e %T ", tm) <= 0)
837                 return;
838         IOVEC_SET_STRING(iovec[n++], header_time);
839
840         /* Third: identifier and PID */
841         if (ucred) {
842                 if (!identifier) {
843                         get_process_comm(ucred->pid, &ident_buf);
844                         identifier = ident_buf;
845                 }
846
847                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
848                 char_array_0(header_pid);
849
850                 if (identifier)
851                         IOVEC_SET_STRING(iovec[n++], identifier);
852
853                 IOVEC_SET_STRING(iovec[n++], header_pid);
854         } else if (identifier) {
855                 IOVEC_SET_STRING(iovec[n++], identifier);
856                 IOVEC_SET_STRING(iovec[n++], ": ");
857         }
858
859         /* Fourth: message */
860         IOVEC_SET_STRING(iovec[n++], message);
861
862         forward_syslog_iovec(s, iovec, n, ucred, tv);
863
864         free(ident_buf);
865 }
866
867 static int fixup_priority(int priority) {
868
869         if ((priority & LOG_FACMASK) == 0)
870                 return (priority & LOG_PRIMASK) | LOG_USER;
871
872         return priority;
873 }
874
875 static void forward_kmsg(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred) {
876         struct iovec iovec[5];
877         char header_priority[6], header_pid[16];
878         int n = 0;
879         char *ident_buf = NULL;
880         int fd;
881
882         assert(s);
883         assert(priority >= 0);
884         assert(priority <= 999);
885         assert(message);
886
887         /* Never allow messages with kernel facility to be written to
888          * kmsg, regardless where the data comes from. */
889         priority = fixup_priority(priority);
890
891         /* First: priority field */
892         snprintf(header_priority, sizeof(header_priority), "<%i>", priority);
893         char_array_0(header_priority);
894         IOVEC_SET_STRING(iovec[n++], header_priority);
895
896         /* Second: identifier and PID */
897         if (ucred) {
898                 if (!identifier) {
899                         get_process_comm(ucred->pid, &ident_buf);
900                         identifier = ident_buf;
901                 }
902
903                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
904                 char_array_0(header_pid);
905
906                 if (identifier)
907                         IOVEC_SET_STRING(iovec[n++], identifier);
908
909                 IOVEC_SET_STRING(iovec[n++], header_pid);
910         } else if (identifier) {
911                 IOVEC_SET_STRING(iovec[n++], identifier);
912                 IOVEC_SET_STRING(iovec[n++], ": ");
913         }
914
915         /* Fourth: message */
916         IOVEC_SET_STRING(iovec[n++], message);
917         IOVEC_SET_STRING(iovec[n++], "\n");
918
919         fd = open("/dev/kmsg", O_WRONLY|O_NOCTTY|O_CLOEXEC);
920         if (fd < 0) {
921                 log_debug("Failed to open /dev/kmsg for logging: %s", strerror(errno));
922                 goto finish;
923         }
924
925         if (writev(fd, iovec, n) < 0)
926                 log_debug("Failed to write to /dev/kmsg for logging: %s", strerror(errno));
927
928         close_nointr_nofail(fd);
929
930 finish:
931         free(ident_buf);
932 }
933
934 static void forward_console(Server *s, const char *identifier, const char *message, struct ucred *ucred) {
935         struct iovec iovec[4];
936         char header_pid[16];
937         int n = 0, fd;
938         char *ident_buf = NULL;
939
940         assert(s);
941         assert(message);
942
943         /* First: identifier and PID */
944         if (ucred) {
945                 if (!identifier) {
946                         get_process_comm(ucred->pid, &ident_buf);
947                         identifier = ident_buf;
948                 }
949
950                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
951                 char_array_0(header_pid);
952
953                 if (identifier)
954                         IOVEC_SET_STRING(iovec[n++], identifier);
955
956                 IOVEC_SET_STRING(iovec[n++], header_pid);
957         } else if (identifier) {
958                 IOVEC_SET_STRING(iovec[n++], identifier);
959                 IOVEC_SET_STRING(iovec[n++], ": ");
960         }
961
962         /* Third: message */
963         IOVEC_SET_STRING(iovec[n++], message);
964         IOVEC_SET_STRING(iovec[n++], "\n");
965
966         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
967         if (fd < 0) {
968                 log_debug("Failed to open /dev/console for logging: %s", strerror(errno));
969                 goto finish;
970         }
971
972         if (writev(fd, iovec, n) < 0)
973                 log_debug("Failed to write to /dev/console for logging: %s", strerror(errno));
974
975         close_nointr_nofail(fd);
976
977 finish:
978         free(ident_buf);
979 }
980
981 static void read_identifier(const char **buf, char **identifier, char **pid) {
982         const char *p;
983         char *t;
984         size_t l, e;
985
986         assert(buf);
987         assert(identifier);
988         assert(pid);
989
990         p = *buf;
991
992         p += strspn(p, WHITESPACE);
993         l = strcspn(p, WHITESPACE);
994
995         if (l <= 0 ||
996             p[l-1] != ':')
997                 return;
998
999         e = l;
1000         l--;
1001
1002         if (p[l-1] == ']') {
1003                 size_t k = l-1;
1004
1005                 for (;;) {
1006
1007                         if (p[k] == '[') {
1008                                 t = strndup(p+k+1, l-k-2);
1009                                 if (t)
1010                                         *pid = t;
1011
1012                                 l = k;
1013                                 break;
1014                         }
1015
1016                         if (k == 0)
1017                                 break;
1018
1019                         k--;
1020                 }
1021         }
1022
1023         t = strndup(p, l);
1024         if (t)
1025                 *identifier = t;
1026
1027         *buf = p + e;
1028         *buf += strspn(*buf, WHITESPACE);
1029 }
1030
1031 static void process_syslog_message(Server *s, const char *buf, struct ucred *ucred, struct timeval *tv, const char *label, size_t label_len) {
1032         char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *syslog_pid = NULL;
1033         struct iovec iovec[N_IOVEC_META_FIELDS + 6];
1034         unsigned n = 0;
1035         int priority = LOG_USER | LOG_INFO;
1036         char *identifier = NULL, *pid = NULL;
1037
1038         assert(s);
1039         assert(buf);
1040
1041         if (s->forward_to_syslog)
1042                 forward_syslog_raw(s, buf, ucred, tv);
1043
1044         parse_syslog_priority((char**) &buf, &priority);
1045         skip_syslog_date((char**) &buf);
1046         read_identifier(&buf, &identifier, &pid);
1047
1048         if (s->forward_to_kmsg)
1049                 forward_kmsg(s, priority, identifier, buf, ucred);
1050
1051         if (s->forward_to_console)
1052                 forward_console(s, identifier, buf, ucred);
1053
1054         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=syslog");
1055
1056         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1057                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1058
1059         if (priority & LOG_FACMASK)
1060                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1061                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1062
1063         if (identifier) {
1064                 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
1065                 if (syslog_identifier)
1066                         IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1067         }
1068
1069         if (pid) {
1070                 syslog_pid = strappend("SYSLOG_PID=", pid);
1071                 if (syslog_pid)
1072                         IOVEC_SET_STRING(iovec[n++], syslog_pid);
1073         }
1074
1075         message = strappend("MESSAGE=", buf);
1076         if (message)
1077                 IOVEC_SET_STRING(iovec[n++], message);
1078
1079         dispatch_message(s, iovec, n, ELEMENTSOF(iovec), ucred, tv, label, label_len, priority);
1080
1081         free(message);
1082         free(identifier);
1083         free(pid);
1084         free(syslog_priority);
1085         free(syslog_facility);
1086         free(syslog_identifier);
1087 }
1088
1089 static bool valid_user_field(const char *p, size_t l) {
1090         const char *a;
1091
1092         /* We kinda enforce POSIX syntax recommendations for
1093            environment variables here, but make a couple of additional
1094            requirements.
1095
1096            http://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html */
1097
1098         /* No empty field names */
1099         if (l <= 0)
1100                 return false;
1101
1102         /* Don't allow names longer than 64 chars */
1103         if (l > 64)
1104                 return false;
1105
1106         /* Variables starting with an underscore are protected */
1107         if (p[0] == '_')
1108                 return false;
1109
1110         /* Don't allow digits as first character */
1111         if (p[0] >= '0' && p[0] <= '9')
1112                 return false;
1113
1114         /* Only allow A-Z0-9 and '_' */
1115         for (a = p; a < p + l; a++)
1116                 if (!((*a >= 'A' && *a <= 'Z') ||
1117                       (*a >= '0' && *a <= '9') ||
1118                       *a == '_'))
1119                         return false;
1120
1121         return true;
1122 }
1123
1124 static void process_native_message(
1125                 Server *s,
1126                 const void *buffer, size_t buffer_size,
1127                 struct ucred *ucred,
1128                 struct timeval *tv,
1129                 const char *label, size_t label_len) {
1130
1131         struct iovec *iovec = NULL;
1132         unsigned n = 0, m = 0, j, tn = (unsigned) -1;
1133         const char *p;
1134         size_t remaining;
1135         int priority = LOG_INFO;
1136         char *identifier = NULL, *message = NULL;
1137
1138         assert(s);
1139         assert(buffer || n == 0);
1140
1141         p = buffer;
1142         remaining = buffer_size;
1143
1144         while (remaining > 0) {
1145                 const char *e, *q;
1146
1147                 e = memchr(p, '\n', remaining);
1148
1149                 if (!e) {
1150                         /* Trailing noise, let's ignore it, and flush what we collected */
1151                         log_debug("Received message with trailing noise, ignoring.");
1152                         break;
1153                 }
1154
1155                 if (e == p) {
1156                         /* Entry separator */
1157                         dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, priority);
1158                         n = 0;
1159                         priority = LOG_INFO;
1160
1161                         p++;
1162                         remaining--;
1163                         continue;
1164                 }
1165
1166                 if (*p == '.' || *p == '#') {
1167                         /* Ignore control commands for now, and
1168                          * comments too. */
1169                         remaining -= (e - p) + 1;
1170                         p = e + 1;
1171                         continue;
1172                 }
1173
1174                 /* A property follows */
1175
1176                 if (n+N_IOVEC_META_FIELDS >= m) {
1177                         struct iovec *c;
1178                         unsigned u;
1179
1180                         u = MAX((n+N_IOVEC_META_FIELDS+1) * 2U, 4U);
1181                         c = realloc(iovec, u * sizeof(struct iovec));
1182                         if (!c) {
1183                                 log_error("Out of memory");
1184                                 break;
1185                         }
1186
1187                         iovec = c;
1188                         m = u;
1189                 }
1190
1191                 q = memchr(p, '=', e - p);
1192                 if (q) {
1193                         if (valid_user_field(p, q - p)) {
1194                                 size_t l;
1195
1196                                 l = e - p;
1197
1198                                 /* If the field name starts with an
1199                                  * underscore, skip the variable,
1200                                  * since that indidates a trusted
1201                                  * field */
1202                                 iovec[n].iov_base = (char*) p;
1203                                 iovec[n].iov_len = l;
1204                                 n++;
1205
1206                                 /* We need to determine the priority
1207                                  * of this entry for the rate limiting
1208                                  * logic */
1209                                 if (l == 10 &&
1210                                     memcmp(p, "PRIORITY=", 9) == 0 &&
1211                                     p[9] >= '0' && p[9] <= '9')
1212                                         priority = (priority & LOG_FACMASK) | (p[9] - '0');
1213
1214                                 else if (l == 17 &&
1215                                          memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
1216                                          p[16] >= '0' && p[16] <= '9')
1217                                         priority = (priority & LOG_PRIMASK) | ((p[16] - '0') << 3);
1218
1219                                 else if (l == 18 &&
1220                                          memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
1221                                          p[16] >= '0' && p[16] <= '9' &&
1222                                          p[17] >= '0' && p[17] <= '9')
1223                                         priority = (priority & LOG_PRIMASK) | (((p[16] - '0')*10 + (p[17] - '0')) << 3);
1224
1225                                 else if (l >= 12 &&
1226                                          memcmp(p, "SYSLOG_IDENTIFIER=", 11) == 0) {
1227                                         char *t;
1228
1229                                         t = strndup(p + 11, l - 11);
1230                                         if (t) {
1231                                                 free(identifier);
1232                                                 identifier = t;
1233                                         }
1234                                 } else if (l >= 8 &&
1235                                            memcmp(p, "MESSAGE=", 8) == 0) {
1236                                         char *t;
1237
1238                                         t = strndup(p + 8, l - 8);
1239                                         if (t) {
1240                                                 free(message);
1241                                                 message = t;
1242                                         }
1243                                 }
1244                         }
1245
1246                         remaining -= (e - p) + 1;
1247                         p = e + 1;
1248                         continue;
1249                 } else {
1250                         uint64_t l;
1251                         char *k;
1252
1253                         if (remaining < e - p + 1 + sizeof(uint64_t) + 1) {
1254                                 log_debug("Failed to parse message, ignoring.");
1255                                 break;
1256                         }
1257
1258                         memcpy(&l, e + 1, sizeof(uint64_t));
1259                         l = le64toh(l);
1260
1261                         if (remaining < e - p + 1 + sizeof(uint64_t) + l + 1 ||
1262                             e[1+sizeof(uint64_t)+l] != '\n') {
1263                                 log_debug("Failed to parse message, ignoring.");
1264                                 break;
1265                         }
1266
1267                         k = malloc((e - p) + 1 + l);
1268                         if (!k) {
1269                                 log_error("Out of memory");
1270                                 break;
1271                         }
1272
1273                         memcpy(k, p, e - p);
1274                         k[e - p] = '=';
1275                         memcpy(k + (e - p) + 1, e + 1 + sizeof(uint64_t), l);
1276
1277                         if (valid_user_field(p, e - p)) {
1278                                 iovec[n].iov_base = k;
1279                                 iovec[n].iov_len = (e - p) + 1 + l;
1280                                 n++;
1281                         } else
1282                                 free(k);
1283
1284                         remaining -= (e - p) + 1 + sizeof(uint64_t) + l + 1;
1285                         p = e + 1 + sizeof(uint64_t) + l + 1;
1286                 }
1287         }
1288
1289         if (n <= 0)
1290                 goto finish;
1291
1292         tn = n++;
1293         IOVEC_SET_STRING(iovec[tn], "_TRANSPORT=journal");
1294
1295         if (message) {
1296                 if (s->forward_to_syslog)
1297                         forward_syslog(s, priority, identifier, message, ucred, tv);
1298
1299                 if (s->forward_to_kmsg)
1300                         forward_kmsg(s, priority, identifier, message, ucred);
1301
1302                 if (s->forward_to_console)
1303                         forward_console(s, identifier, message, ucred);
1304         }
1305
1306         dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, priority);
1307
1308 finish:
1309         for (j = 0; j < n; j++)  {
1310                 if (j == tn)
1311                         continue;
1312
1313                 if (iovec[j].iov_base < buffer ||
1314                     (const uint8_t*) iovec[j].iov_base >= (const uint8_t*) buffer + buffer_size)
1315                         free(iovec[j].iov_base);
1316         }
1317
1318         free(iovec);
1319         free(identifier);
1320         free(message);
1321 }
1322
1323 static void process_native_file(
1324                 Server *s,
1325                 int fd,
1326                 struct ucred *ucred,
1327                 struct timeval *tv,
1328                 const char *label, size_t label_len) {
1329
1330         struct stat st;
1331         void *p;
1332         ssize_t n;
1333
1334         assert(s);
1335         assert(fd >= 0);
1336
1337         /* Data is in the passed file, since it didn't fit in a
1338          * datagram. We can't map the file here, since clients might
1339          * then truncate it and trigger a SIGBUS for us. So let's
1340          * stupidly read it */
1341
1342         if (fstat(fd, &st) < 0) {
1343                 log_error("Failed to stat passed file, ignoring: %m");
1344                 return;
1345         }
1346
1347         if (!S_ISREG(st.st_mode)) {
1348                 log_error("File passed is not regular. Ignoring.");
1349                 return;
1350         }
1351
1352         if (st.st_size <= 0)
1353                 return;
1354
1355         if (st.st_size > ENTRY_SIZE_MAX) {
1356                 log_error("File passed too large. Ignoring.");
1357                 return;
1358         }
1359
1360         p = malloc(st.st_size);
1361         if (!p) {
1362                 log_error("Out of memory");
1363                 return;
1364         }
1365
1366         n = pread(fd, p, st.st_size, 0);
1367         if (n < 0)
1368                 log_error("Failed to read file, ignoring: %s", strerror(-n));
1369         else if (n > 0)
1370                 process_native_message(s, p, n, ucred, tv, label, label_len);
1371
1372         free(p);
1373 }
1374
1375 static int stdout_stream_log(StdoutStream *s, const char *p) {
1376         struct iovec iovec[N_IOVEC_META_FIELDS + 5];
1377         char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *syslog_identifier = NULL;
1378         unsigned n = 0;
1379         int priority;
1380         char *label = NULL;
1381         size_t label_len = 0;
1382
1383         assert(s);
1384         assert(p);
1385
1386         if (isempty(p))
1387                 return 0;
1388
1389         priority = s->priority;
1390
1391         if (s->level_prefix)
1392                 parse_syslog_priority((char**) &p, &priority);
1393
1394         if (s->forward_to_syslog || s->server->forward_to_syslog)
1395                 forward_syslog(s->server, fixup_priority(priority), s->identifier, p, &s->ucred, NULL);
1396
1397         if (s->forward_to_kmsg || s->server->forward_to_kmsg)
1398                 forward_kmsg(s->server, priority, s->identifier, p, &s->ucred);
1399
1400         if (s->forward_to_console || s->server->forward_to_console)
1401                 forward_console(s->server, s->identifier, p, &s->ucred);
1402
1403         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=stdout");
1404
1405         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1406                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1407
1408         if (priority & LOG_FACMASK)
1409                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1410                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1411
1412         if (s->identifier) {
1413                 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", s->identifier);
1414                 if (syslog_identifier)
1415                         IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1416         }
1417
1418         message = strappend("MESSAGE=", p);
1419         if (message)
1420                 IOVEC_SET_STRING(iovec[n++], message);
1421
1422 #ifdef HAVE_SELINUX
1423         if (s->security_context) {
1424                 label = (char*) s->security_context;
1425                 label_len = strlen((char*) s->security_context);
1426         }
1427 #endif
1428
1429         dispatch_message(s->server, iovec, n, ELEMENTSOF(iovec), &s->ucred, NULL, label, label_len, priority);
1430
1431         free(message);
1432         free(syslog_priority);
1433         free(syslog_facility);
1434         free(syslog_identifier);
1435
1436         return 0;
1437 }
1438
1439 static int stdout_stream_line(StdoutStream *s, char *p) {
1440         int r;
1441
1442         assert(s);
1443         assert(p);
1444
1445         p = strstrip(p);
1446
1447         switch (s->state) {
1448
1449         case STDOUT_STREAM_IDENTIFIER:
1450                 if (isempty(p))
1451                         s->identifier = NULL;
1452                 else  {
1453                         s->identifier = strdup(p);
1454                         if (!s->identifier) {
1455                                 log_error("Out of memory");
1456                                 return -ENOMEM;
1457                         }
1458                 }
1459
1460                 s->state = STDOUT_STREAM_PRIORITY;
1461                 return 0;
1462
1463         case STDOUT_STREAM_PRIORITY:
1464                 r = safe_atoi(p, &s->priority);
1465                 if (r < 0 || s->priority <= 0 || s->priority >= 999) {
1466                         log_warning("Failed to parse log priority line.");
1467                         return -EINVAL;
1468                 }
1469
1470                 s->state = STDOUT_STREAM_LEVEL_PREFIX;
1471                 return 0;
1472
1473         case STDOUT_STREAM_LEVEL_PREFIX:
1474                 r = parse_boolean(p);
1475                 if (r < 0) {
1476                         log_warning("Failed to parse level prefix line.");
1477                         return -EINVAL;
1478                 }
1479
1480                 s->level_prefix = !!r;
1481                 s->state = STDOUT_STREAM_FORWARD_TO_SYSLOG;
1482                 return 0;
1483
1484         case STDOUT_STREAM_FORWARD_TO_SYSLOG:
1485                 r = parse_boolean(p);
1486                 if (r < 0) {
1487                         log_warning("Failed to parse forward to syslog line.");
1488                         return -EINVAL;
1489                 }
1490
1491                 s->forward_to_syslog = !!r;
1492                 s->state = STDOUT_STREAM_FORWARD_TO_KMSG;
1493                 return 0;
1494
1495         case STDOUT_STREAM_FORWARD_TO_KMSG:
1496                 r = parse_boolean(p);
1497                 if (r < 0) {
1498                         log_warning("Failed to parse copy to kmsg line.");
1499                         return -EINVAL;
1500                 }
1501
1502                 s->forward_to_kmsg = !!r;
1503                 s->state = STDOUT_STREAM_FORWARD_TO_CONSOLE;
1504                 return 0;
1505
1506         case STDOUT_STREAM_FORWARD_TO_CONSOLE:
1507                 r = parse_boolean(p);
1508                 if (r < 0) {
1509                         log_warning("Failed to parse copy to console line.");
1510                         return -EINVAL;
1511                 }
1512
1513                 s->forward_to_console = !!r;
1514                 s->state = STDOUT_STREAM_RUNNING;
1515                 return 0;
1516
1517         case STDOUT_STREAM_RUNNING:
1518                 return stdout_stream_log(s, p);
1519         }
1520
1521         assert_not_reached("Unknown stream state");
1522 }
1523
1524 static int stdout_stream_scan(StdoutStream *s, bool force_flush) {
1525         char *p;
1526         size_t remaining;
1527         int r;
1528
1529         assert(s);
1530
1531         p = s->buffer;
1532         remaining = s->length;
1533         for (;;) {
1534                 char *end;
1535                 size_t skip;
1536
1537                 end = memchr(p, '\n', remaining);
1538                 if (end)
1539                         skip = end - p + 1;
1540                 else if (remaining >= sizeof(s->buffer) - 1) {
1541                         end = p + sizeof(s->buffer) - 1;
1542                         skip = remaining;
1543                 } else
1544                         break;
1545
1546                 *end = 0;
1547
1548                 r = stdout_stream_line(s, p);
1549                 if (r < 0)
1550                         return r;
1551
1552                 remaining -= skip;
1553                 p += skip;
1554         }
1555
1556         if (force_flush && remaining > 0) {
1557                 p[remaining] = 0;
1558                 r = stdout_stream_line(s, p);
1559                 if (r < 0)
1560                         return r;
1561
1562                 p += remaining;
1563                 remaining = 0;
1564         }
1565
1566         if (p > s->buffer) {
1567                 memmove(s->buffer, p, remaining);
1568                 s->length = remaining;
1569         }
1570
1571         return 0;
1572 }
1573
1574 static int stdout_stream_process(StdoutStream *s) {
1575         ssize_t l;
1576         int r;
1577
1578         assert(s);
1579
1580         l = read(s->fd, s->buffer+s->length, sizeof(s->buffer)-1-s->length);
1581         if (l < 0) {
1582
1583                 if (errno == EAGAIN)
1584                         return 0;
1585
1586                 log_warning("Failed to read from stream: %m");
1587                 return -errno;
1588         }
1589
1590         if (l == 0) {
1591                 r = stdout_stream_scan(s, true);
1592                 if (r < 0)
1593                         return r;
1594
1595                 return 0;
1596         }
1597
1598         s->length += l;
1599         r = stdout_stream_scan(s, false);
1600         if (r < 0)
1601                 return r;
1602
1603         return 1;
1604
1605 }
1606
1607 static void stdout_stream_free(StdoutStream *s) {
1608         assert(s);
1609
1610         if (s->server) {
1611                 assert(s->server->n_stdout_streams > 0);
1612                 s->server->n_stdout_streams --;
1613                 LIST_REMOVE(StdoutStream, stdout_stream, s->server->stdout_streams, s);
1614         }
1615
1616         if (s->fd >= 0) {
1617                 if (s->server)
1618                         epoll_ctl(s->server->epoll_fd, EPOLL_CTL_DEL, s->fd, NULL);
1619
1620                 close_nointr_nofail(s->fd);
1621         }
1622
1623 #ifdef HAVE_SELINUX
1624         if (s->security_context)
1625                 freecon(s->security_context);
1626 #endif
1627
1628         free(s->identifier);
1629         free(s);
1630 }
1631
1632 static int stdout_stream_new(Server *s) {
1633         StdoutStream *stream;
1634         int fd, r;
1635         socklen_t len;
1636         struct epoll_event ev;
1637
1638         assert(s);
1639
1640         fd = accept4(s->stdout_fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC);
1641         if (fd < 0) {
1642                 if (errno == EAGAIN)
1643                         return 0;
1644
1645                 log_error("Failed to accept stdout connection: %m");
1646                 return -errno;
1647         }
1648
1649         if (s->n_stdout_streams >= STDOUT_STREAMS_MAX) {
1650                 log_warning("Too many stdout streams, refusing connection.");
1651                 close_nointr_nofail(fd);
1652                 return 0;
1653         }
1654
1655         stream = new0(StdoutStream, 1);
1656         if (!stream) {
1657                 log_error("Out of memory.");
1658                 close_nointr_nofail(fd);
1659                 return -ENOMEM;
1660         }
1661
1662         stream->fd = fd;
1663
1664         len = sizeof(stream->ucred);
1665         if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &stream->ucred, &len) < 0) {
1666                 log_error("Failed to determine peer credentials: %m");
1667                 r = -errno;
1668                 goto fail;
1669         }
1670
1671 #ifdef HAVE_SELINUX
1672         if (getpeercon(fd, &stream->security_context) < 0)
1673                 log_error("Failed to determine peer security context.");
1674 #endif
1675
1676         if (shutdown(fd, SHUT_WR) < 0) {
1677                 log_error("Failed to shutdown writing side of socket: %m");
1678                 r = -errno;
1679                 goto fail;
1680         }
1681
1682         zero(ev);
1683         ev.data.ptr = stream;
1684         ev.events = EPOLLIN;
1685         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0) {
1686                 log_error("Failed to add stream to event loop: %m");
1687                 r = -errno;
1688                 goto fail;
1689         }
1690
1691         stream->server = s;
1692         LIST_PREPEND(StdoutStream, stdout_stream, s->stdout_streams, stream);
1693         s->n_stdout_streams ++;
1694
1695         return 0;
1696
1697 fail:
1698         stdout_stream_free(stream);
1699         return r;
1700 }
1701
1702 static int parse_kernel_timestamp(char **_p, usec_t *t) {
1703         usec_t r;
1704         int k, i;
1705         char *p;
1706
1707         assert(_p);
1708         assert(*_p);
1709         assert(t);
1710
1711         p = *_p;
1712
1713         if (strlen(p) < 14 || p[0] != '[' || p[13] != ']' || p[6] != '.')
1714                 return 0;
1715
1716         r = 0;
1717
1718         for (i = 1; i <= 5; i++) {
1719                 r *= 10;
1720
1721                 if (p[i] == ' ')
1722                         continue;
1723
1724                 k = undecchar(p[i]);
1725                 if (k < 0)
1726                         return 0;
1727
1728                 r += k;
1729         }
1730
1731         for (i = 7; i <= 12; i++) {
1732                 r *= 10;
1733
1734                 k = undecchar(p[i]);
1735                 if (k < 0)
1736                         return 0;
1737
1738                 r += k;
1739         }
1740
1741         *t = r;
1742         *_p += 14;
1743         *_p += strspn(*_p, WHITESPACE);
1744
1745         return 1;
1746 }
1747
1748 static void proc_kmsg_line(Server *s, const char *p) {
1749         struct iovec iovec[N_IOVEC_META_FIELDS + 7];
1750         char *message = NULL, *syslog_priority = NULL, *syslog_pid = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *source_time = NULL;
1751         int priority = LOG_KERN | LOG_INFO;
1752         unsigned n = 0;
1753         usec_t usec;
1754         char *identifier = NULL, *pid = NULL;
1755
1756         assert(s);
1757         assert(p);
1758
1759         if (isempty(p))
1760                 return;
1761
1762         parse_syslog_priority((char **) &p, &priority);
1763
1764         if (s->forward_to_kmsg && (priority & LOG_FACMASK) != LOG_KERN)
1765                 return;
1766
1767         if (parse_kernel_timestamp((char **) &p, &usec) > 0) {
1768                 if (asprintf(&source_time, "_SOURCE_MONOTONIC_TIMESTAMP=%llu",
1769                              (unsigned long long) usec) >= 0)
1770                         IOVEC_SET_STRING(iovec[n++], source_time);
1771         }
1772
1773         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=kernel");
1774
1775         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1776                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1777
1778         if ((priority & LOG_FACMASK) == LOG_KERN) {
1779
1780                 if (s->forward_to_syslog)
1781                         forward_syslog(s, priority, "kernel", p, NULL, NULL);
1782
1783                 IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=kernel");
1784         } else {
1785                 read_identifier(&p, &identifier, &pid);
1786
1787                 if (s->forward_to_syslog)
1788                         forward_syslog(s, priority, identifier, p, NULL, NULL);
1789
1790                 if (identifier) {
1791                         syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
1792                         if (syslog_identifier)
1793                                 IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1794                 }
1795
1796                 if (pid) {
1797                         syslog_pid = strappend("SYSLOG_PID=", pid);
1798                         if (syslog_pid)
1799                                 IOVEC_SET_STRING(iovec[n++], syslog_pid);
1800                 }
1801
1802                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1803                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1804         }
1805
1806         message = strappend("MESSAGE=", p);
1807         if (message)
1808                 IOVEC_SET_STRING(iovec[n++], message);
1809
1810         dispatch_message(s, iovec, n, ELEMENTSOF(iovec), NULL, NULL, NULL, 0, priority);
1811
1812         free(message);
1813         free(syslog_priority);
1814         free(syslog_identifier);
1815         free(syslog_pid);
1816         free(syslog_facility);
1817         free(source_time);
1818         free(identifier);
1819         free(pid);
1820 }
1821
1822 static void proc_kmsg_scan(Server *s) {
1823         char *p;
1824         size_t remaining;
1825
1826         assert(s);
1827
1828         p = s->proc_kmsg_buffer;
1829         remaining = s->proc_kmsg_length;
1830         for (;;) {
1831                 char *end;
1832                 size_t skip;
1833
1834                 end = memchr(p, '\n', remaining);
1835                 if (end)
1836                         skip = end - p + 1;
1837                 else if (remaining >= sizeof(s->proc_kmsg_buffer) - 1) {
1838                         end = p + sizeof(s->proc_kmsg_buffer) - 1;
1839                         skip = remaining;
1840                 } else
1841                         break;
1842
1843                 *end = 0;
1844
1845                 proc_kmsg_line(s, p);
1846
1847                 remaining -= skip;
1848                 p += skip;
1849         }
1850
1851         if (p > s->proc_kmsg_buffer) {
1852                 memmove(s->proc_kmsg_buffer, p, remaining);
1853                 s->proc_kmsg_length = remaining;
1854         }
1855 }
1856
1857 static int system_journal_open(Server *s) {
1858         int r;
1859         char *fn;
1860         sd_id128_t machine;
1861         char ids[33];
1862
1863         r = sd_id128_get_machine(&machine);
1864         if (r < 0)
1865                 return r;
1866
1867         sd_id128_to_string(machine, ids);
1868
1869         if (!s->system_journal) {
1870
1871                 /* First try to create the machine path, but not the prefix */
1872                 fn = strappend("/var/log/journal/", ids);
1873                 if (!fn)
1874                         return -ENOMEM;
1875                 (void) mkdir(fn, 0755);
1876                 free(fn);
1877
1878                 /* The create the system journal file */
1879                 fn = join("/var/log/journal/", ids, "/system.journal", NULL);
1880                 if (!fn)
1881                         return -ENOMEM;
1882
1883                 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, NULL, &s->system_journal);
1884                 free(fn);
1885
1886                 if (r >= 0) {
1887                         journal_default_metrics(&s->system_metrics, s->system_journal->fd);
1888
1889                         s->system_journal->metrics = s->system_metrics;
1890                         s->system_journal->compress = s->compress;
1891
1892                         server_fix_perms(s, s->system_journal, 0);
1893                 } else if (r < 0) {
1894
1895                         if (r != -ENOENT && r != -EROFS)
1896                                 log_warning("Failed to open system journal: %s", strerror(-r));
1897
1898                         r = 0;
1899                 }
1900         }
1901
1902         if (!s->runtime_journal) {
1903
1904                 fn = join("/run/log/journal/", ids, "/system.journal", NULL);
1905                 if (!fn)
1906                         return -ENOMEM;
1907
1908                 if (s->system_journal) {
1909
1910                         /* Try to open the runtime journal, but only
1911                          * if it already exists, so that we can flush
1912                          * it into the system journal */
1913
1914                         r = journal_file_open_reliably(fn, O_RDWR, 0640, NULL, &s->runtime_journal);
1915                         free(fn);
1916
1917                         if (r < 0) {
1918                                 if (r != -ENOENT)
1919                                         log_warning("Failed to open runtime journal: %s", strerror(-r));
1920
1921                                 r = 0;
1922                         }
1923
1924                 } else {
1925
1926                         /* OK, we really need the runtime journal, so create
1927                          * it if necessary. */
1928
1929                         (void) mkdir_parents(fn, 0755);
1930                         r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, NULL, &s->runtime_journal);
1931                         free(fn);
1932
1933                         if (r < 0) {
1934                                 log_error("Failed to open runtime journal: %s", strerror(-r));
1935                                 return r;
1936                         }
1937                 }
1938
1939                 if (s->runtime_journal) {
1940                         journal_default_metrics(&s->runtime_metrics, s->runtime_journal->fd);
1941
1942                         s->runtime_journal->metrics = s->runtime_metrics;
1943                         s->runtime_journal->compress = s->compress;
1944
1945                         server_fix_perms(s, s->runtime_journal, 0);
1946                 }
1947         }
1948
1949         return r;
1950 }
1951
1952 static int server_flush_to_var(Server *s) {
1953         char path[] = "/run/log/journal/xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
1954         Object *o = NULL;
1955         int r;
1956         sd_id128_t machine;
1957         sd_journal *j;
1958         usec_t ts;
1959
1960         assert(s);
1961
1962         if (!s->runtime_journal)
1963                 return 0;
1964
1965         ts = now(CLOCK_MONOTONIC);
1966         if (s->var_available_timestamp + RECHECK_VAR_AVAILABLE_USEC > ts)
1967                 return 0;
1968
1969         s->var_available_timestamp = ts;
1970
1971         system_journal_open(s);
1972
1973         if (!s->system_journal)
1974                 return 0;
1975
1976         log_info("Flushing to /var...");
1977
1978         r = sd_id128_get_machine(&machine);
1979         if (r < 0) {
1980                 log_error("Failed to get machine id: %s", strerror(-r));
1981                 return r;
1982         }
1983
1984         r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1985         if (r < 0) {
1986                 log_error("Failed to read runtime journal: %s", strerror(-r));
1987                 return r;
1988         }
1989
1990         SD_JOURNAL_FOREACH(j) {
1991                 JournalFile *f;
1992
1993                 f = j->current_file;
1994                 assert(f && f->current_offset > 0);
1995
1996                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1997                 if (r < 0) {
1998                         log_error("Can't read entry: %s", strerror(-r));
1999                         goto finish;
2000                 }
2001
2002                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
2003                 if (r == -E2BIG) {
2004                         log_info("Allocation limit reached.");
2005
2006                         journal_file_post_change(s->system_journal);
2007                         server_rotate(s);
2008                         server_vacuum(s);
2009
2010                         r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
2011                 }
2012
2013                 if (r < 0) {
2014                         log_error("Can't write entry: %s", strerror(-r));
2015                         goto finish;
2016                 }
2017         }
2018
2019 finish:
2020         journal_file_post_change(s->system_journal);
2021
2022         journal_file_close(s->runtime_journal);
2023         s->runtime_journal = NULL;
2024
2025         if (r >= 0) {
2026                 sd_id128_to_string(machine, path + 17);
2027                 rm_rf(path, false, true, false);
2028         }
2029
2030         return r;
2031 }
2032
2033 static int server_read_proc_kmsg(Server *s) {
2034         ssize_t l;
2035         assert(s);
2036         assert(s->proc_kmsg_fd >= 0);
2037
2038         l = read(s->proc_kmsg_fd, s->proc_kmsg_buffer + s->proc_kmsg_length, sizeof(s->proc_kmsg_buffer) - 1 - s->proc_kmsg_length);
2039         if (l < 0) {
2040
2041                 if (errno == EAGAIN || errno == EINTR)
2042                         return 0;
2043
2044                 log_error("Failed to read from kernel: %m");
2045                 return -errno;
2046         }
2047
2048         s->proc_kmsg_length += l;
2049
2050         proc_kmsg_scan(s);
2051         return 1;
2052 }
2053
2054 static int server_flush_proc_kmsg(Server *s) {
2055         int r;
2056
2057         assert(s);
2058
2059         if (s->proc_kmsg_fd < 0)
2060                 return 0;
2061
2062         log_info("Flushing /proc/kmsg...");
2063
2064         for (;;) {
2065                 r = server_read_proc_kmsg(s);
2066                 if (r < 0)
2067                         return r;
2068
2069                 if (r == 0)
2070                         break;
2071         }
2072
2073         return 0;
2074 }
2075
2076 static int process_event(Server *s, struct epoll_event *ev) {
2077         assert(s);
2078
2079         if (ev->data.fd == s->signal_fd) {
2080                 struct signalfd_siginfo sfsi;
2081                 ssize_t n;
2082
2083                 if (ev->events != EPOLLIN) {
2084                         log_info("Got invalid event from epoll.");
2085                         return -EIO;
2086                 }
2087
2088                 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
2089                 if (n != sizeof(sfsi)) {
2090
2091                         if (n >= 0)
2092                                 return -EIO;
2093
2094                         if (errno == EINTR || errno == EAGAIN)
2095                                 return 1;
2096
2097                         return -errno;
2098                 }
2099
2100                 if (sfsi.ssi_signo == SIGUSR1) {
2101                         server_flush_to_var(s);
2102                         return 0;
2103                 }
2104
2105                 log_debug("Received SIG%s", signal_to_string(sfsi.ssi_signo));
2106                 return 0;
2107
2108         } else if (ev->data.fd == s->proc_kmsg_fd) {
2109                 int r;
2110
2111                 if (ev->events != EPOLLIN) {
2112                         log_info("Got invalid event from epoll.");
2113                         return -EIO;
2114                 }
2115
2116                 r = server_read_proc_kmsg(s);
2117                 if (r < 0)
2118                         return r;
2119
2120                 return 1;
2121
2122         } else if (ev->data.fd == s->native_fd ||
2123                    ev->data.fd == s->syslog_fd) {
2124
2125                 if (ev->events != EPOLLIN) {
2126                         log_info("Got invalid event from epoll.");
2127                         return -EIO;
2128                 }
2129
2130                 for (;;) {
2131                         struct msghdr msghdr;
2132                         struct iovec iovec;
2133                         struct ucred *ucred = NULL;
2134                         struct timeval *tv = NULL;
2135                         struct cmsghdr *cmsg;
2136                         char *label = NULL;
2137                         size_t label_len = 0;
2138                         union {
2139                                 struct cmsghdr cmsghdr;
2140                                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
2141                                             CMSG_SPACE(sizeof(struct timeval)) +
2142                                             CMSG_SPACE(sizeof(int)) +
2143                                             CMSG_SPACE(PAGE_SIZE)]; /* selinux label */
2144                         } control;
2145                         ssize_t n;
2146                         int v;
2147                         int *fds = NULL;
2148                         unsigned n_fds = 0;
2149
2150                         if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
2151                                 log_error("SIOCINQ failed: %m");
2152                                 return -errno;
2153                         }
2154
2155                         if (s->buffer_size < (size_t) v) {
2156                                 void *b;
2157                                 size_t l;
2158
2159                                 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
2160                                 b = realloc(s->buffer, l+1);
2161
2162                                 if (!b) {
2163                                         log_error("Couldn't increase buffer.");
2164                                         return -ENOMEM;
2165                                 }
2166
2167                                 s->buffer_size = l;
2168                                 s->buffer = b;
2169                         }
2170
2171                         zero(iovec);
2172                         iovec.iov_base = s->buffer;
2173                         iovec.iov_len = s->buffer_size;
2174
2175                         zero(control);
2176                         zero(msghdr);
2177                         msghdr.msg_iov = &iovec;
2178                         msghdr.msg_iovlen = 1;
2179                         msghdr.msg_control = &control;
2180                         msghdr.msg_controllen = sizeof(control);
2181
2182                         n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
2183                         if (n < 0) {
2184
2185                                 if (errno == EINTR || errno == EAGAIN)
2186                                         return 1;
2187
2188                                 log_error("recvmsg() failed: %m");
2189                                 return -errno;
2190                         }
2191
2192                         for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
2193
2194                                 if (cmsg->cmsg_level == SOL_SOCKET &&
2195                                     cmsg->cmsg_type == SCM_CREDENTIALS &&
2196                                     cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
2197                                         ucred = (struct ucred*) CMSG_DATA(cmsg);
2198                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
2199                                          cmsg->cmsg_type == SCM_SECURITY) {
2200                                         label = (char*) CMSG_DATA(cmsg);
2201                                         label_len = cmsg->cmsg_len - CMSG_LEN(0);
2202                                 } else if (cmsg->cmsg_level == SOL_SOCKET &&
2203                                          cmsg->cmsg_type == SO_TIMESTAMP &&
2204                                          cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
2205                                         tv = (struct timeval*) CMSG_DATA(cmsg);
2206                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
2207                                          cmsg->cmsg_type == SCM_RIGHTS) {
2208                                         fds = (int*) CMSG_DATA(cmsg);
2209                                         n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
2210                                 }
2211                         }
2212
2213                         if (ev->data.fd == s->syslog_fd) {
2214                                 char *e;
2215
2216                                 if (n > 0 && n_fds == 0) {
2217                                         e = memchr(s->buffer, '\n', n);
2218                                         if (e)
2219                                                 *e = 0;
2220                                         else
2221                                                 s->buffer[n] = 0;
2222
2223                                         process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
2224                                 } else if (n_fds > 0)
2225                                         log_warning("Got file descriptors via syslog socket. Ignoring.");
2226
2227                         } else {
2228                                 if (n > 0 && n_fds == 0)
2229                                         process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
2230                                 else if (n == 0 && n_fds == 1)
2231                                         process_native_file(s, fds[0], ucred, tv, label, label_len);
2232                                 else if (n_fds > 0)
2233                                         log_warning("Got too many file descriptors via native socket. Ignoring.");
2234                         }
2235
2236                         close_many(fds, n_fds);
2237                 }
2238
2239                 return 1;
2240
2241         } else if (ev->data.fd == s->stdout_fd) {
2242
2243                 if (ev->events != EPOLLIN) {
2244                         log_info("Got invalid event from epoll.");
2245                         return -EIO;
2246                 }
2247
2248                 stdout_stream_new(s);
2249                 return 1;
2250
2251         } else {
2252                 StdoutStream *stream;
2253
2254                 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
2255                         log_info("Got invalid event from epoll.");
2256                         return -EIO;
2257                 }
2258
2259                 /* If it is none of the well-known fds, it must be an
2260                  * stdout stream fd. Note that this is a bit ugly here
2261                  * (since we rely that none of the well-known fds
2262                  * could be interpreted as pointer), but nonetheless
2263                  * safe, since the well-known fds would never get an
2264                  * fd > 4096, i.e. beyond the first memory page */
2265
2266                 stream = ev->data.ptr;
2267
2268                 if (stdout_stream_process(stream) <= 0)
2269                         stdout_stream_free(stream);
2270
2271                 return 1;
2272         }
2273
2274         log_error("Unknown event.");
2275         return 0;
2276 }
2277
2278 static int open_syslog_socket(Server *s) {
2279         union sockaddr_union sa;
2280         int one, r;
2281         struct epoll_event ev;
2282
2283         assert(s);
2284
2285         if (s->syslog_fd < 0) {
2286
2287                 s->syslog_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2288                 if (s->syslog_fd < 0) {
2289                         log_error("socket() failed: %m");
2290                         return -errno;
2291                 }
2292
2293                 zero(sa);
2294                 sa.un.sun_family = AF_UNIX;
2295                 strncpy(sa.un.sun_path, "/dev/log", sizeof(sa.un.sun_path));
2296
2297                 unlink(sa.un.sun_path);
2298
2299                 r = bind(s->syslog_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2300                 if (r < 0) {
2301                         log_error("bind() failed: %m");
2302                         return -errno;
2303                 }
2304
2305                 chmod(sa.un.sun_path, 0666);
2306         } else
2307                 fd_nonblock(s->syslog_fd, 1);
2308
2309         one = 1;
2310         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
2311         if (r < 0) {
2312                 log_error("SO_PASSCRED failed: %m");
2313                 return -errno;
2314         }
2315
2316 #ifdef HAVE_SELINUX
2317         one = 1;
2318         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
2319         if (r < 0)
2320                 log_warning("SO_PASSSEC failed: %m");
2321 #endif
2322
2323         one = 1;
2324         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
2325         if (r < 0) {
2326                 log_error("SO_TIMESTAMP failed: %m");
2327                 return -errno;
2328         }
2329
2330         zero(ev);
2331         ev.events = EPOLLIN;
2332         ev.data.fd = s->syslog_fd;
2333         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->syslog_fd, &ev) < 0) {
2334                 log_error("Failed to add syslog server fd to epoll object: %m");
2335                 return -errno;
2336         }
2337
2338         return 0;
2339 }
2340
2341 static int open_native_socket(Server*s) {
2342         union sockaddr_union sa;
2343         int one, r;
2344         struct epoll_event ev;
2345
2346         assert(s);
2347
2348         if (s->native_fd < 0) {
2349
2350                 s->native_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2351                 if (s->native_fd < 0) {
2352                         log_error("socket() failed: %m");
2353                         return -errno;
2354                 }
2355
2356                 zero(sa);
2357                 sa.un.sun_family = AF_UNIX;
2358                 strncpy(sa.un.sun_path, "/run/systemd/journal/socket", sizeof(sa.un.sun_path));
2359
2360                 unlink(sa.un.sun_path);
2361
2362                 r = bind(s->native_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2363                 if (r < 0) {
2364                         log_error("bind() failed: %m");
2365                         return -errno;
2366                 }
2367
2368                 chmod(sa.un.sun_path, 0666);
2369         } else
2370                 fd_nonblock(s->native_fd, 1);
2371
2372         one = 1;
2373         r = setsockopt(s->native_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
2374         if (r < 0) {
2375                 log_error("SO_PASSCRED failed: %m");
2376                 return -errno;
2377         }
2378
2379 #ifdef HAVE_SELINUX
2380         one = 1;
2381         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
2382         if (r < 0)
2383                 log_warning("SO_PASSSEC failed: %m");
2384 #endif
2385
2386         one = 1;
2387         r = setsockopt(s->native_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
2388         if (r < 0) {
2389                 log_error("SO_TIMESTAMP failed: %m");
2390                 return -errno;
2391         }
2392
2393         zero(ev);
2394         ev.events = EPOLLIN;
2395         ev.data.fd = s->native_fd;
2396         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->native_fd, &ev) < 0) {
2397                 log_error("Failed to add native server fd to epoll object: %m");
2398                 return -errno;
2399         }
2400
2401         return 0;
2402 }
2403
2404 static int open_stdout_socket(Server *s) {
2405         union sockaddr_union sa;
2406         int r;
2407         struct epoll_event ev;
2408
2409         assert(s);
2410
2411         if (s->stdout_fd < 0) {
2412
2413                 s->stdout_fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2414                 if (s->stdout_fd < 0) {
2415                         log_error("socket() failed: %m");
2416                         return -errno;
2417                 }
2418
2419                 zero(sa);
2420                 sa.un.sun_family = AF_UNIX;
2421                 strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
2422
2423                 unlink(sa.un.sun_path);
2424
2425                 r = bind(s->stdout_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2426                 if (r < 0) {
2427                         log_error("bind() failed: %m");
2428                         return -errno;
2429                 }
2430
2431                 chmod(sa.un.sun_path, 0666);
2432
2433                 if (listen(s->stdout_fd, SOMAXCONN) < 0) {
2434                         log_error("liste() failed: %m");
2435                         return -errno;
2436                 }
2437         } else
2438                 fd_nonblock(s->stdout_fd, 1);
2439
2440         zero(ev);
2441         ev.events = EPOLLIN;
2442         ev.data.fd = s->stdout_fd;
2443         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->stdout_fd, &ev) < 0) {
2444                 log_error("Failed to add stdout server fd to epoll object: %m");
2445                 return -errno;
2446         }
2447
2448         return 0;
2449 }
2450
2451 static int open_proc_kmsg(Server *s) {
2452         struct epoll_event ev;
2453
2454         assert(s);
2455
2456         if (!s->import_proc_kmsg)
2457                 return 0;
2458
2459
2460         s->proc_kmsg_fd = open("/proc/kmsg", O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
2461         if (s->proc_kmsg_fd < 0) {
2462                 log_warning("Failed to open /proc/kmsg, ignoring: %m");
2463                 return 0;
2464         }
2465
2466         zero(ev);
2467         ev.events = EPOLLIN;
2468         ev.data.fd = s->proc_kmsg_fd;
2469         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->proc_kmsg_fd, &ev) < 0) {
2470                 log_error("Failed to add /proc/kmsg fd to epoll object: %m");
2471                 return -errno;
2472         }
2473
2474         return 0;
2475 }
2476
2477 static int open_signalfd(Server *s) {
2478         sigset_t mask;
2479         struct epoll_event ev;
2480
2481         assert(s);
2482
2483         assert_se(sigemptyset(&mask) == 0);
2484         sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, -1);
2485         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
2486
2487         s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
2488         if (s->signal_fd < 0) {
2489                 log_error("signalfd(): %m");
2490                 return -errno;
2491         }
2492
2493         zero(ev);
2494         ev.events = EPOLLIN;
2495         ev.data.fd = s->signal_fd;
2496
2497         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
2498                 log_error("epoll_ctl(): %m");
2499                 return -errno;
2500         }
2501
2502         return 0;
2503 }
2504
2505 static int server_parse_proc_cmdline(Server *s) {
2506         char *line, *w, *state;
2507         int r;
2508         size_t l;
2509
2510         if (detect_container(NULL) > 0)
2511                 return 0;
2512
2513         r = read_one_line_file("/proc/cmdline", &line);
2514         if (r < 0) {
2515                 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
2516                 return 0;
2517         }
2518
2519         FOREACH_WORD_QUOTED(w, l, line, state) {
2520                 char *word;
2521
2522                 word = strndup(w, l);
2523                 if (!word) {
2524                         r = -ENOMEM;
2525                         goto finish;
2526                 }
2527
2528                 if (startswith(word, "systemd_journald.forward_to_syslog=")) {
2529                         r = parse_boolean(word + 35);
2530                         if (r < 0)
2531                                 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
2532                         else
2533                                 s->forward_to_syslog = r;
2534                 } else if (startswith(word, "systemd_journald.forward_to_kmsg=")) {
2535                         r = parse_boolean(word + 33);
2536                         if (r < 0)
2537                                 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
2538                         else
2539                                 s->forward_to_kmsg = r;
2540                 } else if (startswith(word, "systemd_journald.forward_to_console=")) {
2541                         r = parse_boolean(word + 36);
2542                         if (r < 0)
2543                                 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
2544                         else
2545                                 s->forward_to_console = r;
2546                 }
2547
2548                 free(word);
2549         }
2550
2551         r = 0;
2552
2553 finish:
2554         free(line);
2555         return r;
2556 }
2557
2558 static int server_parse_config_file(Server *s) {
2559         FILE *f;
2560         const char *fn;
2561         int r;
2562
2563         assert(s);
2564
2565         fn = "/etc/systemd/systemd-journald.conf";
2566         f = fopen(fn, "re");
2567         if (!f) {
2568                 if (errno == ENOENT)
2569                         return 0;
2570
2571                 log_warning("Failed to open configuration file %s: %m", fn);
2572                 return -errno;
2573         }
2574
2575         r = config_parse(fn, f, "Journal\0", config_item_perf_lookup, (void*) journald_gperf_lookup, false, s);
2576         if (r < 0)
2577                 log_warning("Failed to parse configuration file: %s", strerror(-r));
2578
2579         fclose(f);
2580
2581         return r;
2582 }
2583
2584 static int server_init(Server *s) {
2585         int n, r, fd;
2586
2587         assert(s);
2588
2589         zero(*s);
2590         s->syslog_fd = s->native_fd = s->stdout_fd = s->signal_fd = s->epoll_fd = s->proc_kmsg_fd = -1;
2591         s->compress = true;
2592
2593         s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
2594         s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
2595
2596         s->forward_to_syslog = true;
2597         s->import_proc_kmsg = true;
2598
2599         memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
2600         memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
2601
2602         server_parse_config_file(s);
2603         server_parse_proc_cmdline(s);
2604
2605         s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
2606         if (!s->user_journals) {
2607                 log_error("Out of memory.");
2608                 return -ENOMEM;
2609         }
2610
2611         s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
2612         if (s->epoll_fd < 0) {
2613                 log_error("Failed to create epoll object: %m");
2614                 return -errno;
2615         }
2616
2617         n = sd_listen_fds(true);
2618         if (n < 0) {
2619                 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
2620                 return n;
2621         }
2622
2623         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
2624
2625                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
2626
2627                         if (s->native_fd >= 0) {
2628                                 log_error("Too many native sockets passed.");
2629                                 return -EINVAL;
2630                         }
2631
2632                         s->native_fd = fd;
2633
2634                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
2635
2636                         if (s->stdout_fd >= 0) {
2637                                 log_error("Too many stdout sockets passed.");
2638                                 return -EINVAL;
2639                         }
2640
2641                         s->stdout_fd = fd;
2642
2643                 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
2644
2645                         if (s->syslog_fd >= 0) {
2646                                 log_error("Too many /dev/log sockets passed.");
2647                                 return -EINVAL;
2648                         }
2649
2650                         s->syslog_fd = fd;
2651
2652                 } else {
2653                         log_error("Unknown socket passed.");
2654                         return -EINVAL;
2655                 }
2656         }
2657
2658         r = open_syslog_socket(s);
2659         if (r < 0)
2660                 return r;
2661
2662         r = open_native_socket(s);
2663         if (r < 0)
2664                 return r;
2665
2666         r = open_stdout_socket(s);
2667         if (r < 0)
2668                 return r;
2669
2670         r = open_proc_kmsg(s);
2671         if (r < 0)
2672                 return r;
2673
2674         r = open_signalfd(s);
2675         if (r < 0)
2676                 return r;
2677
2678         s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
2679         if (!s->rate_limit)
2680                 return -ENOMEM;
2681
2682         r = system_journal_open(s);
2683         if (r < 0)
2684                 return r;
2685
2686         return 0;
2687 }
2688
2689 static void server_done(Server *s) {
2690         JournalFile *f;
2691         assert(s);
2692
2693         while (s->stdout_streams)
2694                 stdout_stream_free(s->stdout_streams);
2695
2696         if (s->system_journal)
2697                 journal_file_close(s->system_journal);
2698
2699         if (s->runtime_journal)
2700                 journal_file_close(s->runtime_journal);
2701
2702         while ((f = hashmap_steal_first(s->user_journals)))
2703                 journal_file_close(f);
2704
2705         hashmap_free(s->user_journals);
2706
2707         if (s->epoll_fd >= 0)
2708                 close_nointr_nofail(s->epoll_fd);
2709
2710         if (s->signal_fd >= 0)
2711                 close_nointr_nofail(s->signal_fd);
2712
2713         if (s->syslog_fd >= 0)
2714                 close_nointr_nofail(s->syslog_fd);
2715
2716         if (s->native_fd >= 0)
2717                 close_nointr_nofail(s->native_fd);
2718
2719         if (s->stdout_fd >= 0)
2720                 close_nointr_nofail(s->stdout_fd);
2721
2722         if (s->proc_kmsg_fd >= 0)
2723                 close_nointr_nofail(s->proc_kmsg_fd);
2724
2725         if (s->rate_limit)
2726                 journal_rate_limit_free(s->rate_limit);
2727
2728         free(s->buffer);
2729 }
2730
2731 int main(int argc, char *argv[]) {
2732         Server server;
2733         int r;
2734
2735         /* if (getppid() != 1) { */
2736         /*         log_error("This program should be invoked by init only."); */
2737         /*         return EXIT_FAILURE; */
2738         /* } */
2739
2740         if (argc > 1) {
2741                 log_error("This program does not take arguments.");
2742                 return EXIT_FAILURE;
2743         }
2744
2745         log_set_target(LOG_TARGET_CONSOLE);
2746         log_parse_environment();
2747         log_open();
2748
2749         umask(0022);
2750
2751         r = server_init(&server);
2752         if (r < 0)
2753                 goto finish;
2754
2755         server_vacuum(&server);
2756         server_flush_to_var(&server);
2757         server_flush_proc_kmsg(&server);
2758
2759         log_debug("systemd-journald running as pid %lu", (unsigned long) getpid());
2760         driver_message(&server, SD_MESSAGE_JOURNAL_START, "Journal started");
2761
2762         sd_notify(false,
2763                   "READY=1\n"
2764                   "STATUS=Processing requests...");
2765
2766         for (;;) {
2767                 struct epoll_event event;
2768
2769                 r = epoll_wait(server.epoll_fd, &event, 1, -1);
2770                 if (r < 0) {
2771
2772                         if (errno == EINTR)
2773                                 continue;
2774
2775                         log_error("epoll_wait() failed: %m");
2776                         r = -errno;
2777                         goto finish;
2778                 } else if (r == 0)
2779                         break;
2780
2781                 r = process_event(&server, &event);
2782                 if (r < 0)
2783                         goto finish;
2784                 else if (r == 0)
2785                         break;
2786         }
2787
2788         log_debug("systemd-journald stopped as pid %lu", (unsigned long) getpid());
2789         driver_message(&server, SD_MESSAGE_JOURNAL_STOP, "Journal stopped");
2790
2791 finish:
2792         sd_notify(false,
2793                   "STATUS=Shutting down...");
2794
2795         server_done(&server);
2796
2797         return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
2798 }