chiark / gitweb /
619a98d1f2db252bef61193d3812ea6bbffe63a2
[elogind.git] / src / journal / journald.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/epoll.h>
23 #include <sys/socket.h>
24 #include <errno.h>
25 #include <sys/signalfd.h>
26 #include <unistd.h>
27 #include <fcntl.h>
28 #include <stddef.h>
29 #include <sys/ioctl.h>
30 #include <linux/sockios.h>
31 #include <sys/statvfs.h>
32
33 #include <systemd/sd-journal.h>
34 #include <systemd/sd-messages.h>
35 #include <systemd/sd-daemon.h>
36
37 #ifdef HAVE_LOGIND
38 #include <systemd/sd-login.h>
39 #endif
40
41 #include "mkdir.h"
42 #include "hashmap.h"
43 #include "journal-file.h"
44 #include "socket-util.h"
45 #include "cgroup-util.h"
46 #include "list.h"
47 #include "journal-rate-limit.h"
48 #include "journal-internal.h"
49 #include "conf-parser.h"
50 #include "journald.h"
51 #include "virt.h"
52 #include "missing.h"
53
54 #ifdef HAVE_ACL
55 #include <sys/acl.h>
56 #include <acl/libacl.h>
57 #include "acl-util.h"
58 #endif
59
60 #ifdef HAVE_SELINUX
61 #include <selinux/selinux.h>
62 #endif
63
64 #define USER_JOURNALS_MAX 1024
65 #define STDOUT_STREAMS_MAX 4096
66
67 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
68 #define DEFAULT_RATE_LIMIT_BURST 200
69
70 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
71
72 #define RECHECK_VAR_AVAILABLE_USEC (30*USEC_PER_SEC)
73
74 #define N_IOVEC_META_FIELDS 17
75
76 #define ENTRY_SIZE_MAX (1024*1024*32)
77
78 typedef enum StdoutStreamState {
79         STDOUT_STREAM_IDENTIFIER,
80         STDOUT_STREAM_UNIT_ID,
81         STDOUT_STREAM_PRIORITY,
82         STDOUT_STREAM_LEVEL_PREFIX,
83         STDOUT_STREAM_FORWARD_TO_SYSLOG,
84         STDOUT_STREAM_FORWARD_TO_KMSG,
85         STDOUT_STREAM_FORWARD_TO_CONSOLE,
86         STDOUT_STREAM_RUNNING
87 } StdoutStreamState;
88
89 struct StdoutStream {
90         Server *server;
91         StdoutStreamState state;
92
93         int fd;
94
95         struct ucred ucred;
96 #ifdef HAVE_SELINUX
97         security_context_t security_context;
98 #endif
99
100         char *identifier;
101         char *unit_id;
102         int priority;
103         bool level_prefix:1;
104         bool forward_to_syslog:1;
105         bool forward_to_kmsg:1;
106         bool forward_to_console:1;
107
108         char buffer[LINE_MAX+1];
109         size_t length;
110
111         LIST_FIELDS(StdoutStream, stdout_stream);
112 };
113
114 static int server_flush_to_var(Server *s);
115
116 static uint64_t available_space(Server *s) {
117         char ids[33], *p;
118         const char *f;
119         sd_id128_t machine;
120         struct statvfs ss;
121         uint64_t sum = 0, avail = 0, ss_avail = 0;
122         int r;
123         DIR *d;
124         usec_t ts;
125         JournalMetrics *m;
126
127         ts = now(CLOCK_MONOTONIC);
128
129         if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
130                 return s->cached_available_space;
131
132         r = sd_id128_get_machine(&machine);
133         if (r < 0)
134                 return 0;
135
136         if (s->system_journal) {
137                 f = "/var/log/journal/";
138                 m = &s->system_metrics;
139         } else {
140                 f = "/run/log/journal/";
141                 m = &s->runtime_metrics;
142         }
143
144         assert(m);
145
146         p = strappend(f, sd_id128_to_string(machine, ids));
147         if (!p)
148                 return 0;
149
150         d = opendir(p);
151         free(p);
152
153         if (!d)
154                 return 0;
155
156         if (fstatvfs(dirfd(d), &ss) < 0)
157                 goto finish;
158
159         for (;;) {
160                 struct stat st;
161                 struct dirent buf, *de;
162
163                 r = readdir_r(d, &buf, &de);
164                 if (r != 0)
165                         break;
166
167                 if (!de)
168                         break;
169
170                 if (!endswith(de->d_name, ".journal") &&
171                     !endswith(de->d_name, ".journal~"))
172                         continue;
173
174                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
175                         continue;
176
177                 if (!S_ISREG(st.st_mode))
178                         continue;
179
180                 sum += (uint64_t) st.st_blocks * 512UL;
181         }
182
183         avail = sum >= m->max_use ? 0 : m->max_use - sum;
184
185         ss_avail = ss.f_bsize * ss.f_bavail;
186
187         ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
188
189         if (ss_avail < avail)
190                 avail = ss_avail;
191
192         s->cached_available_space = avail;
193         s->cached_available_space_timestamp = ts;
194
195 finish:
196         closedir(d);
197
198         return avail;
199 }
200
201 static void server_read_file_gid(Server *s) {
202         const char *adm = "adm";
203         int r;
204
205         assert(s);
206
207         if (s->file_gid_valid)
208                 return;
209
210         r = get_group_creds(&adm, &s->file_gid);
211         if (r < 0)
212                 log_warning("Failed to resolve 'adm' group: %s", strerror(-r));
213
214         /* if we couldn't read the gid, then it will be 0, but that's
215          * fine and we shouldn't try to resolve the group again, so
216          * let's just pretend it worked right-away. */
217         s->file_gid_valid = true;
218 }
219
220 static void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
221         int r;
222 #ifdef HAVE_ACL
223         acl_t acl;
224         acl_entry_t entry;
225         acl_permset_t permset;
226 #endif
227
228         assert(f);
229
230         server_read_file_gid(s);
231
232         r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
233         if (r < 0)
234                 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
235
236 #ifdef HAVE_ACL
237         if (uid <= 0)
238                 return;
239
240         acl = acl_get_fd(f->fd);
241         if (!acl) {
242                 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
243                 return;
244         }
245
246         r = acl_find_uid(acl, uid, &entry);
247         if (r <= 0) {
248
249                 if (acl_create_entry(&acl, &entry) < 0 ||
250                     acl_set_tag_type(entry, ACL_USER) < 0 ||
251                     acl_set_qualifier(entry, &uid) < 0) {
252                         log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
253                         goto finish;
254                 }
255         }
256
257         if (acl_get_permset(entry, &permset) < 0 ||
258             acl_add_perm(permset, ACL_READ) < 0 ||
259             acl_calc_mask(&acl) < 0) {
260                 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
261                 goto finish;
262         }
263
264         if (acl_set_fd(f->fd, acl) < 0)
265                 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
266
267 finish:
268         acl_free(acl);
269 #endif
270 }
271
272 static JournalFile* find_journal(Server *s, uid_t uid) {
273         char *p;
274         int r;
275         JournalFile *f;
276         char ids[33];
277         sd_id128_t machine;
278
279         assert(s);
280
281         /* We split up user logs only on /var, not on /run. If the
282          * runtime file is open, we write to it exclusively, in order
283          * to guarantee proper order as soon as we flush /run to
284          * /var and close the runtime file. */
285
286         if (s->runtime_journal)
287                 return s->runtime_journal;
288
289         if (uid <= 0)
290                 return s->system_journal;
291
292         r = sd_id128_get_machine(&machine);
293         if (r < 0)
294                 return s->system_journal;
295
296         f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
297         if (f)
298                 return f;
299
300         if (asprintf(&p, "/var/log/journal/%s/user-%lu.journal", sd_id128_to_string(machine, ids), (unsigned long) uid) < 0)
301                 return s->system_journal;
302
303         while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
304                 /* Too many open? Then let's close one */
305                 f = hashmap_steal_first(s->user_journals);
306                 assert(f);
307                 journal_file_close(f);
308         }
309
310         r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->system_journal, &f);
311         free(p);
312
313         if (r < 0)
314                 return s->system_journal;
315
316         server_fix_perms(s, f, uid);
317
318         r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
319         if (r < 0) {
320                 journal_file_close(f);
321                 return s->system_journal;
322         }
323
324         return f;
325 }
326
327 static void server_rotate(Server *s) {
328         JournalFile *f;
329         void *k;
330         Iterator i;
331         int r;
332
333         log_info("Rotating...");
334
335         if (s->runtime_journal) {
336                 r = journal_file_rotate(&s->runtime_journal);
337                 if (r < 0)
338                         if (s->runtime_journal)
339                                 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
340                         else
341                                 log_error("Failed to create new runtime journal: %s", strerror(-r));
342                 else
343                         server_fix_perms(s, s->runtime_journal, 0);
344         }
345
346         if (s->system_journal) {
347                 r = journal_file_rotate(&s->system_journal);
348                 if (r < 0)
349                         if (s->system_journal)
350                                 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
351                         else
352                                 log_error("Failed to create new system journal: %s", strerror(-r));
353
354                 else
355                         server_fix_perms(s, s->system_journal, 0);
356         }
357
358         HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
359                 r = journal_file_rotate(&f);
360                 if (r < 0)
361                         if (f->path)
362                                 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
363                         else
364                                 log_error("Failed to create user journal: %s", strerror(-r));
365                 else {
366                         hashmap_replace(s->user_journals, k, f);
367                         server_fix_perms(s, s->system_journal, PTR_TO_UINT32(k));
368                 }
369         }
370 }
371
372 static void server_vacuum(Server *s) {
373         char *p;
374         char ids[33];
375         sd_id128_t machine;
376         int r;
377
378         log_info("Vacuuming...");
379
380         r = sd_id128_get_machine(&machine);
381         if (r < 0) {
382                 log_error("Failed to get machine ID: %s", strerror(-r));
383                 return;
384         }
385
386         sd_id128_to_string(machine, ids);
387
388         if (s->system_journal) {
389                 if (asprintf(&p, "/var/log/journal/%s", ids) < 0) {
390                         log_error("Out of memory.");
391                         return;
392                 }
393
394                 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free);
395                 if (r < 0 && r != -ENOENT)
396                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
397                 free(p);
398         }
399
400
401         if (s->runtime_journal) {
402                 if (asprintf(&p, "/run/log/journal/%s", ids) < 0) {
403                         log_error("Out of memory.");
404                         return;
405                 }
406
407                 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free);
408                 if (r < 0 && r != -ENOENT)
409                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
410                 free(p);
411         }
412
413         s->cached_available_space_timestamp = 0;
414 }
415
416 static char *shortened_cgroup_path(pid_t pid) {
417         int r;
418         char *process_path, *init_path, *path;
419
420         assert(pid > 0);
421
422         r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, pid, &process_path);
423         if (r < 0)
424                 return NULL;
425
426         r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &init_path);
427         if (r < 0) {
428                 free(process_path);
429                 return NULL;
430         }
431
432         if (endswith(init_path, "/system"))
433                 init_path[strlen(init_path) - 7] = 0;
434         else if (streq(init_path, "/"))
435                 init_path[0] = 0;
436
437         if (startswith(process_path, init_path)) {
438                 char *p;
439
440                 p = strdup(process_path + strlen(init_path));
441                 if (!p) {
442                         free(process_path);
443                         free(init_path);
444                         return NULL;
445                 }
446                 path = p;
447         } else {
448                 path = process_path;
449                 process_path = NULL;
450         }
451
452         free(process_path);
453         free(init_path);
454
455         return path;
456 }
457
458 static void dispatch_message_real(
459                 Server *s,
460                 struct iovec *iovec, unsigned n, unsigned m,
461                 struct ucred *ucred,
462                 struct timeval *tv,
463                 const char *label, size_t label_len, const char *unit_id) {
464
465         char *pid = NULL, *uid = NULL, *gid = NULL,
466                 *source_time = NULL, *boot_id = NULL, *machine_id = NULL,
467                 *comm = NULL, *cmdline = NULL, *hostname = NULL,
468                 *audit_session = NULL, *audit_loginuid = NULL,
469                 *exe = NULL, *cgroup = NULL, *session = NULL,
470                 *owner_uid = NULL, *unit = NULL, *selinux_context = NULL;
471
472         char idbuf[33];
473         sd_id128_t id;
474         int r;
475         char *t;
476         uid_t loginuid = 0, realuid = 0;
477         JournalFile *f;
478         bool vacuumed = false;
479
480         assert(s);
481         assert(iovec);
482         assert(n > 0);
483         assert(n + N_IOVEC_META_FIELDS <= m);
484
485         if (ucred) {
486                 uint32_t audit;
487 #ifdef HAVE_LOGIND
488                 uid_t owner;
489 #endif
490
491                 realuid = ucred->uid;
492
493                 if (asprintf(&pid, "_PID=%lu", (unsigned long) ucred->pid) >= 0)
494                         IOVEC_SET_STRING(iovec[n++], pid);
495
496                 if (asprintf(&uid, "_UID=%lu", (unsigned long) ucred->uid) >= 0)
497                         IOVEC_SET_STRING(iovec[n++], uid);
498
499                 if (asprintf(&gid, "_GID=%lu", (unsigned long) ucred->gid) >= 0)
500                         IOVEC_SET_STRING(iovec[n++], gid);
501
502                 r = get_process_comm(ucred->pid, &t);
503                 if (r >= 0) {
504                         comm = strappend("_COMM=", t);
505                         free(t);
506
507                         if (comm)
508                                 IOVEC_SET_STRING(iovec[n++], comm);
509                 }
510
511                 r = get_process_exe(ucred->pid, &t);
512                 if (r >= 0) {
513                         exe = strappend("_EXE=", t);
514                         free(t);
515
516                         if (exe)
517                                 IOVEC_SET_STRING(iovec[n++], exe);
518                 }
519
520                 r = get_process_cmdline(ucred->pid, LINE_MAX, false, &t);
521                 if (r >= 0) {
522                         cmdline = strappend("_CMDLINE=", t);
523                         free(t);
524
525                         if (cmdline)
526                                 IOVEC_SET_STRING(iovec[n++], cmdline);
527                 }
528
529                 r = audit_session_from_pid(ucred->pid, &audit);
530                 if (r >= 0)
531                         if (asprintf(&audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit) >= 0)
532                                 IOVEC_SET_STRING(iovec[n++], audit_session);
533
534                 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
535                 if (r >= 0)
536                         if (asprintf(&audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0)
537                                 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
538
539                 t = shortened_cgroup_path(ucred->pid);
540                 if (t) {
541                         cgroup = strappend("_SYSTEMD_CGROUP=", t);
542                         free(t);
543
544                         if (cgroup)
545                                 IOVEC_SET_STRING(iovec[n++], cgroup);
546                 }
547
548 #ifdef HAVE_LOGIND
549                 if (sd_pid_get_session(ucred->pid, &t) >= 0) {
550                         session = strappend("_SYSTEMD_SESSION=", t);
551                         free(t);
552
553                         if (session)
554                                 IOVEC_SET_STRING(iovec[n++], session);
555                 }
556
557                 if (sd_pid_get_owner_uid(ucred->uid, &owner) >= 0)
558                         if (asprintf(&owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner) >= 0)
559                                 IOVEC_SET_STRING(iovec[n++], owner_uid);
560 #endif
561
562                 if (cg_pid_get_unit(ucred->pid, &t) >= 0) {
563                         unit = strappend("_SYSTEMD_UNIT=", t);
564                         free(t);
565                 } else if (unit_id)
566                         unit = strappend("_SYSTEMD_UNIT=", unit_id);
567
568                 if (unit)
569                         IOVEC_SET_STRING(iovec[n++], unit);
570
571 #ifdef HAVE_SELINUX
572                 if (label) {
573                         selinux_context = malloc(sizeof("_SELINUX_CONTEXT=") + label_len);
574                         if (selinux_context) {
575                                 memcpy(selinux_context, "_SELINUX_CONTEXT=", sizeof("_SELINUX_CONTEXT=")-1);
576                                 memcpy(selinux_context+sizeof("_SELINUX_CONTEXT=")-1, label, label_len);
577                                 selinux_context[sizeof("_SELINUX_CONTEXT=")-1+label_len] = 0;
578                                 IOVEC_SET_STRING(iovec[n++], selinux_context);
579                         }
580                 } else {
581                         security_context_t con;
582
583                         if (getpidcon(ucred->pid, &con) >= 0) {
584                                 selinux_context = strappend("_SELINUX_CONTEXT=", con);
585                                 if (selinux_context)
586                                         IOVEC_SET_STRING(iovec[n++], selinux_context);
587
588                                 freecon(con);
589                         }
590                 }
591 #endif
592         }
593
594         if (tv) {
595                 if (asprintf(&source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu",
596                              (unsigned long long) timeval_load(tv)) >= 0)
597                         IOVEC_SET_STRING(iovec[n++], source_time);
598         }
599
600         /* Note that strictly speaking storing the boot id here is
601          * redundant since the entry includes this in-line
602          * anyway. However, we need this indexed, too. */
603         r = sd_id128_get_boot(&id);
604         if (r >= 0)
605                 if (asprintf(&boot_id, "_BOOT_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
606                         IOVEC_SET_STRING(iovec[n++], boot_id);
607
608         r = sd_id128_get_machine(&id);
609         if (r >= 0)
610                 if (asprintf(&machine_id, "_MACHINE_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
611                         IOVEC_SET_STRING(iovec[n++], machine_id);
612
613         t = gethostname_malloc();
614         if (t) {
615                 hostname = strappend("_HOSTNAME=", t);
616                 free(t);
617                 if (hostname)
618                         IOVEC_SET_STRING(iovec[n++], hostname);
619         }
620
621         assert(n <= m);
622
623         server_flush_to_var(s);
624
625 retry:
626         f = find_journal(s, realuid == 0 ? 0 : loginuid);
627         if (!f)
628                 log_warning("Dropping message, as we can't find a place to store the data.");
629         else {
630                 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
631
632                 if ((r == -E2BIG || /* hit limit */
633                      r == -EFBIG || /* hit fs limit */
634                      r == -EDQUOT || /* quota hit */
635                      r == -ENOSPC || /* disk full */
636                      r == -EBADMSG || /* corrupted */
637                      r == -ENODATA || /* truncated */
638                      r == -EHOSTDOWN || /* other machine */
639                      r == -EPROTONOSUPPORT) && /* unsupported feature */
640                     !vacuumed) {
641
642                         if (r == -E2BIG)
643                                 log_info("Allocation limit reached, rotating.");
644                         else
645                                 log_warning("Journal file corrupted, rotating.");
646
647                         server_rotate(s);
648                         server_vacuum(s);
649                         vacuumed = true;
650
651                         log_info("Retrying write.");
652                         goto retry;
653                 }
654
655                 if (r < 0)
656                         log_error("Failed to write entry, ignoring: %s", strerror(-r));
657         }
658
659         free(pid);
660         free(uid);
661         free(gid);
662         free(comm);
663         free(exe);
664         free(cmdline);
665         free(source_time);
666         free(boot_id);
667         free(machine_id);
668         free(hostname);
669         free(audit_session);
670         free(audit_loginuid);
671         free(cgroup);
672         free(session);
673         free(owner_uid);
674         free(unit);
675         free(selinux_context);
676 }
677
678 static void driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
679         char mid[11 + 32 + 1];
680         char buffer[16 + LINE_MAX + 1];
681         struct iovec iovec[N_IOVEC_META_FIELDS + 4];
682         int n = 0;
683         va_list ap;
684         struct ucred ucred;
685
686         assert(s);
687         assert(format);
688
689         IOVEC_SET_STRING(iovec[n++], "PRIORITY=5");
690         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
691
692         memcpy(buffer, "MESSAGE=", 8);
693         va_start(ap, format);
694         vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
695         va_end(ap);
696         char_array_0(buffer);
697         IOVEC_SET_STRING(iovec[n++], buffer);
698
699         snprintf(mid, sizeof(mid), "MESSAGE_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(message_id));
700         char_array_0(mid);
701         IOVEC_SET_STRING(iovec[n++], mid);
702
703         zero(ucred);
704         ucred.pid = getpid();
705         ucred.uid = getuid();
706         ucred.gid = getgid();
707
708         dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL);
709 }
710
711 static void dispatch_message(Server *s,
712                              struct iovec *iovec, unsigned n, unsigned m,
713                              struct ucred *ucred,
714                              struct timeval *tv,
715                              const char *label, size_t label_len,
716                              const char *unit_id,
717                              int priority) {
718         int rl;
719         char *path = NULL, *c;
720
721         assert(s);
722         assert(iovec || n == 0);
723
724         if (n == 0)
725                 return;
726
727         if (LOG_PRI(priority) > s->max_level_store)
728                 return;
729
730         if (!ucred)
731                 goto finish;
732
733         path = shortened_cgroup_path(ucred->pid);
734         if (!path)
735                 goto finish;
736
737         /* example: /user/lennart/3/foobar
738          *          /system/dbus.service/foobar
739          *
740          * So let's cut of everything past the third /, since that is
741          * wher user directories start */
742
743         c = strchr(path, '/');
744         if (c) {
745                 c = strchr(c+1, '/');
746                 if (c) {
747                         c = strchr(c+1, '/');
748                         if (c)
749                                 *c = 0;
750                 }
751         }
752
753         rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available_space(s));
754
755         if (rl == 0) {
756                 free(path);
757                 return;
758         }
759
760         /* Write a suppression message if we suppressed something */
761         if (rl > 1)
762                 driver_message(s, SD_MESSAGE_JOURNAL_DROPPED, "Suppressed %u messages from %s", rl - 1, path);
763
764         free(path);
765
766 finish:
767         dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id);
768 }
769
770 static void forward_syslog_iovec(Server *s, const struct iovec *iovec, unsigned n_iovec, struct ucred *ucred, struct timeval *tv) {
771         struct msghdr msghdr;
772         struct cmsghdr *cmsg;
773         union {
774                 struct cmsghdr cmsghdr;
775                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred))];
776         } control;
777         union sockaddr_union sa;
778
779         assert(s);
780         assert(iovec);
781         assert(n_iovec > 0);
782
783         zero(msghdr);
784         msghdr.msg_iov = (struct iovec*) iovec;
785         msghdr.msg_iovlen = n_iovec;
786
787         zero(sa);
788         sa.un.sun_family = AF_UNIX;
789         strncpy(sa.un.sun_path, "/run/systemd/journal/syslog", sizeof(sa.un.sun_path));
790         msghdr.msg_name = &sa;
791         msghdr.msg_namelen = offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path);
792
793         if (ucred) {
794                 zero(control);
795                 msghdr.msg_control = &control;
796                 msghdr.msg_controllen = sizeof(control);
797
798                 cmsg = CMSG_FIRSTHDR(&msghdr);
799                 cmsg->cmsg_level = SOL_SOCKET;
800                 cmsg->cmsg_type = SCM_CREDENTIALS;
801                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred));
802                 memcpy(CMSG_DATA(cmsg), ucred, sizeof(struct ucred));
803                 msghdr.msg_controllen = cmsg->cmsg_len;
804         }
805
806         /* Forward the syslog message we received via /dev/log to
807          * /run/systemd/syslog. Unfortunately we currently can't set
808          * the SO_TIMESTAMP auxiliary data, and hence we don't. */
809
810         if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
811                 return;
812
813         /* The socket is full? I guess the syslog implementation is
814          * too slow, and we shouldn't wait for that... */
815         if (errno == EAGAIN)
816                 return;
817
818         if (ucred && errno == ESRCH) {
819                 struct ucred u;
820
821                 /* Hmm, presumably the sender process vanished
822                  * by now, so let's fix it as good as we
823                  * can, and retry */
824
825                 u = *ucred;
826                 u.pid = getpid();
827                 memcpy(CMSG_DATA(cmsg), &u, sizeof(struct ucred));
828
829                 if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
830                         return;
831
832                 if (errno == EAGAIN)
833                         return;
834         }
835
836         log_debug("Failed to forward syslog message: %m");
837 }
838
839 static void forward_syslog_raw(Server *s, int priority, const char *buffer, struct ucred *ucred, struct timeval *tv) {
840         struct iovec iovec;
841
842         assert(s);
843         assert(buffer);
844
845         if (LOG_PRI(priority) > s->max_level_syslog)
846                 return;
847
848         IOVEC_SET_STRING(iovec, buffer);
849         forward_syslog_iovec(s, &iovec, 1, ucred, tv);
850 }
851
852 static void forward_syslog(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred, struct timeval *tv) {
853         struct iovec iovec[5];
854         char header_priority[6], header_time[64], header_pid[16];
855         int n = 0;
856         time_t t;
857         struct tm *tm;
858         char *ident_buf = NULL;
859
860         assert(s);
861         assert(priority >= 0);
862         assert(priority <= 999);
863         assert(message);
864
865         if (LOG_PRI(priority) > s->max_level_syslog)
866                 return;
867
868         /* First: priority field */
869         snprintf(header_priority, sizeof(header_priority), "<%i>", priority);
870         char_array_0(header_priority);
871         IOVEC_SET_STRING(iovec[n++], header_priority);
872
873         /* Second: timestamp */
874         t = tv ? tv->tv_sec : ((time_t) (now(CLOCK_REALTIME) / USEC_PER_SEC));
875         tm = localtime(&t);
876         if (!tm)
877                 return;
878         if (strftime(header_time, sizeof(header_time), "%h %e %T ", tm) <= 0)
879                 return;
880         IOVEC_SET_STRING(iovec[n++], header_time);
881
882         /* Third: identifier and PID */
883         if (ucred) {
884                 if (!identifier) {
885                         get_process_comm(ucred->pid, &ident_buf);
886                         identifier = ident_buf;
887                 }
888
889                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
890                 char_array_0(header_pid);
891
892                 if (identifier)
893                         IOVEC_SET_STRING(iovec[n++], identifier);
894
895                 IOVEC_SET_STRING(iovec[n++], header_pid);
896         } else if (identifier) {
897                 IOVEC_SET_STRING(iovec[n++], identifier);
898                 IOVEC_SET_STRING(iovec[n++], ": ");
899         }
900
901         /* Fourth: message */
902         IOVEC_SET_STRING(iovec[n++], message);
903
904         forward_syslog_iovec(s, iovec, n, ucred, tv);
905
906         free(ident_buf);
907 }
908
909 static int fixup_priority(int priority) {
910
911         if ((priority & LOG_FACMASK) == 0)
912                 return (priority & LOG_PRIMASK) | LOG_USER;
913
914         return priority;
915 }
916
917 static void forward_kmsg(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred) {
918         struct iovec iovec[5];
919         char header_priority[6], header_pid[16];
920         int n = 0;
921         char *ident_buf = NULL;
922         int fd;
923
924         assert(s);
925         assert(priority >= 0);
926         assert(priority <= 999);
927         assert(message);
928
929         if (LOG_PRI(priority) > s->max_level_kmsg)
930                 return;
931
932         /* Never allow messages with kernel facility to be written to
933          * kmsg, regardless where the data comes from. */
934         priority = fixup_priority(priority);
935
936         /* First: priority field */
937         snprintf(header_priority, sizeof(header_priority), "<%i>", priority);
938         char_array_0(header_priority);
939         IOVEC_SET_STRING(iovec[n++], header_priority);
940
941         /* Second: identifier and PID */
942         if (ucred) {
943                 if (!identifier) {
944                         get_process_comm(ucred->pid, &ident_buf);
945                         identifier = ident_buf;
946                 }
947
948                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
949                 char_array_0(header_pid);
950
951                 if (identifier)
952                         IOVEC_SET_STRING(iovec[n++], identifier);
953
954                 IOVEC_SET_STRING(iovec[n++], header_pid);
955         } else if (identifier) {
956                 IOVEC_SET_STRING(iovec[n++], identifier);
957                 IOVEC_SET_STRING(iovec[n++], ": ");
958         }
959
960         /* Fourth: message */
961         IOVEC_SET_STRING(iovec[n++], message);
962         IOVEC_SET_STRING(iovec[n++], "\n");
963
964         fd = open("/dev/kmsg", O_WRONLY|O_NOCTTY|O_CLOEXEC);
965         if (fd < 0) {
966                 log_debug("Failed to open /dev/kmsg for logging: %s", strerror(errno));
967                 goto finish;
968         }
969
970         if (writev(fd, iovec, n) < 0)
971                 log_debug("Failed to write to /dev/kmsg for logging: %s", strerror(errno));
972
973         close_nointr_nofail(fd);
974
975 finish:
976         free(ident_buf);
977 }
978
979 static void forward_console(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred) {
980         struct iovec iovec[4];
981         char header_pid[16];
982         int n = 0, fd;
983         char *ident_buf = NULL;
984         const char *tty;
985
986         assert(s);
987         assert(message);
988
989         if (LOG_PRI(priority) > s->max_level_console)
990                 return;
991
992         /* First: identifier and PID */
993         if (ucred) {
994                 if (!identifier) {
995                         get_process_comm(ucred->pid, &ident_buf);
996                         identifier = ident_buf;
997                 }
998
999                 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
1000                 char_array_0(header_pid);
1001
1002                 if (identifier)
1003                         IOVEC_SET_STRING(iovec[n++], identifier);
1004
1005                 IOVEC_SET_STRING(iovec[n++], header_pid);
1006         } else if (identifier) {
1007                 IOVEC_SET_STRING(iovec[n++], identifier);
1008                 IOVEC_SET_STRING(iovec[n++], ": ");
1009         }
1010
1011         /* Third: message */
1012         IOVEC_SET_STRING(iovec[n++], message);
1013         IOVEC_SET_STRING(iovec[n++], "\n");
1014
1015         tty = s->tty_path ? s->tty_path : "/dev/console";
1016
1017         fd = open_terminal(tty, O_WRONLY|O_NOCTTY|O_CLOEXEC);
1018         if (fd < 0) {
1019                 log_debug("Failed to open %s for logging: %s", tty, strerror(errno));
1020                 goto finish;
1021         }
1022
1023         if (writev(fd, iovec, n) < 0)
1024                 log_debug("Failed to write to %s for logging: %s", tty, strerror(errno));
1025
1026         close_nointr_nofail(fd);
1027
1028 finish:
1029         free(ident_buf);
1030 }
1031
1032 static void read_identifier(const char **buf, char **identifier, char **pid) {
1033         const char *p;
1034         char *t;
1035         size_t l, e;
1036
1037         assert(buf);
1038         assert(identifier);
1039         assert(pid);
1040
1041         p = *buf;
1042
1043         p += strspn(p, WHITESPACE);
1044         l = strcspn(p, WHITESPACE);
1045
1046         if (l <= 0 ||
1047             p[l-1] != ':')
1048                 return;
1049
1050         e = l;
1051         l--;
1052
1053         if (p[l-1] == ']') {
1054                 size_t k = l-1;
1055
1056                 for (;;) {
1057
1058                         if (p[k] == '[') {
1059                                 t = strndup(p+k+1, l-k-2);
1060                                 if (t)
1061                                         *pid = t;
1062
1063                                 l = k;
1064                                 break;
1065                         }
1066
1067                         if (k == 0)
1068                                 break;
1069
1070                         k--;
1071                 }
1072         }
1073
1074         t = strndup(p, l);
1075         if (t)
1076                 *identifier = t;
1077
1078         *buf = p + e;
1079         *buf += strspn(*buf, WHITESPACE);
1080 }
1081
1082 static void process_syslog_message(Server *s, const char *buf, struct ucred *ucred, struct timeval *tv, const char *label, size_t label_len) {
1083         char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *syslog_pid = NULL;
1084         struct iovec iovec[N_IOVEC_META_FIELDS + 6];
1085         unsigned n = 0;
1086         int priority = LOG_USER | LOG_INFO;
1087         char *identifier = NULL, *pid = NULL;
1088         const char *orig;
1089
1090         assert(s);
1091         assert(buf);
1092
1093         orig = buf;
1094         parse_syslog_priority((char**) &buf, &priority);
1095
1096         if (s->forward_to_syslog)
1097                 forward_syslog_raw(s, priority, orig, ucred, tv);
1098
1099         skip_syslog_date((char**) &buf);
1100         read_identifier(&buf, &identifier, &pid);
1101
1102         if (s->forward_to_kmsg)
1103                 forward_kmsg(s, priority, identifier, buf, ucred);
1104
1105         if (s->forward_to_console)
1106                 forward_console(s, priority, identifier, buf, ucred);
1107
1108         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=syslog");
1109
1110         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1111                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1112
1113         if (priority & LOG_FACMASK)
1114                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1115                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1116
1117         if (identifier) {
1118                 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
1119                 if (syslog_identifier)
1120                         IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1121         }
1122
1123         if (pid) {
1124                 syslog_pid = strappend("SYSLOG_PID=", pid);
1125                 if (syslog_pid)
1126                         IOVEC_SET_STRING(iovec[n++], syslog_pid);
1127         }
1128
1129         message = strappend("MESSAGE=", buf);
1130         if (message)
1131                 IOVEC_SET_STRING(iovec[n++], message);
1132
1133         dispatch_message(s, iovec, n, ELEMENTSOF(iovec), ucred, tv, label, label_len, NULL, priority);
1134
1135         free(message);
1136         free(identifier);
1137         free(pid);
1138         free(syslog_priority);
1139         free(syslog_facility);
1140         free(syslog_identifier);
1141 }
1142
1143 static bool valid_user_field(const char *p, size_t l) {
1144         const char *a;
1145
1146         /* We kinda enforce POSIX syntax recommendations for
1147            environment variables here, but make a couple of additional
1148            requirements.
1149
1150            http://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html */
1151
1152         /* No empty field names */
1153         if (l <= 0)
1154                 return false;
1155
1156         /* Don't allow names longer than 64 chars */
1157         if (l > 64)
1158                 return false;
1159
1160         /* Variables starting with an underscore are protected */
1161         if (p[0] == '_')
1162                 return false;
1163
1164         /* Don't allow digits as first character */
1165         if (p[0] >= '0' && p[0] <= '9')
1166                 return false;
1167
1168         /* Only allow A-Z0-9 and '_' */
1169         for (a = p; a < p + l; a++)
1170                 if (!((*a >= 'A' && *a <= 'Z') ||
1171                       (*a >= '0' && *a <= '9') ||
1172                       *a == '_'))
1173                         return false;
1174
1175         return true;
1176 }
1177
1178 static void process_native_message(
1179                 Server *s,
1180                 const void *buffer, size_t buffer_size,
1181                 struct ucred *ucred,
1182                 struct timeval *tv,
1183                 const char *label, size_t label_len) {
1184
1185         struct iovec *iovec = NULL;
1186         unsigned n = 0, m = 0, j, tn = (unsigned) -1;
1187         const char *p;
1188         size_t remaining;
1189         int priority = LOG_INFO;
1190         char *identifier = NULL, *message = NULL;
1191
1192         assert(s);
1193         assert(buffer || buffer_size == 0);
1194
1195         p = buffer;
1196         remaining = buffer_size;
1197
1198         while (remaining > 0) {
1199                 const char *e, *q;
1200
1201                 e = memchr(p, '\n', remaining);
1202
1203                 if (!e) {
1204                         /* Trailing noise, let's ignore it, and flush what we collected */
1205                         log_debug("Received message with trailing noise, ignoring.");
1206                         break;
1207                 }
1208
1209                 if (e == p) {
1210                         /* Entry separator */
1211                         dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, NULL, priority);
1212                         n = 0;
1213                         priority = LOG_INFO;
1214
1215                         p++;
1216                         remaining--;
1217                         continue;
1218                 }
1219
1220                 if (*p == '.' || *p == '#') {
1221                         /* Ignore control commands for now, and
1222                          * comments too. */
1223                         remaining -= (e - p) + 1;
1224                         p = e + 1;
1225                         continue;
1226                 }
1227
1228                 /* A property follows */
1229
1230                 if (n+N_IOVEC_META_FIELDS >= m) {
1231                         struct iovec *c;
1232                         unsigned u;
1233
1234                         u = MAX((n+N_IOVEC_META_FIELDS+1) * 2U, 4U);
1235                         c = realloc(iovec, u * sizeof(struct iovec));
1236                         if (!c) {
1237                                 log_error("Out of memory");
1238                                 break;
1239                         }
1240
1241                         iovec = c;
1242                         m = u;
1243                 }
1244
1245                 q = memchr(p, '=', e - p);
1246                 if (q) {
1247                         if (valid_user_field(p, q - p)) {
1248                                 size_t l;
1249
1250                                 l = e - p;
1251
1252                                 /* If the field name starts with an
1253                                  * underscore, skip the variable,
1254                                  * since that indidates a trusted
1255                                  * field */
1256                                 iovec[n].iov_base = (char*) p;
1257                                 iovec[n].iov_len = l;
1258                                 n++;
1259
1260                                 /* We need to determine the priority
1261                                  * of this entry for the rate limiting
1262                                  * logic */
1263                                 if (l == 10 &&
1264                                     memcmp(p, "PRIORITY=", 9) == 0 &&
1265                                     p[9] >= '0' && p[9] <= '9')
1266                                         priority = (priority & LOG_FACMASK) | (p[9] - '0');
1267
1268                                 else if (l == 17 &&
1269                                          memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
1270                                          p[16] >= '0' && p[16] <= '9')
1271                                         priority = (priority & LOG_PRIMASK) | ((p[16] - '0') << 3);
1272
1273                                 else if (l == 18 &&
1274                                          memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
1275                                          p[16] >= '0' && p[16] <= '9' &&
1276                                          p[17] >= '0' && p[17] <= '9')
1277                                         priority = (priority & LOG_PRIMASK) | (((p[16] - '0')*10 + (p[17] - '0')) << 3);
1278
1279                                 else if (l >= 19 &&
1280                                          memcmp(p, "SYSLOG_IDENTIFIER=", 18) == 0) {
1281                                         char *t;
1282
1283                                         t = strndup(p + 18, l - 18);
1284                                         if (t) {
1285                                                 free(identifier);
1286                                                 identifier = t;
1287                                         }
1288                                 } else if (l >= 8 &&
1289                                            memcmp(p, "MESSAGE=", 8) == 0) {
1290                                         char *t;
1291
1292                                         t = strndup(p + 8, l - 8);
1293                                         if (t) {
1294                                                 free(message);
1295                                                 message = t;
1296                                         }
1297                                 }
1298                         }
1299
1300                         remaining -= (e - p) + 1;
1301                         p = e + 1;
1302                         continue;
1303                 } else {
1304                         le64_t l_le;
1305                         uint64_t l;
1306                         char *k;
1307
1308                         if (remaining < e - p + 1 + sizeof(uint64_t) + 1) {
1309                                 log_debug("Failed to parse message, ignoring.");
1310                                 break;
1311                         }
1312
1313                         memcpy(&l_le, e + 1, sizeof(uint64_t));
1314                         l = le64toh(l_le);
1315
1316                         if (remaining < e - p + 1 + sizeof(uint64_t) + l + 1 ||
1317                             e[1+sizeof(uint64_t)+l] != '\n') {
1318                                 log_debug("Failed to parse message, ignoring.");
1319                                 break;
1320                         }
1321
1322                         k = malloc((e - p) + 1 + l);
1323                         if (!k) {
1324                                 log_error("Out of memory");
1325                                 break;
1326                         }
1327
1328                         memcpy(k, p, e - p);
1329                         k[e - p] = '=';
1330                         memcpy(k + (e - p) + 1, e + 1 + sizeof(uint64_t), l);
1331
1332                         if (valid_user_field(p, e - p)) {
1333                                 iovec[n].iov_base = k;
1334                                 iovec[n].iov_len = (e - p) + 1 + l;
1335                                 n++;
1336                         } else
1337                                 free(k);
1338
1339                         remaining -= (e - p) + 1 + sizeof(uint64_t) + l + 1;
1340                         p = e + 1 + sizeof(uint64_t) + l + 1;
1341                 }
1342         }
1343
1344         if (n <= 0)
1345                 goto finish;
1346
1347         tn = n++;
1348         IOVEC_SET_STRING(iovec[tn], "_TRANSPORT=journal");
1349
1350         if (message) {
1351                 if (s->forward_to_syslog)
1352                         forward_syslog(s, priority, identifier, message, ucred, tv);
1353
1354                 if (s->forward_to_kmsg)
1355                         forward_kmsg(s, priority, identifier, message, ucred);
1356
1357                 if (s->forward_to_console)
1358                         forward_console(s, priority, identifier, message, ucred);
1359         }
1360
1361         dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, NULL, priority);
1362
1363 finish:
1364         for (j = 0; j < n; j++)  {
1365                 if (j == tn)
1366                         continue;
1367
1368                 if (iovec[j].iov_base < buffer ||
1369                     (const uint8_t*) iovec[j].iov_base >= (const uint8_t*) buffer + buffer_size)
1370                         free(iovec[j].iov_base);
1371         }
1372
1373         free(iovec);
1374         free(identifier);
1375         free(message);
1376 }
1377
1378 static void process_native_file(
1379                 Server *s,
1380                 int fd,
1381                 struct ucred *ucred,
1382                 struct timeval *tv,
1383                 const char *label, size_t label_len) {
1384
1385         struct stat st;
1386         void *p;
1387         ssize_t n;
1388
1389         assert(s);
1390         assert(fd >= 0);
1391
1392         /* Data is in the passed file, since it didn't fit in a
1393          * datagram. We can't map the file here, since clients might
1394          * then truncate it and trigger a SIGBUS for us. So let's
1395          * stupidly read it */
1396
1397         if (fstat(fd, &st) < 0) {
1398                 log_error("Failed to stat passed file, ignoring: %m");
1399                 return;
1400         }
1401
1402         if (!S_ISREG(st.st_mode)) {
1403                 log_error("File passed is not regular. Ignoring.");
1404                 return;
1405         }
1406
1407         if (st.st_size <= 0)
1408                 return;
1409
1410         if (st.st_size > ENTRY_SIZE_MAX) {
1411                 log_error("File passed too large. Ignoring.");
1412                 return;
1413         }
1414
1415         p = malloc(st.st_size);
1416         if (!p) {
1417                 log_error("Out of memory");
1418                 return;
1419         }
1420
1421         n = pread(fd, p, st.st_size, 0);
1422         if (n < 0)
1423                 log_error("Failed to read file, ignoring: %s", strerror(-n));
1424         else if (n > 0)
1425                 process_native_message(s, p, n, ucred, tv, label, label_len);
1426
1427         free(p);
1428 }
1429
1430 static int stdout_stream_log(StdoutStream *s, const char *p) {
1431         struct iovec iovec[N_IOVEC_META_FIELDS + 5];
1432         char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *syslog_identifier = NULL;
1433         unsigned n = 0;
1434         int priority;
1435         char *label = NULL;
1436         size_t label_len = 0;
1437
1438         assert(s);
1439         assert(p);
1440
1441         if (isempty(p))
1442                 return 0;
1443
1444         priority = s->priority;
1445
1446         if (s->level_prefix)
1447                 parse_syslog_priority((char**) &p, &priority);
1448
1449         if (s->forward_to_syslog || s->server->forward_to_syslog)
1450                 forward_syslog(s->server, fixup_priority(priority), s->identifier, p, &s->ucred, NULL);
1451
1452         if (s->forward_to_kmsg || s->server->forward_to_kmsg)
1453                 forward_kmsg(s->server, priority, s->identifier, p, &s->ucred);
1454
1455         if (s->forward_to_console || s->server->forward_to_console)
1456                 forward_console(s->server, priority, s->identifier, p, &s->ucred);
1457
1458         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=stdout");
1459
1460         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1461                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1462
1463         if (priority & LOG_FACMASK)
1464                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1465                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1466
1467         if (s->identifier) {
1468                 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", s->identifier);
1469                 if (syslog_identifier)
1470                         IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1471         }
1472
1473         message = strappend("MESSAGE=", p);
1474         if (message)
1475                 IOVEC_SET_STRING(iovec[n++], message);
1476
1477 #ifdef HAVE_SELINUX
1478         if (s->security_context) {
1479                 label = (char*) s->security_context;
1480                 label_len = strlen((char*) s->security_context);
1481         }
1482 #endif
1483
1484         dispatch_message(s->server, iovec, n, ELEMENTSOF(iovec), &s->ucred, NULL, label, label_len, s->unit_id, priority);
1485
1486         free(message);
1487         free(syslog_priority);
1488         free(syslog_facility);
1489         free(syslog_identifier);
1490
1491         return 0;
1492 }
1493
1494 static int stdout_stream_line(StdoutStream *s, char *p) {
1495         int r;
1496
1497         assert(s);
1498         assert(p);
1499
1500         p = strstrip(p);
1501
1502         switch (s->state) {
1503
1504         case STDOUT_STREAM_IDENTIFIER:
1505                 if (isempty(p))
1506                         s->identifier = NULL;
1507                 else  {
1508                         s->identifier = strdup(p);
1509                         if (!s->identifier) {
1510                                 log_error("Out of memory");
1511                                 return -ENOMEM;
1512                         }
1513                 }
1514
1515                 s->state = STDOUT_STREAM_UNIT_ID;
1516                 return 0;
1517
1518         case STDOUT_STREAM_UNIT_ID:
1519                 if (s->ucred.uid == 0) {
1520                         if (isempty(p))
1521                                 s->unit_id = NULL;
1522                         else  {
1523                                 s->unit_id = strdup(p);
1524                                 if (!s->unit_id) {
1525                                         log_error("Out of memory");
1526                                         return -ENOMEM;
1527                                 }
1528                         }
1529                 }
1530
1531                 s->state = STDOUT_STREAM_PRIORITY;
1532                 return 0;
1533
1534         case STDOUT_STREAM_PRIORITY:
1535                 r = safe_atoi(p, &s->priority);
1536                 if (r < 0 || s->priority <= 0 || s->priority >= 999) {
1537                         log_warning("Failed to parse log priority line.");
1538                         return -EINVAL;
1539                 }
1540
1541                 s->state = STDOUT_STREAM_LEVEL_PREFIX;
1542                 return 0;
1543
1544         case STDOUT_STREAM_LEVEL_PREFIX:
1545                 r = parse_boolean(p);
1546                 if (r < 0) {
1547                         log_warning("Failed to parse level prefix line.");
1548                         return -EINVAL;
1549                 }
1550
1551                 s->level_prefix = !!r;
1552                 s->state = STDOUT_STREAM_FORWARD_TO_SYSLOG;
1553                 return 0;
1554
1555         case STDOUT_STREAM_FORWARD_TO_SYSLOG:
1556                 r = parse_boolean(p);
1557                 if (r < 0) {
1558                         log_warning("Failed to parse forward to syslog line.");
1559                         return -EINVAL;
1560                 }
1561
1562                 s->forward_to_syslog = !!r;
1563                 s->state = STDOUT_STREAM_FORWARD_TO_KMSG;
1564                 return 0;
1565
1566         case STDOUT_STREAM_FORWARD_TO_KMSG:
1567                 r = parse_boolean(p);
1568                 if (r < 0) {
1569                         log_warning("Failed to parse copy to kmsg line.");
1570                         return -EINVAL;
1571                 }
1572
1573                 s->forward_to_kmsg = !!r;
1574                 s->state = STDOUT_STREAM_FORWARD_TO_CONSOLE;
1575                 return 0;
1576
1577         case STDOUT_STREAM_FORWARD_TO_CONSOLE:
1578                 r = parse_boolean(p);
1579                 if (r < 0) {
1580                         log_warning("Failed to parse copy to console line.");
1581                         return -EINVAL;
1582                 }
1583
1584                 s->forward_to_console = !!r;
1585                 s->state = STDOUT_STREAM_RUNNING;
1586                 return 0;
1587
1588         case STDOUT_STREAM_RUNNING:
1589                 return stdout_stream_log(s, p);
1590         }
1591
1592         assert_not_reached("Unknown stream state");
1593 }
1594
1595 static int stdout_stream_scan(StdoutStream *s, bool force_flush) {
1596         char *p;
1597         size_t remaining;
1598         int r;
1599
1600         assert(s);
1601
1602         p = s->buffer;
1603         remaining = s->length;
1604         for (;;) {
1605                 char *end;
1606                 size_t skip;
1607
1608                 end = memchr(p, '\n', remaining);
1609                 if (end)
1610                         skip = end - p + 1;
1611                 else if (remaining >= sizeof(s->buffer) - 1) {
1612                         end = p + sizeof(s->buffer) - 1;
1613                         skip = remaining;
1614                 } else
1615                         break;
1616
1617                 *end = 0;
1618
1619                 r = stdout_stream_line(s, p);
1620                 if (r < 0)
1621                         return r;
1622
1623                 remaining -= skip;
1624                 p += skip;
1625         }
1626
1627         if (force_flush && remaining > 0) {
1628                 p[remaining] = 0;
1629                 r = stdout_stream_line(s, p);
1630                 if (r < 0)
1631                         return r;
1632
1633                 p += remaining;
1634                 remaining = 0;
1635         }
1636
1637         if (p > s->buffer) {
1638                 memmove(s->buffer, p, remaining);
1639                 s->length = remaining;
1640         }
1641
1642         return 0;
1643 }
1644
1645 static int stdout_stream_process(StdoutStream *s) {
1646         ssize_t l;
1647         int r;
1648
1649         assert(s);
1650
1651         l = read(s->fd, s->buffer+s->length, sizeof(s->buffer)-1-s->length);
1652         if (l < 0) {
1653
1654                 if (errno == EAGAIN)
1655                         return 0;
1656
1657                 log_warning("Failed to read from stream: %m");
1658                 return -errno;
1659         }
1660
1661         if (l == 0) {
1662                 r = stdout_stream_scan(s, true);
1663                 if (r < 0)
1664                         return r;
1665
1666                 return 0;
1667         }
1668
1669         s->length += l;
1670         r = stdout_stream_scan(s, false);
1671         if (r < 0)
1672                 return r;
1673
1674         return 1;
1675
1676 }
1677
1678 static void stdout_stream_free(StdoutStream *s) {
1679         assert(s);
1680
1681         if (s->server) {
1682                 assert(s->server->n_stdout_streams > 0);
1683                 s->server->n_stdout_streams --;
1684                 LIST_REMOVE(StdoutStream, stdout_stream, s->server->stdout_streams, s);
1685         }
1686
1687         if (s->fd >= 0) {
1688                 if (s->server)
1689                         epoll_ctl(s->server->epoll_fd, EPOLL_CTL_DEL, s->fd, NULL);
1690
1691                 close_nointr_nofail(s->fd);
1692         }
1693
1694 #ifdef HAVE_SELINUX
1695         if (s->security_context)
1696                 freecon(s->security_context);
1697 #endif
1698
1699         free(s->identifier);
1700         free(s);
1701 }
1702
1703 static int stdout_stream_new(Server *s) {
1704         StdoutStream *stream;
1705         int fd, r;
1706         socklen_t len;
1707         struct epoll_event ev;
1708
1709         assert(s);
1710
1711         fd = accept4(s->stdout_fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC);
1712         if (fd < 0) {
1713                 if (errno == EAGAIN)
1714                         return 0;
1715
1716                 log_error("Failed to accept stdout connection: %m");
1717                 return -errno;
1718         }
1719
1720         if (s->n_stdout_streams >= STDOUT_STREAMS_MAX) {
1721                 log_warning("Too many stdout streams, refusing connection.");
1722                 close_nointr_nofail(fd);
1723                 return 0;
1724         }
1725
1726         stream = new0(StdoutStream, 1);
1727         if (!stream) {
1728                 log_error("Out of memory.");
1729                 close_nointr_nofail(fd);
1730                 return -ENOMEM;
1731         }
1732
1733         stream->fd = fd;
1734
1735         len = sizeof(stream->ucred);
1736         if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &stream->ucred, &len) < 0) {
1737                 log_error("Failed to determine peer credentials: %m");
1738                 r = -errno;
1739                 goto fail;
1740         }
1741
1742 #ifdef HAVE_SELINUX
1743         if (getpeercon(fd, &stream->security_context) < 0 && errno != ENOPROTOOPT)
1744                 log_error("Failed to determine peer security context: %m");
1745 #endif
1746
1747         if (shutdown(fd, SHUT_WR) < 0) {
1748                 log_error("Failed to shutdown writing side of socket: %m");
1749                 r = -errno;
1750                 goto fail;
1751         }
1752
1753         zero(ev);
1754         ev.data.ptr = stream;
1755         ev.events = EPOLLIN;
1756         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0) {
1757                 log_error("Failed to add stream to event loop: %m");
1758                 r = -errno;
1759                 goto fail;
1760         }
1761
1762         stream->server = s;
1763         LIST_PREPEND(StdoutStream, stdout_stream, s->stdout_streams, stream);
1764         s->n_stdout_streams ++;
1765
1766         return 0;
1767
1768 fail:
1769         stdout_stream_free(stream);
1770         return r;
1771 }
1772
1773 static int parse_kernel_timestamp(char **_p, usec_t *t) {
1774         usec_t r;
1775         int k, i;
1776         char *p;
1777
1778         assert(_p);
1779         assert(*_p);
1780         assert(t);
1781
1782         p = *_p;
1783
1784         if (strlen(p) < 14 || p[0] != '[' || p[13] != ']' || p[6] != '.')
1785                 return 0;
1786
1787         r = 0;
1788
1789         for (i = 1; i <= 5; i++) {
1790                 r *= 10;
1791
1792                 if (p[i] == ' ')
1793                         continue;
1794
1795                 k = undecchar(p[i]);
1796                 if (k < 0)
1797                         return 0;
1798
1799                 r += k;
1800         }
1801
1802         for (i = 7; i <= 12; i++) {
1803                 r *= 10;
1804
1805                 k = undecchar(p[i]);
1806                 if (k < 0)
1807                         return 0;
1808
1809                 r += k;
1810         }
1811
1812         *t = r;
1813         *_p += 14;
1814         *_p += strspn(*_p, WHITESPACE);
1815
1816         return 1;
1817 }
1818
1819 static bool is_us(const char *pid) {
1820         pid_t t;
1821
1822         assert(pid);
1823
1824         if (parse_pid(pid, &t) < 0)
1825                 return false;
1826
1827         return t == getpid();
1828 }
1829
1830 static void proc_kmsg_line(Server *s, const char *p) {
1831         struct iovec iovec[N_IOVEC_META_FIELDS + 7];
1832         char *message = NULL, *syslog_priority = NULL, *syslog_pid = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *source_time = NULL;
1833         int priority = LOG_KERN | LOG_INFO;
1834         unsigned n = 0;
1835         usec_t usec;
1836         char *identifier = NULL, *pid = NULL;
1837
1838         assert(s);
1839         assert(p);
1840
1841         if (isempty(p))
1842                 return;
1843
1844         parse_syslog_priority((char **) &p, &priority);
1845
1846         if (s->forward_to_kmsg && (priority & LOG_FACMASK) != LOG_KERN)
1847                 return;
1848
1849         if (parse_kernel_timestamp((char **) &p, &usec) > 0) {
1850                 if (asprintf(&source_time, "_SOURCE_MONOTONIC_TIMESTAMP=%llu",
1851                              (unsigned long long) usec) >= 0)
1852                         IOVEC_SET_STRING(iovec[n++], source_time);
1853         }
1854
1855         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=kernel");
1856
1857         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1858                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1859
1860         if ((priority & LOG_FACMASK) == LOG_KERN) {
1861
1862                 if (s->forward_to_syslog)
1863                         forward_syslog(s, priority, "kernel", p, NULL, NULL);
1864
1865                 IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=kernel");
1866         } else {
1867                 read_identifier(&p, &identifier, &pid);
1868
1869                 /* Avoid any messages we generated ourselves via
1870                  * log_info() and friends. */
1871                 if (pid && is_us(pid))
1872                         goto finish;
1873
1874                 if (s->forward_to_syslog)
1875                         forward_syslog(s, priority, identifier, p, NULL, NULL);
1876
1877                 if (identifier) {
1878                         syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
1879                         if (syslog_identifier)
1880                                 IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1881                 }
1882
1883                 if (pid) {
1884                         syslog_pid = strappend("SYSLOG_PID=", pid);
1885                         if (syslog_pid)
1886                                 IOVEC_SET_STRING(iovec[n++], syslog_pid);
1887                 }
1888
1889                 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1890                         IOVEC_SET_STRING(iovec[n++], syslog_facility);
1891         }
1892
1893         message = strappend("MESSAGE=", p);
1894         if (message)
1895                 IOVEC_SET_STRING(iovec[n++], message);
1896
1897         dispatch_message(s, iovec, n, ELEMENTSOF(iovec), NULL, NULL, NULL, 0, NULL, priority);
1898
1899 finish:
1900         free(message);
1901         free(syslog_priority);
1902         free(syslog_identifier);
1903         free(syslog_pid);
1904         free(syslog_facility);
1905         free(source_time);
1906         free(identifier);
1907         free(pid);
1908 }
1909
1910 static void proc_kmsg_scan(Server *s) {
1911         char *p;
1912         size_t remaining;
1913
1914         assert(s);
1915
1916         p = s->proc_kmsg_buffer;
1917         remaining = s->proc_kmsg_length;
1918         for (;;) {
1919                 char *end;
1920                 size_t skip;
1921
1922                 end = memchr(p, '\n', remaining);
1923                 if (end)
1924                         skip = end - p + 1;
1925                 else if (remaining >= sizeof(s->proc_kmsg_buffer) - 1) {
1926                         end = p + sizeof(s->proc_kmsg_buffer) - 1;
1927                         skip = remaining;
1928                 } else
1929                         break;
1930
1931                 *end = 0;
1932
1933                 proc_kmsg_line(s, p);
1934
1935                 remaining -= skip;
1936                 p += skip;
1937         }
1938
1939         if (p > s->proc_kmsg_buffer) {
1940                 memmove(s->proc_kmsg_buffer, p, remaining);
1941                 s->proc_kmsg_length = remaining;
1942         }
1943 }
1944
1945 static int system_journal_open(Server *s) {
1946         int r;
1947         char *fn;
1948         sd_id128_t machine;
1949         char ids[33];
1950
1951         r = sd_id128_get_machine(&machine);
1952         if (r < 0)
1953                 return r;
1954
1955         sd_id128_to_string(machine, ids);
1956
1957         if (!s->system_journal) {
1958
1959                 /* First try to create the machine path, but not the prefix */
1960                 fn = strappend("/var/log/journal/", ids);
1961                 if (!fn)
1962                         return -ENOMEM;
1963                 (void) mkdir(fn, 0755);
1964                 free(fn);
1965
1966                 /* The create the system journal file */
1967                 fn = join("/var/log/journal/", ids, "/system.journal", NULL);
1968                 if (!fn)
1969                         return -ENOMEM;
1970
1971                 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, NULL, &s->system_journal);
1972                 free(fn);
1973
1974                 if (r >= 0) {
1975                         journal_default_metrics(&s->system_metrics, s->system_journal->fd);
1976
1977                         s->system_journal->metrics = s->system_metrics;
1978                         s->system_journal->compress = s->compress;
1979
1980                         server_fix_perms(s, s->system_journal, 0);
1981                 } else if (r < 0) {
1982
1983                         if (r != -ENOENT && r != -EROFS)
1984                                 log_warning("Failed to open system journal: %s", strerror(-r));
1985
1986                         r = 0;
1987                 }
1988         }
1989
1990         if (!s->runtime_journal) {
1991
1992                 fn = join("/run/log/journal/", ids, "/system.journal", NULL);
1993                 if (!fn)
1994                         return -ENOMEM;
1995
1996                 if (s->system_journal) {
1997
1998                         /* Try to open the runtime journal, but only
1999                          * if it already exists, so that we can flush
2000                          * it into the system journal */
2001
2002                         r = journal_file_open(fn, O_RDWR, 0640, NULL, &s->runtime_journal);
2003                         free(fn);
2004
2005                         if (r < 0) {
2006                                 if (r != -ENOENT)
2007                                         log_warning("Failed to open runtime journal: %s", strerror(-r));
2008
2009                                 r = 0;
2010                         }
2011
2012                 } else {
2013
2014                         /* OK, we really need the runtime journal, so create
2015                          * it if necessary. */
2016
2017                         (void) mkdir_parents_label(fn, 0755);
2018                         r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, NULL, &s->runtime_journal);
2019                         free(fn);
2020
2021                         if (r < 0) {
2022                                 log_error("Failed to open runtime journal: %s", strerror(-r));
2023                                 return r;
2024                         }
2025                 }
2026
2027                 if (s->runtime_journal) {
2028                         journal_default_metrics(&s->runtime_metrics, s->runtime_journal->fd);
2029
2030                         s->runtime_journal->metrics = s->runtime_metrics;
2031                         s->runtime_journal->compress = s->compress;
2032
2033                         server_fix_perms(s, s->runtime_journal, 0);
2034                 }
2035         }
2036
2037         return r;
2038 }
2039
2040 static int server_flush_to_var(Server *s) {
2041         char path[] = "/run/log/journal/xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
2042         Object *o = NULL;
2043         int r;
2044         sd_id128_t machine;
2045         sd_journal *j;
2046         usec_t ts;
2047
2048         assert(s);
2049
2050         if (!s->runtime_journal)
2051                 return 0;
2052
2053         ts = now(CLOCK_MONOTONIC);
2054         if (s->var_available_timestamp + RECHECK_VAR_AVAILABLE_USEC > ts)
2055                 return 0;
2056
2057         s->var_available_timestamp = ts;
2058
2059         system_journal_open(s);
2060
2061         if (!s->system_journal)
2062                 return 0;
2063
2064         log_info("Flushing to /var...");
2065
2066         r = sd_id128_get_machine(&machine);
2067         if (r < 0) {
2068                 log_error("Failed to get machine id: %s", strerror(-r));
2069                 return r;
2070         }
2071
2072         r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
2073         if (r < 0) {
2074                 log_error("Failed to read runtime journal: %s", strerror(-r));
2075                 return r;
2076         }
2077
2078         SD_JOURNAL_FOREACH(j) {
2079                 JournalFile *f;
2080
2081                 f = j->current_file;
2082                 assert(f && f->current_offset > 0);
2083
2084                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
2085                 if (r < 0) {
2086                         log_error("Can't read entry: %s", strerror(-r));
2087                         goto finish;
2088                 }
2089
2090                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
2091                 if (r == -E2BIG) {
2092                         log_info("Allocation limit reached.");
2093
2094                         journal_file_post_change(s->system_journal);
2095                         server_rotate(s);
2096                         server_vacuum(s);
2097
2098                         r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
2099                 }
2100
2101                 if (r < 0) {
2102                         log_error("Can't write entry: %s", strerror(-r));
2103                         goto finish;
2104                 }
2105         }
2106
2107 finish:
2108         journal_file_post_change(s->system_journal);
2109
2110         journal_file_close(s->runtime_journal);
2111         s->runtime_journal = NULL;
2112
2113         if (r >= 0) {
2114                 sd_id128_to_string(machine, path + 17);
2115                 rm_rf(path, false, true, false);
2116         }
2117
2118         return r;
2119 }
2120
2121 static int server_read_proc_kmsg(Server *s) {
2122         ssize_t l;
2123         assert(s);
2124         assert(s->proc_kmsg_fd >= 0);
2125
2126         l = read(s->proc_kmsg_fd, s->proc_kmsg_buffer + s->proc_kmsg_length, sizeof(s->proc_kmsg_buffer) - 1 - s->proc_kmsg_length);
2127         if (l < 0) {
2128
2129                 if (errno == EAGAIN || errno == EINTR)
2130                         return 0;
2131
2132                 log_error("Failed to read from kernel: %m");
2133                 return -errno;
2134         }
2135
2136         s->proc_kmsg_length += l;
2137
2138         proc_kmsg_scan(s);
2139         return 1;
2140 }
2141
2142 static int server_flush_proc_kmsg(Server *s) {
2143         int r;
2144
2145         assert(s);
2146
2147         if (s->proc_kmsg_fd < 0)
2148                 return 0;
2149
2150         log_info("Flushing /proc/kmsg...");
2151
2152         for (;;) {
2153                 r = server_read_proc_kmsg(s);
2154                 if (r < 0)
2155                         return r;
2156
2157                 if (r == 0)
2158                         break;
2159         }
2160
2161         return 0;
2162 }
2163
2164 static int process_event(Server *s, struct epoll_event *ev) {
2165         assert(s);
2166         assert(ev);
2167
2168         if (ev->data.fd == s->signal_fd) {
2169                 struct signalfd_siginfo sfsi;
2170                 ssize_t n;
2171
2172                 if (ev->events != EPOLLIN) {
2173                         log_info("Got invalid event from epoll.");
2174                         return -EIO;
2175                 }
2176
2177                 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
2178                 if (n != sizeof(sfsi)) {
2179
2180                         if (n >= 0)
2181                                 return -EIO;
2182
2183                         if (errno == EINTR || errno == EAGAIN)
2184                                 return 1;
2185
2186                         return -errno;
2187                 }
2188
2189                 if (sfsi.ssi_signo == SIGUSR1) {
2190                         server_flush_to_var(s);
2191                         return 1;
2192                 }
2193
2194                 if (sfsi.ssi_signo == SIGUSR2) {
2195                         server_rotate(s);
2196                         server_vacuum(s);
2197                         return 1;
2198                 }
2199
2200                 log_debug("Received SIG%s", signal_to_string(sfsi.ssi_signo));
2201                 return 0;
2202
2203         } else if (ev->data.fd == s->proc_kmsg_fd) {
2204                 int r;
2205
2206                 if (ev->events != EPOLLIN) {
2207                         log_info("Got invalid event from epoll.");
2208                         return -EIO;
2209                 }
2210
2211                 r = server_read_proc_kmsg(s);
2212                 if (r < 0)
2213                         return r;
2214
2215                 return 1;
2216
2217         } else if (ev->data.fd == s->native_fd ||
2218                    ev->data.fd == s->syslog_fd) {
2219
2220                 if (ev->events != EPOLLIN) {
2221                         log_info("Got invalid event from epoll.");
2222                         return -EIO;
2223                 }
2224
2225                 for (;;) {
2226                         struct msghdr msghdr;
2227                         struct iovec iovec;
2228                         struct ucred *ucred = NULL;
2229                         struct timeval *tv = NULL;
2230                         struct cmsghdr *cmsg;
2231                         char *label = NULL;
2232                         size_t label_len = 0;
2233                         union {
2234                                 struct cmsghdr cmsghdr;
2235
2236                                 /* We use NAME_MAX space for the
2237                                  * SELinux label here. The kernel
2238                                  * currently enforces no limit, but
2239                                  * according to suggestions from the
2240                                  * SELinux people this will change and
2241                                  * it will probably be identical to
2242                                  * NAME_MAX. For now we use that, but
2243                                  * this should be updated one day when
2244                                  * the final limit is known.*/
2245                                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
2246                                             CMSG_SPACE(sizeof(struct timeval)) +
2247                                             CMSG_SPACE(sizeof(int)) + /* fd */
2248                                             CMSG_SPACE(NAME_MAX)]; /* selinux label */
2249                         } control;
2250                         ssize_t n;
2251                         int v;
2252                         int *fds = NULL;
2253                         unsigned n_fds = 0;
2254
2255                         if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
2256                                 log_error("SIOCINQ failed: %m");
2257                                 return -errno;
2258                         }
2259
2260                         if (s->buffer_size < (size_t) v) {
2261                                 void *b;
2262                                 size_t l;
2263
2264                                 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
2265                                 b = realloc(s->buffer, l+1);
2266
2267                                 if (!b) {
2268                                         log_error("Couldn't increase buffer.");
2269                                         return -ENOMEM;
2270                                 }
2271
2272                                 s->buffer_size = l;
2273                                 s->buffer = b;
2274                         }
2275
2276                         zero(iovec);
2277                         iovec.iov_base = s->buffer;
2278                         iovec.iov_len = s->buffer_size;
2279
2280                         zero(control);
2281                         zero(msghdr);
2282                         msghdr.msg_iov = &iovec;
2283                         msghdr.msg_iovlen = 1;
2284                         msghdr.msg_control = &control;
2285                         msghdr.msg_controllen = sizeof(control);
2286
2287                         n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
2288                         if (n < 0) {
2289
2290                                 if (errno == EINTR || errno == EAGAIN)
2291                                         return 1;
2292
2293                                 log_error("recvmsg() failed: %m");
2294                                 return -errno;
2295                         }
2296
2297                         for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
2298
2299                                 if (cmsg->cmsg_level == SOL_SOCKET &&
2300                                     cmsg->cmsg_type == SCM_CREDENTIALS &&
2301                                     cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
2302                                         ucred = (struct ucred*) CMSG_DATA(cmsg);
2303                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
2304                                          cmsg->cmsg_type == SCM_SECURITY) {
2305                                         label = (char*) CMSG_DATA(cmsg);
2306                                         label_len = cmsg->cmsg_len - CMSG_LEN(0);
2307                                 } else if (cmsg->cmsg_level == SOL_SOCKET &&
2308                                          cmsg->cmsg_type == SO_TIMESTAMP &&
2309                                          cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
2310                                         tv = (struct timeval*) CMSG_DATA(cmsg);
2311                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
2312                                          cmsg->cmsg_type == SCM_RIGHTS) {
2313                                         fds = (int*) CMSG_DATA(cmsg);
2314                                         n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
2315                                 }
2316                         }
2317
2318                         if (ev->data.fd == s->syslog_fd) {
2319                                 char *e;
2320
2321                                 if (n > 0 && n_fds == 0) {
2322                                         e = memchr(s->buffer, '\n', n);
2323                                         if (e)
2324                                                 *e = 0;
2325                                         else
2326                                                 s->buffer[n] = 0;
2327
2328                                         process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
2329                                 } else if (n_fds > 0)
2330                                         log_warning("Got file descriptors via syslog socket. Ignoring.");
2331
2332                         } else {
2333                                 if (n > 0 && n_fds == 0)
2334                                         process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
2335                                 else if (n == 0 && n_fds == 1)
2336                                         process_native_file(s, fds[0], ucred, tv, label, label_len);
2337                                 else if (n_fds > 0)
2338                                         log_warning("Got too many file descriptors via native socket. Ignoring.");
2339                         }
2340
2341                         close_many(fds, n_fds);
2342                 }
2343
2344                 return 1;
2345
2346         } else if (ev->data.fd == s->stdout_fd) {
2347
2348                 if (ev->events != EPOLLIN) {
2349                         log_info("Got invalid event from epoll.");
2350                         return -EIO;
2351                 }
2352
2353                 stdout_stream_new(s);
2354                 return 1;
2355
2356         } else {
2357                 StdoutStream *stream;
2358
2359                 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
2360                         log_info("Got invalid event from epoll.");
2361                         return -EIO;
2362                 }
2363
2364                 /* If it is none of the well-known fds, it must be an
2365                  * stdout stream fd. Note that this is a bit ugly here
2366                  * (since we rely that none of the well-known fds
2367                  * could be interpreted as pointer), but nonetheless
2368                  * safe, since the well-known fds would never get an
2369                  * fd > 4096, i.e. beyond the first memory page */
2370
2371                 stream = ev->data.ptr;
2372
2373                 if (stdout_stream_process(stream) <= 0)
2374                         stdout_stream_free(stream);
2375
2376                 return 1;
2377         }
2378
2379         log_error("Unknown event.");
2380         return 0;
2381 }
2382
2383 static int open_syslog_socket(Server *s) {
2384         union sockaddr_union sa;
2385         int one, r;
2386         struct epoll_event ev;
2387
2388         assert(s);
2389
2390         if (s->syslog_fd < 0) {
2391
2392                 s->syslog_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2393                 if (s->syslog_fd < 0) {
2394                         log_error("socket() failed: %m");
2395                         return -errno;
2396                 }
2397
2398                 zero(sa);
2399                 sa.un.sun_family = AF_UNIX;
2400                 strncpy(sa.un.sun_path, "/dev/log", sizeof(sa.un.sun_path));
2401
2402                 unlink(sa.un.sun_path);
2403
2404                 r = bind(s->syslog_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2405                 if (r < 0) {
2406                         log_error("bind() failed: %m");
2407                         return -errno;
2408                 }
2409
2410                 chmod(sa.un.sun_path, 0666);
2411         } else
2412                 fd_nonblock(s->syslog_fd, 1);
2413
2414         one = 1;
2415         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
2416         if (r < 0) {
2417                 log_error("SO_PASSCRED failed: %m");
2418                 return -errno;
2419         }
2420
2421 #ifdef HAVE_SELINUX
2422         one = 1;
2423         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
2424         if (r < 0)
2425                 log_warning("SO_PASSSEC failed: %m");
2426 #endif
2427
2428         one = 1;
2429         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
2430         if (r < 0) {
2431                 log_error("SO_TIMESTAMP failed: %m");
2432                 return -errno;
2433         }
2434
2435         zero(ev);
2436         ev.events = EPOLLIN;
2437         ev.data.fd = s->syslog_fd;
2438         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->syslog_fd, &ev) < 0) {
2439                 log_error("Failed to add syslog server fd to epoll object: %m");
2440                 return -errno;
2441         }
2442
2443         return 0;
2444 }
2445
2446 static int open_native_socket(Server*s) {
2447         union sockaddr_union sa;
2448         int one, r;
2449         struct epoll_event ev;
2450
2451         assert(s);
2452
2453         if (s->native_fd < 0) {
2454
2455                 s->native_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2456                 if (s->native_fd < 0) {
2457                         log_error("socket() failed: %m");
2458                         return -errno;
2459                 }
2460
2461                 zero(sa);
2462                 sa.un.sun_family = AF_UNIX;
2463                 strncpy(sa.un.sun_path, "/run/systemd/journal/socket", sizeof(sa.un.sun_path));
2464
2465                 unlink(sa.un.sun_path);
2466
2467                 r = bind(s->native_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2468                 if (r < 0) {
2469                         log_error("bind() failed: %m");
2470                         return -errno;
2471                 }
2472
2473                 chmod(sa.un.sun_path, 0666);
2474         } else
2475                 fd_nonblock(s->native_fd, 1);
2476
2477         one = 1;
2478         r = setsockopt(s->native_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
2479         if (r < 0) {
2480                 log_error("SO_PASSCRED failed: %m");
2481                 return -errno;
2482         }
2483
2484 #ifdef HAVE_SELINUX
2485         one = 1;
2486         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
2487         if (r < 0)
2488                 log_warning("SO_PASSSEC failed: %m");
2489 #endif
2490
2491         one = 1;
2492         r = setsockopt(s->native_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
2493         if (r < 0) {
2494                 log_error("SO_TIMESTAMP failed: %m");
2495                 return -errno;
2496         }
2497
2498         zero(ev);
2499         ev.events = EPOLLIN;
2500         ev.data.fd = s->native_fd;
2501         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->native_fd, &ev) < 0) {
2502                 log_error("Failed to add native server fd to epoll object: %m");
2503                 return -errno;
2504         }
2505
2506         return 0;
2507 }
2508
2509 static int open_stdout_socket(Server *s) {
2510         union sockaddr_union sa;
2511         int r;
2512         struct epoll_event ev;
2513
2514         assert(s);
2515
2516         if (s->stdout_fd < 0) {
2517
2518                 s->stdout_fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2519                 if (s->stdout_fd < 0) {
2520                         log_error("socket() failed: %m");
2521                         return -errno;
2522                 }
2523
2524                 zero(sa);
2525                 sa.un.sun_family = AF_UNIX;
2526                 strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
2527
2528                 unlink(sa.un.sun_path);
2529
2530                 r = bind(s->stdout_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2531                 if (r < 0) {
2532                         log_error("bind() failed: %m");
2533                         return -errno;
2534                 }
2535
2536                 chmod(sa.un.sun_path, 0666);
2537
2538                 if (listen(s->stdout_fd, SOMAXCONN) < 0) {
2539                         log_error("liste() failed: %m");
2540                         return -errno;
2541                 }
2542         } else
2543                 fd_nonblock(s->stdout_fd, 1);
2544
2545         zero(ev);
2546         ev.events = EPOLLIN;
2547         ev.data.fd = s->stdout_fd;
2548         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->stdout_fd, &ev) < 0) {
2549                 log_error("Failed to add stdout server fd to epoll object: %m");
2550                 return -errno;
2551         }
2552
2553         return 0;
2554 }
2555
2556 static int open_proc_kmsg(Server *s) {
2557         struct epoll_event ev;
2558
2559         assert(s);
2560
2561         if (!s->import_proc_kmsg)
2562                 return 0;
2563
2564         s->proc_kmsg_fd = open("/proc/kmsg", O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
2565         if (s->proc_kmsg_fd < 0) {
2566                 log_warning("Failed to open /proc/kmsg, ignoring: %m");
2567                 return 0;
2568         }
2569
2570         zero(ev);
2571         ev.events = EPOLLIN;
2572         ev.data.fd = s->proc_kmsg_fd;
2573         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->proc_kmsg_fd, &ev) < 0) {
2574                 log_error("Failed to add /proc/kmsg fd to epoll object: %m");
2575                 return -errno;
2576         }
2577
2578         return 0;
2579 }
2580
2581 static int open_signalfd(Server *s) {
2582         sigset_t mask;
2583         struct epoll_event ev;
2584
2585         assert(s);
2586
2587         assert_se(sigemptyset(&mask) == 0);
2588         sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
2589         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
2590
2591         s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
2592         if (s->signal_fd < 0) {
2593                 log_error("signalfd(): %m");
2594                 return -errno;
2595         }
2596
2597         zero(ev);
2598         ev.events = EPOLLIN;
2599         ev.data.fd = s->signal_fd;
2600
2601         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
2602                 log_error("epoll_ctl(): %m");
2603                 return -errno;
2604         }
2605
2606         return 0;
2607 }
2608
2609 static int server_parse_proc_cmdline(Server *s) {
2610         char *line, *w, *state;
2611         int r;
2612         size_t l;
2613
2614         if (detect_container(NULL) > 0)
2615                 return 0;
2616
2617         r = read_one_line_file("/proc/cmdline", &line);
2618         if (r < 0) {
2619                 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
2620                 return 0;
2621         }
2622
2623         FOREACH_WORD_QUOTED(w, l, line, state) {
2624                 char *word;
2625
2626                 word = strndup(w, l);
2627                 if (!word) {
2628                         r = -ENOMEM;
2629                         goto finish;
2630                 }
2631
2632                 if (startswith(word, "systemd_journald.forward_to_syslog=")) {
2633                         r = parse_boolean(word + 35);
2634                         if (r < 0)
2635                                 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
2636                         else
2637                                 s->forward_to_syslog = r;
2638                 } else if (startswith(word, "systemd_journald.forward_to_kmsg=")) {
2639                         r = parse_boolean(word + 33);
2640                         if (r < 0)
2641                                 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
2642                         else
2643                                 s->forward_to_kmsg = r;
2644                 } else if (startswith(word, "systemd_journald.forward_to_console=")) {
2645                         r = parse_boolean(word + 36);
2646                         if (r < 0)
2647                                 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
2648                         else
2649                                 s->forward_to_console = r;
2650                 }
2651
2652                 free(word);
2653         }
2654
2655         r = 0;
2656
2657 finish:
2658         free(line);
2659         return r;
2660 }
2661
2662 static int server_parse_config_file(Server *s) {
2663         FILE *f;
2664         const char *fn;
2665         int r;
2666
2667         assert(s);
2668
2669         fn = "/etc/systemd/journald.conf";
2670         f = fopen(fn, "re");
2671         if (!f) {
2672                 if (errno == ENOENT)
2673                         return 0;
2674
2675                 log_warning("Failed to open configuration file %s: %m", fn);
2676                 return -errno;
2677         }
2678
2679         r = config_parse(fn, f, "Journal\0", config_item_perf_lookup, (void*) journald_gperf_lookup, false, s);
2680         if (r < 0)
2681                 log_warning("Failed to parse configuration file: %s", strerror(-r));
2682
2683         fclose(f);
2684
2685         return r;
2686 }
2687
2688 static int server_init(Server *s) {
2689         int n, r, fd;
2690
2691         assert(s);
2692
2693         zero(*s);
2694         s->syslog_fd = s->native_fd = s->stdout_fd = s->signal_fd = s->epoll_fd = s->proc_kmsg_fd = -1;
2695         s->compress = true;
2696
2697         s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
2698         s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
2699
2700         s->forward_to_syslog = true;
2701         s->import_proc_kmsg = true;
2702
2703         s->max_level_store = LOG_DEBUG;
2704         s->max_level_syslog = LOG_DEBUG;
2705         s->max_level_kmsg = LOG_NOTICE;
2706         s->max_level_console = LOG_INFO;
2707
2708         memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
2709         memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
2710
2711         server_parse_config_file(s);
2712         server_parse_proc_cmdline(s);
2713
2714         s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
2715         if (!s->user_journals) {
2716                 log_error("Out of memory.");
2717                 return -ENOMEM;
2718         }
2719
2720         s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
2721         if (s->epoll_fd < 0) {
2722                 log_error("Failed to create epoll object: %m");
2723                 return -errno;
2724         }
2725
2726         n = sd_listen_fds(true);
2727         if (n < 0) {
2728                 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
2729                 return n;
2730         }
2731
2732         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
2733
2734                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
2735
2736                         if (s->native_fd >= 0) {
2737                                 log_error("Too many native sockets passed.");
2738                                 return -EINVAL;
2739                         }
2740
2741                         s->native_fd = fd;
2742
2743                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
2744
2745                         if (s->stdout_fd >= 0) {
2746                                 log_error("Too many stdout sockets passed.");
2747                                 return -EINVAL;
2748                         }
2749
2750                         s->stdout_fd = fd;
2751
2752                 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
2753
2754                         if (s->syslog_fd >= 0) {
2755                                 log_error("Too many /dev/log sockets passed.");
2756                                 return -EINVAL;
2757                         }
2758
2759                         s->syslog_fd = fd;
2760
2761                 } else {
2762                         log_error("Unknown socket passed.");
2763                         return -EINVAL;
2764                 }
2765         }
2766
2767         r = open_syslog_socket(s);
2768         if (r < 0)
2769                 return r;
2770
2771         r = open_native_socket(s);
2772         if (r < 0)
2773                 return r;
2774
2775         r = open_stdout_socket(s);
2776         if (r < 0)
2777                 return r;
2778
2779         r = open_proc_kmsg(s);
2780         if (r < 0)
2781                 return r;
2782
2783         r = open_signalfd(s);
2784         if (r < 0)
2785                 return r;
2786
2787         s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
2788         if (!s->rate_limit)
2789                 return -ENOMEM;
2790
2791         r = system_journal_open(s);
2792         if (r < 0)
2793                 return r;
2794
2795         return 0;
2796 }
2797
2798 static void server_done(Server *s) {
2799         JournalFile *f;
2800         assert(s);
2801
2802         while (s->stdout_streams)
2803                 stdout_stream_free(s->stdout_streams);
2804
2805         if (s->system_journal)
2806                 journal_file_close(s->system_journal);
2807
2808         if (s->runtime_journal)
2809                 journal_file_close(s->runtime_journal);
2810
2811         while ((f = hashmap_steal_first(s->user_journals)))
2812                 journal_file_close(f);
2813
2814         hashmap_free(s->user_journals);
2815
2816         if (s->epoll_fd >= 0)
2817                 close_nointr_nofail(s->epoll_fd);
2818
2819         if (s->signal_fd >= 0)
2820                 close_nointr_nofail(s->signal_fd);
2821
2822         if (s->syslog_fd >= 0)
2823                 close_nointr_nofail(s->syslog_fd);
2824
2825         if (s->native_fd >= 0)
2826                 close_nointr_nofail(s->native_fd);
2827
2828         if (s->stdout_fd >= 0)
2829                 close_nointr_nofail(s->stdout_fd);
2830
2831         if (s->proc_kmsg_fd >= 0)
2832                 close_nointr_nofail(s->proc_kmsg_fd);
2833
2834         if (s->rate_limit)
2835                 journal_rate_limit_free(s->rate_limit);
2836
2837         free(s->buffer);
2838         free(s->tty_path);
2839 }
2840
2841 int main(int argc, char *argv[]) {
2842         Server server;
2843         int r;
2844
2845         /* if (getppid() != 1) { */
2846         /*         log_error("This program should be invoked by init only."); */
2847         /*         return EXIT_FAILURE; */
2848         /* } */
2849
2850         if (argc > 1) {
2851                 log_error("This program does not take arguments.");
2852                 return EXIT_FAILURE;
2853         }
2854
2855         log_set_target(LOG_TARGET_SAFE);
2856         log_set_facility(LOG_SYSLOG);
2857         log_parse_environment();
2858         log_open();
2859
2860         umask(0022);
2861
2862         r = server_init(&server);
2863         if (r < 0)
2864                 goto finish;
2865
2866         server_vacuum(&server);
2867         server_flush_to_var(&server);
2868         server_flush_proc_kmsg(&server);
2869
2870         log_debug("systemd-journald running as pid %lu", (unsigned long) getpid());
2871         driver_message(&server, SD_MESSAGE_JOURNAL_START, "Journal started");
2872
2873         sd_notify(false,
2874                   "READY=1\n"
2875                   "STATUS=Processing requests...");
2876
2877         for (;;) {
2878                 struct epoll_event event;
2879
2880                 r = epoll_wait(server.epoll_fd, &event, 1, -1);
2881                 if (r < 0) {
2882
2883                         if (errno == EINTR)
2884                                 continue;
2885
2886                         log_error("epoll_wait() failed: %m");
2887                         r = -errno;
2888                         goto finish;
2889                 } else if (r == 0)
2890                         break;
2891
2892                 r = process_event(&server, &event);
2893                 if (r < 0)
2894                         goto finish;
2895                 else if (r == 0)
2896                         break;
2897         }
2898
2899         log_debug("systemd-journald stopped as pid %lu", (unsigned long) getpid());
2900         driver_message(&server, SD_MESSAGE_JOURNAL_STOP, "Journal stopped");
2901
2902 finish:
2903         sd_notify(false,
2904                   "STATUS=Shutting down...");
2905
2906         server_done(&server);
2907
2908         return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
2909 }