chiark / gitweb /
5836119bede27ddf963370ac11a04a8368550801
[elogind.git] / src / journal / journald.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/epoll.h>
23 #include <sys/socket.h>
24 #include <errno.h>
25 #include <sys/signalfd.h>
26 #include <unistd.h>
27 #include <fcntl.h>
28 #include <stddef.h>
29 #include <sys/ioctl.h>
30 #include <linux/sockios.h>
31 #include <sys/statvfs.h>
32 #include <sys/mman.h>
33
34 #include <libudev.h>
35 #include <systemd/sd-journal.h>
36 #include <systemd/sd-messages.h>
37 #include <systemd/sd-daemon.h>
38
39 #ifdef HAVE_LOGIND
40 #include <systemd/sd-login.h>
41 #endif
42
43 #include "mkdir.h"
44 #include "hashmap.h"
45 #include "journal-file.h"
46 #include "socket-util.h"
47 #include "cgroup-util.h"
48 #include "list.h"
49 #include "virt.h"
50 #include "missing.h"
51 #include "conf-parser.h"
52 #include "journal-rate-limit.h"
53 #include "journal-internal.h"
54 #include "journal-vacuum.h"
55 #include "journal-authenticate.h"
56 #include "journald.h"
57 #include "journald-kmsg.h"
58 #include "journald-syslog.h"
59 #include "journald-stream.h"
60 #include "journald-console.h"
61
62 #ifdef HAVE_ACL
63 #include <sys/acl.h>
64 #include <acl/libacl.h>
65 #include "acl-util.h"
66 #endif
67
68 #ifdef HAVE_SELINUX
69 #include <selinux/selinux.h>
70 #endif
71
72 #define USER_JOURNALS_MAX 1024
73
74 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
75 #define DEFAULT_RATE_LIMIT_BURST 200
76
77 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
78
79 #define ENTRY_SIZE_MAX (1024*1024*32)
80
81 static const char* const storage_table[] = {
82         [STORAGE_AUTO] = "auto",
83         [STORAGE_VOLATILE] = "volatile",
84         [STORAGE_PERSISTENT] = "persistent",
85         [STORAGE_NONE] = "none"
86 };
87
88 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
89 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
90
91 static uint64_t available_space(Server *s) {
92         char ids[33], *p;
93         const char *f;
94         sd_id128_t machine;
95         struct statvfs ss;
96         uint64_t sum = 0, avail = 0, ss_avail = 0;
97         int r;
98         DIR *d;
99         usec_t ts;
100         JournalMetrics *m;
101
102         ts = now(CLOCK_MONOTONIC);
103
104         if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
105                 return s->cached_available_space;
106
107         r = sd_id128_get_machine(&machine);
108         if (r < 0)
109                 return 0;
110
111         if (s->system_journal) {
112                 f = "/var/log/journal/";
113                 m = &s->system_metrics;
114         } else {
115                 f = "/run/log/journal/";
116                 m = &s->runtime_metrics;
117         }
118
119         assert(m);
120
121         p = strappend(f, sd_id128_to_string(machine, ids));
122         if (!p)
123                 return 0;
124
125         d = opendir(p);
126         free(p);
127
128         if (!d)
129                 return 0;
130
131         if (fstatvfs(dirfd(d), &ss) < 0)
132                 goto finish;
133
134         for (;;) {
135                 struct stat st;
136                 struct dirent buf, *de;
137
138                 r = readdir_r(d, &buf, &de);
139                 if (r != 0)
140                         break;
141
142                 if (!de)
143                         break;
144
145                 if (!endswith(de->d_name, ".journal") &&
146                     !endswith(de->d_name, ".journal~"))
147                         continue;
148
149                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
150                         continue;
151
152                 if (!S_ISREG(st.st_mode))
153                         continue;
154
155                 sum += (uint64_t) st.st_blocks * 512UL;
156         }
157
158         avail = sum >= m->max_use ? 0 : m->max_use - sum;
159
160         ss_avail = ss.f_bsize * ss.f_bavail;
161
162         ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
163
164         if (ss_avail < avail)
165                 avail = ss_avail;
166
167         s->cached_available_space = avail;
168         s->cached_available_space_timestamp = ts;
169
170 finish:
171         closedir(d);
172
173         return avail;
174 }
175
176 static void server_read_file_gid(Server *s) {
177         const char *adm = "adm";
178         int r;
179
180         assert(s);
181
182         if (s->file_gid_valid)
183                 return;
184
185         r = get_group_creds(&adm, &s->file_gid);
186         if (r < 0)
187                 log_warning("Failed to resolve 'adm' group: %s", strerror(-r));
188
189         /* if we couldn't read the gid, then it will be 0, but that's
190          * fine and we shouldn't try to resolve the group again, so
191          * let's just pretend it worked right-away. */
192         s->file_gid_valid = true;
193 }
194
195 static void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
196         int r;
197 #ifdef HAVE_ACL
198         acl_t acl;
199         acl_entry_t entry;
200         acl_permset_t permset;
201 #endif
202
203         assert(f);
204
205         server_read_file_gid(s);
206
207         r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
208         if (r < 0)
209                 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
210
211 #ifdef HAVE_ACL
212         if (uid <= 0)
213                 return;
214
215         acl = acl_get_fd(f->fd);
216         if (!acl) {
217                 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
218                 return;
219         }
220
221         r = acl_find_uid(acl, uid, &entry);
222         if (r <= 0) {
223
224                 if (acl_create_entry(&acl, &entry) < 0 ||
225                     acl_set_tag_type(entry, ACL_USER) < 0 ||
226                     acl_set_qualifier(entry, &uid) < 0) {
227                         log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
228                         goto finish;
229                 }
230         }
231
232         if (acl_get_permset(entry, &permset) < 0 ||
233             acl_add_perm(permset, ACL_READ) < 0 ||
234             acl_calc_mask(&acl) < 0) {
235                 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
236                 goto finish;
237         }
238
239         if (acl_set_fd(f->fd, acl) < 0)
240                 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
241
242 finish:
243         acl_free(acl);
244 #endif
245 }
246
247 static JournalFile* find_journal(Server *s, uid_t uid) {
248         char *p;
249         int r;
250         JournalFile *f;
251         sd_id128_t machine;
252
253         assert(s);
254
255         /* We split up user logs only on /var, not on /run. If the
256          * runtime file is open, we write to it exclusively, in order
257          * to guarantee proper order as soon as we flush /run to
258          * /var and close the runtime file. */
259
260         if (s->runtime_journal)
261                 return s->runtime_journal;
262
263         if (uid <= 0)
264                 return s->system_journal;
265
266         r = sd_id128_get_machine(&machine);
267         if (r < 0)
268                 return s->system_journal;
269
270         f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
271         if (f)
272                 return f;
273
274         if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
275                      SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
276                 return s->system_journal;
277
278         while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
279                 /* Too many open? Then let's close one */
280                 f = hashmap_steal_first(s->user_journals);
281                 assert(f);
282                 journal_file_close(f);
283         }
284
285         r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, s->system_journal, &f);
286         free(p);
287
288         if (r < 0)
289                 return s->system_journal;
290
291         server_fix_perms(s, f, uid);
292
293         r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
294         if (r < 0) {
295                 journal_file_close(f);
296                 return s->system_journal;
297         }
298
299         return f;
300 }
301
302 static void server_rotate(Server *s) {
303         JournalFile *f;
304         void *k;
305         Iterator i;
306         int r;
307
308         log_info("Rotating...");
309
310         if (s->runtime_journal) {
311                 r = journal_file_rotate(&s->runtime_journal, s->compress, false);
312                 if (r < 0)
313                         if (s->runtime_journal)
314                                 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
315                         else
316                                 log_error("Failed to create new runtime journal: %s", strerror(-r));
317                 else
318                         server_fix_perms(s, s->runtime_journal, 0);
319         }
320
321         if (s->system_journal) {
322                 r = journal_file_rotate(&s->system_journal, s->compress, s->seal);
323                 if (r < 0)
324                         if (s->system_journal)
325                                 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
326                         else
327                                 log_error("Failed to create new system journal: %s", strerror(-r));
328
329                 else
330                         server_fix_perms(s, s->system_journal, 0);
331         }
332
333         HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
334                 r = journal_file_rotate(&f, s->compress, s->seal);
335                 if (r < 0)
336                         if (f->path)
337                                 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
338                         else
339                                 log_error("Failed to create user journal: %s", strerror(-r));
340                 else {
341                         hashmap_replace(s->user_journals, k, f);
342                         server_fix_perms(s, s->system_journal, PTR_TO_UINT32(k));
343                 }
344         }
345 }
346
347 static void server_vacuum(Server *s) {
348         char *p;
349         char ids[33];
350         sd_id128_t machine;
351         int r;
352
353         log_info("Vacuuming...");
354
355         r = sd_id128_get_machine(&machine);
356         if (r < 0) {
357                 log_error("Failed to get machine ID: %s", strerror(-r));
358                 return;
359         }
360
361         sd_id128_to_string(machine, ids);
362
363         if (s->system_journal) {
364                 if (asprintf(&p, "/var/log/journal/%s", ids) < 0) {
365                         log_oom();
366                         return;
367                 }
368
369                 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free);
370                 if (r < 0 && r != -ENOENT)
371                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
372                 free(p);
373         }
374
375         if (s->runtime_journal) {
376                 if (asprintf(&p, "/run/log/journal/%s", ids) < 0) {
377                         log_oom();
378                         return;
379                 }
380
381                 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free);
382                 if (r < 0 && r != -ENOENT)
383                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
384                 free(p);
385         }
386
387         s->cached_available_space_timestamp = 0;
388 }
389
390 static char *shortened_cgroup_path(pid_t pid) {
391         int r;
392         char *process_path, *init_path, *path;
393
394         assert(pid > 0);
395
396         r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, pid, &process_path);
397         if (r < 0)
398                 return NULL;
399
400         r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &init_path);
401         if (r < 0) {
402                 free(process_path);
403                 return NULL;
404         }
405
406         if (endswith(init_path, "/system"))
407                 init_path[strlen(init_path) - 7] = 0;
408         else if (streq(init_path, "/"))
409                 init_path[0] = 0;
410
411         if (startswith(process_path, init_path)) {
412                 char *p;
413
414                 p = strdup(process_path + strlen(init_path));
415                 if (!p) {
416                         free(process_path);
417                         free(init_path);
418                         return NULL;
419                 }
420                 path = p;
421         } else {
422                 path = process_path;
423                 process_path = NULL;
424         }
425
426         free(process_path);
427         free(init_path);
428
429         return path;
430 }
431
432 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n) {
433         JournalFile *f;
434         bool vacuumed = false;
435         int r;
436
437         assert(s);
438         assert(iovec);
439         assert(n > 0);
440
441         f = find_journal(s, uid);
442         if (!f)
443                 return;
444
445         if (journal_file_rotate_suggested(f)) {
446                 log_info("Journal header limits reached or header out-of-date, rotating.");
447                 server_rotate(s);
448                 server_vacuum(s);
449                 vacuumed = true;
450
451                 f = find_journal(s, uid);
452                 if (!f)
453                         return;
454         }
455
456         for (;;) {
457                 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
458                 if (r >= 0)
459                         return;
460
461                 if (vacuumed ||
462                     (r != -E2BIG && /* hit limit */
463                      r != -EFBIG && /* hit fs limit */
464                      r != -EDQUOT && /* quota hit */
465                      r != -ENOSPC && /* disk full */
466                      r != -EBADMSG && /* corrupted */
467                      r != -ENODATA && /* truncated */
468                      r != -EHOSTDOWN && /* other machine */
469                      r != -EPROTONOSUPPORT && /* unsupported feature */
470                      r != -EBUSY && /* unclean shutdown */
471                      r != -ESHUTDOWN /* already archived */)) {
472                         log_error("Failed to write entry, ignoring: %s", strerror(-r));
473                         return;
474                 }
475
476                 if (r == -E2BIG || r == -EFBIG || r == EDQUOT || r == ENOSPC)
477                         log_info("Allocation limit reached, rotating.");
478                 else if (r == -EHOSTDOWN)
479                         log_info("Journal file from other machine, rotating.");
480                 else if (r == -EBUSY)
481                         log_info("Unlcean shutdown, rotating.");
482                 else
483                         log_warning("Journal file corrupted, rotating.");
484
485                 server_rotate(s);
486                 server_vacuum(s);
487                 vacuumed = true;
488
489                 f = find_journal(s, uid);
490                 if (!f)
491                         return;
492
493                 log_info("Retrying write.");
494         }
495 }
496
497 static void dispatch_message_real(
498                 Server *s,
499                 struct iovec *iovec, unsigned n, unsigned m,
500                 struct ucred *ucred,
501                 struct timeval *tv,
502                 const char *label, size_t label_len,
503                 const char *unit_id) {
504
505         char *pid = NULL, *uid = NULL, *gid = NULL,
506                 *source_time = NULL, *boot_id = NULL, *machine_id = NULL,
507                 *comm = NULL, *cmdline = NULL, *hostname = NULL,
508                 *audit_session = NULL, *audit_loginuid = NULL,
509                 *exe = NULL, *cgroup = NULL, *session = NULL,
510                 *owner_uid = NULL, *unit = NULL, *selinux_context = NULL;
511
512         char idbuf[33];
513         sd_id128_t id;
514         int r;
515         char *t;
516         uid_t loginuid = 0, realuid = 0;
517
518         assert(s);
519         assert(iovec);
520         assert(n > 0);
521         assert(n + N_IOVEC_META_FIELDS <= m);
522
523         if (ucred) {
524                 uint32_t audit;
525 #ifdef HAVE_LOGIND
526                 uid_t owner;
527 #endif
528
529                 realuid = ucred->uid;
530
531                 if (asprintf(&pid, "_PID=%lu", (unsigned long) ucred->pid) >= 0)
532                         IOVEC_SET_STRING(iovec[n++], pid);
533
534                 if (asprintf(&uid, "_UID=%lu", (unsigned long) ucred->uid) >= 0)
535                         IOVEC_SET_STRING(iovec[n++], uid);
536
537                 if (asprintf(&gid, "_GID=%lu", (unsigned long) ucred->gid) >= 0)
538                         IOVEC_SET_STRING(iovec[n++], gid);
539
540                 r = get_process_comm(ucred->pid, &t);
541                 if (r >= 0) {
542                         comm = strappend("_COMM=", t);
543                         free(t);
544
545                         if (comm)
546                                 IOVEC_SET_STRING(iovec[n++], comm);
547                 }
548
549                 r = get_process_exe(ucred->pid, &t);
550                 if (r >= 0) {
551                         exe = strappend("_EXE=", t);
552                         free(t);
553
554                         if (exe)
555                                 IOVEC_SET_STRING(iovec[n++], exe);
556                 }
557
558                 r = get_process_cmdline(ucred->pid, LINE_MAX, false, &t);
559                 if (r >= 0) {
560                         cmdline = strappend("_CMDLINE=", t);
561                         free(t);
562
563                         if (cmdline)
564                                 IOVEC_SET_STRING(iovec[n++], cmdline);
565                 }
566
567                 r = audit_session_from_pid(ucred->pid, &audit);
568                 if (r >= 0)
569                         if (asprintf(&audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit) >= 0)
570                                 IOVEC_SET_STRING(iovec[n++], audit_session);
571
572                 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
573                 if (r >= 0)
574                         if (asprintf(&audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0)
575                                 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
576
577                 t = shortened_cgroup_path(ucred->pid);
578                 if (t) {
579                         cgroup = strappend("_SYSTEMD_CGROUP=", t);
580                         free(t);
581
582                         if (cgroup)
583                                 IOVEC_SET_STRING(iovec[n++], cgroup);
584                 }
585
586 #ifdef HAVE_LOGIND
587                 if (sd_pid_get_session(ucred->pid, &t) >= 0) {
588                         session = strappend("_SYSTEMD_SESSION=", t);
589                         free(t);
590
591                         if (session)
592                                 IOVEC_SET_STRING(iovec[n++], session);
593                 }
594
595                 if (sd_pid_get_owner_uid(ucred->uid, &owner) >= 0)
596                         if (asprintf(&owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner) >= 0)
597                                 IOVEC_SET_STRING(iovec[n++], owner_uid);
598 #endif
599
600                 if (cg_pid_get_unit(ucred->pid, &t) >= 0) {
601                         unit = strappend("_SYSTEMD_UNIT=", t);
602                         free(t);
603                 } else if (unit_id)
604                         unit = strappend("_SYSTEMD_UNIT=", unit_id);
605
606                 if (unit)
607                         IOVEC_SET_STRING(iovec[n++], unit);
608
609 #ifdef HAVE_SELINUX
610                 if (label) {
611                         selinux_context = malloc(sizeof("_SELINUX_CONTEXT=") + label_len);
612                         if (selinux_context) {
613                                 memcpy(selinux_context, "_SELINUX_CONTEXT=", sizeof("_SELINUX_CONTEXT=")-1);
614                                 memcpy(selinux_context+sizeof("_SELINUX_CONTEXT=")-1, label, label_len);
615                                 selinux_context[sizeof("_SELINUX_CONTEXT=")-1+label_len] = 0;
616                                 IOVEC_SET_STRING(iovec[n++], selinux_context);
617                         }
618                 } else {
619                         security_context_t con;
620
621                         if (getpidcon(ucred->pid, &con) >= 0) {
622                                 selinux_context = strappend("_SELINUX_CONTEXT=", con);
623                                 if (selinux_context)
624                                         IOVEC_SET_STRING(iovec[n++], selinux_context);
625
626                                 freecon(con);
627                         }
628                 }
629 #endif
630         }
631
632         if (tv) {
633                 if (asprintf(&source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu",
634                              (unsigned long long) timeval_load(tv)) >= 0)
635                         IOVEC_SET_STRING(iovec[n++], source_time);
636         }
637
638         /* Note that strictly speaking storing the boot id here is
639          * redundant since the entry includes this in-line
640          * anyway. However, we need this indexed, too. */
641         r = sd_id128_get_boot(&id);
642         if (r >= 0)
643                 if (asprintf(&boot_id, "_BOOT_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
644                         IOVEC_SET_STRING(iovec[n++], boot_id);
645
646         r = sd_id128_get_machine(&id);
647         if (r >= 0)
648                 if (asprintf(&machine_id, "_MACHINE_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
649                         IOVEC_SET_STRING(iovec[n++], machine_id);
650
651         t = gethostname_malloc();
652         if (t) {
653                 hostname = strappend("_HOSTNAME=", t);
654                 free(t);
655                 if (hostname)
656                         IOVEC_SET_STRING(iovec[n++], hostname);
657         }
658
659         assert(n <= m);
660
661         write_to_journal(s, realuid == 0 ? 0 : loginuid, iovec, n);
662
663         free(pid);
664         free(uid);
665         free(gid);
666         free(comm);
667         free(exe);
668         free(cmdline);
669         free(source_time);
670         free(boot_id);
671         free(machine_id);
672         free(hostname);
673         free(audit_session);
674         free(audit_loginuid);
675         free(cgroup);
676         free(session);
677         free(owner_uid);
678         free(unit);
679         free(selinux_context);
680 }
681
682 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
683         char mid[11 + 32 + 1];
684         char buffer[16 + LINE_MAX + 1];
685         struct iovec iovec[N_IOVEC_META_FIELDS + 4];
686         int n = 0;
687         va_list ap;
688         struct ucred ucred;
689
690         assert(s);
691         assert(format);
692
693         IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
694         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
695
696         memcpy(buffer, "MESSAGE=", 8);
697         va_start(ap, format);
698         vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
699         va_end(ap);
700         char_array_0(buffer);
701         IOVEC_SET_STRING(iovec[n++], buffer);
702
703         snprintf(mid, sizeof(mid), "MESSAGE_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(message_id));
704         char_array_0(mid);
705         IOVEC_SET_STRING(iovec[n++], mid);
706
707         zero(ucred);
708         ucred.pid = getpid();
709         ucred.uid = getuid();
710         ucred.gid = getgid();
711
712         dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL);
713 }
714
715 void server_dispatch_message(
716                 Server *s,
717                 struct iovec *iovec, unsigned n, unsigned m,
718                 struct ucred *ucred,
719                 struct timeval *tv,
720                 const char *label, size_t label_len,
721                 const char *unit_id,
722                 int priority) {
723
724         int rl;
725         char *path = NULL, *c;
726
727         assert(s);
728         assert(iovec || n == 0);
729
730         if (n == 0)
731                 return;
732
733         if (LOG_PRI(priority) > s->max_level_store)
734                 return;
735
736         if (!ucred)
737                 goto finish;
738
739         path = shortened_cgroup_path(ucred->pid);
740         if (!path)
741                 goto finish;
742
743         /* example: /user/lennart/3/foobar
744          *          /system/dbus.service/foobar
745          *
746          * So let's cut of everything past the third /, since that is
747          * wher user directories start */
748
749         c = strchr(path, '/');
750         if (c) {
751                 c = strchr(c+1, '/');
752                 if (c) {
753                         c = strchr(c+1, '/');
754                         if (c)
755                                 *c = 0;
756                 }
757         }
758
759         rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available_space(s));
760
761         if (rl == 0) {
762                 free(path);
763                 return;
764         }
765
766         /* Write a suppression message if we suppressed something */
767         if (rl > 1)
768                 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED, "Suppressed %u messages from %s", rl - 1, path);
769
770         free(path);
771
772 finish:
773         dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id);
774 }
775
776 static bool valid_user_field(const char *p, size_t l) {
777         const char *a;
778
779         /* We kinda enforce POSIX syntax recommendations for
780            environment variables here, but make a couple of additional
781            requirements.
782
783            http://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html */
784
785         /* No empty field names */
786         if (l <= 0)
787                 return false;
788
789         /* Don't allow names longer than 64 chars */
790         if (l > 64)
791                 return false;
792
793         /* Variables starting with an underscore are protected */
794         if (p[0] == '_')
795                 return false;
796
797         /* Don't allow digits as first character */
798         if (p[0] >= '0' && p[0] <= '9')
799                 return false;
800
801         /* Only allow A-Z0-9 and '_' */
802         for (a = p; a < p + l; a++)
803                 if (!((*a >= 'A' && *a <= 'Z') ||
804                       (*a >= '0' && *a <= '9') ||
805                       *a == '_'))
806                         return false;
807
808         return true;
809 }
810
811 static void process_native_message(
812                 Server *s,
813                 const void *buffer, size_t buffer_size,
814                 struct ucred *ucred,
815                 struct timeval *tv,
816                 const char *label, size_t label_len) {
817
818         struct iovec *iovec = NULL;
819         unsigned n = 0, m = 0, j, tn = (unsigned) -1;
820         const char *p;
821         size_t remaining;
822         int priority = LOG_INFO;
823         char *identifier = NULL, *message = NULL;
824
825         assert(s);
826         assert(buffer || buffer_size == 0);
827
828         p = buffer;
829         remaining = buffer_size;
830
831         while (remaining > 0) {
832                 const char *e, *q;
833
834                 e = memchr(p, '\n', remaining);
835
836                 if (!e) {
837                         /* Trailing noise, let's ignore it, and flush what we collected */
838                         log_debug("Received message with trailing noise, ignoring.");
839                         break;
840                 }
841
842                 if (e == p) {
843                         /* Entry separator */
844                         server_dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, NULL, priority);
845                         n = 0;
846                         priority = LOG_INFO;
847
848                         p++;
849                         remaining--;
850                         continue;
851                 }
852
853                 if (*p == '.' || *p == '#') {
854                         /* Ignore control commands for now, and
855                          * comments too. */
856                         remaining -= (e - p) + 1;
857                         p = e + 1;
858                         continue;
859                 }
860
861                 /* A property follows */
862
863                 if (n+N_IOVEC_META_FIELDS >= m) {
864                         struct iovec *c;
865                         unsigned u;
866
867                         u = MAX((n+N_IOVEC_META_FIELDS+1) * 2U, 4U);
868                         c = realloc(iovec, u * sizeof(struct iovec));
869                         if (!c) {
870                                 log_oom();
871                                 break;
872                         }
873
874                         iovec = c;
875                         m = u;
876                 }
877
878                 q = memchr(p, '=', e - p);
879                 if (q) {
880                         if (valid_user_field(p, q - p)) {
881                                 size_t l;
882
883                                 l = e - p;
884
885                                 /* If the field name starts with an
886                                  * underscore, skip the variable,
887                                  * since that indidates a trusted
888                                  * field */
889                                 iovec[n].iov_base = (char*) p;
890                                 iovec[n].iov_len = l;
891                                 n++;
892
893                                 /* We need to determine the priority
894                                  * of this entry for the rate limiting
895                                  * logic */
896                                 if (l == 10 &&
897                                     memcmp(p, "PRIORITY=", 9) == 0 &&
898                                     p[9] >= '0' && p[9] <= '9')
899                                         priority = (priority & LOG_FACMASK) | (p[9] - '0');
900
901                                 else if (l == 17 &&
902                                          memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
903                                          p[16] >= '0' && p[16] <= '9')
904                                         priority = (priority & LOG_PRIMASK) | ((p[16] - '0') << 3);
905
906                                 else if (l == 18 &&
907                                          memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
908                                          p[16] >= '0' && p[16] <= '9' &&
909                                          p[17] >= '0' && p[17] <= '9')
910                                         priority = (priority & LOG_PRIMASK) | (((p[16] - '0')*10 + (p[17] - '0')) << 3);
911
912                                 else if (l >= 19 &&
913                                          memcmp(p, "SYSLOG_IDENTIFIER=", 18) == 0) {
914                                         char *t;
915
916                                         t = strndup(p + 18, l - 18);
917                                         if (t) {
918                                                 free(identifier);
919                                                 identifier = t;
920                                         }
921                                 } else if (l >= 8 &&
922                                            memcmp(p, "MESSAGE=", 8) == 0) {
923                                         char *t;
924
925                                         t = strndup(p + 8, l - 8);
926                                         if (t) {
927                                                 free(message);
928                                                 message = t;
929                                         }
930                                 }
931                         }
932
933                         remaining -= (e - p) + 1;
934                         p = e + 1;
935                         continue;
936                 } else {
937                         le64_t l_le;
938                         uint64_t l;
939                         char *k;
940
941                         if (remaining < e - p + 1 + sizeof(uint64_t) + 1) {
942                                 log_debug("Failed to parse message, ignoring.");
943                                 break;
944                         }
945
946                         memcpy(&l_le, e + 1, sizeof(uint64_t));
947                         l = le64toh(l_le);
948
949                         if (remaining < e - p + 1 + sizeof(uint64_t) + l + 1 ||
950                             e[1+sizeof(uint64_t)+l] != '\n') {
951                                 log_debug("Failed to parse message, ignoring.");
952                                 break;
953                         }
954
955                         k = malloc((e - p) + 1 + l);
956                         if (!k) {
957                                 log_oom();
958                                 break;
959                         }
960
961                         memcpy(k, p, e - p);
962                         k[e - p] = '=';
963                         memcpy(k + (e - p) + 1, e + 1 + sizeof(uint64_t), l);
964
965                         if (valid_user_field(p, e - p)) {
966                                 iovec[n].iov_base = k;
967                                 iovec[n].iov_len = (e - p) + 1 + l;
968                                 n++;
969                         } else
970                                 free(k);
971
972                         remaining -= (e - p) + 1 + sizeof(uint64_t) + l + 1;
973                         p = e + 1 + sizeof(uint64_t) + l + 1;
974                 }
975         }
976
977         if (n <= 0)
978                 goto finish;
979
980         tn = n++;
981         IOVEC_SET_STRING(iovec[tn], "_TRANSPORT=journal");
982
983         if (message) {
984                 if (s->forward_to_syslog)
985                         server_forward_syslog(s, priority, identifier, message, ucred, tv);
986
987                 if (s->forward_to_kmsg)
988                         server_forward_kmsg(s, priority, identifier, message, ucred);
989
990                 if (s->forward_to_console)
991                         server_forward_console(s, priority, identifier, message, ucred);
992         }
993
994         server_dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, NULL, priority);
995
996 finish:
997         for (j = 0; j < n; j++)  {
998                 if (j == tn)
999                         continue;
1000
1001                 if (iovec[j].iov_base < buffer ||
1002                     (const uint8_t*) iovec[j].iov_base >= (const uint8_t*) buffer + buffer_size)
1003                         free(iovec[j].iov_base);
1004         }
1005
1006         free(iovec);
1007         free(identifier);
1008         free(message);
1009 }
1010
1011 static void process_native_file(
1012                 Server *s,
1013                 int fd,
1014                 struct ucred *ucred,
1015                 struct timeval *tv,
1016                 const char *label, size_t label_len) {
1017
1018         struct stat st;
1019         void *p;
1020         ssize_t n;
1021
1022         assert(s);
1023         assert(fd >= 0);
1024
1025         /* Data is in the passed file, since it didn't fit in a
1026          * datagram. We can't map the file here, since clients might
1027          * then truncate it and trigger a SIGBUS for us. So let's
1028          * stupidly read it */
1029
1030         if (fstat(fd, &st) < 0) {
1031                 log_error("Failed to stat passed file, ignoring: %m");
1032                 return;
1033         }
1034
1035         if (!S_ISREG(st.st_mode)) {
1036                 log_error("File passed is not regular. Ignoring.");
1037                 return;
1038         }
1039
1040         if (st.st_size <= 0)
1041                 return;
1042
1043         if (st.st_size > ENTRY_SIZE_MAX) {
1044                 log_error("File passed too large. Ignoring.");
1045                 return;
1046         }
1047
1048         p = malloc(st.st_size);
1049         if (!p) {
1050                 log_oom();
1051                 return;
1052         }
1053
1054         n = pread(fd, p, st.st_size, 0);
1055         if (n < 0)
1056                 log_error("Failed to read file, ignoring: %s", strerror(-n));
1057         else if (n > 0)
1058                 process_native_message(s, p, n, ucred, tv, label, label_len);
1059
1060         free(p);
1061 }
1062
1063 static int system_journal_open(Server *s) {
1064         int r;
1065         char *fn;
1066         sd_id128_t machine;
1067         char ids[33];
1068
1069         r = sd_id128_get_machine(&machine);
1070         if (r < 0)
1071                 return r;
1072
1073         sd_id128_to_string(machine, ids);
1074
1075         if (!s->system_journal &&
1076             (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
1077             access("/run/systemd/journal/flushed", F_OK) >= 0) {
1078
1079                 /* If in auto mode: first try to create the machine
1080                  * path, but not the prefix.
1081                  *
1082                  * If in persistent mode: create /var/log/journal and
1083                  * the machine path */
1084
1085                 if (s->storage == STORAGE_PERSISTENT)
1086                         (void) mkdir("/var/log/journal/", 0755);
1087
1088                 fn = strappend("/var/log/journal/", ids);
1089                 if (!fn)
1090                         return -ENOMEM;
1091
1092                 (void) mkdir(fn, 0755);
1093                 free(fn);
1094
1095                 fn = strjoin("/var/log/journal/", ids, "/system.journal", NULL);
1096                 if (!fn)
1097                         return -ENOMEM;
1098
1099                 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
1100                 free(fn);
1101
1102                 if (r >= 0)
1103                         server_fix_perms(s, s->system_journal, 0);
1104                 else if (r < 0) {
1105
1106                         if (r != -ENOENT && r != -EROFS)
1107                                 log_warning("Failed to open system journal: %s", strerror(-r));
1108
1109                         r = 0;
1110                 }
1111         }
1112
1113         if (!s->runtime_journal &&
1114             (s->storage != STORAGE_NONE)) {
1115
1116                 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
1117                 if (!fn)
1118                         return -ENOMEM;
1119
1120                 if (s->system_journal) {
1121
1122                         /* Try to open the runtime journal, but only
1123                          * if it already exists, so that we can flush
1124                          * it into the system journal */
1125
1126                         r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
1127                         free(fn);
1128
1129                         if (r < 0) {
1130                                 if (r != -ENOENT)
1131                                         log_warning("Failed to open runtime journal: %s", strerror(-r));
1132
1133                                 r = 0;
1134                         }
1135
1136                 } else {
1137
1138                         /* OK, we really need the runtime journal, so create
1139                          * it if necessary. */
1140
1141                         (void) mkdir_parents(fn, 0755);
1142                         r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
1143                         free(fn);
1144
1145                         if (r < 0) {
1146                                 log_error("Failed to open runtime journal: %s", strerror(-r));
1147                                 return r;
1148                         }
1149                 }
1150
1151                 if (s->runtime_journal)
1152                         server_fix_perms(s, s->runtime_journal, 0);
1153         }
1154
1155         return r;
1156 }
1157
1158 static int server_flush_to_var(Server *s) {
1159         Object *o = NULL;
1160         int r;
1161         sd_id128_t machine;
1162         sd_journal *j;
1163
1164         assert(s);
1165
1166         if (s->storage != STORAGE_AUTO &&
1167             s->storage != STORAGE_PERSISTENT)
1168                 return 0;
1169
1170         if (!s->runtime_journal)
1171                 return 0;
1172
1173         system_journal_open(s);
1174
1175         if (!s->system_journal)
1176                 return 0;
1177
1178         log_info("Flushing to /var...");
1179
1180         r = sd_id128_get_machine(&machine);
1181         if (r < 0) {
1182                 log_error("Failed to get machine id: %s", strerror(-r));
1183                 return r;
1184         }
1185
1186         r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1187         if (r < 0) {
1188                 log_error("Failed to read runtime journal: %s", strerror(-r));
1189                 return r;
1190         }
1191
1192         SD_JOURNAL_FOREACH(j) {
1193                 JournalFile *f;
1194
1195                 f = j->current_file;
1196                 assert(f && f->current_offset > 0);
1197
1198                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1199                 if (r < 0) {
1200                         log_error("Can't read entry: %s", strerror(-r));
1201                         goto finish;
1202                 }
1203
1204                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1205                 if (r == -E2BIG) {
1206                         log_info("Allocation limit reached.");
1207
1208                         journal_file_post_change(s->system_journal);
1209                         server_rotate(s);
1210                         server_vacuum(s);
1211
1212                         r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1213                 }
1214
1215                 if (r < 0) {
1216                         log_error("Can't write entry: %s", strerror(-r));
1217                         goto finish;
1218                 }
1219         }
1220
1221 finish:
1222         journal_file_post_change(s->system_journal);
1223
1224         journal_file_close(s->runtime_journal);
1225         s->runtime_journal = NULL;
1226
1227         if (r >= 0)
1228                 rm_rf("/run/log/journal", false, true, false);
1229
1230         return r;
1231 }
1232
1233 static int process_event(Server *s, struct epoll_event *ev) {
1234         assert(s);
1235         assert(ev);
1236
1237         if (ev->data.fd == s->signal_fd) {
1238                 struct signalfd_siginfo sfsi;
1239                 ssize_t n;
1240
1241                 if (ev->events != EPOLLIN) {
1242                         log_info("Got invalid event from epoll.");
1243                         return -EIO;
1244                 }
1245
1246                 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
1247                 if (n != sizeof(sfsi)) {
1248
1249                         if (n >= 0)
1250                                 return -EIO;
1251
1252                         if (errno == EINTR || errno == EAGAIN)
1253                                 return 1;
1254
1255                         return -errno;
1256                 }
1257
1258                 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
1259
1260                 if (sfsi.ssi_signo == SIGUSR1) {
1261                         touch("/run/systemd/journal/flushed");
1262                         server_flush_to_var(s);
1263                         return 1;
1264                 }
1265
1266                 if (sfsi.ssi_signo == SIGUSR2) {
1267                         server_rotate(s);
1268                         server_vacuum(s);
1269                         return 1;
1270                 }
1271
1272                 return 0;
1273
1274         } else if (ev->data.fd == s->dev_kmsg_fd) {
1275                 int r;
1276
1277                 if (ev->events != EPOLLIN) {
1278                         log_info("Got invalid event from epoll.");
1279                         return -EIO;
1280                 }
1281
1282                 r = server_read_dev_kmsg(s);
1283                 if (r < 0)
1284                         return r;
1285
1286                 return 1;
1287
1288         } else if (ev->data.fd == s->native_fd ||
1289                    ev->data.fd == s->syslog_fd) {
1290
1291                 if (ev->events != EPOLLIN) {
1292                         log_info("Got invalid event from epoll.");
1293                         return -EIO;
1294                 }
1295
1296                 for (;;) {
1297                         struct msghdr msghdr;
1298                         struct iovec iovec;
1299                         struct ucred *ucred = NULL;
1300                         struct timeval *tv = NULL;
1301                         struct cmsghdr *cmsg;
1302                         char *label = NULL;
1303                         size_t label_len = 0;
1304                         union {
1305                                 struct cmsghdr cmsghdr;
1306
1307                                 /* We use NAME_MAX space for the
1308                                  * SELinux label here. The kernel
1309                                  * currently enforces no limit, but
1310                                  * according to suggestions from the
1311                                  * SELinux people this will change and
1312                                  * it will probably be identical to
1313                                  * NAME_MAX. For now we use that, but
1314                                  * this should be updated one day when
1315                                  * the final limit is known.*/
1316                                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1317                                             CMSG_SPACE(sizeof(struct timeval)) +
1318                                             CMSG_SPACE(sizeof(int)) + /* fd */
1319                                             CMSG_SPACE(NAME_MAX)]; /* selinux label */
1320                         } control;
1321                         ssize_t n;
1322                         int v;
1323                         int *fds = NULL;
1324                         unsigned n_fds = 0;
1325
1326                         if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
1327                                 log_error("SIOCINQ failed: %m");
1328                                 return -errno;
1329                         }
1330
1331                         if (s->buffer_size < (size_t) v) {
1332                                 void *b;
1333                                 size_t l;
1334
1335                                 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
1336                                 b = realloc(s->buffer, l+1);
1337
1338                                 if (!b) {
1339                                         log_error("Couldn't increase buffer.");
1340                                         return -ENOMEM;
1341                                 }
1342
1343                                 s->buffer_size = l;
1344                                 s->buffer = b;
1345                         }
1346
1347                         zero(iovec);
1348                         iovec.iov_base = s->buffer;
1349                         iovec.iov_len = s->buffer_size;
1350
1351                         zero(control);
1352                         zero(msghdr);
1353                         msghdr.msg_iov = &iovec;
1354                         msghdr.msg_iovlen = 1;
1355                         msghdr.msg_control = &control;
1356                         msghdr.msg_controllen = sizeof(control);
1357
1358                         n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1359                         if (n < 0) {
1360
1361                                 if (errno == EINTR || errno == EAGAIN)
1362                                         return 1;
1363
1364                                 log_error("recvmsg() failed: %m");
1365                                 return -errno;
1366                         }
1367
1368                         for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1369
1370                                 if (cmsg->cmsg_level == SOL_SOCKET &&
1371                                     cmsg->cmsg_type == SCM_CREDENTIALS &&
1372                                     cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1373                                         ucred = (struct ucred*) CMSG_DATA(cmsg);
1374                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
1375                                          cmsg->cmsg_type == SCM_SECURITY) {
1376                                         label = (char*) CMSG_DATA(cmsg);
1377                                         label_len = cmsg->cmsg_len - CMSG_LEN(0);
1378                                 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1379                                          cmsg->cmsg_type == SO_TIMESTAMP &&
1380                                          cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1381                                         tv = (struct timeval*) CMSG_DATA(cmsg);
1382                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
1383                                          cmsg->cmsg_type == SCM_RIGHTS) {
1384                                         fds = (int*) CMSG_DATA(cmsg);
1385                                         n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1386                                 }
1387                         }
1388
1389                         if (ev->data.fd == s->syslog_fd) {
1390                                 char *e;
1391
1392                                 if (n > 0 && n_fds == 0) {
1393                                         e = memchr(s->buffer, '\n', n);
1394                                         if (e)
1395                                                 *e = 0;
1396                                         else
1397                                                 s->buffer[n] = 0;
1398
1399                                         server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1400                                 } else if (n_fds > 0)
1401                                         log_warning("Got file descriptors via syslog socket. Ignoring.");
1402
1403                         } else {
1404                                 if (n > 0 && n_fds == 0)
1405                                         process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1406                                 else if (n == 0 && n_fds == 1)
1407                                         process_native_file(s, fds[0], ucred, tv, label, label_len);
1408                                 else if (n_fds > 0)
1409                                         log_warning("Got too many file descriptors via native socket. Ignoring.");
1410                         }
1411
1412                         close_many(fds, n_fds);
1413                 }
1414
1415                 return 1;
1416
1417         } else if (ev->data.fd == s->stdout_fd) {
1418
1419                 if (ev->events != EPOLLIN) {
1420                         log_info("Got invalid event from epoll.");
1421                         return -EIO;
1422                 }
1423
1424                 stdout_stream_new(s);
1425                 return 1;
1426
1427         } else {
1428                 StdoutStream *stream;
1429
1430                 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
1431                         log_info("Got invalid event from epoll.");
1432                         return -EIO;
1433                 }
1434
1435                 /* If it is none of the well-known fds, it must be an
1436                  * stdout stream fd. Note that this is a bit ugly here
1437                  * (since we rely that none of the well-known fds
1438                  * could be interpreted as pointer), but nonetheless
1439                  * safe, since the well-known fds would never get an
1440                  * fd > 4096, i.e. beyond the first memory page */
1441
1442                 stream = ev->data.ptr;
1443
1444                 if (stdout_stream_process(stream) <= 0)
1445                         stdout_stream_free(stream);
1446
1447                 return 1;
1448         }
1449
1450         log_error("Unknown event.");
1451         return 0;
1452 }
1453
1454
1455 static int open_native_socket(Server*s) {
1456         union sockaddr_union sa;
1457         int one, r;
1458         struct epoll_event ev;
1459
1460         assert(s);
1461
1462         if (s->native_fd < 0) {
1463
1464                 s->native_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
1465                 if (s->native_fd < 0) {
1466                         log_error("socket() failed: %m");
1467                         return -errno;
1468                 }
1469
1470                 zero(sa);
1471                 sa.un.sun_family = AF_UNIX;
1472                 strncpy(sa.un.sun_path, "/run/systemd/journal/socket", sizeof(sa.un.sun_path));
1473
1474                 unlink(sa.un.sun_path);
1475
1476                 r = bind(s->native_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
1477                 if (r < 0) {
1478                         log_error("bind() failed: %m");
1479                         return -errno;
1480                 }
1481
1482                 chmod(sa.un.sun_path, 0666);
1483         } else
1484                 fd_nonblock(s->native_fd, 1);
1485
1486         one = 1;
1487         r = setsockopt(s->native_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
1488         if (r < 0) {
1489                 log_error("SO_PASSCRED failed: %m");
1490                 return -errno;
1491         }
1492
1493 #ifdef HAVE_SELINUX
1494         one = 1;
1495         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
1496         if (r < 0)
1497                 log_warning("SO_PASSSEC failed: %m");
1498 #endif
1499
1500         one = 1;
1501         r = setsockopt(s->native_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
1502         if (r < 0) {
1503                 log_error("SO_TIMESTAMP failed: %m");
1504                 return -errno;
1505         }
1506
1507         zero(ev);
1508         ev.events = EPOLLIN;
1509         ev.data.fd = s->native_fd;
1510         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->native_fd, &ev) < 0) {
1511                 log_error("Failed to add native server fd to epoll object: %m");
1512                 return -errno;
1513         }
1514
1515         return 0;
1516 }
1517
1518
1519 static int open_signalfd(Server *s) {
1520         sigset_t mask;
1521         struct epoll_event ev;
1522
1523         assert(s);
1524
1525         assert_se(sigemptyset(&mask) == 0);
1526         sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1527         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1528
1529         s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
1530         if (s->signal_fd < 0) {
1531                 log_error("signalfd(): %m");
1532                 return -errno;
1533         }
1534
1535         zero(ev);
1536         ev.events = EPOLLIN;
1537         ev.data.fd = s->signal_fd;
1538
1539         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
1540                 log_error("epoll_ctl(): %m");
1541                 return -errno;
1542         }
1543
1544         return 0;
1545 }
1546
1547 static int server_parse_proc_cmdline(Server *s) {
1548         char *line, *w, *state;
1549         int r;
1550         size_t l;
1551
1552         if (detect_container(NULL) > 0)
1553                 return 0;
1554
1555         r = read_one_line_file("/proc/cmdline", &line);
1556         if (r < 0) {
1557                 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1558                 return 0;
1559         }
1560
1561         FOREACH_WORD_QUOTED(w, l, line, state) {
1562                 char *word;
1563
1564                 word = strndup(w, l);
1565                 if (!word) {
1566                         r = -ENOMEM;
1567                         goto finish;
1568                 }
1569
1570                 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1571                         r = parse_boolean(word + 35);
1572                         if (r < 0)
1573                                 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1574                         else
1575                                 s->forward_to_syslog = r;
1576                 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1577                         r = parse_boolean(word + 33);
1578                         if (r < 0)
1579                                 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1580                         else
1581                                 s->forward_to_kmsg = r;
1582                 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1583                         r = parse_boolean(word + 36);
1584                         if (r < 0)
1585                                 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1586                         else
1587                                 s->forward_to_console = r;
1588                 } else if (startswith(word, "systemd.journald"))
1589                         log_warning("Invalid systemd.journald parameter. Ignoring.");
1590
1591                 free(word);
1592         }
1593
1594         r = 0;
1595
1596 finish:
1597         free(line);
1598         return r;
1599 }
1600
1601 static int server_parse_config_file(Server *s) {
1602         FILE *f;
1603         const char *fn;
1604         int r;
1605
1606         assert(s);
1607
1608         fn = "/etc/systemd/journald.conf";
1609         f = fopen(fn, "re");
1610         if (!f) {
1611                 if (errno == ENOENT)
1612                         return 0;
1613
1614                 log_warning("Failed to open configuration file %s: %m", fn);
1615                 return -errno;
1616         }
1617
1618         r = config_parse(fn, f, "Journal\0", config_item_perf_lookup, (void*) journald_gperf_lookup, false, s);
1619         if (r < 0)
1620                 log_warning("Failed to parse configuration file: %s", strerror(-r));
1621
1622         fclose(f);
1623
1624         return r;
1625 }
1626
1627 static int server_init(Server *s) {
1628         int n, r, fd;
1629
1630         assert(s);
1631
1632         zero(*s);
1633         s->syslog_fd = s->native_fd = s->stdout_fd = s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
1634         s->compress = true;
1635         s->seal = true;
1636
1637         s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1638         s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1639
1640         s->forward_to_syslog = true;
1641
1642         s->max_level_store = LOG_DEBUG;
1643         s->max_level_syslog = LOG_DEBUG;
1644         s->max_level_kmsg = LOG_NOTICE;
1645         s->max_level_console = LOG_INFO;
1646
1647         memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1648         memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1649
1650         server_parse_config_file(s);
1651         server_parse_proc_cmdline(s);
1652
1653         mkdir_p("/run/systemd/journal", 0755);
1654
1655         s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
1656         if (!s->user_journals)
1657                 return log_oom();
1658
1659         s->mmap = mmap_cache_new();
1660         if (!s->mmap)
1661                 return log_oom();
1662
1663         s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
1664         if (s->epoll_fd < 0) {
1665                 log_error("Failed to create epoll object: %m");
1666                 return -errno;
1667         }
1668
1669         n = sd_listen_fds(true);
1670         if (n < 0) {
1671                 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1672                 return n;
1673         }
1674
1675         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1676
1677                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1678
1679                         if (s->native_fd >= 0) {
1680                                 log_error("Too many native sockets passed.");
1681                                 return -EINVAL;
1682                         }
1683
1684                         s->native_fd = fd;
1685
1686                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1687
1688                         if (s->stdout_fd >= 0) {
1689                                 log_error("Too many stdout sockets passed.");
1690                                 return -EINVAL;
1691                         }
1692
1693                         s->stdout_fd = fd;
1694
1695                 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
1696
1697                         if (s->syslog_fd >= 0) {
1698                                 log_error("Too many /dev/log sockets passed.");
1699                                 return -EINVAL;
1700                         }
1701
1702                         s->syslog_fd = fd;
1703
1704                 } else {
1705                         log_error("Unknown socket passed.");
1706                         return -EINVAL;
1707                 }
1708         }
1709
1710         r = server_open_syslog_socket(s);
1711         if (r < 0)
1712                 return r;
1713
1714         r = open_native_socket(s);
1715         if (r < 0)
1716                 return r;
1717
1718         r = server_open_stdout_socket(s);
1719         if (r < 0)
1720                 return r;
1721
1722         r = server_open_dev_kmsg(s);
1723         if (r < 0)
1724                 return r;
1725
1726         r = server_open_kernel_seqnum(s);
1727         if (r < 0)
1728                 return r;
1729
1730         r = open_signalfd(s);
1731         if (r < 0)
1732                 return r;
1733
1734         s->udev = udev_new();
1735         if (!s->udev)
1736                 return -ENOMEM;
1737
1738         s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
1739         if (!s->rate_limit)
1740                 return -ENOMEM;
1741
1742         r = system_journal_open(s);
1743         if (r < 0)
1744                 return r;
1745
1746         return 0;
1747 }
1748
1749 static void maybe_append_tags(Server *s) {
1750 #ifdef HAVE_GCRYPT
1751         JournalFile *f;
1752         Iterator i;
1753         usec_t n;
1754
1755         n = now(CLOCK_REALTIME);
1756
1757         if (s->system_journal)
1758                 journal_file_maybe_append_tag(s->system_journal, n);
1759
1760         HASHMAP_FOREACH(f, s->user_journals, i)
1761                 journal_file_maybe_append_tag(f, n);
1762 #endif
1763 }
1764
1765 static void server_done(Server *s) {
1766         JournalFile *f;
1767         assert(s);
1768
1769         while (s->stdout_streams)
1770                 stdout_stream_free(s->stdout_streams);
1771
1772         if (s->system_journal)
1773                 journal_file_close(s->system_journal);
1774
1775         if (s->runtime_journal)
1776                 journal_file_close(s->runtime_journal);
1777
1778         while ((f = hashmap_steal_first(s->user_journals)))
1779                 journal_file_close(f);
1780
1781         hashmap_free(s->user_journals);
1782
1783         if (s->epoll_fd >= 0)
1784                 close_nointr_nofail(s->epoll_fd);
1785
1786         if (s->signal_fd >= 0)
1787                 close_nointr_nofail(s->signal_fd);
1788
1789         if (s->syslog_fd >= 0)
1790                 close_nointr_nofail(s->syslog_fd);
1791
1792         if (s->native_fd >= 0)
1793                 close_nointr_nofail(s->native_fd);
1794
1795         if (s->stdout_fd >= 0)
1796                 close_nointr_nofail(s->stdout_fd);
1797
1798         if (s->dev_kmsg_fd >= 0)
1799                 close_nointr_nofail(s->dev_kmsg_fd);
1800
1801         if (s->rate_limit)
1802                 journal_rate_limit_free(s->rate_limit);
1803
1804         if (s->kernel_seqnum)
1805                 munmap(s->kernel_seqnum, sizeof(uint64_t));
1806
1807         free(s->buffer);
1808         free(s->tty_path);
1809
1810         if (s->mmap)
1811                 mmap_cache_unref(s->mmap);
1812
1813         if (s->udev)
1814                 udev_unref(s->udev);
1815 }
1816
1817 int main(int argc, char *argv[]) {
1818         Server server;
1819         int r;
1820
1821         /* if (getppid() != 1) { */
1822         /*         log_error("This program should be invoked by init only."); */
1823         /*         return EXIT_FAILURE; */
1824         /* } */
1825
1826         if (argc > 1) {
1827                 log_error("This program does not take arguments.");
1828                 return EXIT_FAILURE;
1829         }
1830
1831         log_set_target(LOG_TARGET_SAFE);
1832         log_set_facility(LOG_SYSLOG);
1833         log_set_max_level(LOG_DEBUG);
1834         log_parse_environment();
1835         log_open();
1836
1837         umask(0022);
1838
1839         r = server_init(&server);
1840         if (r < 0)
1841                 goto finish;
1842
1843         server_vacuum(&server);
1844         server_flush_to_var(&server);
1845         server_flush_dev_kmsg(&server);
1846
1847         log_debug("systemd-journald running as pid %lu", (unsigned long) getpid());
1848         server_driver_message(&server, SD_MESSAGE_JOURNAL_START, "Journal started");
1849
1850         sd_notify(false,
1851                   "READY=1\n"
1852                   "STATUS=Processing requests...");
1853
1854         for (;;) {
1855                 struct epoll_event event;
1856                 int t;
1857
1858 #ifdef HAVE_GCRYPT
1859                 usec_t u;
1860
1861                 if (server.system_journal &&
1862                     journal_file_next_evolve_usec(server.system_journal, &u)) {
1863                         usec_t n;
1864
1865                         n = now(CLOCK_REALTIME);
1866
1867                         if (n >= u)
1868                                 t = 0;
1869                         else
1870                                 t = (int) ((u - n + USEC_PER_MSEC - 1) / USEC_PER_MSEC);
1871                 } else
1872 #endif
1873                         t = -1;
1874
1875                 r = epoll_wait(server.epoll_fd, &event, 1, t);
1876                 if (r < 0) {
1877
1878                         if (errno == EINTR)
1879                                 continue;
1880
1881                         log_error("epoll_wait() failed: %m");
1882                         r = -errno;
1883                         goto finish;
1884                 }
1885
1886                 if (r > 0) {
1887                         r = process_event(&server, &event);
1888                         if (r < 0)
1889                                 goto finish;
1890                         else if (r == 0)
1891                                 break;
1892                 }
1893
1894                 maybe_append_tags(&server);
1895         }
1896
1897         log_debug("systemd-journald stopped as pid %lu", (unsigned long) getpid());
1898         server_driver_message(&server, SD_MESSAGE_JOURNAL_STOP, "Journal stopped");
1899
1900 finish:
1901         sd_notify(false,
1902                   "STATUS=Shutting down...");
1903
1904         server_done(&server);
1905
1906         return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
1907 }