chiark / gitweb /
ba211b3724f99fe8518fe430472efc60419ea1c9
[elogind.git] / src / journal / journald-server.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
26 #include <sys/mman.h>
27 #include <sys/timerfd.h>
28
29 #include <libudev.h>
30 #include <systemd/sd-journal.h>
31 #include <systemd/sd-messages.h>
32 #include <systemd/sd-daemon.h>
33
34 #include "fileio.h"
35 #include "mkdir.h"
36 #include "hashmap.h"
37 #include "journal-file.h"
38 #include "socket-util.h"
39 #include "cgroup-util.h"
40 #include "list.h"
41 #include "virt.h"
42 #include "missing.h"
43 #include "conf-parser.h"
44 #include "journal-internal.h"
45 #include "journal-vacuum.h"
46 #include "journal-authenticate.h"
47 #include "journald-server.h"
48 #include "journald-rate-limit.h"
49 #include "journald-kmsg.h"
50 #include "journald-syslog.h"
51 #include "journald-stream.h"
52 #include "journald-console.h"
53 #include "journald-native.h"
54
55 #ifdef HAVE_ACL
56 #include <sys/acl.h>
57 #include <acl/libacl.h>
58 #include "acl-util.h"
59 #endif
60
61 #ifdef HAVE_SELINUX
62 #include <selinux/selinux.h>
63 #endif
64
65 #define USER_JOURNALS_MAX 1024
66
67 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
68 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
69 #define DEFAULT_RATE_LIMIT_BURST 1000
70
71 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
72
73 static const char* const storage_table[] = {
74         [STORAGE_AUTO] = "auto",
75         [STORAGE_VOLATILE] = "volatile",
76         [STORAGE_PERSISTENT] = "persistent",
77         [STORAGE_NONE] = "none"
78 };
79
80 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
81 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
82
83 static const char* const split_mode_table[] = {
84         [SPLIT_NONE] = "none",
85         [SPLIT_UID] = "uid",
86         [SPLIT_LOGIN] = "login"
87 };
88
89 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
90 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
91
92 static uint64_t available_space(Server *s, bool verbose) {
93         char ids[33];
94         _cleanup_free_ char *p = NULL;
95         sd_id128_t machine;
96         struct statvfs ss;
97         uint64_t sum = 0, ss_avail = 0, avail = 0;
98         int r;
99         _cleanup_closedir_ DIR *d = NULL;
100         usec_t ts;
101         const char *f;
102         JournalMetrics *m;
103
104         ts = now(CLOCK_MONOTONIC);
105
106         if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts
107             && !verbose)
108                 return s->cached_available_space;
109
110         r = sd_id128_get_machine(&machine);
111         if (r < 0)
112                 return 0;
113
114         if (s->system_journal) {
115                 f = "/var/log/journal/";
116                 m = &s->system_metrics;
117         } else {
118                 f = "/run/log/journal/";
119                 m = &s->runtime_metrics;
120         }
121
122         assert(m);
123
124         p = strappend(f, sd_id128_to_string(machine, ids));
125         if (!p)
126                 return 0;
127
128         d = opendir(p);
129         if (!d)
130                 return 0;
131
132         if (fstatvfs(dirfd(d), &ss) < 0)
133                 return 0;
134
135         for (;;) {
136                 struct stat st;
137                 struct dirent *de;
138                 union dirent_storage buf;
139
140                 r = readdir_r(d, &buf.de, &de);
141                 if (r != 0)
142                         break;
143
144                 if (!de)
145                         break;
146
147                 if (!endswith(de->d_name, ".journal") &&
148                     !endswith(de->d_name, ".journal~"))
149                         continue;
150
151                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
152                         continue;
153
154                 if (!S_ISREG(st.st_mode))
155                         continue;
156
157                 sum += (uint64_t) st.st_blocks * 512UL;
158         }
159
160         ss_avail = ss.f_bsize * ss.f_bavail;
161         avail = ss_avail > m->keep_free ? ss_avail - m->keep_free : 0;
162
163         s->cached_available_space = MIN(m->max_use, avail) > sum ? MIN(m->max_use, avail) - sum : 0;
164         s->cached_available_space_timestamp = ts;
165
166         if (verbose) {
167                 char    fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
168                         fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX];
169
170                 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
171                                       "%s journal is using %s (max %s, leaving %s of free %s, current limit %s).",
172                                       s->system_journal ? "Permanent" : "Runtime",
173                                       format_bytes(fb1, sizeof(fb1), sum),
174                                       format_bytes(fb2, sizeof(fb2), m->max_use),
175                                       format_bytes(fb3, sizeof(fb3), m->keep_free),
176                                       format_bytes(fb4, sizeof(fb4), ss_avail),
177                                       format_bytes(fb5, sizeof(fb5), MIN(m->max_use, avail)));
178         }
179
180         return s->cached_available_space;
181 }
182
183 static void server_read_file_gid(Server *s) {
184         const char *g = "systemd-journal";
185         int r;
186
187         assert(s);
188
189         if (s->file_gid_valid)
190                 return;
191
192         r = get_group_creds(&g, &s->file_gid);
193         if (r < 0)
194                 log_warning("Failed to resolve '%s' group: %s", g, strerror(-r));
195
196         /* if we couldn't read the gid, then it will be 0, but that's
197          * fine and we shouldn't try to resolve the group again, so
198          * let's just pretend it worked right-away. */
199         s->file_gid_valid = true;
200 }
201
202 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
203         int r;
204 #ifdef HAVE_ACL
205         acl_t acl;
206         acl_entry_t entry;
207         acl_permset_t permset;
208 #endif
209
210         assert(f);
211
212         server_read_file_gid(s);
213
214         r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
215         if (r < 0)
216                 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
217
218 #ifdef HAVE_ACL
219         if (uid <= 0)
220                 return;
221
222         acl = acl_get_fd(f->fd);
223         if (!acl) {
224                 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
225                 return;
226         }
227
228         r = acl_find_uid(acl, uid, &entry);
229         if (r <= 0) {
230
231                 if (acl_create_entry(&acl, &entry) < 0 ||
232                     acl_set_tag_type(entry, ACL_USER) < 0 ||
233                     acl_set_qualifier(entry, &uid) < 0) {
234                         log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
235                         goto finish;
236                 }
237         }
238
239         /* We do not recalculate the mask unconditionally here,
240          * so that the fchmod() mask above stays intact. */
241         if (acl_get_permset(entry, &permset) < 0 ||
242             acl_add_perm(permset, ACL_READ) < 0 ||
243             calc_acl_mask_if_needed(&acl) < 0) {
244                 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
245                 goto finish;
246         }
247
248         if (acl_set_fd(f->fd, acl) < 0)
249                 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
250
251 finish:
252         acl_free(acl);
253 #endif
254 }
255
256 static JournalFile* find_journal(Server *s, uid_t uid) {
257         _cleanup_free_ char *p = NULL;
258         int r;
259         JournalFile *f;
260         sd_id128_t machine;
261
262         assert(s);
263
264         /* We split up user logs only on /var, not on /run. If the
265          * runtime file is open, we write to it exclusively, in order
266          * to guarantee proper order as soon as we flush /run to
267          * /var and close the runtime file. */
268
269         if (s->runtime_journal)
270                 return s->runtime_journal;
271
272         if (uid <= 0)
273                 return s->system_journal;
274
275         r = sd_id128_get_machine(&machine);
276         if (r < 0)
277                 return s->system_journal;
278
279         f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
280         if (f)
281                 return f;
282
283         if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
284                      SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
285                 return s->system_journal;
286
287         while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
288                 /* Too many open? Then let's close one */
289                 f = hashmap_steal_first(s->user_journals);
290                 assert(f);
291                 journal_file_close(f);
292         }
293
294         r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
295         if (r < 0)
296                 return s->system_journal;
297
298         server_fix_perms(s, f, uid);
299
300         r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
301         if (r < 0) {
302                 journal_file_close(f);
303                 return s->system_journal;
304         }
305
306         return f;
307 }
308
309 void server_rotate(Server *s) {
310         JournalFile *f;
311         void *k;
312         Iterator i;
313         int r;
314
315         log_debug("Rotating...");
316
317         if (s->runtime_journal) {
318                 r = journal_file_rotate(&s->runtime_journal, s->compress, false);
319                 if (r < 0)
320                         if (s->runtime_journal)
321                                 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
322                         else
323                                 log_error("Failed to create new runtime journal: %s", strerror(-r));
324                 else
325                         server_fix_perms(s, s->runtime_journal, 0);
326         }
327
328         if (s->system_journal) {
329                 r = journal_file_rotate(&s->system_journal, s->compress, s->seal);
330                 if (r < 0)
331                         if (s->system_journal)
332                                 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
333                         else
334                                 log_error("Failed to create new system journal: %s", strerror(-r));
335
336                 else
337                         server_fix_perms(s, s->system_journal, 0);
338         }
339
340         HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
341                 r = journal_file_rotate(&f, s->compress, s->seal);
342                 if (r < 0)
343                         if (f)
344                                 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
345                         else
346                                 log_error("Failed to create user journal: %s", strerror(-r));
347                 else {
348                         hashmap_replace(s->user_journals, k, f);
349                         server_fix_perms(s, f, PTR_TO_UINT32(k));
350                 }
351         }
352 }
353
354 void server_sync(Server *s) {
355         static const struct itimerspec sync_timer_disable = {};
356         JournalFile *f;
357         void *k;
358         Iterator i;
359         int r;
360
361         if (s->system_journal) {
362                 r = journal_file_set_offline(s->system_journal);
363                 if (r < 0)
364                         log_error("Failed to sync system journal: %s", strerror(-r));
365         }
366
367         HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
368                 r = journal_file_set_offline(f);
369                 if (r < 0)
370                         log_error("Failed to sync user journal: %s", strerror(-r));
371         }
372
373         r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_disable, NULL);
374         if (r < 0)
375                 log_error("Failed to disable max timer: %m");
376
377         s->sync_scheduled = false;
378 }
379
380 void server_vacuum(Server *s) {
381         char ids[33];
382         sd_id128_t machine;
383         int r;
384
385         log_debug("Vacuuming...");
386
387         s->oldest_file_usec = 0;
388
389         r = sd_id128_get_machine(&machine);
390         if (r < 0) {
391                 log_error("Failed to get machine ID: %s", strerror(-r));
392                 return;
393         }
394
395         sd_id128_to_string(machine, ids);
396
397         if (s->system_journal) {
398                 char *p = strappenda("/var/log/journal/", ids);
399
400                 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
401                 if (r < 0 && r != -ENOENT)
402                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
403         }
404
405         if (s->runtime_journal) {
406                 char *p = strappenda("/run/log/journal/", ids);
407
408                 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
409                 if (r < 0 && r != -ENOENT)
410                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
411         }
412
413         s->cached_available_space_timestamp = 0;
414 }
415
416 bool shall_try_append_again(JournalFile *f, int r) {
417
418         /* -E2BIG            Hit configured limit
419            -EFBIG            Hit fs limit
420            -EDQUOT           Quota limit hit
421            -ENOSPC           Disk full
422            -EHOSTDOWN        Other machine
423            -EBUSY            Unclean shutdown
424            -EPROTONOSUPPORT  Unsupported feature
425            -EBADMSG          Corrupted
426            -ENODATA          Truncated
427            -ESHUTDOWN        Already archived */
428
429         if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
430                 log_debug("%s: Allocation limit reached, rotating.", f->path);
431         else if (r == -EHOSTDOWN)
432                 log_info("%s: Journal file from other machine, rotating.", f->path);
433         else if (r == -EBUSY)
434                 log_info("%s: Unclean shutdown, rotating.", f->path);
435         else if (r == -EPROTONOSUPPORT)
436                 log_info("%s: Unsupported feature, rotating.", f->path);
437         else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
438                 log_warning("%s: Journal file corrupted, rotating.", f->path);
439         else
440                 return false;
441
442         return true;
443 }
444
445 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
446         JournalFile *f;
447         bool vacuumed = false;
448         int r;
449
450         assert(s);
451         assert(iovec);
452         assert(n > 0);
453
454         f = find_journal(s, uid);
455         if (!f)
456                 return;
457
458         if (journal_file_rotate_suggested(f, s->max_file_usec)) {
459                 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
460                 server_rotate(s);
461                 server_vacuum(s);
462                 vacuumed = true;
463
464                 f = find_journal(s, uid);
465                 if (!f)
466                         return;
467         }
468
469         r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
470         if (r >= 0) {
471                 server_schedule_sync(s, priority);
472                 return;
473         }
474
475         if (vacuumed || !shall_try_append_again(f, r)) {
476                 size_t size = 0;
477                 unsigned i;
478                 for (i = 0; i < n; i++)
479                         size += iovec[i].iov_len;
480
481                 log_error("Failed to write entry (%d items, %zu bytes), ignoring: %s", n, size, strerror(-r));
482                 return;
483         }
484
485         server_rotate(s);
486         server_vacuum(s);
487
488         f = find_journal(s, uid);
489         if (!f)
490                 return;
491
492         log_debug("Retrying write.");
493         r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
494         if (r < 0) {
495                 size_t size = 0;
496                 unsigned i;
497                 for (i = 0; i < n; i++)
498                         size += iovec[i].iov_len;
499
500                 log_error("Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %s", n, size, strerror(-r));
501         } else
502                 server_schedule_sync(s, priority);
503 }
504
505 static void dispatch_message_real(
506                 Server *s,
507                 struct iovec *iovec, unsigned n, unsigned m,
508                 struct ucred *ucred,
509                 struct timeval *tv,
510                 const char *label, size_t label_len,
511                 const char *unit_id,
512                 int priority,
513                 pid_t object_pid) {
514
515         char    pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
516                 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
517                 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
518                 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
519                 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
520                 boot_id[sizeof("_BOOT_ID=") + 32] = "_BOOT_ID=",
521                 machine_id[sizeof("_MACHINE_ID=") + 32] = "_MACHINE_ID=",
522                 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
523                 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
524                 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
525         uid_t object_uid;
526         gid_t object_gid;
527         char *x;
528         sd_id128_t id;
529         int r;
530         char *t, *c;
531         uid_t realuid = 0, owner = 0, journal_uid;
532         bool owner_valid = false;
533 #ifdef HAVE_AUDIT
534         char    audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
535                 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
536                 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
537                 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
538
539         uint32_t audit;
540         uid_t loginuid;
541 #endif
542
543         assert(s);
544         assert(iovec);
545         assert(n > 0);
546         assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
547
548         if (ucred) {
549                 realuid = ucred->uid;
550
551                 sprintf(pid, "_PID=%lu", (unsigned long) ucred->pid);
552                 IOVEC_SET_STRING(iovec[n++], pid);
553
554                 sprintf(uid, "_UID=%lu", (unsigned long) ucred->uid);
555                 IOVEC_SET_STRING(iovec[n++], uid);
556
557                 sprintf(gid, "_GID=%lu", (unsigned long) ucred->gid);
558                 IOVEC_SET_STRING(iovec[n++], gid);
559
560                 r = get_process_comm(ucred->pid, &t);
561                 if (r >= 0) {
562                         x = strappenda("_COMM=", t);
563                         free(t);
564                         IOVEC_SET_STRING(iovec[n++], x);
565                 }
566
567                 r = get_process_exe(ucred->pid, &t);
568                 if (r >= 0) {
569                         x = strappenda("_EXE=", t);
570                         free(t);
571                         IOVEC_SET_STRING(iovec[n++], x);
572                 }
573
574                 r = get_process_cmdline(ucred->pid, 0, false, &t);
575                 if (r >= 0) {
576                         x = strappenda("_CMDLINE=", t);
577                         free(t);
578                         IOVEC_SET_STRING(iovec[n++], x);
579                 }
580
581                 r = get_process_capeff(ucred->pid, &t);
582                 if (r >= 0) {
583                         x = strappenda("_CAP_EFFECTIVE=", t);
584                         free(t);
585                         IOVEC_SET_STRING(iovec[n++], x);
586                 }
587
588 #ifdef HAVE_AUDIT
589                 r = audit_session_from_pid(ucred->pid, &audit);
590                 if (r >= 0) {
591                         sprintf(audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit);
592                         IOVEC_SET_STRING(iovec[n++], audit_session);
593                 }
594
595                 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
596                 if (r >= 0) {
597                         sprintf(audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid);
598                         IOVEC_SET_STRING(iovec[n++], audit_loginuid);
599                 }
600 #endif
601
602                 r = cg_pid_get_path_shifted(ucred->pid, NULL, &c);
603                 if (r >= 0) {
604                         char *session = NULL;
605
606                         x = strappenda("_SYSTEMD_CGROUP=", c);
607                         IOVEC_SET_STRING(iovec[n++], x);
608
609                         r = cg_path_get_session(c, &t);
610                         if (r >= 0) {
611                                 session = strappenda("_SYSTEMD_SESSION=", t);
612                                 free(t);
613                                 IOVEC_SET_STRING(iovec[n++], session);
614                         }
615
616                         if (cg_path_get_owner_uid(c, &owner) >= 0) {
617                                 owner_valid = true;
618
619                                 sprintf(owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner);
620                                 IOVEC_SET_STRING(iovec[n++], owner_uid);
621                         }
622
623                         if (cg_path_get_unit(c, &t) >= 0) {
624                                 x = strappenda("_SYSTEMD_UNIT=", t);
625                                 free(t);
626                                 IOVEC_SET_STRING(iovec[n++], x);
627                         } else if (unit_id && !session) {
628                                 x = strappenda("_SYSTEMD_UNIT=", unit_id);
629                                 IOVEC_SET_STRING(iovec[n++], x);
630                         }
631
632                         if (cg_path_get_user_unit(c, &t) >= 0) {
633                                 x = strappenda("_SYSTEMD_USER_UNIT=", t);
634                                 free(t);
635                                 IOVEC_SET_STRING(iovec[n++], x);
636                         } else if (unit_id && session) {
637                                 x = strappenda("_SYSTEMD_USER_UNIT=", unit_id);
638                                 IOVEC_SET_STRING(iovec[n++], x);
639                         }
640
641                         free(c);
642                 }
643
644 #ifdef HAVE_SELINUX
645                 if (label) {
646                         x = alloca(sizeof("_SELINUX_CONTEXT=") + label_len);
647
648                         *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
649                         IOVEC_SET_STRING(iovec[n++], x);
650                 } else {
651                         security_context_t con;
652
653                         if (getpidcon(ucred->pid, &con) >= 0) {
654                                 x = strappenda("_SELINUX_CONTEXT=", con);
655
656                                 freecon(con);
657                                 IOVEC_SET_STRING(iovec[n++], x);
658                         }
659                 }
660 #endif
661         }
662         assert(n <= m);
663
664         if (object_pid) {
665                 r = get_process_uid(object_pid, &object_uid);
666                 if (r >= 0) {
667                         sprintf(o_uid, "OBJECT_UID=%lu", (unsigned long) object_uid);
668                         IOVEC_SET_STRING(iovec[n++], o_uid);
669                 }
670
671                 r = get_process_gid(object_pid, &object_gid);
672                 if (r >= 0) {
673                         sprintf(o_gid, "OBJECT_GID=%lu", (unsigned long) object_gid);
674                         IOVEC_SET_STRING(iovec[n++], o_gid);
675                 }
676
677                 r = get_process_comm(object_pid, &t);
678                 if (r >= 0) {
679                         x = strappenda("OBJECT_COMM=", t);
680                         free(t);
681                         IOVEC_SET_STRING(iovec[n++], x);
682                 }
683
684                 r = get_process_exe(object_pid, &t);
685                 if (r >= 0) {
686                         x = strappenda("OBJECT_EXE=", t);
687                         free(t);
688                         IOVEC_SET_STRING(iovec[n++], x);
689                 }
690
691                 r = get_process_cmdline(object_pid, 0, false, &t);
692                 if (r >= 0) {
693                         x = strappenda("OBJECT_CMDLINE=", t);
694                         free(t);
695                         IOVEC_SET_STRING(iovec[n++], x);
696                 }
697
698 #ifdef HAVE_AUDIT
699                 r = audit_session_from_pid(object_pid, &audit);
700                 if (r >= 0) {
701                         sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%lu", (unsigned long) audit);
702                         IOVEC_SET_STRING(iovec[n++], o_audit_session);
703                 }
704
705                 r = audit_loginuid_from_pid(object_pid, &loginuid);
706                 if (r >= 0) {
707                         sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID=%lu", (unsigned long) loginuid);
708                         IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
709                 }
710 #endif
711
712                 r = cg_pid_get_path_shifted(object_pid, NULL, &c);
713                 if (r >= 0) {
714                         x = strappenda("OBJECT_SYSTEMD_CGROUP=", c);
715                         IOVEC_SET_STRING(iovec[n++], x);
716
717                         r = cg_path_get_session(c, &t);
718                         if (r >= 0) {
719                                 x = strappenda("OBJECT_SYSTEMD_SESSION=", t);
720                                 free(t);
721                                 IOVEC_SET_STRING(iovec[n++], x);
722                         }
723
724                         if (cg_path_get_owner_uid(c, &owner) >= 0) {
725                                 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner);
726                                 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
727                         }
728
729                         if (cg_path_get_unit(c, &t) >= 0) {
730                                 x = strappenda("OBJECT_SYSTEMD_UNIT=", t);
731                                 free(t);
732                                 IOVEC_SET_STRING(iovec[n++], x);
733                         }
734
735                         if (cg_path_get_user_unit(c, &t) >= 0) {
736                                 x = strappenda("OBJECT_SYSTEMD_USER_UNIT=", t);
737                                 free(t);
738                                 IOVEC_SET_STRING(iovec[n++], x);
739                         }
740
741                         free(c);
742                 }
743         }
744         assert(n <= m);
745
746         if (tv) {
747                 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
748                 IOVEC_SET_STRING(iovec[n++], source_time);
749         }
750
751         /* Note that strictly speaking storing the boot id here is
752          * redundant since the entry includes this in-line
753          * anyway. However, we need this indexed, too. */
754         r = sd_id128_get_boot(&id);
755         if (r >= 0) {
756                 sd_id128_to_string(id, boot_id + strlen("_BOOT_ID="));
757                 IOVEC_SET_STRING(iovec[n++], boot_id);
758         }
759
760         r = sd_id128_get_machine(&id);
761         if (r >= 0) {
762                 sd_id128_to_string(id, machine_id + strlen("_MACHINE_ID="));
763                 IOVEC_SET_STRING(iovec[n++], machine_id);
764         }
765
766         t = gethostname_malloc();
767         if (t) {
768                 x = strappenda("_HOSTNAME=", t);
769                 free(t);
770                 IOVEC_SET_STRING(iovec[n++], x);
771         }
772
773         assert(n <= m);
774
775         if (s->split_mode == SPLIT_UID && realuid > 0)
776                 /* Split up strictly by any UID */
777                 journal_uid = realuid;
778         else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
779                 /* Split up by login UIDs, this avoids creation of
780                  * individual journals for system UIDs.  We do this
781                  * only if the realuid is not root, in order not to
782                  * accidentally leak privileged information to the
783                  * user that is logged by a privileged process that is
784                  * part of an unprivileged session.*/
785                 journal_uid = owner;
786         else
787                 journal_uid = 0;
788
789         write_to_journal(s, journal_uid, iovec, n, priority);
790 }
791
792 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
793         char mid[11 + 32 + 1];
794         char buffer[16 + LINE_MAX + 1];
795         struct iovec iovec[N_IOVEC_META_FIELDS + 4];
796         int n = 0;
797         va_list ap;
798         struct ucred ucred = {};
799
800         assert(s);
801         assert(format);
802
803         IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
804         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
805
806         memcpy(buffer, "MESSAGE=", 8);
807         va_start(ap, format);
808         vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
809         va_end(ap);
810         char_array_0(buffer);
811         IOVEC_SET_STRING(iovec[n++], buffer);
812
813         if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
814                 snprintf(mid, sizeof(mid), MESSAGE_ID(message_id));
815                 char_array_0(mid);
816                 IOVEC_SET_STRING(iovec[n++], mid);
817         }
818
819         ucred.pid = getpid();
820         ucred.uid = getuid();
821         ucred.gid = getgid();
822
823         dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
824 }
825
826 void server_dispatch_message(
827                 Server *s,
828                 struct iovec *iovec, unsigned n, unsigned m,
829                 struct ucred *ucred,
830                 struct timeval *tv,
831                 const char *label, size_t label_len,
832                 const char *unit_id,
833                 int priority,
834                 pid_t object_pid) {
835
836         int rl, r;
837         _cleanup_free_ char *path = NULL;
838         char *c;
839
840         assert(s);
841         assert(iovec || n == 0);
842
843         if (n == 0)
844                 return;
845
846         if (LOG_PRI(priority) > s->max_level_store)
847                 return;
848
849         /* Stop early in case the information will not be stored
850          * in a journal. */
851         if (s->storage == STORAGE_NONE)
852                 return;
853
854         if (!ucred)
855                 goto finish;
856
857         r = cg_pid_get_path_shifted(ucred->pid, NULL, &path);
858         if (r < 0)
859                 goto finish;
860
861         /* example: /user/lennart/3/foobar
862          *          /system/dbus.service/foobar
863          *
864          * So let's cut of everything past the third /, since that is
865          * where user directories start */
866
867         c = strchr(path, '/');
868         if (c) {
869                 c = strchr(c+1, '/');
870                 if (c) {
871                         c = strchr(c+1, '/');
872                         if (c)
873                                 *c = 0;
874                 }
875         }
876
877         rl = journal_rate_limit_test(s->rate_limit, path,
878                                      priority & LOG_PRIMASK, available_space(s, false));
879
880         if (rl == 0)
881                 return;
882
883         /* Write a suppression message if we suppressed something */
884         if (rl > 1)
885                 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
886                                       "Suppressed %u messages from %s", rl - 1, path);
887
888 finish:
889         dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
890 }
891
892
893 static int system_journal_open(Server *s) {
894         int r;
895         char *fn;
896         sd_id128_t machine;
897         char ids[33];
898
899         r = sd_id128_get_machine(&machine);
900         if (r < 0) {
901                 log_error("Failed to get machine id: %s", strerror(-r));
902                 return r;
903         }
904
905         sd_id128_to_string(machine, ids);
906
907         if (!s->system_journal &&
908             (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
909             access("/run/systemd/journal/flushed", F_OK) >= 0) {
910
911                 /* If in auto mode: first try to create the machine
912                  * path, but not the prefix.
913                  *
914                  * If in persistent mode: create /var/log/journal and
915                  * the machine path */
916
917                 if (s->storage == STORAGE_PERSISTENT)
918                         (void) mkdir("/var/log/journal/", 0755);
919
920                 fn = strappenda("/var/log/journal/", ids);
921                 (void) mkdir(fn, 0755);
922
923                 fn = strappenda(fn, "/system.journal");
924                 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
925
926                 if (r >= 0)
927                         server_fix_perms(s, s->system_journal, 0);
928                 else if (r < 0) {
929                         if (r != -ENOENT && r != -EROFS)
930                                 log_warning("Failed to open system journal: %s", strerror(-r));
931
932                         r = 0;
933                 }
934         }
935
936         if (!s->runtime_journal &&
937             (s->storage != STORAGE_NONE)) {
938
939                 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
940                 if (!fn)
941                         return -ENOMEM;
942
943                 if (s->system_journal) {
944
945                         /* Try to open the runtime journal, but only
946                          * if it already exists, so that we can flush
947                          * it into the system journal */
948
949                         r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
950                         free(fn);
951
952                         if (r < 0) {
953                                 if (r != -ENOENT)
954                                         log_warning("Failed to open runtime journal: %s", strerror(-r));
955
956                                 r = 0;
957                         }
958
959                 } else {
960
961                         /* OK, we really need the runtime journal, so create
962                          * it if necessary. */
963
964                         (void) mkdir_parents(fn, 0755);
965                         r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
966                         free(fn);
967
968                         if (r < 0) {
969                                 log_error("Failed to open runtime journal: %s", strerror(-r));
970                                 return r;
971                         }
972                 }
973
974                 if (s->runtime_journal)
975                         server_fix_perms(s, s->runtime_journal, 0);
976         }
977
978         available_space(s, true);
979
980         return r;
981 }
982
983 int server_flush_to_var(Server *s) {
984         int r;
985         sd_id128_t machine;
986         sd_journal *j = NULL;
987
988         assert(s);
989
990         if (s->storage != STORAGE_AUTO &&
991             s->storage != STORAGE_PERSISTENT)
992                 return 0;
993
994         if (!s->runtime_journal)
995                 return 0;
996
997         system_journal_open(s);
998
999         if (!s->system_journal)
1000                 return 0;
1001
1002         log_debug("Flushing to /var...");
1003
1004         r = sd_id128_get_machine(&machine);
1005         if (r < 0)
1006                 return r;
1007
1008         r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1009         if (r < 0) {
1010                 log_error("Failed to read runtime journal: %s", strerror(-r));
1011                 return r;
1012         }
1013
1014         sd_journal_set_data_threshold(j, 0);
1015
1016         SD_JOURNAL_FOREACH(j) {
1017                 Object *o = NULL;
1018                 JournalFile *f;
1019
1020                 f = j->current_file;
1021                 assert(f && f->current_offset > 0);
1022
1023                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1024                 if (r < 0) {
1025                         log_error("Can't read entry: %s", strerror(-r));
1026                         goto finish;
1027                 }
1028
1029                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1030                 if (r >= 0)
1031                         continue;
1032
1033                 if (!shall_try_append_again(s->system_journal, r)) {
1034                         log_error("Can't write entry: %s", strerror(-r));
1035                         goto finish;
1036                 }
1037
1038                 server_rotate(s);
1039                 server_vacuum(s);
1040
1041                 if (!s->system_journal) {
1042                         log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1043                         r = -EIO;
1044                         goto finish;
1045                 }
1046
1047                 log_debug("Retrying write.");
1048                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1049                 if (r < 0) {
1050                         log_error("Can't write entry: %s", strerror(-r));
1051                         goto finish;
1052                 }
1053         }
1054
1055 finish:
1056         journal_file_post_change(s->system_journal);
1057
1058         journal_file_close(s->runtime_journal);
1059         s->runtime_journal = NULL;
1060
1061         if (r >= 0)
1062                 rm_rf("/run/log/journal", false, true, false);
1063
1064         sd_journal_close(j);
1065
1066         return r;
1067 }
1068
1069 int process_event(Server *s, struct epoll_event *ev) {
1070         assert(s);
1071         assert(ev);
1072
1073         if (ev->data.fd == s->signal_fd) {
1074                 struct signalfd_siginfo sfsi;
1075                 ssize_t n;
1076
1077                 if (ev->events != EPOLLIN) {
1078                         log_error("Got invalid event from epoll.");
1079                         return -EIO;
1080                 }
1081
1082                 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
1083                 if (n != sizeof(sfsi)) {
1084
1085                         if (n >= 0)
1086                                 return -EIO;
1087
1088                         if (errno == EINTR || errno == EAGAIN)
1089                                 return 1;
1090
1091                         return -errno;
1092                 }
1093
1094                 if (sfsi.ssi_signo == SIGUSR1) {
1095                         log_info("Received request to flush runtime journal from PID %"PRIu32,
1096                                  sfsi.ssi_pid);
1097                         touch("/run/systemd/journal/flushed");
1098                         server_flush_to_var(s);
1099                         server_sync(s);
1100                         return 1;
1101                 }
1102
1103                 if (sfsi.ssi_signo == SIGUSR2) {
1104                         log_info("Received request to rotate journal from PID %"PRIu32,
1105                                  sfsi.ssi_pid);
1106                         server_rotate(s);
1107                         server_vacuum(s);
1108                         return 1;
1109                 }
1110
1111                 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
1112
1113                 return 0;
1114
1115         } else if (ev->data.fd == s->sync_timer_fd) {
1116                 int r;
1117                 uint64_t t;
1118
1119                 log_debug("Got sync request from epoll.");
1120
1121                 r = read(ev->data.fd, (void *)&t, sizeof(t));
1122                 if (r < 0)
1123                         return 0;
1124
1125                 server_sync(s);
1126                 return 1;
1127
1128         } else if (ev->data.fd == s->dev_kmsg_fd) {
1129                 int r;
1130
1131                 if (ev->events != EPOLLIN) {
1132                         log_error("Got invalid event from epoll.");
1133                         return -EIO;
1134                 }
1135
1136                 r = server_read_dev_kmsg(s);
1137                 if (r < 0)
1138                         return r;
1139
1140                 return 1;
1141
1142         } else if (ev->data.fd == s->native_fd ||
1143                    ev->data.fd == s->syslog_fd) {
1144
1145                 if (ev->events != EPOLLIN) {
1146                         log_error("Got invalid event from epoll.");
1147                         return -EIO;
1148                 }
1149
1150                 for (;;) {
1151                         struct msghdr msghdr;
1152                         struct iovec iovec;
1153                         struct ucred *ucred = NULL;
1154                         struct timeval *tv = NULL;
1155                         struct cmsghdr *cmsg;
1156                         char *label = NULL;
1157                         size_t label_len = 0;
1158                         union {
1159                                 struct cmsghdr cmsghdr;
1160
1161                                 /* We use NAME_MAX space for the
1162                                  * SELinux label here. The kernel
1163                                  * currently enforces no limit, but
1164                                  * according to suggestions from the
1165                                  * SELinux people this will change and
1166                                  * it will probably be identical to
1167                                  * NAME_MAX. For now we use that, but
1168                                  * this should be updated one day when
1169                                  * the final limit is known.*/
1170                                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1171                                             CMSG_SPACE(sizeof(struct timeval)) +
1172                                             CMSG_SPACE(sizeof(int)) + /* fd */
1173                                             CMSG_SPACE(NAME_MAX)]; /* selinux label */
1174                         } control;
1175                         ssize_t n;
1176                         int v;
1177                         int *fds = NULL;
1178                         unsigned n_fds = 0;
1179
1180                         if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
1181                                 log_error("SIOCINQ failed: %m");
1182                                 return -errno;
1183                         }
1184
1185                         if (s->buffer_size < (size_t) v) {
1186                                 void *b;
1187                                 size_t l;
1188
1189                                 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
1190                                 b = realloc(s->buffer, l+1);
1191
1192                                 if (!b) {
1193                                         log_error("Couldn't increase buffer.");
1194                                         return -ENOMEM;
1195                                 }
1196
1197                                 s->buffer_size = l;
1198                                 s->buffer = b;
1199                         }
1200
1201                         zero(iovec);
1202                         iovec.iov_base = s->buffer;
1203                         iovec.iov_len = s->buffer_size;
1204
1205                         zero(control);
1206                         zero(msghdr);
1207                         msghdr.msg_iov = &iovec;
1208                         msghdr.msg_iovlen = 1;
1209                         msghdr.msg_control = &control;
1210                         msghdr.msg_controllen = sizeof(control);
1211
1212                         n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1213                         if (n < 0) {
1214
1215                                 if (errno == EINTR || errno == EAGAIN)
1216                                         return 1;
1217
1218                                 log_error("recvmsg() failed: %m");
1219                                 return -errno;
1220                         }
1221
1222                         for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1223
1224                                 if (cmsg->cmsg_level == SOL_SOCKET &&
1225                                     cmsg->cmsg_type == SCM_CREDENTIALS &&
1226                                     cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1227                                         ucred = (struct ucred*) CMSG_DATA(cmsg);
1228                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
1229                                          cmsg->cmsg_type == SCM_SECURITY) {
1230                                         label = (char*) CMSG_DATA(cmsg);
1231                                         label_len = cmsg->cmsg_len - CMSG_LEN(0);
1232                                 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1233                                            cmsg->cmsg_type == SO_TIMESTAMP &&
1234                                            cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1235                                         tv = (struct timeval*) CMSG_DATA(cmsg);
1236                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
1237                                          cmsg->cmsg_type == SCM_RIGHTS) {
1238                                         fds = (int*) CMSG_DATA(cmsg);
1239                                         n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1240                                 }
1241                         }
1242
1243                         if (ev->data.fd == s->syslog_fd) {
1244                                 if (n > 0 && n_fds == 0) {
1245                                         s->buffer[n] = 0;
1246                                         server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1247                                 } else if (n_fds > 0)
1248                                         log_warning("Got file descriptors via syslog socket. Ignoring.");
1249
1250                         } else {
1251                                 if (n > 0 && n_fds == 0)
1252                                         server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1253                                 else if (n == 0 && n_fds == 1)
1254                                         server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1255                                 else if (n_fds > 0)
1256                                         log_warning("Got too many file descriptors via native socket. Ignoring.");
1257                         }
1258
1259                         close_many(fds, n_fds);
1260                 }
1261
1262                 return 1;
1263
1264         } else if (ev->data.fd == s->stdout_fd) {
1265
1266                 if (ev->events != EPOLLIN) {
1267                         log_error("Got invalid event from epoll.");
1268                         return -EIO;
1269                 }
1270
1271                 stdout_stream_new(s);
1272                 return 1;
1273
1274         } else {
1275                 StdoutStream *stream;
1276
1277                 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
1278                         log_error("Got invalid event from epoll.");
1279                         return -EIO;
1280                 }
1281
1282                 /* If it is none of the well-known fds, it must be an
1283                  * stdout stream fd. Note that this is a bit ugly here
1284                  * (since we rely that none of the well-known fds
1285                  * could be interpreted as pointer), but nonetheless
1286                  * safe, since the well-known fds would never get an
1287                  * fd > 4096, i.e. beyond the first memory page */
1288
1289                 stream = ev->data.ptr;
1290
1291                 if (stdout_stream_process(stream) <= 0)
1292                         stdout_stream_free(stream);
1293
1294                 return 1;
1295         }
1296
1297         log_error("Unknown event.");
1298         return 0;
1299 }
1300
1301 static int open_signalfd(Server *s) {
1302         sigset_t mask;
1303         struct epoll_event ev;
1304
1305         assert(s);
1306
1307         assert_se(sigemptyset(&mask) == 0);
1308         sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1309         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1310
1311         s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
1312         if (s->signal_fd < 0) {
1313                 log_error("signalfd(): %m");
1314                 return -errno;
1315         }
1316
1317         zero(ev);
1318         ev.events = EPOLLIN;
1319         ev.data.fd = s->signal_fd;
1320
1321         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
1322                 log_error("epoll_ctl(): %m");
1323                 return -errno;
1324         }
1325
1326         return 0;
1327 }
1328
1329 static int server_parse_proc_cmdline(Server *s) {
1330         _cleanup_free_ char *line = NULL;
1331         char *w, *state;
1332         int r;
1333         size_t l;
1334
1335         if (detect_container(NULL) > 0)
1336                 return 0;
1337
1338         r = read_one_line_file("/proc/cmdline", &line);
1339         if (r < 0) {
1340                 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1341                 return 0;
1342         }
1343
1344         FOREACH_WORD_QUOTED(w, l, line, state) {
1345                 _cleanup_free_ char *word;
1346
1347                 word = strndup(w, l);
1348                 if (!word)
1349                         return -ENOMEM;
1350
1351                 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1352                         r = parse_boolean(word + 35);
1353                         if (r < 0)
1354                                 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1355                         else
1356                                 s->forward_to_syslog = r;
1357                 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1358                         r = parse_boolean(word + 33);
1359                         if (r < 0)
1360                                 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1361                         else
1362                                 s->forward_to_kmsg = r;
1363                 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1364                         r = parse_boolean(word + 36);
1365                         if (r < 0)
1366                                 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1367                         else
1368                                 s->forward_to_console = r;
1369                 } else if (startswith(word, "systemd.journald"))
1370                         log_warning("Invalid systemd.journald parameter. Ignoring.");
1371         }
1372
1373         return 0;
1374 }
1375
1376 static int server_parse_config_file(Server *s) {
1377         static const char fn[] = "/etc/systemd/journald.conf";
1378         _cleanup_fclose_ FILE *f = NULL;
1379         int r;
1380
1381         assert(s);
1382
1383         f = fopen(fn, "re");
1384         if (!f) {
1385                 if (errno == ENOENT)
1386                         return 0;
1387
1388                 log_warning("Failed to open configuration file %s: %m", fn);
1389                 return -errno;
1390         }
1391
1392         r = config_parse(NULL, fn, f, "Journal\0", config_item_perf_lookup,
1393                          (void*) journald_gperf_lookup, false, false, s);
1394         if (r < 0)
1395                 log_warning("Failed to parse configuration file: %s", strerror(-r));
1396
1397         return r;
1398 }
1399
1400 static int server_open_sync_timer(Server *s) {
1401         int r;
1402         struct epoll_event ev;
1403
1404         assert(s);
1405
1406         s->sync_timer_fd = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC);
1407         if (s->sync_timer_fd < 0)
1408                 return -errno;
1409
1410         zero(ev);
1411         ev.events = EPOLLIN;
1412         ev.data.fd = s->sync_timer_fd;
1413
1414         r = epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->sync_timer_fd, &ev);
1415         if (r < 0) {
1416                 log_error("Failed to add idle timer fd to epoll object: %m");
1417                 return -errno;
1418         }
1419
1420         return 0;
1421 }
1422
1423 int server_schedule_sync(Server *s, int priority) {
1424         int r;
1425
1426         assert(s);
1427
1428         if (priority <= LOG_CRIT) {
1429                 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1430                 server_sync(s);
1431                 return 0;
1432         }
1433
1434         if (s->sync_scheduled)
1435                 return 0;
1436
1437         if (s->sync_interval_usec) {
1438                 struct itimerspec sync_timer_enable = {};
1439
1440                 timespec_store(&sync_timer_enable.it_value, s->sync_interval_usec);
1441
1442                 r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_enable, NULL);
1443                 if (r < 0)
1444                         return -errno;
1445         }
1446
1447         s->sync_scheduled = true;
1448
1449         return 0;
1450 }
1451
1452 int server_init(Server *s) {
1453         int n, r, fd;
1454
1455         assert(s);
1456
1457         zero(*s);
1458         s->sync_timer_fd = s->syslog_fd = s->native_fd = s->stdout_fd =
1459                 s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
1460         s->compress = true;
1461         s->seal = true;
1462
1463         s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1464         s->sync_scheduled = false;
1465
1466         s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1467         s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1468
1469         s->forward_to_syslog = true;
1470
1471         s->max_level_store = LOG_DEBUG;
1472         s->max_level_syslog = LOG_DEBUG;
1473         s->max_level_kmsg = LOG_NOTICE;
1474         s->max_level_console = LOG_INFO;
1475
1476         memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1477         memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1478
1479         server_parse_config_file(s);
1480         server_parse_proc_cmdline(s);
1481         if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1482                 log_debug("Setting both rate limit interval and burst from %llu,%u to 0,0",
1483                           (long long unsigned) s->rate_limit_interval,
1484                           s->rate_limit_burst);
1485                 s->rate_limit_interval = s->rate_limit_burst = 0;
1486         }
1487
1488         mkdir_p("/run/systemd/journal", 0755);
1489
1490         s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
1491         if (!s->user_journals)
1492                 return log_oom();
1493
1494         s->mmap = mmap_cache_new();
1495         if (!s->mmap)
1496                 return log_oom();
1497
1498         s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
1499         if (s->epoll_fd < 0) {
1500                 log_error("Failed to create epoll object: %m");
1501                 return -errno;
1502         }
1503
1504         n = sd_listen_fds(true);
1505         if (n < 0) {
1506                 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1507                 return n;
1508         }
1509
1510         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1511
1512                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1513
1514                         if (s->native_fd >= 0) {
1515                                 log_error("Too many native sockets passed.");
1516                                 return -EINVAL;
1517                         }
1518
1519                         s->native_fd = fd;
1520
1521                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1522
1523                         if (s->stdout_fd >= 0) {
1524                                 log_error("Too many stdout sockets passed.");
1525                                 return -EINVAL;
1526                         }
1527
1528                         s->stdout_fd = fd;
1529
1530                 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
1531
1532                         if (s->syslog_fd >= 0) {
1533                                 log_error("Too many /dev/log sockets passed.");
1534                                 return -EINVAL;
1535                         }
1536
1537                         s->syslog_fd = fd;
1538
1539                 } else {
1540                         log_error("Unknown socket passed.");
1541                         return -EINVAL;
1542                 }
1543         }
1544
1545         r = server_open_syslog_socket(s);
1546         if (r < 0)
1547                 return r;
1548
1549         r = server_open_native_socket(s);
1550         if (r < 0)
1551                 return r;
1552
1553         r = server_open_stdout_socket(s);
1554         if (r < 0)
1555                 return r;
1556
1557         r = server_open_dev_kmsg(s);
1558         if (r < 0)
1559                 return r;
1560
1561         r = server_open_kernel_seqnum(s);
1562         if (r < 0)
1563                 return r;
1564
1565         r = server_open_sync_timer(s);
1566         if (r < 0)
1567                 return r;
1568
1569         r = open_signalfd(s);
1570         if (r < 0)
1571                 return r;
1572
1573         s->udev = udev_new();
1574         if (!s->udev)
1575                 return -ENOMEM;
1576
1577         s->rate_limit = journal_rate_limit_new(s->rate_limit_interval,
1578                                                s->rate_limit_burst);
1579         if (!s->rate_limit)
1580                 return -ENOMEM;
1581
1582         r = system_journal_open(s);
1583         if (r < 0)
1584                 return r;
1585
1586         return 0;
1587 }
1588
1589 void server_maybe_append_tags(Server *s) {
1590 #ifdef HAVE_GCRYPT
1591         JournalFile *f;
1592         Iterator i;
1593         usec_t n;
1594
1595         n = now(CLOCK_REALTIME);
1596
1597         if (s->system_journal)
1598                 journal_file_maybe_append_tag(s->system_journal, n);
1599
1600         HASHMAP_FOREACH(f, s->user_journals, i)
1601                 journal_file_maybe_append_tag(f, n);
1602 #endif
1603 }
1604
1605 void server_done(Server *s) {
1606         JournalFile *f;
1607         assert(s);
1608
1609         while (s->stdout_streams)
1610                 stdout_stream_free(s->stdout_streams);
1611
1612         if (s->system_journal)
1613                 journal_file_close(s->system_journal);
1614
1615         if (s->runtime_journal)
1616                 journal_file_close(s->runtime_journal);
1617
1618         while ((f = hashmap_steal_first(s->user_journals)))
1619                 journal_file_close(f);
1620
1621         hashmap_free(s->user_journals);
1622
1623         if (s->epoll_fd >= 0)
1624                 close_nointr_nofail(s->epoll_fd);
1625
1626         if (s->signal_fd >= 0)
1627                 close_nointr_nofail(s->signal_fd);
1628
1629         if (s->syslog_fd >= 0)
1630                 close_nointr_nofail(s->syslog_fd);
1631
1632         if (s->native_fd >= 0)
1633                 close_nointr_nofail(s->native_fd);
1634
1635         if (s->stdout_fd >= 0)
1636                 close_nointr_nofail(s->stdout_fd);
1637
1638         if (s->dev_kmsg_fd >= 0)
1639                 close_nointr_nofail(s->dev_kmsg_fd);
1640
1641         if (s->sync_timer_fd >= 0)
1642                 close_nointr_nofail(s->sync_timer_fd);
1643
1644         if (s->rate_limit)
1645                 journal_rate_limit_free(s->rate_limit);
1646
1647         if (s->kernel_seqnum)
1648                 munmap(s->kernel_seqnum, sizeof(uint64_t));
1649
1650         free(s->buffer);
1651         free(s->tty_path);
1652
1653         if (s->mmap)
1654                 mmap_cache_unref(s->mmap);
1655
1656         if (s->udev)
1657                 udev_unref(s->udev);
1658 }