chiark / gitweb /
Update TODO
[elogind.git] / src / journal / journald-server.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
26 #include <sys/mman.h>
27 #include <sys/timerfd.h>
28
29 #include <libudev.h>
30 #include <systemd/sd-journal.h>
31 #include <systemd/sd-messages.h>
32 #include <systemd/sd-daemon.h>
33
34 #include "fileio.h"
35 #include "mkdir.h"
36 #include "hashmap.h"
37 #include "journal-file.h"
38 #include "socket-util.h"
39 #include "cgroup-util.h"
40 #include "list.h"
41 #include "virt.h"
42 #include "missing.h"
43 #include "conf-parser.h"
44 #include "journal-internal.h"
45 #include "journal-vacuum.h"
46 #include "journal-authenticate.h"
47 #include "journald-server.h"
48 #include "journald-rate-limit.h"
49 #include "journald-kmsg.h"
50 #include "journald-syslog.h"
51 #include "journald-stream.h"
52 #include "journald-console.h"
53 #include "journald-native.h"
54
55 #ifdef HAVE_ACL
56 #include <sys/acl.h>
57 #include <acl/libacl.h>
58 #include "acl-util.h"
59 #endif
60
61 #ifdef HAVE_SELINUX
62 #include <selinux/selinux.h>
63 #endif
64
65 #define USER_JOURNALS_MAX 1024
66
67 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
68 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
69 #define DEFAULT_RATE_LIMIT_BURST 1000
70
71 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
72
73 static const char* const storage_table[] = {
74         [STORAGE_AUTO] = "auto",
75         [STORAGE_VOLATILE] = "volatile",
76         [STORAGE_PERSISTENT] = "persistent",
77         [STORAGE_NONE] = "none"
78 };
79
80 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
81 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
82
83 static const char* const split_mode_table[] = {
84         [SPLIT_NONE] = "none",
85         [SPLIT_UID] = "uid",
86         [SPLIT_LOGIN] = "login"
87 };
88
89 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
90 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
91
92 static uint64_t available_space(Server *s, bool verbose) {
93         char ids[33];
94         _cleanup_free_ char *p = NULL;
95         sd_id128_t machine;
96         struct statvfs ss;
97         uint64_t sum = 0, ss_avail = 0, avail = 0;
98         int r;
99         _cleanup_closedir_ DIR *d = NULL;
100         usec_t ts;
101         const char *f;
102         JournalMetrics *m;
103
104         ts = now(CLOCK_MONOTONIC);
105
106         if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts
107             && !verbose)
108                 return s->cached_available_space;
109
110         r = sd_id128_get_machine(&machine);
111         if (r < 0)
112                 return 0;
113
114         if (s->system_journal) {
115                 f = "/var/log/journal/";
116                 m = &s->system_metrics;
117         } else {
118                 f = "/run/log/journal/";
119                 m = &s->runtime_metrics;
120         }
121
122         assert(m);
123
124         p = strappend(f, sd_id128_to_string(machine, ids));
125         if (!p)
126                 return 0;
127
128         d = opendir(p);
129         if (!d)
130                 return 0;
131
132         if (fstatvfs(dirfd(d), &ss) < 0)
133                 return 0;
134
135         for (;;) {
136                 struct stat st;
137                 struct dirent *de;
138                 union dirent_storage buf;
139
140                 r = readdir_r(d, &buf.de, &de);
141                 if (r != 0)
142                         break;
143
144                 if (!de)
145                         break;
146
147                 if (!endswith(de->d_name, ".journal") &&
148                     !endswith(de->d_name, ".journal~"))
149                         continue;
150
151                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
152                         continue;
153
154                 if (!S_ISREG(st.st_mode))
155                         continue;
156
157                 sum += (uint64_t) st.st_blocks * 512UL;
158         }
159
160         ss_avail = ss.f_bsize * ss.f_bavail;
161         avail = ss_avail > m->keep_free ? ss_avail - m->keep_free : 0;
162
163         s->cached_available_space = MIN(m->max_use, avail) > sum ? MIN(m->max_use, avail) - sum : 0;
164         s->cached_available_space_timestamp = ts;
165
166         if (verbose) {
167                 char    fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
168                         fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX];
169
170                 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
171                                       "%s journal is using %s (max %s, leaving %s of free %s, current limit %s).",
172                                       s->system_journal ? "Permanent" : "Runtime",
173                                       format_bytes(fb1, sizeof(fb1), sum),
174                                       format_bytes(fb2, sizeof(fb2), m->max_use),
175                                       format_bytes(fb3, sizeof(fb3), m->keep_free),
176                                       format_bytes(fb4, sizeof(fb4), ss_avail),
177                                       format_bytes(fb5, sizeof(fb5), MIN(m->max_use, avail)));
178         }
179
180         return s->cached_available_space;
181 }
182
183 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
184         int r;
185 #ifdef HAVE_ACL
186         acl_t acl;
187         acl_entry_t entry;
188         acl_permset_t permset;
189 #endif
190
191         assert(f);
192
193         r = fchmod(f->fd, 0640);
194         if (r < 0)
195                 log_warning("Failed to fix access mode on %s, ignoring: %s", f->path, strerror(-r));
196
197 #ifdef HAVE_ACL
198         if (uid <= 0)
199                 return;
200
201         acl = acl_get_fd(f->fd);
202         if (!acl) {
203                 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
204                 return;
205         }
206
207         r = acl_find_uid(acl, uid, &entry);
208         if (r <= 0) {
209
210                 if (acl_create_entry(&acl, &entry) < 0 ||
211                     acl_set_tag_type(entry, ACL_USER) < 0 ||
212                     acl_set_qualifier(entry, &uid) < 0) {
213                         log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
214                         goto finish;
215                 }
216         }
217
218         /* We do not recalculate the mask unconditionally here,
219          * so that the fchmod() mask above stays intact. */
220         if (acl_get_permset(entry, &permset) < 0 ||
221             acl_add_perm(permset, ACL_READ) < 0 ||
222             calc_acl_mask_if_needed(&acl) < 0) {
223                 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
224                 goto finish;
225         }
226
227         if (acl_set_fd(f->fd, acl) < 0)
228                 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
229
230 finish:
231         acl_free(acl);
232 #endif
233 }
234
235 static JournalFile* find_journal(Server *s, uid_t uid) {
236         _cleanup_free_ char *p = NULL;
237         int r;
238         JournalFile *f;
239         sd_id128_t machine;
240
241         assert(s);
242
243         /* We split up user logs only on /var, not on /run. If the
244          * runtime file is open, we write to it exclusively, in order
245          * to guarantee proper order as soon as we flush /run to
246          * /var and close the runtime file. */
247
248         if (s->runtime_journal)
249                 return s->runtime_journal;
250
251         if (uid <= 0)
252                 return s->system_journal;
253
254         r = sd_id128_get_machine(&machine);
255         if (r < 0)
256                 return s->system_journal;
257
258         f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
259         if (f)
260                 return f;
261
262         if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
263                      SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
264                 return s->system_journal;
265
266         while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
267                 /* Too many open? Then let's close one */
268                 f = hashmap_steal_first(s->user_journals);
269                 assert(f);
270                 journal_file_close(f);
271         }
272
273         r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
274         if (r < 0)
275                 return s->system_journal;
276
277         server_fix_perms(s, f, uid);
278
279         r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
280         if (r < 0) {
281                 journal_file_close(f);
282                 return s->system_journal;
283         }
284
285         return f;
286 }
287
288 void server_rotate(Server *s) {
289         JournalFile *f;
290         void *k;
291         Iterator i;
292         int r;
293
294         log_debug("Rotating...");
295
296         if (s->runtime_journal) {
297                 r = journal_file_rotate(&s->runtime_journal, s->compress, false);
298                 if (r < 0)
299                         if (s->runtime_journal)
300                                 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
301                         else
302                                 log_error("Failed to create new runtime journal: %s", strerror(-r));
303                 else
304                         server_fix_perms(s, s->runtime_journal, 0);
305         }
306
307         if (s->system_journal) {
308                 r = journal_file_rotate(&s->system_journal, s->compress, s->seal);
309                 if (r < 0)
310                         if (s->system_journal)
311                                 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
312                         else
313                                 log_error("Failed to create new system journal: %s", strerror(-r));
314
315                 else
316                         server_fix_perms(s, s->system_journal, 0);
317         }
318
319         HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
320                 r = journal_file_rotate(&f, s->compress, s->seal);
321                 if (r < 0)
322                         if (f)
323                                 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
324                         else
325                                 log_error("Failed to create user journal: %s", strerror(-r));
326                 else {
327                         hashmap_replace(s->user_journals, k, f);
328                         server_fix_perms(s, f, PTR_TO_UINT32(k));
329                 }
330         }
331 }
332
333 void server_sync(Server *s) {
334         static const struct itimerspec sync_timer_disable = {};
335         JournalFile *f;
336         void *k;
337         Iterator i;
338         int r;
339
340         if (s->system_journal) {
341                 r = journal_file_set_offline(s->system_journal);
342                 if (r < 0)
343                         log_error("Failed to sync system journal: %s", strerror(-r));
344         }
345
346         HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
347                 r = journal_file_set_offline(f);
348                 if (r < 0)
349                         log_error("Failed to sync user journal: %s", strerror(-r));
350         }
351
352         r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_disable, NULL);
353         if (r < 0)
354                 log_error("Failed to disable max timer: %m");
355
356         s->sync_scheduled = false;
357 }
358
359 void server_vacuum(Server *s) {
360         char ids[33];
361         sd_id128_t machine;
362         int r;
363
364         log_debug("Vacuuming...");
365
366         s->oldest_file_usec = 0;
367
368         r = sd_id128_get_machine(&machine);
369         if (r < 0) {
370                 log_error("Failed to get machine ID: %s", strerror(-r));
371                 return;
372         }
373
374         sd_id128_to_string(machine, ids);
375
376         if (s->system_journal) {
377                 char *p = strappenda("/var/log/journal/", ids);
378
379                 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
380                 if (r < 0 && r != -ENOENT)
381                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
382         }
383
384         if (s->runtime_journal) {
385                 char *p = strappenda("/run/log/journal/", ids);
386
387                 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
388                 if (r < 0 && r != -ENOENT)
389                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
390         }
391
392         s->cached_available_space_timestamp = 0;
393 }
394
395 bool shall_try_append_again(JournalFile *f, int r) {
396
397         /* -E2BIG            Hit configured limit
398            -EFBIG            Hit fs limit
399            -EDQUOT           Quota limit hit
400            -ENOSPC           Disk full
401            -EHOSTDOWN        Other machine
402            -EBUSY            Unclean shutdown
403            -EPROTONOSUPPORT  Unsupported feature
404            -EBADMSG          Corrupted
405            -ENODATA          Truncated
406            -ESHUTDOWN        Already archived */
407
408         if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
409                 log_debug("%s: Allocation limit reached, rotating.", f->path);
410         else if (r == -EHOSTDOWN)
411                 log_info("%s: Journal file from other machine, rotating.", f->path);
412         else if (r == -EBUSY)
413                 log_info("%s: Unclean shutdown, rotating.", f->path);
414         else if (r == -EPROTONOSUPPORT)
415                 log_info("%s: Unsupported feature, rotating.", f->path);
416         else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
417                 log_warning("%s: Journal file corrupted, rotating.", f->path);
418         else
419                 return false;
420
421         return true;
422 }
423
424 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
425         JournalFile *f;
426         bool vacuumed = false;
427         int r;
428
429         assert(s);
430         assert(iovec);
431         assert(n > 0);
432
433         f = find_journal(s, uid);
434         if (!f)
435                 return;
436
437         if (journal_file_rotate_suggested(f, s->max_file_usec)) {
438                 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
439                 server_rotate(s);
440                 server_vacuum(s);
441                 vacuumed = true;
442
443                 f = find_journal(s, uid);
444                 if (!f)
445                         return;
446         }
447
448         r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
449         if (r >= 0) {
450                 server_schedule_sync(s, priority);
451                 return;
452         }
453
454         if (vacuumed || !shall_try_append_again(f, r)) {
455                 size_t size = 0;
456                 unsigned i;
457                 for (i = 0; i < n; i++)
458                         size += iovec[i].iov_len;
459
460                 log_error("Failed to write entry (%d items, %zu bytes), ignoring: %s", n, size, strerror(-r));
461                 return;
462         }
463
464         server_rotate(s);
465         server_vacuum(s);
466
467         f = find_journal(s, uid);
468         if (!f)
469                 return;
470
471         log_debug("Retrying write.");
472         r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
473         if (r < 0) {
474                 size_t size = 0;
475                 unsigned i;
476                 for (i = 0; i < n; i++)
477                         size += iovec[i].iov_len;
478
479                 log_error("Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %s", n, size, strerror(-r));
480         } else
481                 server_schedule_sync(s, priority);
482 }
483
484 static void dispatch_message_real(
485                 Server *s,
486                 struct iovec *iovec, unsigned n, unsigned m,
487                 struct ucred *ucred,
488                 struct timeval *tv,
489                 const char *label, size_t label_len,
490                 const char *unit_id,
491                 int priority,
492                 pid_t object_pid) {
493
494         char    pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
495                 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
496                 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
497                 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
498                 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
499                 boot_id[sizeof("_BOOT_ID=") + 32] = "_BOOT_ID=",
500                 machine_id[sizeof("_MACHINE_ID=") + 32] = "_MACHINE_ID=",
501                 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
502                 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
503                 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
504         uid_t object_uid;
505         gid_t object_gid;
506         char *x;
507         sd_id128_t id;
508         int r;
509         char *t, *c;
510         uid_t realuid = 0, owner = 0, journal_uid;
511         bool owner_valid = false;
512 #ifdef HAVE_AUDIT
513         char    audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
514                 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
515                 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
516                 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
517
518         uint32_t audit;
519         uid_t loginuid;
520 #endif
521
522         assert(s);
523         assert(iovec);
524         assert(n > 0);
525         assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
526
527         if (ucred) {
528                 realuid = ucred->uid;
529
530                 sprintf(pid, "_PID=%lu", (unsigned long) ucred->pid);
531                 IOVEC_SET_STRING(iovec[n++], pid);
532
533                 sprintf(uid, "_UID=%lu", (unsigned long) ucred->uid);
534                 IOVEC_SET_STRING(iovec[n++], uid);
535
536                 sprintf(gid, "_GID=%lu", (unsigned long) ucred->gid);
537                 IOVEC_SET_STRING(iovec[n++], gid);
538
539                 r = get_process_comm(ucred->pid, &t);
540                 if (r >= 0) {
541                         x = strappenda("_COMM=", t);
542                         free(t);
543                         IOVEC_SET_STRING(iovec[n++], x);
544                 }
545
546                 r = get_process_exe(ucred->pid, &t);
547                 if (r >= 0) {
548                         x = strappenda("_EXE=", t);
549                         free(t);
550                         IOVEC_SET_STRING(iovec[n++], x);
551                 }
552
553                 r = get_process_cmdline(ucred->pid, 0, false, &t);
554                 if (r >= 0) {
555                         x = strappenda("_CMDLINE=", t);
556                         free(t);
557                         IOVEC_SET_STRING(iovec[n++], x);
558                 }
559
560                 r = get_process_capeff(ucred->pid, &t);
561                 if (r >= 0) {
562                         x = strappenda("_CAP_EFFECTIVE=", t);
563                         free(t);
564                         IOVEC_SET_STRING(iovec[n++], x);
565                 }
566
567 #ifdef HAVE_AUDIT
568                 r = audit_session_from_pid(ucred->pid, &audit);
569                 if (r >= 0) {
570                         sprintf(audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit);
571                         IOVEC_SET_STRING(iovec[n++], audit_session);
572                 }
573
574                 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
575                 if (r >= 0) {
576                         sprintf(audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid);
577                         IOVEC_SET_STRING(iovec[n++], audit_loginuid);
578                 }
579 #endif
580
581                 r = cg_pid_get_path_shifted(ucred->pid, NULL, &c);
582                 if (r >= 0) {
583                         char *session = NULL;
584
585                         x = strappenda("_SYSTEMD_CGROUP=", c);
586                         IOVEC_SET_STRING(iovec[n++], x);
587
588                         r = cg_path_get_session(c, &t);
589                         if (r >= 0) {
590                                 session = strappenda("_SYSTEMD_SESSION=", t);
591                                 free(t);
592                                 IOVEC_SET_STRING(iovec[n++], session);
593                         }
594
595                         if (cg_path_get_owner_uid(c, &owner) >= 0) {
596                                 owner_valid = true;
597
598                                 sprintf(owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner);
599                                 IOVEC_SET_STRING(iovec[n++], owner_uid);
600                         }
601
602                         if (cg_path_get_unit(c, &t) >= 0) {
603                                 x = strappenda("_SYSTEMD_UNIT=", t);
604                                 free(t);
605                                 IOVEC_SET_STRING(iovec[n++], x);
606                         } else if (unit_id && !session) {
607                                 x = strappenda("_SYSTEMD_UNIT=", unit_id);
608                                 IOVEC_SET_STRING(iovec[n++], x);
609                         }
610
611                         if (cg_path_get_user_unit(c, &t) >= 0) {
612                                 x = strappenda("_SYSTEMD_USER_UNIT=", t);
613                                 free(t);
614                                 IOVEC_SET_STRING(iovec[n++], x);
615                         } else if (unit_id && session) {
616                                 x = strappenda("_SYSTEMD_USER_UNIT=", unit_id);
617                                 IOVEC_SET_STRING(iovec[n++], x);
618                         }
619
620                         if (cg_path_get_slice(c, &t) >= 0) {
621                                 x = strappenda("_SYSTEMD_SLICE=", t);
622                                 free(t);
623                                 IOVEC_SET_STRING(iovec[n++], x);
624                         }
625
626                         free(c);
627                 }
628
629 #ifdef HAVE_SELINUX
630                 if (label) {
631                         x = alloca(sizeof("_SELINUX_CONTEXT=") + label_len);
632
633                         *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
634                         IOVEC_SET_STRING(iovec[n++], x);
635                 } else {
636                         security_context_t con;
637
638                         if (getpidcon(ucred->pid, &con) >= 0) {
639                                 x = strappenda("_SELINUX_CONTEXT=", con);
640
641                                 freecon(con);
642                                 IOVEC_SET_STRING(iovec[n++], x);
643                         }
644                 }
645 #endif
646         }
647         assert(n <= m);
648
649         if (object_pid) {
650                 r = get_process_uid(object_pid, &object_uid);
651                 if (r >= 0) {
652                         sprintf(o_uid, "OBJECT_UID=%lu", (unsigned long) object_uid);
653                         IOVEC_SET_STRING(iovec[n++], o_uid);
654                 }
655
656                 r = get_process_gid(object_pid, &object_gid);
657                 if (r >= 0) {
658                         sprintf(o_gid, "OBJECT_GID=%lu", (unsigned long) object_gid);
659                         IOVEC_SET_STRING(iovec[n++], o_gid);
660                 }
661
662                 r = get_process_comm(object_pid, &t);
663                 if (r >= 0) {
664                         x = strappenda("OBJECT_COMM=", t);
665                         free(t);
666                         IOVEC_SET_STRING(iovec[n++], x);
667                 }
668
669                 r = get_process_exe(object_pid, &t);
670                 if (r >= 0) {
671                         x = strappenda("OBJECT_EXE=", t);
672                         free(t);
673                         IOVEC_SET_STRING(iovec[n++], x);
674                 }
675
676                 r = get_process_cmdline(object_pid, 0, false, &t);
677                 if (r >= 0) {
678                         x = strappenda("OBJECT_CMDLINE=", t);
679                         free(t);
680                         IOVEC_SET_STRING(iovec[n++], x);
681                 }
682
683 #ifdef HAVE_AUDIT
684                 r = audit_session_from_pid(object_pid, &audit);
685                 if (r >= 0) {
686                         sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%lu", (unsigned long) audit);
687                         IOVEC_SET_STRING(iovec[n++], o_audit_session);
688                 }
689
690                 r = audit_loginuid_from_pid(object_pid, &loginuid);
691                 if (r >= 0) {
692                         sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID=%lu", (unsigned long) loginuid);
693                         IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
694                 }
695 #endif
696
697                 r = cg_pid_get_path_shifted(object_pid, NULL, &c);
698                 if (r >= 0) {
699                         x = strappenda("OBJECT_SYSTEMD_CGROUP=", c);
700                         IOVEC_SET_STRING(iovec[n++], x);
701
702                         r = cg_path_get_session(c, &t);
703                         if (r >= 0) {
704                                 x = strappenda("OBJECT_SYSTEMD_SESSION=", t);
705                                 free(t);
706                                 IOVEC_SET_STRING(iovec[n++], x);
707                         }
708
709                         if (cg_path_get_owner_uid(c, &owner) >= 0) {
710                                 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner);
711                                 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
712                         }
713
714                         if (cg_path_get_unit(c, &t) >= 0) {
715                                 x = strappenda("OBJECT_SYSTEMD_UNIT=", t);
716                                 free(t);
717                                 IOVEC_SET_STRING(iovec[n++], x);
718                         }
719
720                         if (cg_path_get_user_unit(c, &t) >= 0) {
721                                 x = strappenda("OBJECT_SYSTEMD_USER_UNIT=", t);
722                                 free(t);
723                                 IOVEC_SET_STRING(iovec[n++], x);
724                         }
725
726                         free(c);
727                 }
728         }
729         assert(n <= m);
730
731         if (tv) {
732                 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
733                 IOVEC_SET_STRING(iovec[n++], source_time);
734         }
735
736         /* Note that strictly speaking storing the boot id here is
737          * redundant since the entry includes this in-line
738          * anyway. However, we need this indexed, too. */
739         r = sd_id128_get_boot(&id);
740         if (r >= 0) {
741                 sd_id128_to_string(id, boot_id + strlen("_BOOT_ID="));
742                 IOVEC_SET_STRING(iovec[n++], boot_id);
743         }
744
745         r = sd_id128_get_machine(&id);
746         if (r >= 0) {
747                 sd_id128_to_string(id, machine_id + strlen("_MACHINE_ID="));
748                 IOVEC_SET_STRING(iovec[n++], machine_id);
749         }
750
751         t = gethostname_malloc();
752         if (t) {
753                 x = strappenda("_HOSTNAME=", t);
754                 free(t);
755                 IOVEC_SET_STRING(iovec[n++], x);
756         }
757
758         assert(n <= m);
759
760         if (s->split_mode == SPLIT_UID && realuid > 0)
761                 /* Split up strictly by any UID */
762                 journal_uid = realuid;
763         else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
764                 /* Split up by login UIDs, this avoids creation of
765                  * individual journals for system UIDs.  We do this
766                  * only if the realuid is not root, in order not to
767                  * accidentally leak privileged information to the
768                  * user that is logged by a privileged process that is
769                  * part of an unprivileged session.*/
770                 journal_uid = owner;
771         else
772                 journal_uid = 0;
773
774         write_to_journal(s, journal_uid, iovec, n, priority);
775 }
776
777 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
778         char mid[11 + 32 + 1];
779         char buffer[16 + LINE_MAX + 1];
780         struct iovec iovec[N_IOVEC_META_FIELDS + 4];
781         int n = 0;
782         va_list ap;
783         struct ucred ucred = {};
784
785         assert(s);
786         assert(format);
787
788         IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
789         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
790
791         memcpy(buffer, "MESSAGE=", 8);
792         va_start(ap, format);
793         vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
794         va_end(ap);
795         char_array_0(buffer);
796         IOVEC_SET_STRING(iovec[n++], buffer);
797
798         if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
799                 snprintf(mid, sizeof(mid), MESSAGE_ID(message_id));
800                 char_array_0(mid);
801                 IOVEC_SET_STRING(iovec[n++], mid);
802         }
803
804         ucred.pid = getpid();
805         ucred.uid = getuid();
806         ucred.gid = getgid();
807
808         dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
809 }
810
811 void server_dispatch_message(
812                 Server *s,
813                 struct iovec *iovec, unsigned n, unsigned m,
814                 struct ucred *ucred,
815                 struct timeval *tv,
816                 const char *label, size_t label_len,
817                 const char *unit_id,
818                 int priority,
819                 pid_t object_pid) {
820
821         int rl, r;
822         _cleanup_free_ char *path = NULL;
823         char *c;
824
825         assert(s);
826         assert(iovec || n == 0);
827
828         if (n == 0)
829                 return;
830
831         if (LOG_PRI(priority) > s->max_level_store)
832                 return;
833
834         /* Stop early in case the information will not be stored
835          * in a journal. */
836         if (s->storage == STORAGE_NONE)
837                 return;
838
839         if (!ucred)
840                 goto finish;
841
842         r = cg_pid_get_path_shifted(ucred->pid, NULL, &path);
843         if (r < 0)
844                 goto finish;
845
846         /* example: /user/lennart/3/foobar
847          *          /system/dbus.service/foobar
848          *
849          * So let's cut of everything past the third /, since that is
850          * where user directories start */
851
852         c = strchr(path, '/');
853         if (c) {
854                 c = strchr(c+1, '/');
855                 if (c) {
856                         c = strchr(c+1, '/');
857                         if (c)
858                                 *c = 0;
859                 }
860         }
861
862         rl = journal_rate_limit_test(s->rate_limit, path,
863                                      priority & LOG_PRIMASK, available_space(s, false));
864
865         if (rl == 0)
866                 return;
867
868         /* Write a suppression message if we suppressed something */
869         if (rl > 1)
870                 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
871                                       "Suppressed %u messages from %s", rl - 1, path);
872
873 finish:
874         dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
875 }
876
877
878 static int system_journal_open(Server *s) {
879         int r;
880         char *fn;
881         sd_id128_t machine;
882         char ids[33];
883
884         r = sd_id128_get_machine(&machine);
885         if (r < 0) {
886                 log_error("Failed to get machine id: %s", strerror(-r));
887                 return r;
888         }
889
890         sd_id128_to_string(machine, ids);
891
892         if (!s->system_journal &&
893             (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
894             access("/run/systemd/journal/flushed", F_OK) >= 0) {
895
896                 /* If in auto mode: first try to create the machine
897                  * path, but not the prefix.
898                  *
899                  * If in persistent mode: create /var/log/journal and
900                  * the machine path */
901
902                 if (s->storage == STORAGE_PERSISTENT)
903                         (void) mkdir("/var/log/journal/", 0755);
904
905                 fn = strappenda("/var/log/journal/", ids);
906                 (void) mkdir(fn, 0755);
907
908                 fn = strappenda(fn, "/system.journal");
909                 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
910
911                 if (r >= 0)
912                         server_fix_perms(s, s->system_journal, 0);
913                 else if (r < 0) {
914                         if (r != -ENOENT && r != -EROFS)
915                                 log_warning("Failed to open system journal: %s", strerror(-r));
916
917                         r = 0;
918                 }
919         }
920
921         if (!s->runtime_journal &&
922             (s->storage != STORAGE_NONE)) {
923
924                 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
925                 if (!fn)
926                         return -ENOMEM;
927
928                 if (s->system_journal) {
929
930                         /* Try to open the runtime journal, but only
931                          * if it already exists, so that we can flush
932                          * it into the system journal */
933
934                         r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
935                         free(fn);
936
937                         if (r < 0) {
938                                 if (r != -ENOENT)
939                                         log_warning("Failed to open runtime journal: %s", strerror(-r));
940
941                                 r = 0;
942                         }
943
944                 } else {
945
946                         /* OK, we really need the runtime journal, so create
947                          * it if necessary. */
948
949                         (void) mkdir_parents(fn, 0755);
950                         r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
951                         free(fn);
952
953                         if (r < 0) {
954                                 log_error("Failed to open runtime journal: %s", strerror(-r));
955                                 return r;
956                         }
957                 }
958
959                 if (s->runtime_journal)
960                         server_fix_perms(s, s->runtime_journal, 0);
961         }
962
963         available_space(s, true);
964
965         return r;
966 }
967
968 int server_flush_to_var(Server *s) {
969         int r;
970         sd_id128_t machine;
971         sd_journal *j = NULL;
972
973         assert(s);
974
975         if (s->storage != STORAGE_AUTO &&
976             s->storage != STORAGE_PERSISTENT)
977                 return 0;
978
979         if (!s->runtime_journal)
980                 return 0;
981
982         system_journal_open(s);
983
984         if (!s->system_journal)
985                 return 0;
986
987         log_debug("Flushing to /var...");
988
989         r = sd_id128_get_machine(&machine);
990         if (r < 0)
991                 return r;
992
993         r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
994         if (r < 0) {
995                 log_error("Failed to read runtime journal: %s", strerror(-r));
996                 return r;
997         }
998
999         sd_journal_set_data_threshold(j, 0);
1000
1001         SD_JOURNAL_FOREACH(j) {
1002                 Object *o = NULL;
1003                 JournalFile *f;
1004
1005                 f = j->current_file;
1006                 assert(f && f->current_offset > 0);
1007
1008                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1009                 if (r < 0) {
1010                         log_error("Can't read entry: %s", strerror(-r));
1011                         goto finish;
1012                 }
1013
1014                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1015                 if (r >= 0)
1016                         continue;
1017
1018                 if (!shall_try_append_again(s->system_journal, r)) {
1019                         log_error("Can't write entry: %s", strerror(-r));
1020                         goto finish;
1021                 }
1022
1023                 server_rotate(s);
1024                 server_vacuum(s);
1025
1026                 if (!s->system_journal) {
1027                         log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1028                         r = -EIO;
1029                         goto finish;
1030                 }
1031
1032                 log_debug("Retrying write.");
1033                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1034                 if (r < 0) {
1035                         log_error("Can't write entry: %s", strerror(-r));
1036                         goto finish;
1037                 }
1038         }
1039
1040 finish:
1041         journal_file_post_change(s->system_journal);
1042
1043         journal_file_close(s->runtime_journal);
1044         s->runtime_journal = NULL;
1045
1046         if (r >= 0)
1047                 rm_rf("/run/log/journal", false, true, false);
1048
1049         sd_journal_close(j);
1050
1051         return r;
1052 }
1053
1054 int process_event(Server *s, struct epoll_event *ev) {
1055         assert(s);
1056         assert(ev);
1057
1058         if (ev->data.fd == s->signal_fd) {
1059                 struct signalfd_siginfo sfsi;
1060                 ssize_t n;
1061
1062                 if (ev->events != EPOLLIN) {
1063                         log_error("Got invalid event from epoll for %s: %"PRIx32,
1064                                   "signal fd", ev->events);
1065                         return -EIO;
1066                 }
1067
1068                 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
1069                 if (n != sizeof(sfsi)) {
1070
1071                         if (n >= 0)
1072                                 return -EIO;
1073
1074                         if (errno == EINTR || errno == EAGAIN)
1075                                 return 1;
1076
1077                         return -errno;
1078                 }
1079
1080                 if (sfsi.ssi_signo == SIGUSR1) {
1081                         log_info("Received request to flush runtime journal from PID %"PRIu32,
1082                                  sfsi.ssi_pid);
1083                         touch("/run/systemd/journal/flushed");
1084                         server_flush_to_var(s);
1085                         server_sync(s);
1086                         return 1;
1087                 }
1088
1089                 if (sfsi.ssi_signo == SIGUSR2) {
1090                         log_info("Received request to rotate journal from PID %"PRIu32,
1091                                  sfsi.ssi_pid);
1092                         server_rotate(s);
1093                         server_vacuum(s);
1094                         return 1;
1095                 }
1096
1097                 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
1098
1099                 return 0;
1100
1101         } else if (ev->data.fd == s->sync_timer_fd) {
1102                 int r;
1103                 uint64_t t;
1104
1105                 log_debug("Got sync request from epoll.");
1106
1107                 r = read(ev->data.fd, (void *)&t, sizeof(t));
1108                 if (r < 0)
1109                         return 0;
1110
1111                 server_sync(s);
1112                 return 1;
1113
1114         } else if (ev->data.fd == s->dev_kmsg_fd) {
1115                 int r;
1116
1117                 if (ev->events & EPOLLERR)
1118                         log_warning("/dev/kmsg buffer overrun, some messages lost.");
1119
1120                 if (!(ev->events & EPOLLIN)) {
1121                         log_error("Got invalid event from epoll for %s: %"PRIx32,
1122                                   "/dev/kmsg", ev->events);
1123                         return -EIO;
1124                 }
1125
1126                 r = server_read_dev_kmsg(s);
1127                 if (r < 0)
1128                         return r;
1129
1130                 return 1;
1131
1132         } else if (ev->data.fd == s->native_fd ||
1133                    ev->data.fd == s->syslog_fd) {
1134
1135                 if (ev->events != EPOLLIN) {
1136                         log_error("Got invalid event from epoll for %s: %"PRIx32,
1137                                   ev->data.fd == s->native_fd ? "native fd" : "syslog fd",
1138                                   ev->events);
1139                         return -EIO;
1140                 }
1141
1142                 for (;;) {
1143                         struct msghdr msghdr;
1144                         struct iovec iovec;
1145                         struct ucred *ucred = NULL;
1146                         struct timeval *tv = NULL;
1147                         struct cmsghdr *cmsg;
1148                         char *label = NULL;
1149                         size_t label_len = 0;
1150                         union {
1151                                 struct cmsghdr cmsghdr;
1152
1153                                 /* We use NAME_MAX space for the
1154                                  * SELinux label here. The kernel
1155                                  * currently enforces no limit, but
1156                                  * according to suggestions from the
1157                                  * SELinux people this will change and
1158                                  * it will probably be identical to
1159                                  * NAME_MAX. For now we use that, but
1160                                  * this should be updated one day when
1161                                  * the final limit is known.*/
1162                                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1163                                             CMSG_SPACE(sizeof(struct timeval)) +
1164                                             CMSG_SPACE(sizeof(int)) + /* fd */
1165                                             CMSG_SPACE(NAME_MAX)]; /* selinux label */
1166                         } control;
1167                         ssize_t n;
1168                         int v;
1169                         int *fds = NULL;
1170                         unsigned n_fds = 0;
1171
1172                         if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
1173                                 log_error("SIOCINQ failed: %m");
1174                                 return -errno;
1175                         }
1176
1177                         if (s->buffer_size < (size_t) v) {
1178                                 void *b;
1179                                 size_t l;
1180
1181                                 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
1182                                 b = realloc(s->buffer, l+1);
1183
1184                                 if (!b) {
1185                                         log_error("Couldn't increase buffer.");
1186                                         return -ENOMEM;
1187                                 }
1188
1189                                 s->buffer_size = l;
1190                                 s->buffer = b;
1191                         }
1192
1193                         zero(iovec);
1194                         iovec.iov_base = s->buffer;
1195                         iovec.iov_len = s->buffer_size;
1196
1197                         zero(control);
1198                         zero(msghdr);
1199                         msghdr.msg_iov = &iovec;
1200                         msghdr.msg_iovlen = 1;
1201                         msghdr.msg_control = &control;
1202                         msghdr.msg_controllen = sizeof(control);
1203
1204                         n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1205                         if (n < 0) {
1206
1207                                 if (errno == EINTR || errno == EAGAIN)
1208                                         return 1;
1209
1210                                 log_error("recvmsg() failed: %m");
1211                                 return -errno;
1212                         }
1213
1214                         for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1215
1216                                 if (cmsg->cmsg_level == SOL_SOCKET &&
1217                                     cmsg->cmsg_type == SCM_CREDENTIALS &&
1218                                     cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1219                                         ucred = (struct ucred*) CMSG_DATA(cmsg);
1220                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
1221                                          cmsg->cmsg_type == SCM_SECURITY) {
1222                                         label = (char*) CMSG_DATA(cmsg);
1223                                         label_len = cmsg->cmsg_len - CMSG_LEN(0);
1224                                 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1225                                            cmsg->cmsg_type == SO_TIMESTAMP &&
1226                                            cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1227                                         tv = (struct timeval*) CMSG_DATA(cmsg);
1228                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
1229                                          cmsg->cmsg_type == SCM_RIGHTS) {
1230                                         fds = (int*) CMSG_DATA(cmsg);
1231                                         n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1232                                 }
1233                         }
1234
1235                         if (ev->data.fd == s->syslog_fd) {
1236                                 if (n > 0 && n_fds == 0) {
1237                                         s->buffer[n] = 0;
1238                                         server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1239                                 } else if (n_fds > 0)
1240                                         log_warning("Got file descriptors via syslog socket. Ignoring.");
1241
1242                         } else {
1243                                 if (n > 0 && n_fds == 0)
1244                                         server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1245                                 else if (n == 0 && n_fds == 1)
1246                                         server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1247                                 else if (n_fds > 0)
1248                                         log_warning("Got too many file descriptors via native socket. Ignoring.");
1249                         }
1250
1251                         close_many(fds, n_fds);
1252                 }
1253
1254                 return 1;
1255
1256         } else if (ev->data.fd == s->stdout_fd) {
1257
1258                 if (ev->events != EPOLLIN) {
1259                         log_error("Got invalid event from epoll for %s: %"PRIx32,
1260                                   "stdout fd", ev->events);
1261                         return -EIO;
1262                 }
1263
1264                 stdout_stream_new(s);
1265                 return 1;
1266
1267         } else {
1268                 StdoutStream *stream;
1269
1270                 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
1271                         log_error("Got invalid event from epoll for %s: %"PRIx32,
1272                                   "stdout stream", ev->events);
1273                         log_error("Got invalid event from epoll.");
1274                         return -EIO;
1275                 }
1276
1277                 /* If it is none of the well-known fds, it must be an
1278                  * stdout stream fd. Note that this is a bit ugly here
1279                  * (since we rely that none of the well-known fds
1280                  * could be interpreted as pointer), but nonetheless
1281                  * safe, since the well-known fds would never get an
1282                  * fd > 4096, i.e. beyond the first memory page */
1283
1284                 stream = ev->data.ptr;
1285
1286                 if (stdout_stream_process(stream) <= 0)
1287                         stdout_stream_free(stream);
1288
1289                 return 1;
1290         }
1291
1292         log_error("Unknown event.");
1293         return 0;
1294 }
1295
1296 static int open_signalfd(Server *s) {
1297         sigset_t mask;
1298         struct epoll_event ev;
1299
1300         assert(s);
1301
1302         assert_se(sigemptyset(&mask) == 0);
1303         sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1304         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1305
1306         s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
1307         if (s->signal_fd < 0) {
1308                 log_error("signalfd(): %m");
1309                 return -errno;
1310         }
1311
1312         zero(ev);
1313         ev.events = EPOLLIN;
1314         ev.data.fd = s->signal_fd;
1315
1316         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
1317                 log_error("epoll_ctl(): %m");
1318                 return -errno;
1319         }
1320
1321         return 0;
1322 }
1323
1324 static int server_parse_proc_cmdline(Server *s) {
1325         _cleanup_free_ char *line = NULL;
1326         char *w, *state;
1327         int r;
1328         size_t l;
1329
1330         if (detect_container(NULL) > 0)
1331                 return 0;
1332
1333         r = read_one_line_file("/proc/cmdline", &line);
1334         if (r < 0) {
1335                 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1336                 return 0;
1337         }
1338
1339         FOREACH_WORD_QUOTED(w, l, line, state) {
1340                 _cleanup_free_ char *word;
1341
1342                 word = strndup(w, l);
1343                 if (!word)
1344                         return -ENOMEM;
1345
1346                 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1347                         r = parse_boolean(word + 35);
1348                         if (r < 0)
1349                                 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1350                         else
1351                                 s->forward_to_syslog = r;
1352                 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1353                         r = parse_boolean(word + 33);
1354                         if (r < 0)
1355                                 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1356                         else
1357                                 s->forward_to_kmsg = r;
1358                 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1359                         r = parse_boolean(word + 36);
1360                         if (r < 0)
1361                                 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1362                         else
1363                                 s->forward_to_console = r;
1364                 } else if (startswith(word, "systemd.journald"))
1365                         log_warning("Invalid systemd.journald parameter. Ignoring.");
1366         }
1367
1368         return 0;
1369 }
1370
1371 static int server_parse_config_file(Server *s) {
1372         static const char fn[] = "/etc/systemd/journald.conf";
1373         _cleanup_fclose_ FILE *f = NULL;
1374         int r;
1375
1376         assert(s);
1377
1378         f = fopen(fn, "re");
1379         if (!f) {
1380                 if (errno == ENOENT)
1381                         return 0;
1382
1383                 log_warning("Failed to open configuration file %s: %m", fn);
1384                 return -errno;
1385         }
1386
1387         r = config_parse(NULL, fn, f, "Journal\0", config_item_perf_lookup,
1388                          (void*) journald_gperf_lookup, false, false, s);
1389         if (r < 0)
1390                 log_warning("Failed to parse configuration file: %s", strerror(-r));
1391
1392         return r;
1393 }
1394
1395 static int server_open_sync_timer(Server *s) {
1396         int r;
1397         struct epoll_event ev;
1398
1399         assert(s);
1400
1401         s->sync_timer_fd = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC);
1402         if (s->sync_timer_fd < 0)
1403                 return -errno;
1404
1405         zero(ev);
1406         ev.events = EPOLLIN;
1407         ev.data.fd = s->sync_timer_fd;
1408
1409         r = epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->sync_timer_fd, &ev);
1410         if (r < 0) {
1411                 log_error("Failed to add idle timer fd to epoll object: %m");
1412                 return -errno;
1413         }
1414
1415         return 0;
1416 }
1417
1418 int server_schedule_sync(Server *s, int priority) {
1419         int r;
1420
1421         assert(s);
1422
1423         if (priority <= LOG_CRIT) {
1424                 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1425                 server_sync(s);
1426                 return 0;
1427         }
1428
1429         if (s->sync_scheduled)
1430                 return 0;
1431
1432         if (s->sync_interval_usec) {
1433                 struct itimerspec sync_timer_enable = {};
1434
1435                 timespec_store(&sync_timer_enable.it_value, s->sync_interval_usec);
1436
1437                 r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_enable, NULL);
1438                 if (r < 0)
1439                         return -errno;
1440         }
1441
1442         s->sync_scheduled = true;
1443
1444         return 0;
1445 }
1446
1447 int server_init(Server *s) {
1448         int n, r, fd;
1449
1450         assert(s);
1451
1452         zero(*s);
1453         s->sync_timer_fd = s->syslog_fd = s->native_fd = s->stdout_fd =
1454                 s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
1455         s->compress = true;
1456         s->seal = true;
1457
1458         s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1459         s->sync_scheduled = false;
1460
1461         s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1462         s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1463
1464         s->forward_to_syslog = true;
1465
1466         s->max_level_store = LOG_DEBUG;
1467         s->max_level_syslog = LOG_DEBUG;
1468         s->max_level_kmsg = LOG_NOTICE;
1469         s->max_level_console = LOG_INFO;
1470
1471         memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1472         memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1473
1474         server_parse_config_file(s);
1475         server_parse_proc_cmdline(s);
1476         if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1477                 log_debug("Setting both rate limit interval and burst from %llu,%u to 0,0",
1478                           (long long unsigned) s->rate_limit_interval,
1479                           s->rate_limit_burst);
1480                 s->rate_limit_interval = s->rate_limit_burst = 0;
1481         }
1482
1483         mkdir_p("/run/systemd/journal", 0755);
1484
1485         s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
1486         if (!s->user_journals)
1487                 return log_oom();
1488
1489         s->mmap = mmap_cache_new();
1490         if (!s->mmap)
1491                 return log_oom();
1492
1493         s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
1494         if (s->epoll_fd < 0) {
1495                 log_error("Failed to create epoll object: %m");
1496                 return -errno;
1497         }
1498
1499         n = sd_listen_fds(true);
1500         if (n < 0) {
1501                 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1502                 return n;
1503         }
1504
1505         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1506
1507                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1508
1509                         if (s->native_fd >= 0) {
1510                                 log_error("Too many native sockets passed.");
1511                                 return -EINVAL;
1512                         }
1513
1514                         s->native_fd = fd;
1515
1516                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1517
1518                         if (s->stdout_fd >= 0) {
1519                                 log_error("Too many stdout sockets passed.");
1520                                 return -EINVAL;
1521                         }
1522
1523                         s->stdout_fd = fd;
1524
1525                 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
1526
1527                         if (s->syslog_fd >= 0) {
1528                                 log_error("Too many /dev/log sockets passed.");
1529                                 return -EINVAL;
1530                         }
1531
1532                         s->syslog_fd = fd;
1533
1534                 } else {
1535                         log_error("Unknown socket passed.");
1536                         return -EINVAL;
1537                 }
1538         }
1539
1540         r = server_open_syslog_socket(s);
1541         if (r < 0)
1542                 return r;
1543
1544         r = server_open_native_socket(s);
1545         if (r < 0)
1546                 return r;
1547
1548         r = server_open_stdout_socket(s);
1549         if (r < 0)
1550                 return r;
1551
1552         r = server_open_dev_kmsg(s);
1553         if (r < 0)
1554                 return r;
1555
1556         r = server_open_kernel_seqnum(s);
1557         if (r < 0)
1558                 return r;
1559
1560         r = server_open_sync_timer(s);
1561         if (r < 0)
1562                 return r;
1563
1564         r = open_signalfd(s);
1565         if (r < 0)
1566                 return r;
1567
1568         s->udev = udev_new();
1569         if (!s->udev)
1570                 return -ENOMEM;
1571
1572         s->rate_limit = journal_rate_limit_new(s->rate_limit_interval,
1573                                                s->rate_limit_burst);
1574         if (!s->rate_limit)
1575                 return -ENOMEM;
1576
1577         r = system_journal_open(s);
1578         if (r < 0)
1579                 return r;
1580
1581         return 0;
1582 }
1583
1584 void server_maybe_append_tags(Server *s) {
1585 #ifdef HAVE_GCRYPT
1586         JournalFile *f;
1587         Iterator i;
1588         usec_t n;
1589
1590         n = now(CLOCK_REALTIME);
1591
1592         if (s->system_journal)
1593                 journal_file_maybe_append_tag(s->system_journal, n);
1594
1595         HASHMAP_FOREACH(f, s->user_journals, i)
1596                 journal_file_maybe_append_tag(f, n);
1597 #endif
1598 }
1599
1600 void server_done(Server *s) {
1601         JournalFile *f;
1602         assert(s);
1603
1604         while (s->stdout_streams)
1605                 stdout_stream_free(s->stdout_streams);
1606
1607         if (s->system_journal)
1608                 journal_file_close(s->system_journal);
1609
1610         if (s->runtime_journal)
1611                 journal_file_close(s->runtime_journal);
1612
1613         while ((f = hashmap_steal_first(s->user_journals)))
1614                 journal_file_close(f);
1615
1616         hashmap_free(s->user_journals);
1617
1618         if (s->epoll_fd >= 0)
1619                 close_nointr_nofail(s->epoll_fd);
1620
1621         if (s->signal_fd >= 0)
1622                 close_nointr_nofail(s->signal_fd);
1623
1624         if (s->syslog_fd >= 0)
1625                 close_nointr_nofail(s->syslog_fd);
1626
1627         if (s->native_fd >= 0)
1628                 close_nointr_nofail(s->native_fd);
1629
1630         if (s->stdout_fd >= 0)
1631                 close_nointr_nofail(s->stdout_fd);
1632
1633         if (s->dev_kmsg_fd >= 0)
1634                 close_nointr_nofail(s->dev_kmsg_fd);
1635
1636         if (s->sync_timer_fd >= 0)
1637                 close_nointr_nofail(s->sync_timer_fd);
1638
1639         if (s->rate_limit)
1640                 journal_rate_limit_free(s->rate_limit);
1641
1642         if (s->kernel_seqnum)
1643                 munmap(s->kernel_seqnum, sizeof(uint64_t));
1644
1645         free(s->buffer);
1646         free(s->tty_path);
1647
1648         if (s->mmap)
1649                 mmap_cache_unref(s->mmap);
1650
1651         if (s->udev)
1652                 udev_unref(s->udev);
1653 }