chiark / gitweb /
journald: log the slice of a process along with each message in _SYSTEMD_SLICE=
[elogind.git] / src / journal / journald-server.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
26 #include <sys/mman.h>
27 #include <sys/timerfd.h>
28
29 #include <libudev.h>
30 #include <systemd/sd-journal.h>
31 #include <systemd/sd-messages.h>
32 #include <systemd/sd-daemon.h>
33
34 #include "fileio.h"
35 #include "mkdir.h"
36 #include "hashmap.h"
37 #include "journal-file.h"
38 #include "socket-util.h"
39 #include "cgroup-util.h"
40 #include "list.h"
41 #include "virt.h"
42 #include "missing.h"
43 #include "conf-parser.h"
44 #include "journal-internal.h"
45 #include "journal-vacuum.h"
46 #include "journal-authenticate.h"
47 #include "journald-server.h"
48 #include "journald-rate-limit.h"
49 #include "journald-kmsg.h"
50 #include "journald-syslog.h"
51 #include "journald-stream.h"
52 #include "journald-console.h"
53 #include "journald-native.h"
54
55 #ifdef HAVE_ACL
56 #include <sys/acl.h>
57 #include <acl/libacl.h>
58 #include "acl-util.h"
59 #endif
60
61 #ifdef HAVE_SELINUX
62 #include <selinux/selinux.h>
63 #endif
64
65 #define USER_JOURNALS_MAX 1024
66
67 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
68 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
69 #define DEFAULT_RATE_LIMIT_BURST 1000
70
71 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
72
73 static const char* const storage_table[] = {
74         [STORAGE_AUTO] = "auto",
75         [STORAGE_VOLATILE] = "volatile",
76         [STORAGE_PERSISTENT] = "persistent",
77         [STORAGE_NONE] = "none"
78 };
79
80 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
81 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
82
83 static const char* const split_mode_table[] = {
84         [SPLIT_NONE] = "none",
85         [SPLIT_UID] = "uid",
86         [SPLIT_LOGIN] = "login"
87 };
88
89 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
90 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
91
92 static uint64_t available_space(Server *s, bool verbose) {
93         char ids[33];
94         _cleanup_free_ char *p = NULL;
95         sd_id128_t machine;
96         struct statvfs ss;
97         uint64_t sum = 0, ss_avail = 0, avail = 0;
98         int r;
99         _cleanup_closedir_ DIR *d = NULL;
100         usec_t ts;
101         const char *f;
102         JournalMetrics *m;
103
104         ts = now(CLOCK_MONOTONIC);
105
106         if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts
107             && !verbose)
108                 return s->cached_available_space;
109
110         r = sd_id128_get_machine(&machine);
111         if (r < 0)
112                 return 0;
113
114         if (s->system_journal) {
115                 f = "/var/log/journal/";
116                 m = &s->system_metrics;
117         } else {
118                 f = "/run/log/journal/";
119                 m = &s->runtime_metrics;
120         }
121
122         assert(m);
123
124         p = strappend(f, sd_id128_to_string(machine, ids));
125         if (!p)
126                 return 0;
127
128         d = opendir(p);
129         if (!d)
130                 return 0;
131
132         if (fstatvfs(dirfd(d), &ss) < 0)
133                 return 0;
134
135         for (;;) {
136                 struct stat st;
137                 struct dirent *de;
138                 union dirent_storage buf;
139
140                 r = readdir_r(d, &buf.de, &de);
141                 if (r != 0)
142                         break;
143
144                 if (!de)
145                         break;
146
147                 if (!endswith(de->d_name, ".journal") &&
148                     !endswith(de->d_name, ".journal~"))
149                         continue;
150
151                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
152                         continue;
153
154                 if (!S_ISREG(st.st_mode))
155                         continue;
156
157                 sum += (uint64_t) st.st_blocks * 512UL;
158         }
159
160         ss_avail = ss.f_bsize * ss.f_bavail;
161         avail = ss_avail > m->keep_free ? ss_avail - m->keep_free : 0;
162
163         s->cached_available_space = MIN(m->max_use, avail) > sum ? MIN(m->max_use, avail) - sum : 0;
164         s->cached_available_space_timestamp = ts;
165
166         if (verbose) {
167                 char    fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
168                         fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX];
169
170                 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
171                                       "%s journal is using %s (max %s, leaving %s of free %s, current limit %s).",
172                                       s->system_journal ? "Permanent" : "Runtime",
173                                       format_bytes(fb1, sizeof(fb1), sum),
174                                       format_bytes(fb2, sizeof(fb2), m->max_use),
175                                       format_bytes(fb3, sizeof(fb3), m->keep_free),
176                                       format_bytes(fb4, sizeof(fb4), ss_avail),
177                                       format_bytes(fb5, sizeof(fb5), MIN(m->max_use, avail)));
178         }
179
180         return s->cached_available_space;
181 }
182
183 static void server_read_file_gid(Server *s) {
184         const char *g = "systemd-journal";
185         int r;
186
187         assert(s);
188
189         if (s->file_gid_valid)
190                 return;
191
192         r = get_group_creds(&g, &s->file_gid);
193         if (r < 0)
194                 log_warning("Failed to resolve '%s' group: %s", g, strerror(-r));
195
196         /* if we couldn't read the gid, then it will be 0, but that's
197          * fine and we shouldn't try to resolve the group again, so
198          * let's just pretend it worked right-away. */
199         s->file_gid_valid = true;
200 }
201
202 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
203         int r;
204 #ifdef HAVE_ACL
205         acl_t acl;
206         acl_entry_t entry;
207         acl_permset_t permset;
208 #endif
209
210         assert(f);
211
212         server_read_file_gid(s);
213
214         r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
215         if (r < 0)
216                 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
217
218 #ifdef HAVE_ACL
219         if (uid <= 0)
220                 return;
221
222         acl = acl_get_fd(f->fd);
223         if (!acl) {
224                 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
225                 return;
226         }
227
228         r = acl_find_uid(acl, uid, &entry);
229         if (r <= 0) {
230
231                 if (acl_create_entry(&acl, &entry) < 0 ||
232                     acl_set_tag_type(entry, ACL_USER) < 0 ||
233                     acl_set_qualifier(entry, &uid) < 0) {
234                         log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
235                         goto finish;
236                 }
237         }
238
239         /* We do not recalculate the mask unconditionally here,
240          * so that the fchmod() mask above stays intact. */
241         if (acl_get_permset(entry, &permset) < 0 ||
242             acl_add_perm(permset, ACL_READ) < 0 ||
243             calc_acl_mask_if_needed(&acl) < 0) {
244                 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
245                 goto finish;
246         }
247
248         if (acl_set_fd(f->fd, acl) < 0)
249                 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
250
251 finish:
252         acl_free(acl);
253 #endif
254 }
255
256 static JournalFile* find_journal(Server *s, uid_t uid) {
257         _cleanup_free_ char *p = NULL;
258         int r;
259         JournalFile *f;
260         sd_id128_t machine;
261
262         assert(s);
263
264         /* We split up user logs only on /var, not on /run. If the
265          * runtime file is open, we write to it exclusively, in order
266          * to guarantee proper order as soon as we flush /run to
267          * /var and close the runtime file. */
268
269         if (s->runtime_journal)
270                 return s->runtime_journal;
271
272         if (uid <= 0)
273                 return s->system_journal;
274
275         r = sd_id128_get_machine(&machine);
276         if (r < 0)
277                 return s->system_journal;
278
279         f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
280         if (f)
281                 return f;
282
283         if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
284                      SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
285                 return s->system_journal;
286
287         while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
288                 /* Too many open? Then let's close one */
289                 f = hashmap_steal_first(s->user_journals);
290                 assert(f);
291                 journal_file_close(f);
292         }
293
294         r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
295         if (r < 0)
296                 return s->system_journal;
297
298         server_fix_perms(s, f, uid);
299
300         r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
301         if (r < 0) {
302                 journal_file_close(f);
303                 return s->system_journal;
304         }
305
306         return f;
307 }
308
309 void server_rotate(Server *s) {
310         JournalFile *f;
311         void *k;
312         Iterator i;
313         int r;
314
315         log_debug("Rotating...");
316
317         if (s->runtime_journal) {
318                 r = journal_file_rotate(&s->runtime_journal, s->compress, false);
319                 if (r < 0)
320                         if (s->runtime_journal)
321                                 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
322                         else
323                                 log_error("Failed to create new runtime journal: %s", strerror(-r));
324                 else
325                         server_fix_perms(s, s->runtime_journal, 0);
326         }
327
328         if (s->system_journal) {
329                 r = journal_file_rotate(&s->system_journal, s->compress, s->seal);
330                 if (r < 0)
331                         if (s->system_journal)
332                                 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
333                         else
334                                 log_error("Failed to create new system journal: %s", strerror(-r));
335
336                 else
337                         server_fix_perms(s, s->system_journal, 0);
338         }
339
340         HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
341                 r = journal_file_rotate(&f, s->compress, s->seal);
342                 if (r < 0)
343                         if (f)
344                                 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
345                         else
346                                 log_error("Failed to create user journal: %s", strerror(-r));
347                 else {
348                         hashmap_replace(s->user_journals, k, f);
349                         server_fix_perms(s, f, PTR_TO_UINT32(k));
350                 }
351         }
352 }
353
354 void server_sync(Server *s) {
355         static const struct itimerspec sync_timer_disable = {};
356         JournalFile *f;
357         void *k;
358         Iterator i;
359         int r;
360
361         if (s->system_journal) {
362                 r = journal_file_set_offline(s->system_journal);
363                 if (r < 0)
364                         log_error("Failed to sync system journal: %s", strerror(-r));
365         }
366
367         HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
368                 r = journal_file_set_offline(f);
369                 if (r < 0)
370                         log_error("Failed to sync user journal: %s", strerror(-r));
371         }
372
373         r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_disable, NULL);
374         if (r < 0)
375                 log_error("Failed to disable max timer: %m");
376
377         s->sync_scheduled = false;
378 }
379
380 void server_vacuum(Server *s) {
381         char ids[33];
382         sd_id128_t machine;
383         int r;
384
385         log_debug("Vacuuming...");
386
387         s->oldest_file_usec = 0;
388
389         r = sd_id128_get_machine(&machine);
390         if (r < 0) {
391                 log_error("Failed to get machine ID: %s", strerror(-r));
392                 return;
393         }
394
395         sd_id128_to_string(machine, ids);
396
397         if (s->system_journal) {
398                 char *p = strappenda("/var/log/journal/", ids);
399
400                 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
401                 if (r < 0 && r != -ENOENT)
402                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
403         }
404
405         if (s->runtime_journal) {
406                 char *p = strappenda("/run/log/journal/", ids);
407
408                 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
409                 if (r < 0 && r != -ENOENT)
410                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
411         }
412
413         s->cached_available_space_timestamp = 0;
414 }
415
416 bool shall_try_append_again(JournalFile *f, int r) {
417
418         /* -E2BIG            Hit configured limit
419            -EFBIG            Hit fs limit
420            -EDQUOT           Quota limit hit
421            -ENOSPC           Disk full
422            -EHOSTDOWN        Other machine
423            -EBUSY            Unclean shutdown
424            -EPROTONOSUPPORT  Unsupported feature
425            -EBADMSG          Corrupted
426            -ENODATA          Truncated
427            -ESHUTDOWN        Already archived */
428
429         if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
430                 log_debug("%s: Allocation limit reached, rotating.", f->path);
431         else if (r == -EHOSTDOWN)
432                 log_info("%s: Journal file from other machine, rotating.", f->path);
433         else if (r == -EBUSY)
434                 log_info("%s: Unclean shutdown, rotating.", f->path);
435         else if (r == -EPROTONOSUPPORT)
436                 log_info("%s: Unsupported feature, rotating.", f->path);
437         else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
438                 log_warning("%s: Journal file corrupted, rotating.", f->path);
439         else
440                 return false;
441
442         return true;
443 }
444
445 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
446         JournalFile *f;
447         bool vacuumed = false;
448         int r;
449
450         assert(s);
451         assert(iovec);
452         assert(n > 0);
453
454         f = find_journal(s, uid);
455         if (!f)
456                 return;
457
458         if (journal_file_rotate_suggested(f, s->max_file_usec)) {
459                 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
460                 server_rotate(s);
461                 server_vacuum(s);
462                 vacuumed = true;
463
464                 f = find_journal(s, uid);
465                 if (!f)
466                         return;
467         }
468
469         r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
470         if (r >= 0) {
471                 server_schedule_sync(s, priority);
472                 return;
473         }
474
475         if (vacuumed || !shall_try_append_again(f, r)) {
476                 size_t size = 0;
477                 unsigned i;
478                 for (i = 0; i < n; i++)
479                         size += iovec[i].iov_len;
480
481                 log_error("Failed to write entry (%d items, %zu bytes), ignoring: %s", n, size, strerror(-r));
482                 return;
483         }
484
485         server_rotate(s);
486         server_vacuum(s);
487
488         f = find_journal(s, uid);
489         if (!f)
490                 return;
491
492         log_debug("Retrying write.");
493         r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
494         if (r < 0) {
495                 size_t size = 0;
496                 unsigned i;
497                 for (i = 0; i < n; i++)
498                         size += iovec[i].iov_len;
499
500                 log_error("Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %s", n, size, strerror(-r));
501         } else
502                 server_schedule_sync(s, priority);
503 }
504
505 static void dispatch_message_real(
506                 Server *s,
507                 struct iovec *iovec, unsigned n, unsigned m,
508                 struct ucred *ucred,
509                 struct timeval *tv,
510                 const char *label, size_t label_len,
511                 const char *unit_id,
512                 int priority,
513                 pid_t object_pid) {
514
515         char    pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
516                 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
517                 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
518                 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
519                 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
520                 boot_id[sizeof("_BOOT_ID=") + 32] = "_BOOT_ID=",
521                 machine_id[sizeof("_MACHINE_ID=") + 32] = "_MACHINE_ID=",
522                 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
523                 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
524                 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
525         uid_t object_uid;
526         gid_t object_gid;
527         char *x;
528         sd_id128_t id;
529         int r;
530         char *t, *c;
531         uid_t realuid = 0, owner = 0, journal_uid;
532         bool owner_valid = false;
533 #ifdef HAVE_AUDIT
534         char    audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
535                 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
536                 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
537                 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
538
539         uint32_t audit;
540         uid_t loginuid;
541 #endif
542
543         assert(s);
544         assert(iovec);
545         assert(n > 0);
546         assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
547
548         if (ucred) {
549                 realuid = ucred->uid;
550
551                 sprintf(pid, "_PID=%lu", (unsigned long) ucred->pid);
552                 IOVEC_SET_STRING(iovec[n++], pid);
553
554                 sprintf(uid, "_UID=%lu", (unsigned long) ucred->uid);
555                 IOVEC_SET_STRING(iovec[n++], uid);
556
557                 sprintf(gid, "_GID=%lu", (unsigned long) ucred->gid);
558                 IOVEC_SET_STRING(iovec[n++], gid);
559
560                 r = get_process_comm(ucred->pid, &t);
561                 if (r >= 0) {
562                         x = strappenda("_COMM=", t);
563                         free(t);
564                         IOVEC_SET_STRING(iovec[n++], x);
565                 }
566
567                 r = get_process_exe(ucred->pid, &t);
568                 if (r >= 0) {
569                         x = strappenda("_EXE=", t);
570                         free(t);
571                         IOVEC_SET_STRING(iovec[n++], x);
572                 }
573
574                 r = get_process_cmdline(ucred->pid, 0, false, &t);
575                 if (r >= 0) {
576                         x = strappenda("_CMDLINE=", t);
577                         free(t);
578                         IOVEC_SET_STRING(iovec[n++], x);
579                 }
580
581                 r = get_process_capeff(ucred->pid, &t);
582                 if (r >= 0) {
583                         x = strappenda("_CAP_EFFECTIVE=", t);
584                         free(t);
585                         IOVEC_SET_STRING(iovec[n++], x);
586                 }
587
588 #ifdef HAVE_AUDIT
589                 r = audit_session_from_pid(ucred->pid, &audit);
590                 if (r >= 0) {
591                         sprintf(audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit);
592                         IOVEC_SET_STRING(iovec[n++], audit_session);
593                 }
594
595                 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
596                 if (r >= 0) {
597                         sprintf(audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid);
598                         IOVEC_SET_STRING(iovec[n++], audit_loginuid);
599                 }
600 #endif
601
602                 r = cg_pid_get_path_shifted(ucred->pid, NULL, &c);
603                 if (r >= 0) {
604                         char *session = NULL;
605
606                         x = strappenda("_SYSTEMD_CGROUP=", c);
607                         IOVEC_SET_STRING(iovec[n++], x);
608
609                         r = cg_path_get_session(c, &t);
610                         if (r >= 0) {
611                                 session = strappenda("_SYSTEMD_SESSION=", t);
612                                 free(t);
613                                 IOVEC_SET_STRING(iovec[n++], session);
614                         }
615
616                         if (cg_path_get_owner_uid(c, &owner) >= 0) {
617                                 owner_valid = true;
618
619                                 sprintf(owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner);
620                                 IOVEC_SET_STRING(iovec[n++], owner_uid);
621                         }
622
623                         if (cg_path_get_unit(c, &t) >= 0) {
624                                 x = strappenda("_SYSTEMD_UNIT=", t);
625                                 free(t);
626                                 IOVEC_SET_STRING(iovec[n++], x);
627                         } else if (unit_id && !session) {
628                                 x = strappenda("_SYSTEMD_UNIT=", unit_id);
629                                 IOVEC_SET_STRING(iovec[n++], x);
630                         }
631
632                         if (cg_path_get_user_unit(c, &t) >= 0) {
633                                 x = strappenda("_SYSTEMD_USER_UNIT=", t);
634                                 free(t);
635                                 IOVEC_SET_STRING(iovec[n++], x);
636                         } else if (unit_id && session) {
637                                 x = strappenda("_SYSTEMD_USER_UNIT=", unit_id);
638                                 IOVEC_SET_STRING(iovec[n++], x);
639                         }
640
641                         if (cg_path_get_slice(c, &t) >= 0) {
642                                 x = strappenda("_SYSTEMD_SLICE=", t);
643                                 free(t);
644                                 IOVEC_SET_STRING(iovec[n++], x);
645                         }
646
647                         free(c);
648                 }
649
650 #ifdef HAVE_SELINUX
651                 if (label) {
652                         x = alloca(sizeof("_SELINUX_CONTEXT=") + label_len);
653
654                         *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
655                         IOVEC_SET_STRING(iovec[n++], x);
656                 } else {
657                         security_context_t con;
658
659                         if (getpidcon(ucred->pid, &con) >= 0) {
660                                 x = strappenda("_SELINUX_CONTEXT=", con);
661
662                                 freecon(con);
663                                 IOVEC_SET_STRING(iovec[n++], x);
664                         }
665                 }
666 #endif
667         }
668         assert(n <= m);
669
670         if (object_pid) {
671                 r = get_process_uid(object_pid, &object_uid);
672                 if (r >= 0) {
673                         sprintf(o_uid, "OBJECT_UID=%lu", (unsigned long) object_uid);
674                         IOVEC_SET_STRING(iovec[n++], o_uid);
675                 }
676
677                 r = get_process_gid(object_pid, &object_gid);
678                 if (r >= 0) {
679                         sprintf(o_gid, "OBJECT_GID=%lu", (unsigned long) object_gid);
680                         IOVEC_SET_STRING(iovec[n++], o_gid);
681                 }
682
683                 r = get_process_comm(object_pid, &t);
684                 if (r >= 0) {
685                         x = strappenda("OBJECT_COMM=", t);
686                         free(t);
687                         IOVEC_SET_STRING(iovec[n++], x);
688                 }
689
690                 r = get_process_exe(object_pid, &t);
691                 if (r >= 0) {
692                         x = strappenda("OBJECT_EXE=", t);
693                         free(t);
694                         IOVEC_SET_STRING(iovec[n++], x);
695                 }
696
697                 r = get_process_cmdline(object_pid, 0, false, &t);
698                 if (r >= 0) {
699                         x = strappenda("OBJECT_CMDLINE=", t);
700                         free(t);
701                         IOVEC_SET_STRING(iovec[n++], x);
702                 }
703
704 #ifdef HAVE_AUDIT
705                 r = audit_session_from_pid(object_pid, &audit);
706                 if (r >= 0) {
707                         sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%lu", (unsigned long) audit);
708                         IOVEC_SET_STRING(iovec[n++], o_audit_session);
709                 }
710
711                 r = audit_loginuid_from_pid(object_pid, &loginuid);
712                 if (r >= 0) {
713                         sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID=%lu", (unsigned long) loginuid);
714                         IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
715                 }
716 #endif
717
718                 r = cg_pid_get_path_shifted(object_pid, NULL, &c);
719                 if (r >= 0) {
720                         x = strappenda("OBJECT_SYSTEMD_CGROUP=", c);
721                         IOVEC_SET_STRING(iovec[n++], x);
722
723                         r = cg_path_get_session(c, &t);
724                         if (r >= 0) {
725                                 x = strappenda("OBJECT_SYSTEMD_SESSION=", t);
726                                 free(t);
727                                 IOVEC_SET_STRING(iovec[n++], x);
728                         }
729
730                         if (cg_path_get_owner_uid(c, &owner) >= 0) {
731                                 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner);
732                                 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
733                         }
734
735                         if (cg_path_get_unit(c, &t) >= 0) {
736                                 x = strappenda("OBJECT_SYSTEMD_UNIT=", t);
737                                 free(t);
738                                 IOVEC_SET_STRING(iovec[n++], x);
739                         }
740
741                         if (cg_path_get_user_unit(c, &t) >= 0) {
742                                 x = strappenda("OBJECT_SYSTEMD_USER_UNIT=", t);
743                                 free(t);
744                                 IOVEC_SET_STRING(iovec[n++], x);
745                         }
746
747                         free(c);
748                 }
749         }
750         assert(n <= m);
751
752         if (tv) {
753                 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
754                 IOVEC_SET_STRING(iovec[n++], source_time);
755         }
756
757         /* Note that strictly speaking storing the boot id here is
758          * redundant since the entry includes this in-line
759          * anyway. However, we need this indexed, too. */
760         r = sd_id128_get_boot(&id);
761         if (r >= 0) {
762                 sd_id128_to_string(id, boot_id + strlen("_BOOT_ID="));
763                 IOVEC_SET_STRING(iovec[n++], boot_id);
764         }
765
766         r = sd_id128_get_machine(&id);
767         if (r >= 0) {
768                 sd_id128_to_string(id, machine_id + strlen("_MACHINE_ID="));
769                 IOVEC_SET_STRING(iovec[n++], machine_id);
770         }
771
772         t = gethostname_malloc();
773         if (t) {
774                 x = strappenda("_HOSTNAME=", t);
775                 free(t);
776                 IOVEC_SET_STRING(iovec[n++], x);
777         }
778
779         assert(n <= m);
780
781         if (s->split_mode == SPLIT_UID && realuid > 0)
782                 /* Split up strictly by any UID */
783                 journal_uid = realuid;
784         else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
785                 /* Split up by login UIDs, this avoids creation of
786                  * individual journals for system UIDs.  We do this
787                  * only if the realuid is not root, in order not to
788                  * accidentally leak privileged information to the
789                  * user that is logged by a privileged process that is
790                  * part of an unprivileged session.*/
791                 journal_uid = owner;
792         else
793                 journal_uid = 0;
794
795         write_to_journal(s, journal_uid, iovec, n, priority);
796 }
797
798 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
799         char mid[11 + 32 + 1];
800         char buffer[16 + LINE_MAX + 1];
801         struct iovec iovec[N_IOVEC_META_FIELDS + 4];
802         int n = 0;
803         va_list ap;
804         struct ucred ucred = {};
805
806         assert(s);
807         assert(format);
808
809         IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
810         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
811
812         memcpy(buffer, "MESSAGE=", 8);
813         va_start(ap, format);
814         vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
815         va_end(ap);
816         char_array_0(buffer);
817         IOVEC_SET_STRING(iovec[n++], buffer);
818
819         if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
820                 snprintf(mid, sizeof(mid), MESSAGE_ID(message_id));
821                 char_array_0(mid);
822                 IOVEC_SET_STRING(iovec[n++], mid);
823         }
824
825         ucred.pid = getpid();
826         ucred.uid = getuid();
827         ucred.gid = getgid();
828
829         dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
830 }
831
832 void server_dispatch_message(
833                 Server *s,
834                 struct iovec *iovec, unsigned n, unsigned m,
835                 struct ucred *ucred,
836                 struct timeval *tv,
837                 const char *label, size_t label_len,
838                 const char *unit_id,
839                 int priority,
840                 pid_t object_pid) {
841
842         int rl, r;
843         _cleanup_free_ char *path = NULL;
844         char *c;
845
846         assert(s);
847         assert(iovec || n == 0);
848
849         if (n == 0)
850                 return;
851
852         if (LOG_PRI(priority) > s->max_level_store)
853                 return;
854
855         /* Stop early in case the information will not be stored
856          * in a journal. */
857         if (s->storage == STORAGE_NONE)
858                 return;
859
860         if (!ucred)
861                 goto finish;
862
863         r = cg_pid_get_path_shifted(ucred->pid, NULL, &path);
864         if (r < 0)
865                 goto finish;
866
867         /* example: /user/lennart/3/foobar
868          *          /system/dbus.service/foobar
869          *
870          * So let's cut of everything past the third /, since that is
871          * where user directories start */
872
873         c = strchr(path, '/');
874         if (c) {
875                 c = strchr(c+1, '/');
876                 if (c) {
877                         c = strchr(c+1, '/');
878                         if (c)
879                                 *c = 0;
880                 }
881         }
882
883         rl = journal_rate_limit_test(s->rate_limit, path,
884                                      priority & LOG_PRIMASK, available_space(s, false));
885
886         if (rl == 0)
887                 return;
888
889         /* Write a suppression message if we suppressed something */
890         if (rl > 1)
891                 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
892                                       "Suppressed %u messages from %s", rl - 1, path);
893
894 finish:
895         dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
896 }
897
898
899 static int system_journal_open(Server *s) {
900         int r;
901         char *fn;
902         sd_id128_t machine;
903         char ids[33];
904
905         r = sd_id128_get_machine(&machine);
906         if (r < 0) {
907                 log_error("Failed to get machine id: %s", strerror(-r));
908                 return r;
909         }
910
911         sd_id128_to_string(machine, ids);
912
913         if (!s->system_journal &&
914             (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
915             access("/run/systemd/journal/flushed", F_OK) >= 0) {
916
917                 /* If in auto mode: first try to create the machine
918                  * path, but not the prefix.
919                  *
920                  * If in persistent mode: create /var/log/journal and
921                  * the machine path */
922
923                 if (s->storage == STORAGE_PERSISTENT)
924                         (void) mkdir("/var/log/journal/", 0755);
925
926                 fn = strappenda("/var/log/journal/", ids);
927                 (void) mkdir(fn, 0755);
928
929                 fn = strappenda(fn, "/system.journal");
930                 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
931
932                 if (r >= 0)
933                         server_fix_perms(s, s->system_journal, 0);
934                 else if (r < 0) {
935                         if (r != -ENOENT && r != -EROFS)
936                                 log_warning("Failed to open system journal: %s", strerror(-r));
937
938                         r = 0;
939                 }
940         }
941
942         if (!s->runtime_journal &&
943             (s->storage != STORAGE_NONE)) {
944
945                 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
946                 if (!fn)
947                         return -ENOMEM;
948
949                 if (s->system_journal) {
950
951                         /* Try to open the runtime journal, but only
952                          * if it already exists, so that we can flush
953                          * it into the system journal */
954
955                         r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
956                         free(fn);
957
958                         if (r < 0) {
959                                 if (r != -ENOENT)
960                                         log_warning("Failed to open runtime journal: %s", strerror(-r));
961
962                                 r = 0;
963                         }
964
965                 } else {
966
967                         /* OK, we really need the runtime journal, so create
968                          * it if necessary. */
969
970                         (void) mkdir_parents(fn, 0755);
971                         r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
972                         free(fn);
973
974                         if (r < 0) {
975                                 log_error("Failed to open runtime journal: %s", strerror(-r));
976                                 return r;
977                         }
978                 }
979
980                 if (s->runtime_journal)
981                         server_fix_perms(s, s->runtime_journal, 0);
982         }
983
984         available_space(s, true);
985
986         return r;
987 }
988
989 int server_flush_to_var(Server *s) {
990         int r;
991         sd_id128_t machine;
992         sd_journal *j = NULL;
993
994         assert(s);
995
996         if (s->storage != STORAGE_AUTO &&
997             s->storage != STORAGE_PERSISTENT)
998                 return 0;
999
1000         if (!s->runtime_journal)
1001                 return 0;
1002
1003         system_journal_open(s);
1004
1005         if (!s->system_journal)
1006                 return 0;
1007
1008         log_debug("Flushing to /var...");
1009
1010         r = sd_id128_get_machine(&machine);
1011         if (r < 0)
1012                 return r;
1013
1014         r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1015         if (r < 0) {
1016                 log_error("Failed to read runtime journal: %s", strerror(-r));
1017                 return r;
1018         }
1019
1020         sd_journal_set_data_threshold(j, 0);
1021
1022         SD_JOURNAL_FOREACH(j) {
1023                 Object *o = NULL;
1024                 JournalFile *f;
1025
1026                 f = j->current_file;
1027                 assert(f && f->current_offset > 0);
1028
1029                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1030                 if (r < 0) {
1031                         log_error("Can't read entry: %s", strerror(-r));
1032                         goto finish;
1033                 }
1034
1035                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1036                 if (r >= 0)
1037                         continue;
1038
1039                 if (!shall_try_append_again(s->system_journal, r)) {
1040                         log_error("Can't write entry: %s", strerror(-r));
1041                         goto finish;
1042                 }
1043
1044                 server_rotate(s);
1045                 server_vacuum(s);
1046
1047                 if (!s->system_journal) {
1048                         log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1049                         r = -EIO;
1050                         goto finish;
1051                 }
1052
1053                 log_debug("Retrying write.");
1054                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1055                 if (r < 0) {
1056                         log_error("Can't write entry: %s", strerror(-r));
1057                         goto finish;
1058                 }
1059         }
1060
1061 finish:
1062         journal_file_post_change(s->system_journal);
1063
1064         journal_file_close(s->runtime_journal);
1065         s->runtime_journal = NULL;
1066
1067         if (r >= 0)
1068                 rm_rf("/run/log/journal", false, true, false);
1069
1070         sd_journal_close(j);
1071
1072         return r;
1073 }
1074
1075 int process_event(Server *s, struct epoll_event *ev) {
1076         assert(s);
1077         assert(ev);
1078
1079         if (ev->data.fd == s->signal_fd) {
1080                 struct signalfd_siginfo sfsi;
1081                 ssize_t n;
1082
1083                 if (ev->events != EPOLLIN) {
1084                         log_error("Got invalid event from epoll.");
1085                         return -EIO;
1086                 }
1087
1088                 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
1089                 if (n != sizeof(sfsi)) {
1090
1091                         if (n >= 0)
1092                                 return -EIO;
1093
1094                         if (errno == EINTR || errno == EAGAIN)
1095                                 return 1;
1096
1097                         return -errno;
1098                 }
1099
1100                 if (sfsi.ssi_signo == SIGUSR1) {
1101                         log_info("Received request to flush runtime journal from PID %"PRIu32,
1102                                  sfsi.ssi_pid);
1103                         touch("/run/systemd/journal/flushed");
1104                         server_flush_to_var(s);
1105                         server_sync(s);
1106                         return 1;
1107                 }
1108
1109                 if (sfsi.ssi_signo == SIGUSR2) {
1110                         log_info("Received request to rotate journal from PID %"PRIu32,
1111                                  sfsi.ssi_pid);
1112                         server_rotate(s);
1113                         server_vacuum(s);
1114                         return 1;
1115                 }
1116
1117                 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
1118
1119                 return 0;
1120
1121         } else if (ev->data.fd == s->sync_timer_fd) {
1122                 int r;
1123                 uint64_t t;
1124
1125                 log_debug("Got sync request from epoll.");
1126
1127                 r = read(ev->data.fd, (void *)&t, sizeof(t));
1128                 if (r < 0)
1129                         return 0;
1130
1131                 server_sync(s);
1132                 return 1;
1133
1134         } else if (ev->data.fd == s->dev_kmsg_fd) {
1135                 int r;
1136
1137                 if (ev->events != EPOLLIN) {
1138                         log_error("Got invalid event from epoll.");
1139                         return -EIO;
1140                 }
1141
1142                 r = server_read_dev_kmsg(s);
1143                 if (r < 0)
1144                         return r;
1145
1146                 return 1;
1147
1148         } else if (ev->data.fd == s->native_fd ||
1149                    ev->data.fd == s->syslog_fd) {
1150
1151                 if (ev->events != EPOLLIN) {
1152                         log_error("Got invalid event from epoll.");
1153                         return -EIO;
1154                 }
1155
1156                 for (;;) {
1157                         struct msghdr msghdr;
1158                         struct iovec iovec;
1159                         struct ucred *ucred = NULL;
1160                         struct timeval *tv = NULL;
1161                         struct cmsghdr *cmsg;
1162                         char *label = NULL;
1163                         size_t label_len = 0;
1164                         union {
1165                                 struct cmsghdr cmsghdr;
1166
1167                                 /* We use NAME_MAX space for the
1168                                  * SELinux label here. The kernel
1169                                  * currently enforces no limit, but
1170                                  * according to suggestions from the
1171                                  * SELinux people this will change and
1172                                  * it will probably be identical to
1173                                  * NAME_MAX. For now we use that, but
1174                                  * this should be updated one day when
1175                                  * the final limit is known.*/
1176                                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1177                                             CMSG_SPACE(sizeof(struct timeval)) +
1178                                             CMSG_SPACE(sizeof(int)) + /* fd */
1179                                             CMSG_SPACE(NAME_MAX)]; /* selinux label */
1180                         } control;
1181                         ssize_t n;
1182                         int v;
1183                         int *fds = NULL;
1184                         unsigned n_fds = 0;
1185
1186                         if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
1187                                 log_error("SIOCINQ failed: %m");
1188                                 return -errno;
1189                         }
1190
1191                         if (s->buffer_size < (size_t) v) {
1192                                 void *b;
1193                                 size_t l;
1194
1195                                 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
1196                                 b = realloc(s->buffer, l+1);
1197
1198                                 if (!b) {
1199                                         log_error("Couldn't increase buffer.");
1200                                         return -ENOMEM;
1201                                 }
1202
1203                                 s->buffer_size = l;
1204                                 s->buffer = b;
1205                         }
1206
1207                         zero(iovec);
1208                         iovec.iov_base = s->buffer;
1209                         iovec.iov_len = s->buffer_size;
1210
1211                         zero(control);
1212                         zero(msghdr);
1213                         msghdr.msg_iov = &iovec;
1214                         msghdr.msg_iovlen = 1;
1215                         msghdr.msg_control = &control;
1216                         msghdr.msg_controllen = sizeof(control);
1217
1218                         n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1219                         if (n < 0) {
1220
1221                                 if (errno == EINTR || errno == EAGAIN)
1222                                         return 1;
1223
1224                                 log_error("recvmsg() failed: %m");
1225                                 return -errno;
1226                         }
1227
1228                         for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1229
1230                                 if (cmsg->cmsg_level == SOL_SOCKET &&
1231                                     cmsg->cmsg_type == SCM_CREDENTIALS &&
1232                                     cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1233                                         ucred = (struct ucred*) CMSG_DATA(cmsg);
1234                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
1235                                          cmsg->cmsg_type == SCM_SECURITY) {
1236                                         label = (char*) CMSG_DATA(cmsg);
1237                                         label_len = cmsg->cmsg_len - CMSG_LEN(0);
1238                                 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1239                                            cmsg->cmsg_type == SO_TIMESTAMP &&
1240                                            cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1241                                         tv = (struct timeval*) CMSG_DATA(cmsg);
1242                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
1243                                          cmsg->cmsg_type == SCM_RIGHTS) {
1244                                         fds = (int*) CMSG_DATA(cmsg);
1245                                         n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1246                                 }
1247                         }
1248
1249                         if (ev->data.fd == s->syslog_fd) {
1250                                 if (n > 0 && n_fds == 0) {
1251                                         s->buffer[n] = 0;
1252                                         server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1253                                 } else if (n_fds > 0)
1254                                         log_warning("Got file descriptors via syslog socket. Ignoring.");
1255
1256                         } else {
1257                                 if (n > 0 && n_fds == 0)
1258                                         server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1259                                 else if (n == 0 && n_fds == 1)
1260                                         server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1261                                 else if (n_fds > 0)
1262                                         log_warning("Got too many file descriptors via native socket. Ignoring.");
1263                         }
1264
1265                         close_many(fds, n_fds);
1266                 }
1267
1268                 return 1;
1269
1270         } else if (ev->data.fd == s->stdout_fd) {
1271
1272                 if (ev->events != EPOLLIN) {
1273                         log_error("Got invalid event from epoll.");
1274                         return -EIO;
1275                 }
1276
1277                 stdout_stream_new(s);
1278                 return 1;
1279
1280         } else {
1281                 StdoutStream *stream;
1282
1283                 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
1284                         log_error("Got invalid event from epoll.");
1285                         return -EIO;
1286                 }
1287
1288                 /* If it is none of the well-known fds, it must be an
1289                  * stdout stream fd. Note that this is a bit ugly here
1290                  * (since we rely that none of the well-known fds
1291                  * could be interpreted as pointer), but nonetheless
1292                  * safe, since the well-known fds would never get an
1293                  * fd > 4096, i.e. beyond the first memory page */
1294
1295                 stream = ev->data.ptr;
1296
1297                 if (stdout_stream_process(stream) <= 0)
1298                         stdout_stream_free(stream);
1299
1300                 return 1;
1301         }
1302
1303         log_error("Unknown event.");
1304         return 0;
1305 }
1306
1307 static int open_signalfd(Server *s) {
1308         sigset_t mask;
1309         struct epoll_event ev;
1310
1311         assert(s);
1312
1313         assert_se(sigemptyset(&mask) == 0);
1314         sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1315         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1316
1317         s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
1318         if (s->signal_fd < 0) {
1319                 log_error("signalfd(): %m");
1320                 return -errno;
1321         }
1322
1323         zero(ev);
1324         ev.events = EPOLLIN;
1325         ev.data.fd = s->signal_fd;
1326
1327         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
1328                 log_error("epoll_ctl(): %m");
1329                 return -errno;
1330         }
1331
1332         return 0;
1333 }
1334
1335 static int server_parse_proc_cmdline(Server *s) {
1336         _cleanup_free_ char *line = NULL;
1337         char *w, *state;
1338         int r;
1339         size_t l;
1340
1341         if (detect_container(NULL) > 0)
1342                 return 0;
1343
1344         r = read_one_line_file("/proc/cmdline", &line);
1345         if (r < 0) {
1346                 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1347                 return 0;
1348         }
1349
1350         FOREACH_WORD_QUOTED(w, l, line, state) {
1351                 _cleanup_free_ char *word;
1352
1353                 word = strndup(w, l);
1354                 if (!word)
1355                         return -ENOMEM;
1356
1357                 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1358                         r = parse_boolean(word + 35);
1359                         if (r < 0)
1360                                 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1361                         else
1362                                 s->forward_to_syslog = r;
1363                 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1364                         r = parse_boolean(word + 33);
1365                         if (r < 0)
1366                                 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1367                         else
1368                                 s->forward_to_kmsg = r;
1369                 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1370                         r = parse_boolean(word + 36);
1371                         if (r < 0)
1372                                 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1373                         else
1374                                 s->forward_to_console = r;
1375                 } else if (startswith(word, "systemd.journald"))
1376                         log_warning("Invalid systemd.journald parameter. Ignoring.");
1377         }
1378
1379         return 0;
1380 }
1381
1382 static int server_parse_config_file(Server *s) {
1383         static const char fn[] = "/etc/systemd/journald.conf";
1384         _cleanup_fclose_ FILE *f = NULL;
1385         int r;
1386
1387         assert(s);
1388
1389         f = fopen(fn, "re");
1390         if (!f) {
1391                 if (errno == ENOENT)
1392                         return 0;
1393
1394                 log_warning("Failed to open configuration file %s: %m", fn);
1395                 return -errno;
1396         }
1397
1398         r = config_parse(NULL, fn, f, "Journal\0", config_item_perf_lookup,
1399                          (void*) journald_gperf_lookup, false, false, s);
1400         if (r < 0)
1401                 log_warning("Failed to parse configuration file: %s", strerror(-r));
1402
1403         return r;
1404 }
1405
1406 static int server_open_sync_timer(Server *s) {
1407         int r;
1408         struct epoll_event ev;
1409
1410         assert(s);
1411
1412         s->sync_timer_fd = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC);
1413         if (s->sync_timer_fd < 0)
1414                 return -errno;
1415
1416         zero(ev);
1417         ev.events = EPOLLIN;
1418         ev.data.fd = s->sync_timer_fd;
1419
1420         r = epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->sync_timer_fd, &ev);
1421         if (r < 0) {
1422                 log_error("Failed to add idle timer fd to epoll object: %m");
1423                 return -errno;
1424         }
1425
1426         return 0;
1427 }
1428
1429 int server_schedule_sync(Server *s, int priority) {
1430         int r;
1431
1432         assert(s);
1433
1434         if (priority <= LOG_CRIT) {
1435                 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1436                 server_sync(s);
1437                 return 0;
1438         }
1439
1440         if (s->sync_scheduled)
1441                 return 0;
1442
1443         if (s->sync_interval_usec) {
1444                 struct itimerspec sync_timer_enable = {};
1445
1446                 timespec_store(&sync_timer_enable.it_value, s->sync_interval_usec);
1447
1448                 r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_enable, NULL);
1449                 if (r < 0)
1450                         return -errno;
1451         }
1452
1453         s->sync_scheduled = true;
1454
1455         return 0;
1456 }
1457
1458 int server_init(Server *s) {
1459         int n, r, fd;
1460
1461         assert(s);
1462
1463         zero(*s);
1464         s->sync_timer_fd = s->syslog_fd = s->native_fd = s->stdout_fd =
1465                 s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
1466         s->compress = true;
1467         s->seal = true;
1468
1469         s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1470         s->sync_scheduled = false;
1471
1472         s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1473         s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1474
1475         s->forward_to_syslog = true;
1476
1477         s->max_level_store = LOG_DEBUG;
1478         s->max_level_syslog = LOG_DEBUG;
1479         s->max_level_kmsg = LOG_NOTICE;
1480         s->max_level_console = LOG_INFO;
1481
1482         memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1483         memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1484
1485         server_parse_config_file(s);
1486         server_parse_proc_cmdline(s);
1487         if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1488                 log_debug("Setting both rate limit interval and burst from %llu,%u to 0,0",
1489                           (long long unsigned) s->rate_limit_interval,
1490                           s->rate_limit_burst);
1491                 s->rate_limit_interval = s->rate_limit_burst = 0;
1492         }
1493
1494         mkdir_p("/run/systemd/journal", 0755);
1495
1496         s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
1497         if (!s->user_journals)
1498                 return log_oom();
1499
1500         s->mmap = mmap_cache_new();
1501         if (!s->mmap)
1502                 return log_oom();
1503
1504         s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
1505         if (s->epoll_fd < 0) {
1506                 log_error("Failed to create epoll object: %m");
1507                 return -errno;
1508         }
1509
1510         n = sd_listen_fds(true);
1511         if (n < 0) {
1512                 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1513                 return n;
1514         }
1515
1516         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1517
1518                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1519
1520                         if (s->native_fd >= 0) {
1521                                 log_error("Too many native sockets passed.");
1522                                 return -EINVAL;
1523                         }
1524
1525                         s->native_fd = fd;
1526
1527                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1528
1529                         if (s->stdout_fd >= 0) {
1530                                 log_error("Too many stdout sockets passed.");
1531                                 return -EINVAL;
1532                         }
1533
1534                         s->stdout_fd = fd;
1535
1536                 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
1537
1538                         if (s->syslog_fd >= 0) {
1539                                 log_error("Too many /dev/log sockets passed.");
1540                                 return -EINVAL;
1541                         }
1542
1543                         s->syslog_fd = fd;
1544
1545                 } else {
1546                         log_error("Unknown socket passed.");
1547                         return -EINVAL;
1548                 }
1549         }
1550
1551         r = server_open_syslog_socket(s);
1552         if (r < 0)
1553                 return r;
1554
1555         r = server_open_native_socket(s);
1556         if (r < 0)
1557                 return r;
1558
1559         r = server_open_stdout_socket(s);
1560         if (r < 0)
1561                 return r;
1562
1563         r = server_open_dev_kmsg(s);
1564         if (r < 0)
1565                 return r;
1566
1567         r = server_open_kernel_seqnum(s);
1568         if (r < 0)
1569                 return r;
1570
1571         r = server_open_sync_timer(s);
1572         if (r < 0)
1573                 return r;
1574
1575         r = open_signalfd(s);
1576         if (r < 0)
1577                 return r;
1578
1579         s->udev = udev_new();
1580         if (!s->udev)
1581                 return -ENOMEM;
1582
1583         s->rate_limit = journal_rate_limit_new(s->rate_limit_interval,
1584                                                s->rate_limit_burst);
1585         if (!s->rate_limit)
1586                 return -ENOMEM;
1587
1588         r = system_journal_open(s);
1589         if (r < 0)
1590                 return r;
1591
1592         return 0;
1593 }
1594
1595 void server_maybe_append_tags(Server *s) {
1596 #ifdef HAVE_GCRYPT
1597         JournalFile *f;
1598         Iterator i;
1599         usec_t n;
1600
1601         n = now(CLOCK_REALTIME);
1602
1603         if (s->system_journal)
1604                 journal_file_maybe_append_tag(s->system_journal, n);
1605
1606         HASHMAP_FOREACH(f, s->user_journals, i)
1607                 journal_file_maybe_append_tag(f, n);
1608 #endif
1609 }
1610
1611 void server_done(Server *s) {
1612         JournalFile *f;
1613         assert(s);
1614
1615         while (s->stdout_streams)
1616                 stdout_stream_free(s->stdout_streams);
1617
1618         if (s->system_journal)
1619                 journal_file_close(s->system_journal);
1620
1621         if (s->runtime_journal)
1622                 journal_file_close(s->runtime_journal);
1623
1624         while ((f = hashmap_steal_first(s->user_journals)))
1625                 journal_file_close(f);
1626
1627         hashmap_free(s->user_journals);
1628
1629         if (s->epoll_fd >= 0)
1630                 close_nointr_nofail(s->epoll_fd);
1631
1632         if (s->signal_fd >= 0)
1633                 close_nointr_nofail(s->signal_fd);
1634
1635         if (s->syslog_fd >= 0)
1636                 close_nointr_nofail(s->syslog_fd);
1637
1638         if (s->native_fd >= 0)
1639                 close_nointr_nofail(s->native_fd);
1640
1641         if (s->stdout_fd >= 0)
1642                 close_nointr_nofail(s->stdout_fd);
1643
1644         if (s->dev_kmsg_fd >= 0)
1645                 close_nointr_nofail(s->dev_kmsg_fd);
1646
1647         if (s->sync_timer_fd >= 0)
1648                 close_nointr_nofail(s->sync_timer_fd);
1649
1650         if (s->rate_limit)
1651                 journal_rate_limit_free(s->rate_limit);
1652
1653         if (s->kernel_seqnum)
1654                 munmap(s->kernel_seqnum, sizeof(uint64_t));
1655
1656         free(s->buffer);
1657         free(s->tty_path);
1658
1659         if (s->mmap)
1660                 mmap_cache_unref(s->mmap);
1661
1662         if (s->udev)
1663                 udev_unref(s->udev);
1664 }