chiark / gitweb /
ca274ee44ad0c35bf7f8aa8ffcc912248b6150e6
[elogind.git] / src / journal / journald.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU General Public License as published by
10   the Free Software Foundation; either version 2 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   General Public License for more details.
17
18   You should have received a copy of the GNU General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/epoll.h>
23 #include <sys/socket.h>
24 #include <errno.h>
25 #include <sys/signalfd.h>
26 #include <unistd.h>
27 #include <fcntl.h>
28 #include <sys/acl.h>
29 #include <acl/libacl.h>
30 #include <stddef.h>
31 #include <sys/ioctl.h>
32 #include <linux/sockios.h>
33
34 #include "hashmap.h"
35 #include "journal-file.h"
36 #include "sd-daemon.h"
37 #include "socket-util.h"
38 #include "acl-util.h"
39 #include "cgroup-util.h"
40
41 #define USER_JOURNALS_MAX 1024
42
43 typedef struct Server {
44         int epoll_fd;
45         int signal_fd;
46         int syslog_fd;
47         int native_fd;
48
49         JournalFile *runtime_journal;
50         JournalFile *system_journal;
51         Hashmap *user_journals;
52
53         uint64_t seqnum;
54
55         char *buffer;
56         size_t buffer_size;
57
58         JournalMetrics metrics;
59         uint64_t max_use;
60         bool compress;
61 } Server;
62
63 static void fix_perms(JournalFile *f, uid_t uid) {
64         acl_t acl;
65         acl_entry_t entry;
66         acl_permset_t permset;
67         int r;
68
69         assert(f);
70
71         r = fchmod_and_fchown(f->fd, 0640, 0, 0);
72         if (r < 0)
73                 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
74
75         if (uid <= 0)
76                 return;
77
78         acl = acl_get_fd(f->fd);
79         if (!acl) {
80                 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
81                 return;
82         }
83
84         r = acl_find_uid(acl, uid, &entry);
85         if (r <= 0) {
86
87                 if (acl_create_entry(&acl, &entry) < 0 ||
88                     acl_set_tag_type(entry, ACL_USER) < 0 ||
89                     acl_set_qualifier(entry, &uid) < 0) {
90                         log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
91                         goto finish;
92                 }
93         }
94
95         if (acl_get_permset(entry, &permset) < 0 ||
96             acl_add_perm(permset, ACL_READ) < 0 ||
97             acl_calc_mask(&acl) < 0) {
98                 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
99                 goto finish;
100         }
101
102         if (acl_set_fd(f->fd, acl) < 0)
103                 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
104
105 finish:
106         acl_free(acl);
107 }
108
109 static JournalFile* find_journal(Server *s, uid_t uid) {
110         char *p;
111         int r;
112         JournalFile *f;
113         char ids[33];
114         sd_id128_t machine;
115
116         assert(s);
117
118         /* We split up user logs only on /var, not on /run */
119         if (!s->system_journal)
120                 return s->runtime_journal;
121
122         if (uid <= 0)
123                 return s->system_journal;
124
125         r = sd_id128_get_machine(&machine);
126         if (r < 0)
127                 return s->system_journal;
128
129         f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
130         if (f)
131                 return f;
132
133         if (asprintf(&p, "/var/log/journal/%s/user-%lu.journal", sd_id128_to_string(machine, ids), (unsigned long) uid) < 0)
134                 return s->system_journal;
135
136         while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
137                 /* Too many open? Then let's close one */
138                 f = hashmap_steal_first(s->user_journals);
139                 assert(f);
140                 journal_file_close(f);
141         }
142
143         r = journal_file_open(p, O_RDWR|O_CREAT, 0640, s->system_journal, &f);
144         free(p);
145
146         if (r < 0)
147                 return s->system_journal;
148
149         fix_perms(f, uid);
150         f->metrics = s->metrics;
151         f->compress = s->compress;
152
153         r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
154         if (r < 0) {
155                 journal_file_close(f);
156                 return s->system_journal;
157         }
158
159         return f;
160 }
161
162 static void server_vacuum(Server *s) {
163         Iterator i;
164         void *k;
165         char *p;
166         char ids[33];
167         sd_id128_t machine;
168         int r;
169         JournalFile *f;
170
171         log_info("Rotating...");
172
173         if (s->runtime_journal) {
174                 r = journal_file_rotate(&s->runtime_journal);
175                 if (r < 0)
176                         log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
177         }
178
179         if (s->system_journal) {
180                 r = journal_file_rotate(&s->system_journal);
181                 if (r < 0)
182                         log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
183         }
184
185         HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
186                 r = journal_file_rotate(&f);
187                 if (r < 0)
188                         log_error("Failed to rotate %s: %s", f->path, strerror(-r));
189                 else
190                         hashmap_replace(s->user_journals, k, f);
191         }
192
193         log_info("Vacuuming...");
194
195         r = sd_id128_get_machine(&machine);
196         if (r < 0) {
197                 log_error("Failed to get machine ID: %s", strerror(-r));
198                 return;
199         }
200
201         if (asprintf(&p, "/var/log/journal/%s", sd_id128_to_string(machine, ids)) < 0) {
202                 log_error("Out of memory.");
203                 return;
204         }
205
206         r = journal_directory_vacuum(p, s->max_use, s->metrics.keep_free);
207         if (r < 0 && r != -ENOENT)
208                 log_error("Failed to vacuum %s: %s", p, strerror(-r));
209         free(p);
210
211         if (asprintf(&p, "/run/log/journal/%s", ids) < 0) {
212                 log_error("Out of memory.");
213                 return;
214         }
215
216         r = journal_directory_vacuum(p, s->max_use, s->metrics.keep_free);
217         if (r < 0 && r != -ENOENT)
218                 log_error("Failed to vacuum %s: %s", p, strerror(-r));
219         free(p);
220 }
221
222 static void dispatch_message(Server *s, struct iovec *iovec, unsigned n, unsigned m, struct ucred *ucred, struct timeval *tv) {
223         char *pid = NULL, *uid = NULL, *gid = NULL,
224                 *source_time = NULL, *boot_id = NULL, *machine_id = NULL,
225                 *comm = NULL, *cmdline = NULL, *hostname = NULL,
226                 *audit_session = NULL, *audit_loginuid = NULL,
227                 *exe = NULL, *cgroup = NULL;
228
229         char idbuf[33];
230         sd_id128_t id;
231         int r;
232         char *t;
233         uid_t loginuid = 0, realuid = 0;
234         JournalFile *f;
235         bool vacuumed = false;
236
237         assert(s);
238         assert(iovec || n == 0);
239
240         if (n == 0)
241                 return;
242
243         assert(n + 13 <= m);
244
245         if (ucred) {
246                 uint32_t session;
247                 char *path;
248
249                 realuid = ucred->uid;
250
251                 if (asprintf(&pid, "_PID=%lu", (unsigned long) ucred->pid) >= 0)
252                         IOVEC_SET_STRING(iovec[n++], pid);
253
254                 if (asprintf(&uid, "_UID=%lu", (unsigned long) ucred->uid) >= 0)
255                         IOVEC_SET_STRING(iovec[n++], uid);
256
257                 if (asprintf(&gid, "_GID=%lu", (unsigned long) ucred->gid) >= 0)
258                         IOVEC_SET_STRING(iovec[n++], gid);
259
260                 r = get_process_comm(ucred->pid, &t);
261                 if (r >= 0) {
262                         comm = strappend("_COMM=", t);
263                         if (comm)
264                                 IOVEC_SET_STRING(iovec[n++], comm);
265                         free(t);
266                 }
267
268                 r = get_process_exe(ucred->pid, &t);
269                 if (r >= 0) {
270                         exe = strappend("_EXE=", t);
271                         if (comm)
272                                 IOVEC_SET_STRING(iovec[n++], exe);
273                         free(t);
274                 }
275
276                 r = get_process_cmdline(ucred->pid, LINE_MAX, false, &t);
277                 if (r >= 0) {
278                         cmdline = strappend("_CMDLINE=", t);
279                         if (cmdline)
280                                 IOVEC_SET_STRING(iovec[n++], cmdline);
281                         free(t);
282                 }
283
284                 r = audit_session_from_pid(ucred->pid, &session);
285                 if (r >= 0)
286                         if (asprintf(&audit_session, "_AUDIT_SESSION=%lu", (unsigned long) session) >= 0)
287                                 IOVEC_SET_STRING(iovec[n++], audit_session);
288
289                 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
290                 if (r >= 0)
291                         if (asprintf(&audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0)
292                                 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
293
294                 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, ucred->pid, &path);
295                 if (r >= 0) {
296                         cgroup = strappend("_SYSTEMD_CGROUP=", path);
297                         if (cgroup)
298                                 IOVEC_SET_STRING(iovec[n++], cgroup);
299                         free(path);
300                 }
301         }
302
303         if (tv) {
304                 if (asprintf(&source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu",
305                              (unsigned long long) timeval_load(tv)) >= 0)
306                         IOVEC_SET_STRING(iovec[n++], source_time);
307         }
308
309         /* Note that strictly speaking storing the boot id here is
310          * redundant since the entry includes this in-line
311          * anyway. However, we need this indexed, too. */
312         r = sd_id128_get_boot(&id);
313         if (r >= 0)
314                 if (asprintf(&boot_id, "_BOOT_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
315                         IOVEC_SET_STRING(iovec[n++], boot_id);
316
317         r = sd_id128_get_machine(&id);
318         if (r >= 0)
319                 if (asprintf(&machine_id, "_MACHINE_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
320                         IOVEC_SET_STRING(iovec[n++], machine_id);
321
322         t = gethostname_malloc();
323         if (t) {
324                 hostname = strappend("_HOSTNAME=", t);
325                 if (hostname)
326                         IOVEC_SET_STRING(iovec[n++], hostname);
327                 free(t);
328         }
329
330         assert(n <= m);
331
332 retry:
333         f = find_journal(s, realuid == 0 ? 0 : loginuid);
334         if (!f)
335                 log_warning("Dropping message, as we can't find a place to store the data.");
336         else {
337                 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
338
339                 if (r == -E2BIG && !vacuumed) {
340                         log_info("Allocation limit reached.");
341
342                         server_vacuum(s);
343                         vacuumed = true;
344
345                         log_info("Retrying write.");
346                         goto retry;
347                 }
348
349                 if (r < 0)
350                         log_error("Failed to write entry, ignoring: %s", strerror(-r));
351         }
352
353         free(pid);
354         free(uid);
355         free(gid);
356         free(comm);
357         free(exe);
358         free(cmdline);
359         free(source_time);
360         free(boot_id);
361         free(machine_id);
362         free(hostname);
363         free(audit_session);
364         free(audit_loginuid);
365         free(cgroup);
366
367 }
368
369 static void process_syslog_message(Server *s, const char *buf, struct ucred *ucred, struct timeval *tv) {
370         char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL;
371         struct iovec iovec[16];
372         unsigned n = 0;
373         int priority = LOG_USER | LOG_INFO;
374
375         assert(s);
376         assert(buf);
377
378         parse_syslog_priority((char**) &buf, &priority);
379         skip_syslog_date((char**) &buf);
380
381         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
382                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
383
384         if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
385                 IOVEC_SET_STRING(iovec[n++], syslog_facility);
386
387         message = strappend("MESSAGE=", buf);
388         if (message)
389                 IOVEC_SET_STRING(iovec[n++], message);
390
391         dispatch_message(s, iovec, n, ELEMENTSOF(iovec), ucred, tv);
392
393         free(message);
394         free(syslog_facility);
395         free(syslog_priority);
396 }
397
398 static void process_native_message(Server *s, const void *buffer, size_t buffer_size, struct ucred *ucred, struct timeval *tv) {
399         struct iovec *iovec = NULL;
400         unsigned n = 0, m = 0, j;
401         const char *p;
402         size_t remaining;
403
404         assert(s);
405         assert(buffer || n == 0);
406
407         p = buffer;
408         remaining = buffer_size;
409
410         while (remaining > 0) {
411                 const char *e, *q;
412
413                 e = memchr(p, '\n', remaining);
414
415                 if (!e) {
416                         /* Trailing noise, let's ignore it, and flush what we collected */
417                         log_debug("Received message with trailing noise, ignoring.");
418                         break;
419                 }
420
421                 if (e == p) {
422                         /* Entry separator */
423                         dispatch_message(s, iovec, n, m, ucred, tv);
424                         n = 0;
425
426                         p++;
427                         remaining--;
428                         continue;
429                 }
430
431                 if (*p == '.') {
432                         /* Control command, ignore for now */
433                         remaining -= (e - p) + 1;
434                         p = e + 1;
435                         continue;
436                 }
437
438                 /* A property follows */
439
440                 if (n+13 >= m) {
441                         struct iovec *c;
442                         unsigned u;
443
444                         u = MAX((n+13U) * 2U, 4U);
445                         c = realloc(iovec, u * sizeof(struct iovec));
446                         if (!c) {
447                                 log_error("Out of memory");
448                                 break;
449                         }
450
451                         iovec = c;
452                         m = u;
453                 }
454
455                 q = memchr(p, '=', e - p);
456                 if (q) {
457                         if (p[0] != '_') {
458                                 /* If the field name starts with an
459                                  * underscore, skip the variable,
460                                  * since that indidates a trusted
461                                  * field */
462                                 iovec[n].iov_base = (char*) p;
463                                 iovec[n].iov_len = e - p;
464                                 n++;
465                         }
466
467                         remaining -= (e - p) + 1;
468                         p = e + 1;
469                         continue;
470                 } else {
471                         uint64_t l;
472                         char *k;
473
474                         if (remaining < e - p + 1 + sizeof(uint64_t) + 1) {
475                                 log_debug("Failed to parse message, ignoring.");
476                                 break;
477                         }
478
479                         memcpy(&l, e + 1, sizeof(uint64_t));
480                         l = le64toh(l);
481
482                         if (remaining < e - p + 1 + sizeof(uint64_t) + l + 1 ||
483                             e[1+sizeof(uint64_t)+l] != '\n') {
484                                 log_debug("Failed to parse message, ignoring.");
485                                 break;
486                         }
487
488                         k = malloc((e - p) + 1 + l);
489                         if (!k) {
490                                 log_error("Out of memory");
491                                 break;
492                         }
493
494                         memcpy(k, p, e - p);
495                         k[e - p] = '=';
496                         memcpy(k + (e - p) + 1, e + 1 + sizeof(uint64_t), l);
497
498                         if (k[0] != '_') {
499                                 iovec[n].iov_base = k;
500                                 iovec[n].iov_len = (e - p) + 1 + l;
501                                 n++;
502                         } else
503                                 free(k);
504
505                         remaining -= (e - p) + 1 + sizeof(uint64_t) + l + 1;
506                         p = e + 1 + sizeof(uint64_t) + l + 1;
507                 }
508         }
509
510         dispatch_message(s, iovec, n, m, ucred, tv);
511
512         for (j = 0; j < n; j++)
513                 if (iovec[j].iov_base < buffer ||
514                     (const uint8_t*) iovec[j].iov_base >= (const uint8_t*) buffer + buffer_size)
515                         free(iovec[j].iov_base);
516 }
517
518 static int process_event(Server *s, struct epoll_event *ev) {
519         assert(s);
520
521         if (ev->events != EPOLLIN) {
522                 log_info("Got invalid event from epoll.");
523                 return -EIO;
524         }
525
526         if (ev->data.fd == s->signal_fd) {
527                 struct signalfd_siginfo sfsi;
528                 ssize_t n;
529
530                 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
531                 if (n != sizeof(sfsi)) {
532
533                         if (n >= 0)
534                                 return -EIO;
535
536                         if (errno == EINTR || errno == EAGAIN)
537                                 return 0;
538
539                         return -errno;
540                 }
541
542                 log_debug("Received SIG%s", signal_to_string(sfsi.ssi_signo));
543                 return 0;
544
545         }
546
547         if (ev->data.fd == s->native_fd ||
548             ev->data.fd == s->syslog_fd) {
549                 for (;;) {
550                         struct msghdr msghdr;
551                         struct iovec iovec;
552                         struct ucred *ucred = NULL;
553                         struct timeval *tv = NULL;
554                         struct cmsghdr *cmsg;
555                         union {
556                                 struct cmsghdr cmsghdr;
557                                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
558                                             CMSG_SPACE(sizeof(struct timeval))];
559                         } control;
560                         ssize_t n;
561                         int v;
562
563                         if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
564                                 log_error("SIOCINQ failed: %m");
565                                 return -errno;
566                         }
567
568                         if (v <= 0)
569                                 return 1;
570
571                         if (s->buffer_size < (size_t) v) {
572                                 void *b;
573                                 size_t l;
574
575                                 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
576                                 b = realloc(s->buffer, l+1);
577
578                                 if (!b) {
579                                         log_error("Couldn't increase buffer.");
580                                         return -ENOMEM;
581                                 }
582
583                                 s->buffer_size = l;
584                                 s->buffer = b;
585                         }
586
587                         zero(iovec);
588                         iovec.iov_base = s->buffer;
589                         iovec.iov_len = s->buffer_size;
590
591                         zero(control);
592                         zero(msghdr);
593                         msghdr.msg_iov = &iovec;
594                         msghdr.msg_iovlen = 1;
595                         msghdr.msg_control = &control;
596                         msghdr.msg_controllen = sizeof(control);
597
598                         n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT);
599                         if (n < 0) {
600
601                                 if (errno == EINTR || errno == EAGAIN)
602                                         return 1;
603
604                                 log_error("recvmsg() failed: %m");
605                                 return -errno;
606                         }
607
608                         for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
609
610                                 if (cmsg->cmsg_level == SOL_SOCKET &&
611                                     cmsg->cmsg_type == SCM_CREDENTIALS &&
612                                     cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
613                                         ucred = (struct ucred*) CMSG_DATA(cmsg);
614                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
615                                          cmsg->cmsg_type == SO_TIMESTAMP &&
616                                          cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
617                                         tv = (struct timeval*) CMSG_DATA(cmsg);
618                         }
619
620                         if (ev->data.fd == s->syslog_fd) {
621                                 char *e;
622
623                                 e = memchr(s->buffer, '\n', n);
624                                 if (e)
625                                         *e = 0;
626                                 else
627                                         s->buffer[n] = 0;
628
629                                 process_syslog_message(s, strstrip(s->buffer), ucred, tv);
630                         } else
631                                 process_native_message(s, s->buffer, n, ucred, tv);
632                 }
633
634                 return 1;
635         }
636
637         log_error("Unknown event.");
638         return 0;
639 }
640
641 static int system_journal_open(Server *s) {
642         int r;
643         char *fn;
644         sd_id128_t machine;
645         char ids[33];
646
647         r = sd_id128_get_machine(&machine);
648         if (r < 0)
649                 return r;
650
651         /* First try to create the machine path, but not the prefix */
652         fn = strappend("/var/log/journal/", sd_id128_to_string(machine, ids));
653         if (!fn)
654                 return -ENOMEM;
655         (void) mkdir(fn, 0755);
656         free(fn);
657
658         /* The create the system journal file */
659         fn = join("/var/log/journal/", ids, "/system.journal", NULL);
660         if (!fn)
661                 return -ENOMEM;
662
663         r = journal_file_open(fn, O_RDWR|O_CREAT, 0640, NULL, &s->system_journal);
664         free(fn);
665
666         if (r >= 0) {
667                 s->system_journal->metrics = s->metrics;
668                 s->system_journal->compress = s->compress;
669
670                 fix_perms(s->system_journal, 0);
671                 return r;
672         }
673
674         if (r < 0 && r != -ENOENT) {
675                 log_error("Failed to open system journal: %s", strerror(-r));
676                 return r;
677         }
678
679         /* /var didn't work, so try /run, but this time we
680          * create the prefix too */
681         fn = join("/run/log/journal/", ids, "/system.journal", NULL);
682         if (!fn)
683                 return -ENOMEM;
684
685         (void) mkdir_parents(fn, 0755);
686         r = journal_file_open(fn, O_RDWR|O_CREAT, 0640, NULL, &s->runtime_journal);
687         free(fn);
688
689         if (r < 0) {
690                 log_error("Failed to open runtime journal: %s", strerror(-r));
691                 return r;
692         }
693
694         s->runtime_journal->metrics = s->metrics;
695         s->runtime_journal->compress = s->compress;
696
697         fix_perms(s->runtime_journal, 0);
698         return r;
699 }
700
701 static int open_syslog_socket(Server *s) {
702         union sockaddr_union sa;
703         int one, r;
704
705         assert(s);
706
707         if (s->syslog_fd < 0) {
708
709                 s->syslog_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0);
710                 if (s->syslog_fd < 0) {
711                         log_error("socket() failed: %m");
712                         return -errno;
713                 }
714
715                 zero(sa);
716                 sa.un.sun_family = AF_UNIX;
717                 strncpy(sa.un.sun_path, "/run/systemd/syslog", sizeof(sa.un.sun_path));
718
719                 unlink(sa.un.sun_path);
720
721                 r = bind(s->syslog_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
722                 if (r < 0) {
723                         log_error("bind() failed: %m");
724                         return -errno;
725                 }
726
727                 chmod(sa.un.sun_path, 0666);
728         }
729
730         one = 1;
731         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
732         if (r < 0) {
733                 log_error("SO_PASSCRED failed: %m");
734                 return -errno;
735         }
736
737         one = 1;
738         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
739         if (r < 0) {
740                 log_error("SO_TIMESTAMP failed: %m");
741                 return -errno;
742         }
743
744         return 0;
745 }
746
747 static int open_native_socket(Server*s) {
748         union sockaddr_union sa;
749         int one, r;
750
751         assert(s);
752
753         if (s->native_fd < 0) {
754
755                 s->native_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0);
756                 if (s->native_fd < 0) {
757                         log_error("socket() failed: %m");
758                         return -errno;
759                 }
760
761                 zero(sa);
762                 sa.un.sun_family = AF_UNIX;
763                 strncpy(sa.un.sun_path, "/run/systemd/journal", sizeof(sa.un.sun_path));
764
765                 unlink(sa.un.sun_path);
766
767                 r = bind(s->native_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
768                 if (r < 0) {
769                         log_error("bind() failed: %m");
770                         return -errno;
771                 }
772
773                 chmod(sa.un.sun_path, 0666);
774         }
775
776         one = 1;
777         r = setsockopt(s->native_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
778         if (r < 0) {
779                 log_error("SO_PASSCRED failed: %m");
780                 return -errno;
781         }
782
783         one = 1;
784         r = setsockopt(s->native_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
785         if (r < 0) {
786                 log_error("SO_TIMESTAMP failed: %m");
787                 return -errno;
788         }
789
790         return 0;
791 }
792
793 static int server_init(Server *s) {
794         int n, r, fd;
795         struct epoll_event ev;
796         sigset_t mask;
797
798         assert(s);
799
800         zero(*s);
801         s->syslog_fd = s->native_fd = s->signal_fd = -1;
802         s->metrics.max_size = DEFAULT_MAX_SIZE;
803         s->metrics.min_size = DEFAULT_MIN_SIZE;
804         s->metrics.keep_free = DEFAULT_KEEP_FREE;
805         s->max_use = DEFAULT_MAX_USE;
806         s->compress = true;
807
808         s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
809         if (s->epoll_fd < 0) {
810                 log_error("Failed to create epoll object: %m");
811                 return -errno;
812         }
813
814         n = sd_listen_fds(true);
815         if (n < 0) {
816                 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
817                 return n;
818         }
819
820         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
821
822                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
823
824                         if (s->syslog_fd >= 0) {
825                                 log_error("Too many /dev/log sockets passed.");
826                                 return -EINVAL;
827                         }
828
829                         s->syslog_fd = fd;
830
831                 } else if (sd_is_socket(fd, AF_UNIX, SOCK_DGRAM, -1) > 0) {
832
833                         if (s->native_fd >= 0) {
834                                 log_error("Too many native sockets passed.");
835                                 return -EINVAL;
836                         }
837
838                         s->native_fd = fd;
839                 } else {
840                         log_error("Unknown socket passed.");
841                         return -EINVAL;
842                 }
843         }
844
845         r = open_syslog_socket(s);
846         if (r < 0)
847                 return r;
848
849         zero(ev);
850         ev.events = EPOLLIN;
851         ev.data.fd = s->syslog_fd;
852         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->syslog_fd, &ev) < 0) {
853                 log_error("Failed to add syslog server fd to epoll object: %m");
854                 return -errno;
855         }
856
857         r = open_native_socket(s);
858         if (r < 0)
859                 return r;
860
861         zero(ev);
862         ev.events = EPOLLIN;
863         ev.data.fd = s->native_fd;
864         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->native_fd, &ev) < 0) {
865                 log_error("Failed to add native server fd to epoll object: %m");
866                 return -errno;
867         }
868
869         s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
870         if (!s->user_journals) {
871                 log_error("Out of memory.");
872                 return -ENOMEM;
873         }
874
875         r = system_journal_open(s);
876         if (r < 0)
877                 return r;
878
879         assert_se(sigemptyset(&mask) == 0);
880         sigset_add_many(&mask, SIGINT, SIGTERM, -1);
881         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
882
883         s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
884         if (s->signal_fd < 0) {
885                 log_error("signalfd(): %m");
886                 return -errno;
887         }
888
889         zero(ev);
890         ev.events = EPOLLIN;
891         ev.data.fd = s->signal_fd;
892
893         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
894                 log_error("epoll_ctl(): %m");
895                 return -errno;
896         }
897
898         return 0;
899 }
900
901 static void server_done(Server *s) {
902         JournalFile *f;
903         assert(s);
904
905         if (s->system_journal)
906                 journal_file_close(s->system_journal);
907
908         if (s->runtime_journal)
909                 journal_file_close(s->runtime_journal);
910
911         while ((f = hashmap_steal_first(s->user_journals)))
912                 journal_file_close(f);
913
914         hashmap_free(s->user_journals);
915
916         if (s->epoll_fd >= 0)
917                 close_nointr_nofail(s->epoll_fd);
918
919         if (s->signal_fd >= 0)
920                 close_nointr_nofail(s->signal_fd);
921
922         if (s->syslog_fd >= 0)
923                 close_nointr_nofail(s->syslog_fd);
924
925         if (s->native_fd >= 0)
926                 close_nointr_nofail(s->native_fd);
927 }
928
929 int main(int argc, char *argv[]) {
930         Server server;
931         int r;
932
933         /* if (getppid() != 1) { */
934         /*         log_error("This program should be invoked by init only."); */
935         /*         return EXIT_FAILURE; */
936         /* } */
937
938         if (argc > 1) {
939                 log_error("This program does not take arguments.");
940                 return EXIT_FAILURE;
941         }
942
943         log_set_target(LOG_TARGET_CONSOLE);
944         log_set_max_level(LOG_DEBUG);
945         log_parse_environment();
946         log_open();
947
948         umask(0022);
949
950         r = server_init(&server);
951         if (r < 0)
952                 goto finish;
953
954         log_debug("systemd-journald running as pid %lu", (unsigned long) getpid());
955
956         sd_notify(false,
957                   "READY=1\n"
958                   "STATUS=Processing messages...");
959
960         for (;;) {
961                 struct epoll_event event;
962
963                 r = epoll_wait(server.epoll_fd, &event, 1, -1);
964                 if (r < 0) {
965
966                         if (errno == EINTR)
967                                 continue;
968
969                         log_error("epoll_wait() failed: %m");
970                         r = -errno;
971                         goto finish;
972                 } else if (r == 0)
973                         break;
974
975                 r = process_event(&server, &event);
976                 if (r < 0)
977                         goto finish;
978                 else if (r == 0)
979                         break;
980         }
981
982 finish:
983         sd_notify(false,
984                   "STATUS=Shutting down...");
985
986         server_done(&server);
987
988         return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
989 }