chiark / gitweb /
d35e1c119ab5984604471f1ed08b240ecadc93c7
[elogind.git] / src / journal / journald.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU General Public License as published by
10   the Free Software Foundation; either version 2 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   General Public License for more details.
17
18   You should have received a copy of the GNU General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/epoll.h>
23 #include <sys/socket.h>
24 #include <errno.h>
25 #include <sys/signalfd.h>
26 #include <unistd.h>
27 #include <fcntl.h>
28 #include <sys/acl.h>
29 #include <acl/libacl.h>
30 #include <stddef.h>
31 #include <sys/ioctl.h>
32 #include <linux/sockios.h>
33
34 #include "hashmap.h"
35 #include "journal-file.h"
36 #include "sd-daemon.h"
37 #include "socket-util.h"
38 #include "acl-util.h"
39 #include "cgroup-util.h"
40
41 #define USER_JOURNALS_MAX 1024
42
43 typedef struct Server {
44         int epoll_fd;
45         int signal_fd;
46         int syslog_fd;
47         int native_fd;
48
49         JournalFile *runtime_journal;
50         JournalFile *system_journal;
51         Hashmap *user_journals;
52
53         uint64_t seqnum;
54
55         char *buffer;
56         size_t buffer_size;
57
58         JournalMetrics metrics;
59         uint64_t max_use;
60         bool compress;
61 } Server;
62
63 static void fix_perms(JournalFile *f, uid_t uid) {
64         acl_t acl;
65         acl_entry_t entry;
66         acl_permset_t permset;
67         int r;
68
69         assert(f);
70
71         r = fchmod_and_fchown(f->fd, 0640, 0, 0);
72         if (r < 0)
73                 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
74
75         if (uid <= 0)
76                 return;
77
78         acl = acl_get_fd(f->fd);
79         if (!acl) {
80                 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
81                 return;
82         }
83
84         r = acl_find_uid(acl, uid, &entry);
85         if (r <= 0) {
86
87                 if (acl_create_entry(&acl, &entry) < 0 ||
88                     acl_set_tag_type(entry, ACL_USER) < 0 ||
89                     acl_set_qualifier(entry, &uid) < 0) {
90                         log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
91                         goto finish;
92                 }
93         }
94
95         if (acl_get_permset(entry, &permset) < 0 ||
96             acl_add_perm(permset, ACL_READ) < 0 ||
97             acl_calc_mask(&acl) < 0) {
98                 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
99                 goto finish;
100         }
101
102         if (acl_set_fd(f->fd, acl) < 0)
103                 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
104
105 finish:
106         acl_free(acl);
107 }
108
109 static JournalFile* find_journal(Server *s, uid_t uid) {
110         char *p;
111         int r;
112         JournalFile *f;
113         char ids[33];
114         sd_id128_t machine;
115
116         assert(s);
117
118         /* We split up user logs only on /var, not on /run */
119         if (!s->system_journal)
120                 return s->runtime_journal;
121
122         if (uid <= 0)
123                 return s->system_journal;
124
125         r = sd_id128_get_machine(&machine);
126         if (r < 0)
127                 return s->system_journal;
128
129         f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
130         if (f)
131                 return f;
132
133         if (asprintf(&p, "/var/log/journal/%s/user-%lu.journal", sd_id128_to_string(machine, ids), (unsigned long) uid) < 0)
134                 return s->system_journal;
135
136         while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
137                 /* Too many open? Then let's close one */
138                 f = hashmap_steal_first(s->user_journals);
139                 assert(f);
140                 journal_file_close(f);
141         }
142
143         r = journal_file_open(p, O_RDWR|O_CREAT, 0640, s->system_journal, &f);
144         free(p);
145
146         if (r < 0)
147                 return s->system_journal;
148
149         fix_perms(f, uid);
150         f->metrics = s->metrics;
151         f->compress = s->compress;
152
153         r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
154         if (r < 0) {
155                 journal_file_close(f);
156                 return s->system_journal;
157         }
158
159         return f;
160 }
161
162 static void server_vacuum(Server *s) {
163         Iterator i;
164         void *k;
165         char *p;
166         char ids[33];
167         sd_id128_t machine;
168         int r;
169         JournalFile *f;
170
171         log_info("Rotating...");
172
173         if (s->runtime_journal) {
174                 r = journal_file_rotate(&s->runtime_journal);
175                 if (r < 0)
176                         log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
177         }
178
179         if (s->system_journal) {
180                 r = journal_file_rotate(&s->system_journal);
181                 if (r < 0)
182                         log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
183         }
184
185         HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
186                 r = journal_file_rotate(&f);
187                 if (r < 0)
188                         log_error("Failed to rotate %s: %s", f->path, strerror(-r));
189                 else
190                         hashmap_replace(s->user_journals, k, f);
191         }
192
193         log_info("Vacuuming...");
194
195         r = sd_id128_get_machine(&machine);
196         if (r < 0) {
197                 log_error("Failed to get machine ID: %s", strerror(-r));
198                 return;
199         }
200
201         if (asprintf(&p, "/var/log/journal/%s", sd_id128_to_string(machine, ids)) < 0) {
202                 log_error("Out of memory.");
203                 return;
204         }
205
206         r = journal_directory_vacuum(p, s->max_use, s->metrics.keep_free);
207         if (r < 0 && r != -ENOENT)
208                 log_error("Failed to vacuum %s: %s", p, strerror(-r));
209         free(p);
210
211         if (asprintf(&p, "/run/log/journal/%s", ids) < 0) {
212                 log_error("Out of memory.");
213                 return;
214         }
215
216         r = journal_directory_vacuum(p, s->max_use, s->metrics.keep_free);
217         if (r < 0 && r != -ENOENT)
218                 log_error("Failed to vacuum %s: %s", p, strerror(-r));
219         free(p);
220 }
221
222 static void dispatch_message(Server *s, struct iovec *iovec, unsigned n, unsigned m, struct ucred *ucred, struct timeval *tv) {
223         char *pid = NULL, *uid = NULL, *gid = NULL,
224                 *source_time = NULL, *boot_id = NULL, *machine_id = NULL,
225                 *comm = NULL, *cmdline = NULL, *hostname = NULL,
226                 *audit_session = NULL, *audit_loginuid = NULL,
227                 *exe = NULL, *cgroup = NULL;
228
229         char idbuf[33];
230         sd_id128_t id;
231         int r;
232         char *t;
233         uid_t loginuid = 0, realuid = 0;
234         JournalFile *f;
235         bool vacuumed = false;
236
237         assert(s);
238         assert(iovec || n == 0);
239
240         if (n == 0)
241                 return;
242
243         assert(n + 13 <= m);
244
245         if (ucred) {
246                 uint32_t session;
247                 char *path;
248
249                 realuid = ucred->uid;
250
251                 if (asprintf(&pid, "_PID=%lu", (unsigned long) ucred->pid) >= 0)
252                         IOVEC_SET_STRING(iovec[n++], pid);
253
254                 if (asprintf(&uid, "_UID=%lu", (unsigned long) ucred->uid) >= 0)
255                         IOVEC_SET_STRING(iovec[n++], uid);
256
257                 if (asprintf(&gid, "_GID=%lu", (unsigned long) ucred->gid) >= 0)
258                         IOVEC_SET_STRING(iovec[n++], gid);
259
260                 r = get_process_comm(ucred->pid, &t);
261                 if (r >= 0) {
262                         comm = strappend("_COMM=", t);
263                         if (comm)
264                                 IOVEC_SET_STRING(iovec[n++], comm);
265                         free(t);
266                 }
267
268                 r = get_process_exe(ucred->pid, &t);
269                 if (r >= 0) {
270                         exe = strappend("_EXE=", t);
271                         if (comm)
272                                 IOVEC_SET_STRING(iovec[n++], exe);
273                         free(t);
274                 }
275
276                 r = get_process_cmdline(ucred->pid, LINE_MAX, false, &t);
277                 if (r >= 0) {
278                         cmdline = strappend("_CMDLINE=", t);
279                         if (cmdline)
280                                 IOVEC_SET_STRING(iovec[n++], cmdline);
281                         free(t);
282                 }
283
284                 r = audit_session_from_pid(ucred->pid, &session);
285                 if (r >= 0)
286                         if (asprintf(&audit_session, "_AUDIT_SESSION=%lu", (unsigned long) session) >= 0)
287                                 IOVEC_SET_STRING(iovec[n++], audit_session);
288
289                 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
290                 if (r >= 0)
291                         if (asprintf(&audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0)
292                                 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
293
294                 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, ucred->pid, &path);
295                 if (r >= 0) {
296                         cgroup = strappend("_SYSTEMD_CGROUP=", path);
297                         if (cgroup)
298                                 IOVEC_SET_STRING(iovec[n++], cgroup);
299                         free(path);
300                 }
301         }
302
303         if (tv) {
304                 if (asprintf(&source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu",
305                              (unsigned long long) timeval_load(tv)) >= 0)
306                         IOVEC_SET_STRING(iovec[n++], source_time);
307         }
308
309         /* Note that strictly speaking storing the boot id here is
310          * redundant since the entry includes this in-line
311          * anyway. However, we need this indexed, too. */
312         r = sd_id128_get_boot(&id);
313         if (r >= 0)
314                 if (asprintf(&boot_id, "_BOOT_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
315                         IOVEC_SET_STRING(iovec[n++], boot_id);
316
317         r = sd_id128_get_machine(&id);
318         if (r >= 0)
319                 if (asprintf(&machine_id, "_MACHINE_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
320                         IOVEC_SET_STRING(iovec[n++], machine_id);
321
322         t = gethostname_malloc();
323         if (t) {
324                 hostname = strappend("_HOSTNAME=", t);
325                 if (hostname)
326                         IOVEC_SET_STRING(iovec[n++], hostname);
327                 free(t);
328         }
329
330         assert(n <= m);
331
332 retry:
333         f = find_journal(s, realuid == 0 ? 0 : loginuid);
334         if (!f)
335                 log_warning("Dropping message, as we can't find a place to store the data.");
336         else {
337                 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
338
339                 if (r == -E2BIG && !vacuumed) {
340                         log_info("Allocation limit reached.");
341
342                         server_vacuum(s);
343                         vacuumed = true;
344
345                         log_info("Retrying write.");
346                         goto retry;
347                 }
348
349                 if (r < 0)
350                         log_error("Failed to write entry, ignoring: %s", strerror(-r));
351         }
352
353         free(pid);
354         free(uid);
355         free(gid);
356         free(comm);
357         free(exe);
358         free(cmdline);
359         free(source_time);
360         free(boot_id);
361         free(machine_id);
362         free(hostname);
363         free(audit_session);
364         free(audit_loginuid);
365         free(cgroup);
366
367 }
368
369 static void process_syslog_message(Server *s, const char *buf, struct ucred *ucred, struct timeval *tv) {
370         char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL;
371         struct iovec iovec[16];
372         unsigned n = 0;
373         int priority = LOG_USER | LOG_INFO;
374
375         assert(s);
376         assert(buf);
377
378         parse_syslog_priority((char**) &buf, &priority);
379         skip_syslog_date((char**) &buf);
380
381         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
382                 IOVEC_SET_STRING(iovec[n++], syslog_priority);
383
384         if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
385                 IOVEC_SET_STRING(iovec[n++], syslog_facility);
386
387         message = strappend("MESSAGE=", buf);
388         if (message)
389                 IOVEC_SET_STRING(iovec[n++], message);
390
391         dispatch_message(s, iovec, n, ELEMENTSOF(iovec), ucred, tv);
392
393         free(message);
394         free(syslog_facility);
395         free(syslog_priority);
396 }
397
398 static bool valid_user_field(const char *p, size_t l) {
399         const char *a;
400
401         /* We kinda enforce POSIX syntax recommendations for
402            environment variables here, but make a couple of additional
403            requirements.
404
405            http://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html */
406
407         /* No empty field names */
408         if (l <= 0)
409                 return false;
410
411         /* Don't allow names longer than 64 chars */
412         if (l > 64)
413                 return false;
414
415         /* Variables starting with an underscore are protected */
416         if (p[0] == '_')
417                 return false;
418
419         /* Don't allow digits as first character */
420         if (p[0] >= '0' && p[0] <= '9')
421                 return false;
422
423         /* Only allow A-Z0-9 and '_' */
424         for (a = p; a < p + l; a++)
425                 if (!((*a >= 'A' && *a <= 'Z') ||
426                       (*a >= '0' && *a <= '9') ||
427                       *a == '_'))
428                         return false;
429
430         return true;
431 }
432
433 static void process_native_message(Server *s, const void *buffer, size_t buffer_size, struct ucred *ucred, struct timeval *tv) {
434         struct iovec *iovec = NULL;
435         unsigned n = 0, m = 0, j;
436         const char *p;
437         size_t remaining;
438
439         assert(s);
440         assert(buffer || n == 0);
441
442         p = buffer;
443         remaining = buffer_size;
444
445         while (remaining > 0) {
446                 const char *e, *q;
447
448                 e = memchr(p, '\n', remaining);
449
450                 if (!e) {
451                         /* Trailing noise, let's ignore it, and flush what we collected */
452                         log_debug("Received message with trailing noise, ignoring.");
453                         break;
454                 }
455
456                 if (e == p) {
457                         /* Entry separator */
458                         dispatch_message(s, iovec, n, m, ucred, tv);
459                         n = 0;
460
461                         p++;
462                         remaining--;
463                         continue;
464                 }
465
466                 if (*p == '.' || *p == '#') {
467                         /* Ignore control commands for now, and
468                          * comments too. */
469                         remaining -= (e - p) + 1;
470                         p = e + 1;
471                         continue;
472                 }
473
474                 /* A property follows */
475
476                 if (n+13 >= m) {
477                         struct iovec *c;
478                         unsigned u;
479
480                         u = MAX((n+13U) * 2U, 4U);
481                         c = realloc(iovec, u * sizeof(struct iovec));
482                         if (!c) {
483                                 log_error("Out of memory");
484                                 break;
485                         }
486
487                         iovec = c;
488                         m = u;
489                 }
490
491                 q = memchr(p, '=', e - p);
492                 if (q) {
493                         if (valid_user_field(p, q - p)) {
494                                 /* If the field name starts with an
495                                  * underscore, skip the variable,
496                                  * since that indidates a trusted
497                                  * field */
498                                 iovec[n].iov_base = (char*) p;
499                                 iovec[n].iov_len = e - p;
500                                 n++;
501                         }
502
503                         remaining -= (e - p) + 1;
504                         p = e + 1;
505                         continue;
506                 } else {
507                         uint64_t l;
508                         char *k;
509
510                         if (remaining < e - p + 1 + sizeof(uint64_t) + 1) {
511                                 log_debug("Failed to parse message, ignoring.");
512                                 break;
513                         }
514
515                         memcpy(&l, e + 1, sizeof(uint64_t));
516                         l = le64toh(l);
517
518                         if (remaining < e - p + 1 + sizeof(uint64_t) + l + 1 ||
519                             e[1+sizeof(uint64_t)+l] != '\n') {
520                                 log_debug("Failed to parse message, ignoring.");
521                                 break;
522                         }
523
524                         k = malloc((e - p) + 1 + l);
525                         if (!k) {
526                                 log_error("Out of memory");
527                                 break;
528                         }
529
530                         memcpy(k, p, e - p);
531                         k[e - p] = '=';
532                         memcpy(k + (e - p) + 1, e + 1 + sizeof(uint64_t), l);
533
534                         if (valid_user_field(p, e - p)) {
535                                 iovec[n].iov_base = k;
536                                 iovec[n].iov_len = (e - p) + 1 + l;
537                                 n++;
538                         } else
539                                 free(k);
540
541                         remaining -= (e - p) + 1 + sizeof(uint64_t) + l + 1;
542                         p = e + 1 + sizeof(uint64_t) + l + 1;
543                 }
544         }
545
546         dispatch_message(s, iovec, n, m, ucred, tv);
547
548         for (j = 0; j < n; j++)
549                 if (iovec[j].iov_base < buffer ||
550                     (const uint8_t*) iovec[j].iov_base >= (const uint8_t*) buffer + buffer_size)
551                         free(iovec[j].iov_base);
552 }
553
554 static int process_event(Server *s, struct epoll_event *ev) {
555         assert(s);
556
557         if (ev->events != EPOLLIN) {
558                 log_info("Got invalid event from epoll.");
559                 return -EIO;
560         }
561
562         if (ev->data.fd == s->signal_fd) {
563                 struct signalfd_siginfo sfsi;
564                 ssize_t n;
565
566                 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
567                 if (n != sizeof(sfsi)) {
568
569                         if (n >= 0)
570                                 return -EIO;
571
572                         if (errno == EINTR || errno == EAGAIN)
573                                 return 0;
574
575                         return -errno;
576                 }
577
578                 log_debug("Received SIG%s", signal_to_string(sfsi.ssi_signo));
579                 return 0;
580
581         }
582
583         if (ev->data.fd == s->native_fd ||
584             ev->data.fd == s->syslog_fd) {
585                 for (;;) {
586                         struct msghdr msghdr;
587                         struct iovec iovec;
588                         struct ucred *ucred = NULL;
589                         struct timeval *tv = NULL;
590                         struct cmsghdr *cmsg;
591                         union {
592                                 struct cmsghdr cmsghdr;
593                                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
594                                             CMSG_SPACE(sizeof(struct timeval))];
595                         } control;
596                         ssize_t n;
597                         int v;
598
599                         if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
600                                 log_error("SIOCINQ failed: %m");
601                                 return -errno;
602                         }
603
604                         if (v <= 0)
605                                 return 1;
606
607                         if (s->buffer_size < (size_t) v) {
608                                 void *b;
609                                 size_t l;
610
611                                 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
612                                 b = realloc(s->buffer, l+1);
613
614                                 if (!b) {
615                                         log_error("Couldn't increase buffer.");
616                                         return -ENOMEM;
617                                 }
618
619                                 s->buffer_size = l;
620                                 s->buffer = b;
621                         }
622
623                         zero(iovec);
624                         iovec.iov_base = s->buffer;
625                         iovec.iov_len = s->buffer_size;
626
627                         zero(control);
628                         zero(msghdr);
629                         msghdr.msg_iov = &iovec;
630                         msghdr.msg_iovlen = 1;
631                         msghdr.msg_control = &control;
632                         msghdr.msg_controllen = sizeof(control);
633
634                         n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT);
635                         if (n < 0) {
636
637                                 if (errno == EINTR || errno == EAGAIN)
638                                         return 1;
639
640                                 log_error("recvmsg() failed: %m");
641                                 return -errno;
642                         }
643
644                         for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
645
646                                 if (cmsg->cmsg_level == SOL_SOCKET &&
647                                     cmsg->cmsg_type == SCM_CREDENTIALS &&
648                                     cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
649                                         ucred = (struct ucred*) CMSG_DATA(cmsg);
650                                 else if (cmsg->cmsg_level == SOL_SOCKET &&
651                                          cmsg->cmsg_type == SO_TIMESTAMP &&
652                                          cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
653                                         tv = (struct timeval*) CMSG_DATA(cmsg);
654                         }
655
656                         if (ev->data.fd == s->syslog_fd) {
657                                 char *e;
658
659                                 e = memchr(s->buffer, '\n', n);
660                                 if (e)
661                                         *e = 0;
662                                 else
663                                         s->buffer[n] = 0;
664
665                                 process_syslog_message(s, strstrip(s->buffer), ucred, tv);
666                         } else
667                                 process_native_message(s, s->buffer, n, ucred, tv);
668                 }
669
670                 return 1;
671         }
672
673         log_error("Unknown event.");
674         return 0;
675 }
676
677 static int system_journal_open(Server *s) {
678         int r;
679         char *fn;
680         sd_id128_t machine;
681         char ids[33];
682
683         r = sd_id128_get_machine(&machine);
684         if (r < 0)
685                 return r;
686
687         /* First try to create the machine path, but not the prefix */
688         fn = strappend("/var/log/journal/", sd_id128_to_string(machine, ids));
689         if (!fn)
690                 return -ENOMEM;
691         (void) mkdir(fn, 0755);
692         free(fn);
693
694         /* The create the system journal file */
695         fn = join("/var/log/journal/", ids, "/system.journal", NULL);
696         if (!fn)
697                 return -ENOMEM;
698
699         r = journal_file_open(fn, O_RDWR|O_CREAT, 0640, NULL, &s->system_journal);
700         free(fn);
701
702         if (r >= 0) {
703                 s->system_journal->metrics = s->metrics;
704                 s->system_journal->compress = s->compress;
705
706                 fix_perms(s->system_journal, 0);
707                 return r;
708         }
709
710         if (r < 0 && r != -ENOENT) {
711                 log_error("Failed to open system journal: %s", strerror(-r));
712                 return r;
713         }
714
715         /* /var didn't work, so try /run, but this time we
716          * create the prefix too */
717         fn = join("/run/log/journal/", ids, "/system.journal", NULL);
718         if (!fn)
719                 return -ENOMEM;
720
721         (void) mkdir_parents(fn, 0755);
722         r = journal_file_open(fn, O_RDWR|O_CREAT, 0640, NULL, &s->runtime_journal);
723         free(fn);
724
725         if (r < 0) {
726                 log_error("Failed to open runtime journal: %s", strerror(-r));
727                 return r;
728         }
729
730         s->runtime_journal->metrics = s->metrics;
731         s->runtime_journal->compress = s->compress;
732
733         fix_perms(s->runtime_journal, 0);
734         return r;
735 }
736
737 static int open_syslog_socket(Server *s) {
738         union sockaddr_union sa;
739         int one, r;
740
741         assert(s);
742
743         if (s->syslog_fd < 0) {
744
745                 s->syslog_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0);
746                 if (s->syslog_fd < 0) {
747                         log_error("socket() failed: %m");
748                         return -errno;
749                 }
750
751                 zero(sa);
752                 sa.un.sun_family = AF_UNIX;
753                 strncpy(sa.un.sun_path, "/run/systemd/syslog", sizeof(sa.un.sun_path));
754
755                 unlink(sa.un.sun_path);
756
757                 r = bind(s->syslog_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
758                 if (r < 0) {
759                         log_error("bind() failed: %m");
760                         return -errno;
761                 }
762
763                 chmod(sa.un.sun_path, 0666);
764         }
765
766         one = 1;
767         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
768         if (r < 0) {
769                 log_error("SO_PASSCRED failed: %m");
770                 return -errno;
771         }
772
773         one = 1;
774         r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
775         if (r < 0) {
776                 log_error("SO_TIMESTAMP failed: %m");
777                 return -errno;
778         }
779
780         return 0;
781 }
782
783 static int open_native_socket(Server*s) {
784         union sockaddr_union sa;
785         int one, r;
786
787         assert(s);
788
789         if (s->native_fd < 0) {
790
791                 s->native_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0);
792                 if (s->native_fd < 0) {
793                         log_error("socket() failed: %m");
794                         return -errno;
795                 }
796
797                 zero(sa);
798                 sa.un.sun_family = AF_UNIX;
799                 strncpy(sa.un.sun_path, "/run/systemd/journal", sizeof(sa.un.sun_path));
800
801                 unlink(sa.un.sun_path);
802
803                 r = bind(s->native_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
804                 if (r < 0) {
805                         log_error("bind() failed: %m");
806                         return -errno;
807                 }
808
809                 chmod(sa.un.sun_path, 0666);
810         }
811
812         one = 1;
813         r = setsockopt(s->native_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
814         if (r < 0) {
815                 log_error("SO_PASSCRED failed: %m");
816                 return -errno;
817         }
818
819         one = 1;
820         r = setsockopt(s->native_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
821         if (r < 0) {
822                 log_error("SO_TIMESTAMP failed: %m");
823                 return -errno;
824         }
825
826         return 0;
827 }
828
829 static int server_init(Server *s) {
830         int n, r, fd;
831         struct epoll_event ev;
832         sigset_t mask;
833
834         assert(s);
835
836         zero(*s);
837         s->syslog_fd = s->native_fd = s->signal_fd = -1;
838         s->metrics.max_size = DEFAULT_MAX_SIZE;
839         s->metrics.min_size = DEFAULT_MIN_SIZE;
840         s->metrics.keep_free = DEFAULT_KEEP_FREE;
841         s->max_use = DEFAULT_MAX_USE;
842         s->compress = true;
843
844         s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
845         if (s->epoll_fd < 0) {
846                 log_error("Failed to create epoll object: %m");
847                 return -errno;
848         }
849
850         n = sd_listen_fds(true);
851         if (n < 0) {
852                 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
853                 return n;
854         }
855
856         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
857
858                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
859
860                         if (s->syslog_fd >= 0) {
861                                 log_error("Too many /dev/log sockets passed.");
862                                 return -EINVAL;
863                         }
864
865                         s->syslog_fd = fd;
866
867                 } else if (sd_is_socket(fd, AF_UNIX, SOCK_DGRAM, -1) > 0) {
868
869                         if (s->native_fd >= 0) {
870                                 log_error("Too many native sockets passed.");
871                                 return -EINVAL;
872                         }
873
874                         s->native_fd = fd;
875                 } else {
876                         log_error("Unknown socket passed.");
877                         return -EINVAL;
878                 }
879         }
880
881         r = open_syslog_socket(s);
882         if (r < 0)
883                 return r;
884
885         zero(ev);
886         ev.events = EPOLLIN;
887         ev.data.fd = s->syslog_fd;
888         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->syslog_fd, &ev) < 0) {
889                 log_error("Failed to add syslog server fd to epoll object: %m");
890                 return -errno;
891         }
892
893         r = open_native_socket(s);
894         if (r < 0)
895                 return r;
896
897         zero(ev);
898         ev.events = EPOLLIN;
899         ev.data.fd = s->native_fd;
900         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->native_fd, &ev) < 0) {
901                 log_error("Failed to add native server fd to epoll object: %m");
902                 return -errno;
903         }
904
905         s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
906         if (!s->user_journals) {
907                 log_error("Out of memory.");
908                 return -ENOMEM;
909         }
910
911         r = system_journal_open(s);
912         if (r < 0)
913                 return r;
914
915         assert_se(sigemptyset(&mask) == 0);
916         sigset_add_many(&mask, SIGINT, SIGTERM, -1);
917         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
918
919         s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
920         if (s->signal_fd < 0) {
921                 log_error("signalfd(): %m");
922                 return -errno;
923         }
924
925         zero(ev);
926         ev.events = EPOLLIN;
927         ev.data.fd = s->signal_fd;
928
929         if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
930                 log_error("epoll_ctl(): %m");
931                 return -errno;
932         }
933
934         return 0;
935 }
936
937 static void server_done(Server *s) {
938         JournalFile *f;
939         assert(s);
940
941         if (s->system_journal)
942                 journal_file_close(s->system_journal);
943
944         if (s->runtime_journal)
945                 journal_file_close(s->runtime_journal);
946
947         while ((f = hashmap_steal_first(s->user_journals)))
948                 journal_file_close(f);
949
950         hashmap_free(s->user_journals);
951
952         if (s->epoll_fd >= 0)
953                 close_nointr_nofail(s->epoll_fd);
954
955         if (s->signal_fd >= 0)
956                 close_nointr_nofail(s->signal_fd);
957
958         if (s->syslog_fd >= 0)
959                 close_nointr_nofail(s->syslog_fd);
960
961         if (s->native_fd >= 0)
962                 close_nointr_nofail(s->native_fd);
963 }
964
965 int main(int argc, char *argv[]) {
966         Server server;
967         int r;
968
969         /* if (getppid() != 1) { */
970         /*         log_error("This program should be invoked by init only."); */
971         /*         return EXIT_FAILURE; */
972         /* } */
973
974         if (argc > 1) {
975                 log_error("This program does not take arguments.");
976                 return EXIT_FAILURE;
977         }
978
979         log_set_target(LOG_TARGET_CONSOLE);
980         log_set_max_level(LOG_DEBUG);
981         log_parse_environment();
982         log_open();
983
984         umask(0022);
985
986         r = server_init(&server);
987         if (r < 0)
988                 goto finish;
989
990         log_debug("systemd-journald running as pid %lu", (unsigned long) getpid());
991
992         sd_notify(false,
993                   "READY=1\n"
994                   "STATUS=Processing messages...");
995
996         for (;;) {
997                 struct epoll_event event;
998
999                 r = epoll_wait(server.epoll_fd, &event, 1, -1);
1000                 if (r < 0) {
1001
1002                         if (errno == EINTR)
1003                                 continue;
1004
1005                         log_error("epoll_wait() failed: %m");
1006                         r = -errno;
1007                         goto finish;
1008                 } else if (r == 0)
1009                         break;
1010
1011                 r = process_event(&server, &event);
1012                 if (r < 0)
1013                         goto finish;
1014                 else if (r == 0)
1015                         break;
1016         }
1017
1018 finish:
1019         sd_notify(false,
1020                   "STATUS=Shutting down...");
1021
1022         server_done(&server);
1023
1024         return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
1025 }