1 /* $Id: makehistory.c 7468 2005-12-12 03:23:21Z eagle $
3 ** Rebuild history/overview databases.
8 #include "portable/wait.h"
14 #include "inn/buffer.h"
15 #include "inn/history.h"
16 #include "inn/innconf.h"
17 #include "inn/messages.h"
25 static const char usage[] = "\
26 Usage: makehistory [-bOIax] [-f file] [-l count] [-s size] [-T tmpdir]\n\
28 -b delete bad articles from spool\n\
29 -e read entire articles to compute proper byte count\n\
30 -f write history entries to file (default $pathdb/history)\n\
31 -s size size new history database for approximately size entries\n\
32 -a open output history file in append mode\n\
33 -O create overview entries for articles\n\
34 -I do not create overview for articles numbered below lowmark\n\
35 -l count size of overview updates (default 10000)\n\
36 -x don't write history entries\n\
37 -T tmpdir use directory tmpdir for temporary files\n\
38 -F fork when writing overview\n";
42 ** Information about the schema of the news overview files.
44 typedef struct _ARTOVERFIELD {
53 #define DEFAULT_SEGSIZE 10000;
56 char *SchemaPath = NULL;
57 char *ActivePath = NULL;
58 char *HistoryPath = NULL;
59 struct history *History;
63 bool Cutofflow = false;
65 int OverTmpSegSize, OverTmpSegCount;
67 char *OverTmpPath = NULL;
74 /* Misc variables needed for the overview creation code. */
75 static char MESSAGEID[] = "Message-ID";
76 static char EXPIRES[] = "Expires";
77 static char DATE[] = "Date";
78 static char XREF[] = "Xref";
79 static ARTOVERFIELD *ARTfields; /* overview fields listed in overview.fmt */
80 static size_t ARTfieldsize;
81 static ARTOVERFIELD *Datep = (ARTOVERFIELD *)NULL;
82 static ARTOVERFIELD *Msgidp = (ARTOVERFIELD *)NULL;
83 static ARTOVERFIELD *Expp = (ARTOVERFIELD *)NULL;
84 static ARTOVERFIELD *Xrefp = (ARTOVERFIELD *)NULL;
85 static ARTOVERFIELD *Missfields; /* header fields not listed in
86 overview.fmt, but ones that we need
88 static size_t Missfieldsize = 0;
90 static void OverAddAllNewsgroups(void);
93 ** Check and parse an date header line. Return the new value or
103 if ((t = parsedate(p, &Now)) == -1)
109 ** Check and parse a Message-ID header line. Return private space.
112 GetMessageID(char *p)
114 static struct buffer buffer = { 0, 0, 0, NULL };
118 if (p[0] != '<' || p[strlen(p) - 1] != '>')
121 /* Copy into re-used memory space, including NUL. */
122 buffer_set(&buffer, p, strlen(p)+1);
127 * The overview temp file is used to accumulate overview lines as articles are
128 * scanned. The format is
129 * (1st) newsgroup name\tToken\toverview data.
130 * When about 10000 lines of this overview data are accumulated, the data
131 * file is sorted and then read back in and the data added to overview.
132 * The sorting/batching helps improve efficiency.
136 * Flush the unwritten OverTempFile data to disk, sort the file, read it
137 * back in, and add it to overview.
141 FlushOverTmpFile(void)
152 time_t arrived, expires;
153 static int first = 1;
155 if (OverTmpFile == NULL)
157 if (fflush(OverTmpFile) == EOF || ferror(OverTmpFile) || fclose(OverTmpFile) == EOF)
158 sysdie("cannot close temporary overview file");
160 if(!first) { /* if previous one is running, wait for it */
163 if((WIFEXITED(status) && WEXITSTATUS(status) != 0)
164 || WIFSIGNALED(status))
170 sysdie("cannot fork");
180 /* init the overview setup. */
181 if (!OVopen(OV_WRITE)) {
182 warn("cannot open overview");
185 if (!OVctl(OVSORT, (void *)&sorttype)) {
186 warn("cannot obtain overview sorting information");
190 if (!OVctl(OVCUTOFFLOW, (void *)&Cutofflow)) {
191 warn("cannot obtain overview cutoff information");
197 /* This is a bit odd, but as long as other user's files can't be deleted
198 out of the temporary directory, it should work. We're using mkstemp to
199 create a file and then passing its name to sort, which will then open
200 it again and overwrite it. */
201 SortedTmpPath = concatpath(TmpDir, "hisTXXXXXX");
202 fd = mkstemp(SortedTmpPath);
204 syswarn("cannot create temporary file");
206 Fork ? _exit(1) : exit(1);
209 snprintf(temp, sizeof(temp), "exec %s -T %s -t'%c' -o %s %s", _PATH_SORT,
210 TmpDir, '\t', SortedTmpPath, OverTmpPath);
212 i = system(temp) >> 8;
214 syswarn("cannot sort temporary overview file (%s exited %d)",
217 Fork ? _exit(1) : exit(1);
220 /* don't need old path anymore. */
225 /* read sorted lines. */
226 if ((qp = QIOopen(SortedTmpPath)) == NULL) {
227 syswarn("cannot open sorted overview file %s", SortedTmpPath);
229 Fork ? _exit(1) : exit(1);
232 for (count = 1; ; ++count) {
235 if (QIOtoolong(qp)) {
236 warn("overview line %d is too long", count);
241 if ((p = strchr(line, '\t')) == NULL
242 || (q = strchr(p+1, '\t')) == NULL
243 || (r = strchr(q+1, '\t')) == NULL) {
244 warn("sorted overview file %s has a bad line at %d",
245 SortedTmpPath, count);
248 /* p+1 now points to start of token, q+1 points to start of overline. */
249 if (sorttype == OVNEWSGROUP) {
253 arrived = (time_t)atol(p);
254 expires = (time_t)atol(q);
256 if ((r = strchr(r, '\t')) == NULL) {
257 warn("sorted overview file %s has a bad line at %d",
258 SortedTmpPath, count);
266 arrived = (time_t)atol(line);
267 expires = (time_t)atol(p);
269 token = TextToToken(q);
270 if (OVadd(token, r, strlen(r), arrived, expires) == OVADDFAILED) {
271 if (OVctl(OVSPACE, (void *)&i) && i == OV_NOSPACE) {
272 warn("no space left for overview");
274 Fork ? _exit(1) : exit(1);
276 warn("cannot write overview data \"%.40s\"", q);
279 /* Check for errors and close. */
281 syswarn("cannot read sorted overview file %s", SortedTmpPath);
283 Fork ? _exit(1) : exit(1);
286 /* unlink sorted tmp file */
287 unlink(SortedTmpPath);
297 * Write a line to the overview temp file.
300 WriteOverLine(TOKEN *token, const char *xrefs, int xrefslen,
301 char *overdata, int overlen, time_t arrived, time_t expires)
304 const char *p, *q, *r;
307 if (sorttype == OVNOSORT) {
309 fprintf(Overchan, "%s %ld %ld ", TokenToText(*token), (long)arrived, (long)expires);
310 if (fwrite(overdata, 1, overlen, Overchan) != (size_t) overlen)
311 sysdie("writing overview failed");
312 fputc('\n', Overchan);
313 } else if (OVadd(*token, overdata, overlen, arrived, expires) == OVADDFAILED) {
314 if (OVctl(OVSPACE, (void *)&i) && i == OV_NOSPACE) {
315 warn("no space left for overview");
319 warn("cannot write overview data for article %s",
320 TokenToText(*token));
324 if (OverTmpPath == NULL) {
325 /* need new temp file, so create it. */
326 OverTmpPath = concatpath(TmpDir, "histXXXXXX");
327 fd = mkstemp(OverTmpPath);
329 sysdie("cannot create temporary file");
330 OverTmpFile = fdopen(fd, "w");
331 if (OverTmpFile == NULL)
332 sysdie("cannot open %s", OverTmpPath);
335 if (sorttype == OVNEWSGROUP) {
336 /* find first ng name in xref. */
337 for (p = xrefs, q=NULL ; p < xrefs+xrefslen ; ++p) {
339 q = p+1; /* found space */
344 warn("bogus Xref data for %s", TokenToText(*token));
345 /* XXX do nuke here? */
349 for (p = q, r=NULL ; p < xrefs+xrefslen ; ++p) {
356 warn("bogus Xref data for %s", TokenToText(*token));
357 /* XXX do nuke here? */
360 /* q points to start of ng name, r points to its end. */
361 assert(sizeof(temp) > r - q + 1);
362 memcpy(temp, q, r - q + 1);
363 temp[r - q + 1] = '\0';
364 fprintf(OverTmpFile, "%s\t%10lu\t%lu\t%s\t", temp,
365 (unsigned long) arrived, (unsigned long) expires,
366 TokenToText(*token));
368 fprintf(OverTmpFile, "%10lu\t%lu\t%s\t", (unsigned long) arrived,
369 (unsigned long) expires,
370 TokenToText(*token));
372 fwrite(overdata, overlen, 1, OverTmpFile);
373 fprintf(OverTmpFile, "\n");
376 if (OverTmpSegSize != 0 && OverTmpSegCount >= OverTmpSegSize) {
383 ** Read the overview schema.
386 ARTreadschema(bool Overview)
393 bool foundxreffull = false;
396 /* Open file, count lines. */
397 if ((F = fopen(SchemaPath, "r")) == NULL)
398 sysdie("cannot open %s", SchemaPath);
399 for (i = 0; fgets(buff, sizeof buff, F) != NULL; i++)
401 fseeko(F, 0, SEEK_SET);
402 ARTfields = xmalloc((i + 1) * sizeof(ARTOVERFIELD));
404 /* Parse each field. */
405 for (fp = ARTfields; fgets(buff, sizeof buff, F) != NULL; ) {
406 /* Ignore blank and comment lines. */
407 if ((p = strchr(buff, '\n')) != NULL)
409 if ((p = strchr(buff, '#')) != NULL)
413 if ((p = strchr(buff, ':')) != NULL) {
415 fp->NeedHeadername = (strcmp(p, "full") == 0);
418 fp->NeedHeadername = false;
419 fp->Headername = xstrdup(buff);
420 fp->HeadernameLength = strlen(buff);
421 fp->Header = (char *)NULL;
422 fp->HasHeader = false;
423 fp->HeaderLength = 0;
424 if (strncasecmp(buff, DATE, strlen(DATE)) == 0)
426 if (strncasecmp(buff, MESSAGEID, strlen(MESSAGEID)) == 0)
428 if (strncasecmp(buff, EXPIRES, strlen(EXPIRES)) == 0)
430 if (strncasecmp(buff, XREF, strlen(XREF)) == 0) {
432 foundxreffull = fp->NeedHeadername;
436 ARTfieldsize = fp - ARTfields;
439 if (Msgidp == (ARTOVERFIELD *)NULL)
441 if (Datep == (ARTOVERFIELD *)NULL)
443 if (Expp == (ARTOVERFIELD *)NULL)
445 if (Overview && (Xrefp == (ARTOVERFIELD *)NULL || !foundxreffull))
446 die("Xref:full must be included in %s", SchemaPath);
447 if (Missfieldsize > 0) {
448 Missfields = xmalloc(Missfieldsize * sizeof(ARTOVERFIELD));
450 if (Msgidp == (ARTOVERFIELD *)NULL) {
451 fp->NeedHeadername = false;
452 fp->Headername = xstrdup(MESSAGEID);
453 fp->HeadernameLength = strlen(MESSAGEID);
454 fp->Header = (char *)NULL;
455 fp->HasHeader = false;
456 fp->HeaderLength = 0;
459 if (Datep == (ARTOVERFIELD *)NULL) {
460 fp->NeedHeadername = false;
461 fp->Headername = xstrdup(DATE);
462 fp->HeadernameLength = strlen(DATE);
463 fp->Header = (char *)NULL;
464 fp->HasHeader = false;
465 fp->HeaderLength = 0;
468 if (Expp == (ARTOVERFIELD *)NULL) {
469 fp->NeedHeadername = false;
470 fp->Headername = xstrdup(EXPIRES);
471 fp->HeadernameLength = strlen(EXPIRES);
472 fp->Header = (char *)NULL;
473 fp->HasHeader = false;
474 fp->HeaderLength = 0;
477 if (Overview && Xrefp == (ARTOVERFIELD *)NULL) {
478 fp->NeedHeadername = false;
479 fp->Headername = xstrdup(XREF);
480 fp->HeadernameLength = strlen(XREF);
481 fp->Header = (char *)NULL;
482 fp->HasHeader = false;
483 fp->HeaderLength = 0;
490 * Handle a single article. This routine's fairly complicated.
493 DoArt(ARTHANDLE *art)
498 static struct buffer buffer = { 0, 0, 0, NULL };
499 static char SEP[] = "\t";
500 static char NUL[] = "\0";
501 static char COLONSPACE[] = ": ";
503 const char *MessageID;
507 char overdata[BIG_BUFFER];
508 char bytes[BIG_BUFFER];
511 /* Set up place to store headers. */
512 for (fp = ARTfields, i = 0; i < ARTfieldsize; i++, fp++) {
513 if (fp->HeaderLength) {
516 fp->HeaderLength = 0;
517 fp->HasHeader = false;
519 if (Missfieldsize > 0) {
520 for (fp = Missfields, i = 0; i < Missfieldsize; i++, fp++) {
521 if (fp->HeaderLength) {
524 fp->HeaderLength = 0;
525 fp->HasHeader = false;
528 for (fp = ARTfields, i = 0; i < ARTfieldsize; i++, fp++) {
529 fp->Header = wire_findheader(art->data, art->len, fp->Headername);
531 /* Someone managed to break their server so that they were appending
532 multiple Xref headers, and INN had a bug where it wouldn't notice
533 this and reject the article. Just in case, see if there are
534 multiple Xref headers and use the last one. */
536 const char *next = fp->Header;
539 while (next != NULL) {
540 next = wire_endheader(fp->Header, art->data + art->len - 1);
544 left = art->len - (next - art->data);
545 next = wire_findheader(next, left, fp->Headername);
551 /* Now, if we have a header, find and record its length. */
552 if (fp->Header != NULL) {
553 fp->HasHeader = true;
554 p = wire_endheader(fp->Header, art->data + art->len - 1);
558 /* The true length of the header is p - fp->Header + 1, but p
559 points to the \n at the end of the header, so subtract 2 to
560 peel off the \r\n (we're guaranteed we're dealing with
561 wire-format articles. */
562 fp->HeaderLength = p - fp->Header - 1;
563 } else if (RetrMode == RETR_ALL
564 && strcmp(fp->Headername, "Bytes") == 0) {
565 snprintf(bytes, sizeof(bytes), "%lu", (unsigned long) art->len);
566 fp->HasHeader = true;
568 fp->HeaderLength = strlen(bytes);
571 if (Missfieldsize > 0) {
572 for (fp = Missfields, i = 0; i < Missfieldsize; i++, fp++) {
573 fp->Header = wire_findheader(art->data, art->len, fp->Headername);
574 if (fp->Header != NULL) {
575 fp->HasHeader = true;
576 p = wire_endheader(fp->Header, art->data + art->len - 1);
579 fp->HeaderLength = p - fp->Header - 1;
583 if (DoOverview && Xrefp->HeaderLength == 0) {
584 if (!SMprobe(SMARTNGNUM, art->token, (void *)&ann)) {
585 Xrefp->Header = NULL;
586 Xrefp->HeaderLength = 0;
588 if (ann.artnum == 0 || ann.groupname == NULL)
590 len = strlen(innconf->pathhost) + 1 + strlen(ann.groupname) + 1
592 if (len > BIG_BUFFER) {
593 Xrefp->Header = NULL;
594 Xrefp->HeaderLength = 0;
596 snprintf(overdata, sizeof(overdata), "%s %s:%lu",
597 innconf->pathhost, ann.groupname, ann.artnum);
598 Xrefp->Header = overdata;
599 Xrefp->HeaderLength = strlen(overdata);
601 if (ann.groupname != NULL)
606 MessageID = (char *)NULL;
607 Arrived = art->arrived;
611 if (!Msgidp->HasHeader) {
612 warn("no Message-ID header in %s", TokenToText(*art->token));
614 SMcancel(*art->token);
618 buffer_set(&buffer, Msgidp->Header, Msgidp->HeaderLength);
619 buffer_append(&buffer, NUL, 1);
620 for (i = 0, q = buffer.data; i < buffer.left; q++, i++)
621 if (*q == '\t' || *q == '\n' || *q == '\r')
623 MessageID = GetMessageID(buffer.data);
624 if (*MessageID == '\0') {
625 warn("no Message-ID header in %s", TokenToText(*art->token));
627 SMcancel(*art->token);
632 * check if msgid is in history if in update mode, or if article is
633 * newer than start time of makehistory.
636 if (!Datep->HasHeader) {
639 buffer_set(&buffer, Datep->Header, Datep->HeaderLength);
640 buffer_append(&buffer, NUL, 1);
641 for (i = 0, q = buffer.data; i < buffer.left; q++, i++)
642 if (*q == '\t' || *q == '\n' || *q == '\r')
644 if ((Posted = GetaDate(buffer.data)) == 0)
648 if (Expp->HasHeader) {
649 buffer_set(&buffer, Expp->Header, Expp->HeaderLength);
650 buffer_append(&buffer, NUL, 1);
651 for (i = 0, q = buffer.data; i < buffer.left; q++, i++)
652 if (*q == '\t' || *q == '\n' || *q == '\r')
654 Expires = GetaDate(buffer.data);
657 if (DoOverview && Xrefp->HeaderLength > 0) {
658 for (fp = ARTfields, j = 0; j < ARTfieldsize; j++, fp++) {
660 buffer_set(&buffer, "", 0);
662 buffer_append(&buffer, SEP, strlen(SEP));
663 if (fp->HeaderLength == 0)
665 if (fp->NeedHeadername) {
666 buffer_append(&buffer, fp->Headername, fp->HeadernameLength);
667 buffer_append(&buffer, COLONSPACE, strlen(COLONSPACE));
670 buffer_resize(&buffer, buffer.left + fp->HeaderLength);
671 end = fp->Header + fp->HeaderLength - 1;
672 for (p = fp->Header, q = &buffer.data[i]; p <= end; p++) {
673 if (*p == '\r' && p < end && p[1] == '\n') {
677 if (*p == '\0' || *p == '\t' || *p == '\n' || *p == '\r')
684 WriteOverLine(art->token, Xrefp->Header, Xrefp->HeaderLength,
685 buffer.data, buffer.left, Arrived, Expires);
691 r = HISwrite(History, MessageID,
692 Arrived, Posted, Expires, art->token);
694 sysdie("cannot write history line");
700 ** Add all groups to overview group.index. --rmt
703 OverAddAllNewsgroups(void)
711 if ((qp = QIOopen(ActivePath)) == NULL)
712 sysdie("cannot open %s", ActivePath);
713 for (count = 1; (line = QIOread(qp)) != NULL; count++) {
714 if ((p = strchr(line, ' ')) == NULL) {
715 warn("bad active line %d: %.40s", count, line);
719 hi = (ARTNUM)atol(p);
720 if ((p = strchr(p, ' ')) == NULL) {
721 warn("bad active line %d: %.40s", count, line);
725 lo = (ARTNUM)atol(p);
726 if ((q = strrchr(p, ' ')) == NULL) {
727 warn("bad active line %d: %.40s", count, line);
730 /* q+1 points to NG flag */
731 if (!OVgroupadd(line, lo, hi, q+1))
732 die("cannot add %s to overview group index", line);
734 /* Test error conditions; QIOtoolong shouldn't happen. */
736 die("active file line %d is too long", count);
738 sysdie("cannot read %s around line %d", ActivePath, count);
744 ** Change to the news user if possible, and if not, die. Used for operations
745 ** that may create new database files, so as not to mess up the ownership.
752 pwd = getpwnam(NEWSUSER);
754 die("can't resolve %s to a UID (account doesn't exist?)", NEWSUSER);
757 if (getuid() != pwd->pw_uid)
758 die("must be run as %s", NEWSUSER);
763 main(int argc, char **argv)
765 ARTHANDLE *art = NULL;
774 /* First thing, set up logging and our identity. */
775 openlog("makehistory", L_OPENLOG_FLAGS | LOG_PID, LOG_INN_PROG);
776 message_program_name = "makehistory";
779 if (!innconf_read(NULL))
781 HistoryPath = concatpath(innconf->pathdb, _PATH_HISTORY);
782 ActivePath = concatpath(innconf->pathdb, _PATH_ACTIVE);
783 TmpDir = innconf->pathtmp;
784 SchemaPath = concatpath(innconf->pathetc, _PATH_SCHEMA);
786 OverTmpSegSize = DEFAULT_SEGSIZE;
793 RetrMode = RETR_HEAD;
795 while ((i = getopt(argc, argv, "aebf:Il:OT:xFs:")) != EOF) {
810 HistoryPath = optarg;
816 OverTmpSegSize = atoi(optarg);
828 npairs = atoi(optarg);
832 fprintf(stderr, "%s", usage);
840 fprintf(stderr, "%s", usage);
844 if ((p = strrchr(HistoryPath, '/')) == NULL) {
845 /* find the default history file directory */
846 HistoryDir = innconf->pathdb;
849 HistoryDir = xstrdup(HistoryPath);
853 if (chdir(HistoryDir) < 0)
854 sysdie("cannot chdir to %s", HistoryDir);
856 /* Change users if necessary. */
859 /* Read in the overview schema */
860 ARTreadschema(DoOverview);
863 /* init the overview setup. */
864 if (!OVopen(OV_WRITE))
865 sysdie("cannot open overview");
866 if (!OVctl(OVSORT, (void *)&sorttype))
867 die("cannot obtain overview sort information");
869 if (!OVctl(OVCUTOFFLOW, (void *)&Cutofflow))
870 die("cannot obtain overview cutoff information");
871 OverAddAllNewsgroups();
873 OverAddAllNewsgroups();
874 if (sorttype == OVNOSORT) {
875 buff = concat(innconf->pathbin, "/", "overchan", NULL);
876 if ((Overchan = popen(buff, "w")) == NULL)
877 sysdie("cannot fork overchan process");
884 /* Init the Storage Manager */
886 if (!SMsetup(SM_RDWR, (void *)&val) || !SMsetup(SM_PREOPEN, (void *)&val))
887 sysdie("cannot set up storage manager");
889 sysdie("cannot initialize storage manager: %s", SMerrorstr);
891 /* Initialise the history manager */
893 int flags = HIS_RDWR | HIS_INCORE;
897 History = HISopen(NULL, innconf->hismethod, flags);
899 sysdie("cannot create history handle");
900 HISctl(History, HISCTLS_NPAIRS, &npairs);
901 if (!HISctl(History, HISCTLS_PATH, HistoryPath))
902 sysdie("cannot open %s", HistoryPath);
905 /* Get the time. Only get it once, which is good enough. */
906 if (GetTimeInfo(&Now) < 0)
907 sysdie("cannot get the time");
910 * Scan the entire spool, nuke any bad arts if needed, and process each
914 while ((art = SMnext(art, RetrMode)) != NULL) {
916 if (NukeBadArts && art->data == NULL && art->token != NULL)
917 SMcancel(*art->token);
924 /* close history file. */
925 if (!HISclose(History))
926 sysdie("cannot close history file");
930 if (sorttype == OVNOSORT && Fork)
931 if (fflush(Overchan) == EOF || ferror(Overchan) || pclose(Overchan) == EOF)
932 sysdie("cannot flush overview data");
933 if (sorttype != OVNOSORT) {