1 /* $Id: expire.c 6775 2004-05-17 06:23:42Z rra $
3 ** Code for overview-driven expiration.
5 ** In order to expire on a per-newsgroup (instead of per-storage-class)
6 ** basis, one has to use overview-driven expiration. This contains all of
7 ** the code to do that. It provides OVgroupbasedexpire, OVhisthasmsgid, and
8 ** OVgroupmatch for the use of various overview methods.
16 #include "inn/innconf.h"
19 #include "ovinterface.h"
23 enum KRP {Keep, Remove, Poison};
26 static long EXPprocessed;
27 static long EXPunlinked;
28 static long EXPoverindexdrop;
30 #define NGH_HASH(Name, p, j) \
31 for (p = Name, j = 0; *p; ) j = (j << 5) + j + *p++
33 #define NGH_BUCKET(j) &NGHtable[j & (NGH_SIZE - 1)]
35 #define OVFMT_UNINIT -2
36 #define OVFMT_NODATE -1
37 #define OVFMT_NOXREF -1
39 static int Dateindex = OVFMT_UNINIT;
40 static int Xrefindex = OVFMT_UNINIT;
41 static int Messageidindex = OVFMT_UNINIT;
43 typedef struct _NEWSGROUP {
47 unsigned long Lastpurged;
48 /* These fields are new. */
52 /* X flag => remove entire article when it expires in this group */
56 typedef struct _NGHASH {
62 #define MAGIC_TIME 49710.
64 typedef struct _BADGROUP {
65 struct _BADGROUP *Next;
70 ** Information about the schema of the news overview files.
72 typedef struct _ARTOVERFIELD {
79 static BADGROUP *EXPbadgroups;
81 static NEWSGROUP *Groups;
82 static NEWSGROUP EXPdefault;
83 static NGHASH NGHtable[NGH_SIZE];
86 static enum KRP *krps;
88 static ARTOVERFIELD * ARTfields;
89 static int ARTfieldsize;
90 static bool ReadOverviewfmt = false;
93 /* FIXME: The following variables are shared between this file and ov.c.
94 This should be cleaned up with a better internal interface. */
97 extern FILE * EXPunlinkfile;
98 extern bool OVignoreselfexpire;
99 extern bool OVusepost;
101 extern bool OVearliest;
103 extern int OVnumpatterns;
104 extern char ** OVpatterns;
108 ** Hash a newsgroup and see if we get it.
120 NGH_HASH(Name, p, j);
122 for (c = *Name, ngp = htp->Groups, i = htp->Used; --i >= 0; ngp++)
123 if (c == ngp[0]->Name[0] && strcmp(Name, ngp[0]->Name) == 0)
129 ** Sorting predicate to put newsgroups in rough order of their activity.
132 NGcompare(const void *p1, const void *p2)
134 const NEWSGROUP * const * ng1 = p1;
135 const NEWSGROUP * const * ng2 = p2;
137 return ng1[0]->Last - ng2[0]->Last;
141 ** Split a line at a specified field separator into a vector and return
142 ** the number of fields found, or -1 on error.
145 EXPsplit(char *p, char sep, char **argv, int count)
167 for (i = 1, *argv++ = p; *p; )
170 for (; *p == sep; p++);
182 ** Build the newsgroup structures from the active file.
185 BuildGroups(char *active)
197 /* Count the number of groups. */
198 for (p = active, i = 0; (p = strchr(p, '\n')) != NULL; p++, i++)
201 Groups = xmalloc(i * sizeof(NEWSGROUP));
203 /* Set up the default hash buckets. */
204 NGHbuckets = i / NGH_SIZE;
207 for (i = NGH_SIZE, htp = NGHtable; --i >= 0; htp++) {
208 htp->Size = NGHbuckets;
209 htp->Groups = xmalloc(htp->Size * sizeof(NEWSGROUP *));
213 /* Fill in the array. */
215 for (p = active, ngp = Groups, i = nGroups; --i >= 0; ngp++, p = q + 1) {
217 if ((q = strchr(p, '\n')) == NULL) {
218 fprintf(stderr, "%s: line %d missing newline\n", ACTIVE, lines);
224 if (EXPsplit(p, ' ', fields, ARRAY_SIZE(fields)) != 4) {
225 fprintf(stderr, "%s: line %d wrong number of fields\n", ACTIVE, lines);
228 ngp->Name = fields[0];
229 ngp->Last = atol(fields[1]);
230 ngp->Rest = fields[3];
232 /* Find the right bucket for the group, make sure there is room. */
233 NGH_HASH(ngp->Name, p, j);
235 if (htp->Used >= htp->Size) {
236 htp->Size += NGHbuckets;
237 htp->Groups = xrealloc(htp->Groups, htp->Size * sizeof(NEWSGROUP *));
239 htp->Groups[htp->Used++] = ngp;
242 /* Sort each hash bucket. */
243 for (i = NGH_SIZE, htp = NGHtable; --i >= 0; htp++)
245 qsort(htp->Groups, htp->Used, sizeof htp->Groups[0], NGcompare);
247 /* Ok, now change our use of the Last field. Set them all to maxint. */
248 for (i = NGH_SIZE, htp = NGHtable; --i >= 0; htp++) {
252 for (ngpa = htp->Groups, k = htp->Used; --k >= 0; ngpa++) {
253 ngpa[0]->Last = ~(unsigned long) 0;
254 ngpa[0]->Lastpurged = 0;
260 ** Parse a number field converting it into a "when did this start?".
261 ** This makes the "keep it" tests fast, but inverts the logic of
262 ** just about everything you expect. Print a message and return false
266 EXPgetnum(int line, char *word, time_t *v, const char *name)
272 if (strcasecmp(word, "never") == 0) {
277 /* Check the number. We don't have strtod yet. */
278 for (p = word; ISWHITE(*p); p++)
280 if (*p == '+' || *p == '-')
282 for (SawDot = false; *p; p++)
288 else if (!CTYPE(isdigit, (int)*p))
291 fprintf(stderr, "Line %d, bad `%c' character in %s field\n",
299 *v = OVnow - (time_t)(d * 86400.);
304 ** Set the expiration fields for all groups that match this pattern.
307 EXPmatch(char *p, NEWSGROUP *v, char mod)
316 for (ngp = Groups, i = nGroups; --i >= 0; ngp++)
317 if (negate ? !uwildmat(ngp->Name, p) : uwildmat(ngp->Name, p))
319 || (mod == 'm' && ngp->Rest[0] == NF_FLAG_MODERATED)
320 || (mod == 'u' && ngp->Rest[0] != NF_FLAG_MODERATED)) {
322 ngp->Default = v->Default;
323 ngp->Purge = v->Purge;
324 ngp->Poison = v->Poison;
329 ** Parse the expiration control file. Return true if okay.
345 /* Scan all lines. */
347 patterns = xmalloc(nGroups * sizeof(char *));
349 for (i = 1; fgets(buff, sizeof buff, F) != NULL; i++) {
350 if ((p = strchr(buff, '\n')) == NULL) {
351 fprintf(stderr, "Line %d too long\n", i);
356 p = strchr(buff, '#');
360 p = buff + strlen(buff);
361 while (--p >= buff) {
362 if (isspace((int)*p))
369 if ((j = EXPsplit(buff, ':', fields, ARRAY_SIZE(fields))) == -1) {
370 fprintf(stderr, "Line %d too many fields\n", i);
375 /* Expired-article remember line? */
376 if (strcmp(fields[0], "/remember/") == 0) {
380 /* Regular expiration line -- right number of fields? */
382 fprintf(stderr, "Line %d bad format\n", i);
387 /* Parse the fields. */
388 if (strchr(fields[1], 'M') != NULL)
390 else if (strchr(fields[1], 'U') != NULL)
392 else if (strchr(fields[1], 'A') != NULL)
395 fprintf(stderr, "Line %d bad modflag\n", i);
399 v.Poison = (strchr(fields[1], 'X') != NULL);
400 if (!EXPgetnum(i, fields[2], &v.Keep, "keep")
401 || !EXPgetnum(i, fields[3], &v.Default, "default")
402 || !EXPgetnum(i, fields[4], &v.Purge, "purge")) {
406 /* These were turned into offsets, so the test is the opposite
407 * of what you think it should be. If Purge isn't forever,
408 * make sure it's greater then the other two fields. */
410 /* Some value not forever; make sure other values are in range. */
411 if (v.Keep && v.Keep < v.Purge) {
412 fprintf(stderr, "Line %d keep>purge\n", i);
416 if (v.Default && v.Default < v.Purge) {
417 fprintf(stderr, "Line %d default>purge\n", i);
423 /* Is this the default line? */
424 if (fields[0][0] == '*' && fields[0][1] == '\0' && mod == 'a') {
426 fprintf(stderr, "Line %d duplicate default\n", i);
430 EXPdefault.Keep = v.Keep;
431 EXPdefault.Default = v.Default;
432 EXPdefault.Purge = v.Purge;
433 EXPdefault.Poison = v.Poison;
437 /* Assign to all groups that match the pattern and flags. */
438 if ((j = EXPsplit(fields[0], ',', patterns, nGroups)) == -1) {
439 fprintf(stderr, "Line %d too many patterns\n", i);
443 for (k = 0; k < j; k++)
444 EXPmatch(patterns[k], &v, mod);
452 ** Handle a newsgroup that isn't in the active file.
455 EXPnotfound(char *Entry)
457 static NEWSGROUP Removeit;
460 /* See if we already know about this group. */
461 for (bg = EXPbadgroups; bg; bg = bg->Next)
462 if (strcmp(Entry, bg->Name) == 0)
465 bg = xmalloc(sizeof(BADGROUP));
466 bg->Name = xstrdup(Entry);
467 bg->Next = EXPbadgroups;
470 /* remove it all now. */
471 if (Removeit.Keep == 0) {
472 Removeit.Keep = OVnow;
473 Removeit.Default = OVnow;
474 Removeit.Purge = OVnow;
480 ** Should we keep the specified article?
483 EXPkeepit(char *Entry, time_t when, time_t expires)
486 enum KRP retval = Remove;
488 if ((ngp = NGfind(Entry)) == NULL)
489 ngp = EXPnotfound(Entry);
491 /* Bad posting date? */
492 if (when > OVrealnow + 86400) {
493 /* Yes -- force the article to go right now. */
494 when = expires ? ngp->Purge : ngp->Default;
497 /* If no expiration, make sure it wasn't posted before the default. */
499 if (when >= ngp->Default)
502 /* Make sure it's not posted before the purge cut-off and
503 * that it's not due to expire. */
505 if (when >= ngp->Purge && (expires >= OVnow || when >= ngp->Keep))
508 if (retval == Keep) {
511 return ngp->Poison ? Poison : Remove;
516 ** An article can be removed. Either print a note, or actually remove it.
517 ** Takes in the Xref information so that it can pass this to the storage
518 ** API callback used to generate the list of files to remove.
521 OVEXPremove(TOKEN token, bool deletedgroups, char **xref, int ngroups)
528 if (EXPunlinkfile && xref != NULL) {
529 SMprintfiles(EXPunlinkfile, token, xref, ngroups);
530 if (!ferror(EXPunlinkfile))
532 fprintf(stderr, "Can't write to -z file, %s\n", strerror(errno));
533 fprintf(stderr, "(Will ignore it for rest of run.)\n");
534 fclose(EXPunlinkfile);
535 EXPunlinkfile = NULL;
537 if (!SMcancel(token) && SMerrno != SMERR_NOENT && SMerrno != SMERR_UNINIT)
538 fprintf(stderr, "Can't unlink %s: %s\n", TokenToText(token),
543 ** Read the overview schema.
554 bool foundxref = false;
555 bool foundxreffull = false;
557 /* Open file, count lines. */
558 path = concatpath(innconf->pathetc, _PATH_SCHEMA);
559 F = fopen(path, "r");
562 for (i = 0; fgets(buff, sizeof buff, F) != NULL; i++)
564 fseeko(F, 0, SEEK_SET);
565 ARTfields = xmalloc((i + 1) * sizeof(ARTOVERFIELD));
567 /* Parse each field. */
568 for (fp = ARTfields; fgets(buff, sizeof buff, F) != NULL; ) {
569 /* Ignore blank and comment lines. */
570 if ((p = strchr(buff, '\n')) != NULL)
572 if ((p = strchr(buff, '#')) != NULL)
576 if ((p = strchr(buff, ':')) != NULL) {
578 fp->NeedsHeader = (strcmp(p, "full") == 0);
581 fp->NeedsHeader = false;
582 fp->HasHeader = false;
583 fp->Header = xstrdup(buff);
584 fp->Length = strlen(buff);
585 if (strcasecmp(buff, "Xref") == 0) {
587 foundxreffull = fp->NeedsHeader;
591 ARTfieldsize = fp - ARTfields;
593 if (!foundxref || !foundxreffull) {
594 fprintf(stderr, "'Xref:full' must be included in %s", path);
601 ** Return a field from the overview line or NULL on error. Return a copy
602 ** since we might be re-using the line later.
605 OVERGetHeader(const char *p, int field)
613 fp = &ARTfields[field];
615 /* Skip leading headers. */
616 for (; field-- >= 0 && *p; p++)
617 if ((p = strchr(p, '\t')) == NULL)
625 if (fp->NeedsHeader) { /* find an exact match */
626 while (strncmp(fp->Header, p, fp->Length) != 0) {
627 if ((p = strchr(p, '\t')) == NULL)
634 /* Figure out length; get space. */
635 if ((next = strpbrk(p, "\n\r\t")) != NULL) {
642 buff = xmalloc(buffsize + 1);
644 else if (buffsize < i) {
646 buff = xrealloc(buff, buffsize + 1);
655 ** Read overview.fmt and find index for headers
658 OVfindheaderindex(void)
667 if (innconf->groupbaseexpiry) {
668 ACTIVE = concatpath(innconf->pathdb, _PATH_ACTIVE);
669 if ((active = ReadInFile(ACTIVE, (struct stat *)NULL)) == NULL) {
670 fprintf(stderr, "Can't read %s, %s\n",
671 ACTIVE, strerror(errno));
675 arts = xmalloc(nGroups * sizeof(char *));
676 krps = xmalloc(nGroups * sizeof(enum KRP));
677 path = concatpath(innconf->pathetc, _PATH_EXPIRECTL);
678 F = fopen(path, "r");
680 if (!EXPreadfile(F)) {
682 fprintf(stderr, "Format error in expire.ctl\n");
688 if (Dateindex == OVFMT_UNINIT) {
689 for (Dateindex = OVFMT_NODATE, i = 0; i < ARTfieldsize; i++) {
690 if (strcasecmp(ARTfields[i].Header, "Date") == 0) {
692 } else if (strcasecmp(ARTfields[i].Header, "Xref") == 0) {
694 } else if (strcasecmp(ARTfields[i].Header, "Message-ID") == 0) {
699 ReadOverviewfmt = true;
704 ** Do the work of expiring one line. Assumes article still exists in the
705 ** spool. Returns true if article should be purged, or return false.
708 OVgroupbasedexpire(TOKEN token, const char *group, const char *data,
709 int len UNUSED, time_t arrived, time_t expires)
711 static char *Group = NULL;
722 if (SMprobe(SELFEXPIRE, &token, NULL)) {
723 if (!OVignoreselfexpire)
724 /* this article should be kept */
727 if (!ReadOverviewfmt) {
732 if ((p = OVERGetHeader(data, Dateindex)) == NULL) {
736 if ((when = parsedate(p, NULL)) == -1) {
743 if ((Xref = OVERGetHeader(data, Xrefindex)) == NULL) {
747 Group = concat(group, ":", (char *) 0);
750 if ((Xref = strchr(Xref, ' ')) == NULL) {
754 for (Xref++; *Xref == ' '; Xref++)
757 if ((count = EXPsplit(Xref, ' ', arts, nGroups)) == -1) {
762 /* arts is now an array of strings, each of which is a group name, a
763 colon, and an article number. EXPkeepit wants just pure group names,
764 so replace the colons with nuls (deleting the overview entry if it
765 isn't in the expected form). */
766 for (i = 0; i < count; i++) {
767 p = strchr(arts[i], ':');
770 fprintf(stderr, "Bad entry, \"%s\"\n", arts[i]);
777 /* First check all postings */
782 for (i = 0; i < count; ++i) {
783 if ((krps[i] = EXPkeepit(arts[i], when, expires)) == Poison)
785 if (OVkeep && (krps[i] == Keep))
787 if ((krps[i] == Remove) && strcmp(group, arts[i]) == 0)
789 if ((krps[i] == Keep))
795 if (delete || poisoned || token.type == TOKEN_EMPTY) {
796 /* delete article if this is first entry */
797 if (strcmp(group, arts[0]) == 0) {
798 for (i = 0; i < count; i++)
799 arts[i][strlen(arts[i])] = ':';
800 OVEXPremove(token, false, arts, count);
805 } else { /* not earliest mode */
806 if ((!keeper && delete) || token.type == TOKEN_EMPTY) {
807 /* delete article if purge is set, indicating that it has
808 expired out of every group to which it was posted */
810 for (i = 0; i < count; i++)
811 arts[i][strlen(arts[i])] = ':';
812 OVEXPremove(token, false, arts, count);
819 /* this article should be kept */
824 OVhisthasmsgid(struct history *h, const char *data)
828 if (!ReadOverviewfmt) {
831 if ((p = OVERGetHeader(data, Messageidindex)) == NULL)
833 return HISlookup(h, p, NULL, NULL, NULL, NULL);
837 OVgroupmatch(const char *group)
842 if (OVnumpatterns == 0 || group == NULL)
844 for (i = 0; i < OVnumpatterns; i++) {
845 switch (OVpatterns[i][0]) {
847 if (!wanted && uwildmat(group, &OVpatterns[i][1]))
850 if (uwildmat(group, &OVpatterns[i][1])) {
855 if (uwildmat(group, OVpatterns[i]))
866 BADGROUP *bg, *bgnext;
870 if (EXPprocessed != 0) {
872 printf(" Article lines processed %8ld\n", EXPprocessed);
873 printf(" Articles dropped %8ld\n", EXPunlinked);
874 printf(" Overview index dropped %8ld\n", EXPoverindexdrop);
876 EXPprocessed = EXPunlinked = EXPoverindexdrop = 0;
878 if (innconf->ovgrouppat != NULL) {
879 for (i = 0 ; i < OVnumpatterns ; i++)
883 for (bg = EXPbadgroups; bg; bg = bgnext) {
888 for (fp = ARTfields, i = 0; i < ARTfieldsize ; i++, fp++) {
892 if (ACTIVE != NULL) {
896 if (Groups != NULL) {
900 for (i = 0, htp = NGHtable ; i < NGH_SIZE ; i++, htp++) {
901 if (htp->Groups != NULL) {