1 /* $Id: art.c 7748 2008-04-06 13:49:56Z iulius $
10 #include "inn/innconf.h"
17 typedef struct iovec IOVEC;
21 extern bool DoCancels;
24 #define EXECUTE_BITS (S_IXUSR | S_IXGRP | S_IXOTH)
26 #define EXECUTE_BITS 0111
27 #endif /* defined(S_IXUSR) */
29 /* Characters used in log messages indicating the disposition of messages. */
30 #define ART_ACCEPT '+'
32 #define ART_STRSTR '?'
34 #define ART_REJECT '-'
37 ** used to sort Xref, Bytes and Path pointers
39 typedef struct _HEADERP {
47 ** For speed we build a binary tree of the headers, sorted by their
48 ** name. We also store the header's Name fields in the tree to avoid
49 ** doing an extra indirection.
51 typedef struct _TREE {
53 const ARTHEADER *Header;
58 static TREE *ARTheadertree;
61 ** For doing the overview database, we keep a list of the headers and
62 ** a flag saying if they're written in brief or full format.
64 typedef struct _ARTOVERFIELD {
65 const ARTHEADER *Header;
69 static ARTOVERFIELD *ARTfields;
72 ** General newsgroup we care about, and what we put in the Path line.
74 static char ARTctl[] = "control";
75 static char ARTjnk[] = "junk";
76 static char *ARTpathme;
79 ** Different types of rejected articles.
81 typedef enum {REJECT_DUPLICATE, REJECT_SITE, REJECT_FILTER, REJECT_DISTRIB,
82 REJECT_GROUP, REJECT_UNAPP, REJECT_OTHER} Reject_type;
85 ** Flag array, indexed by character. Character classes for Message-ID's.
87 static char ARTcclass[256];
88 #define CC_MSGID_ATOM 01
89 #define CC_MSGID_NORM 02
90 #define CC_HOSTNAME 04
91 #define ARTnormchar(c) ((ARTcclass[(unsigned char)(c)] & CC_MSGID_NORM) != 0)
92 #define ARTatomchar(c) ((ARTcclass[(unsigned char)(c)] & CC_MSGID_ATOM) != 0)
93 #define ARThostchar(c) ((ARTcclass[(unsigned char)(c)] & CC_HOSTNAME) != 0)
95 #if defined(DO_PERL) || defined(DO_PYTHON)
96 const char *filterPath;
97 #endif /* DO_PERL || DO_PYTHON */
102 ** Trim '\r' from buffer.
105 buffer_trimcr(struct buffer *bp)
110 for (p = q = bp->data ; p < bp->data + bp->left ; p++) {
111 if (*p == '\r' && p+1 < bp->data + bp->left && p[1] == '\n') {
121 ** Mark that the site gets this article.
124 SITEmark(SITE *sp, NEWSGROUP *ngp)
131 if (sp->Funnel != NOSITE) {
132 funnel = &Sites[sp->Funnel];
133 if (funnel->ng == NULL)
144 static char *SCHEMA = NULL;
152 bool foundxref = false;
153 bool foundxreffull = false;
155 if (ARTfields != NULL) {
160 /* Open file, count lines. */
162 SCHEMA = concatpath(innconf->pathetc, _PATH_SCHEMA);
163 if ((F = Fopen(SCHEMA, "r", TEMPORARYOPEN)) == NULL)
165 for (i = 0; fgets(buff, sizeof buff, F) != NULL; i++)
167 fseeko(F, 0, SEEK_SET);
168 ARTfields = xmalloc((i + 1) * sizeof(ARTOVERFIELD));
170 /* Parse each field. */
171 for (ok = true, fp = ARTfields ; fgets(buff, sizeof buff, F) != NULL ;) {
172 /* Ignore blank and comment lines. */
173 if ((p = strchr(buff, '\n')) != NULL)
175 if ((p = strchr(buff, '#')) != NULL)
179 if ((p = strchr(buff, ':')) != NULL) {
181 fp->NeedHeader = (strcmp(p, "full") == 0);
183 fp->NeedHeader = false;
184 if (strcasecmp(buff, "Xref") == 0) {
186 foundxreffull = fp->NeedHeader;
188 for (hp = ARTheaders; hp < ARRAY_END(ARTheaders); hp++) {
189 if (strcasecmp(buff, hp->Name) == 0) {
194 if (hp == ARRAY_END(ARTheaders)) {
195 syslog(L_ERROR, "%s bad_schema unknown header \"%s\"",
205 if (!foundxref || !foundxreffull) {
206 syslog(L_FATAL, "%s 'Xref:full' must be included in %s", LogName, SCHEMA);
214 ** Build a balanced tree for the headers in subscript range [lo..hi).
215 ** This only gets called once, and the tree only has about 37 entries,
216 ** so we don't bother to unroll the recursion.
219 ARTbuildtree(const ARTHEADER **Table, int lo, int hi)
224 mid = lo + (hi - lo) / 2;
225 tp = xmalloc(sizeof(TREE));
226 tp->Header = Table[mid];
227 tp->Name = tp->Header->Name;
231 tp->Before = ARTbuildtree(Table, lo, mid);
235 tp->After = ARTbuildtree(Table, mid + 1, hi);
241 ** Sorting predicate for qsort call in ARTsetup.
244 ARTcompare(const void *p1, const void *p2)
246 return strcasecmp(((const ARTHEADER **)p1)[0]->Name,
247 ((const ARTHEADER **)p2)[0]->Name);
252 ** Setup the article processing.
258 const ARTHEADER ** table;
261 /* Set up the character class tables. These are written a
262 * little strangely to work around a GCC2.0 bug. */
263 memset(ARTcclass, 0, sizeof ARTcclass);
264 p = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
265 while ((i = *p++) != 0) {
266 ARTcclass[i] = CC_HOSTNAME | CC_MSGID_ATOM | CC_MSGID_NORM;
268 p = "!#$%&'*+-/=?^_`{|}~";
269 while ((i = *p++) != 0) {
270 ARTcclass[i] = CC_MSGID_ATOM | CC_MSGID_NORM;
272 p = "\"(),.:;<@[\\]";
273 while ((i = *p++) != 0) {
274 ARTcclass[i] = CC_MSGID_NORM;
277 /* The RFC's don't require it, but we add underscore to the list of valid
278 * hostname characters. */
279 ARTcclass['.'] |= CC_HOSTNAME;
280 ARTcclass['-'] |= CC_HOSTNAME;
281 ARTcclass['_'] |= CC_HOSTNAME;
283 /* Build the header tree. */
284 table = xmalloc(ARRAY_SIZE(ARTheaders) * sizeof(ARTHEADER *));
285 for (i = 0; i < ARRAY_SIZE(ARTheaders); i++)
286 table[i] = &ARTheaders[i];
287 qsort(table, ARRAY_SIZE(ARTheaders), sizeof *table, ARTcompare);
288 ARTheadertree = ARTbuildtree(table, 0, ARRAY_SIZE(ARTheaders));
291 /* Get our Path name, kill trailing !. */
292 ARTpathme = xstrdup(Path.data);
293 ARTpathme[Path.used - 1] = '\0';
295 /* Set up database; ignore errors. */
301 ARTfreetree(TREE *tp)
305 for ( ; tp != NULL; tp = next) {
307 ARTfreetree(tp->Before);
317 if (ARTfields != NULL) {
321 ARTfreetree(ARTheadertree);
325 ** Start a log message about an article.
328 ARTlog(const ARTDATA *data, char code, const char *text)
330 const HDRCONTENT *hc = data->HdrContent;
334 TMRstart(TMR_ARTLOG);
335 /* We could be a bit faster by not dividing Now.usec by 1000,
336 * but who really wants to log at the Microsec level? */
337 Done = code == ART_ACCEPT || code == ART_JUNK;
339 i = fprintf(Log, "%.15s.%03d %c %s %s %s%s",
340 ctime(&Now.time) + 4, (int)(Now.usec / 1000), code, data->Feedsite,
341 HDR_FOUND(HDR__MESSAGE_ID) ? HDR(HDR__MESSAGE_ID) : "(null)",
342 text, Done ? "" : "\n");
344 i = fprintf(Log, "%.15s.%03d %c %s %s%s",
345 ctime(&Now.time) + 4, (int)(Now.usec / 1000), code, data->Feedsite,
346 HDR_FOUND(HDR__MESSAGE_ID) ? HDR(HDR__MESSAGE_ID) : "(null)",
348 if (i == EOF || (Done && !BufferedLogs && fflush(Log)) || ferror(Log)) {
350 syslog(L_ERROR, "%s cant write log_start %m", LogName);
351 IOError("logging article", i);
358 ** Parse a Path line, splitting it up into NULL-terminated array of strings.
361 ARTparsepath(const char *p, int size, LISTBUFFER *list)
367 SetupListBuffer(size, list);
369 /* loop over text and copy */
370 for (i = 0, q = list->Data, hp = list->List ; *p ; p++, *q++ = '\0') {
371 /* skip leading separators. */
372 for (; *p && !ARThostchar(*p) && ISWHITE(*p) ; p++)
377 if (list->ListLength <= i) {
378 list->ListLength += DEFAULTNGBOXSIZE;
379 list->List = xrealloc(list->List, list->ListLength * sizeof(char *));
382 /* mark the start of the host, move to the end of it while copying */
383 for (*hp++ = q, i++ ; *p && ARThostchar(*p) && !ISWHITE(*p) ;)
389 if (i == list->ListLength) {
390 list->ListLength += DEFAULTNGBOXSIZE;
391 list->List = xrealloc(list->List, list->ListLength * sizeof(char *));
399 ** Sorting pointer where header starts
402 ARTheaderpcmp(const void *p1, const void *p2)
404 return (((const HEADERP *)p1)->p - ((const HEADERP *)p2)->p);
407 /* Write an article using the storage api. Put it together in memory and
408 call out to the api. */
410 ARTstore(CHANNEL *cp)
412 struct buffer *Article = &cp->In;
413 ARTDATA *data = &cp->Data;
414 HDRCONTENT *hc = data->HdrContent;
417 int i, j, iovcnt = 0;
420 struct buffer *headers = &data->Headers;
421 struct iovec iov[ARTIOVCNT];
424 /* find Path, Bytes and Xref to be prepended/dropped/replaced */
426 /* assumes Path header is required header */
427 hp[i].p = HDR(HDR__PATH);
428 hp[i++].index = HDR__PATH;
429 if (HDR_FOUND(HDR__XREF)) {
430 hp[i].p = HDR(HDR__XREF);
431 hp[i++].index = HDR__XREF;
433 if (HDR_FOUND(HDR__BYTES)) {
434 hp[i].p = HDR(HDR__BYTES);
435 hp[i++].index = HDR__BYTES;
437 /* get the order of header appearance */
438 qsort(hp, i, sizeof(HEADERP), ARTheaderpcmp);
439 /* p always points where the next data should be written from */
440 for (p = Article->data + cp->Start, j = 0 ; j < i ; j++) {
441 switch (hp[j].index) {
443 if (!data->Hassamepath || data->AddAlias || Pathcluster.used) {
444 /* write heading data */
445 iov[iovcnt].iov_base = (char *) p;
446 iov[iovcnt++].iov_len = HDR(HDR__PATH) - p;
447 arth.len += HDR(HDR__PATH) - p;
448 /* append clusterpath */
449 if (Pathcluster.used) {
450 iov[iovcnt].iov_base = Pathcluster.data;
451 iov[iovcnt++].iov_len = Pathcluster.used;
452 arth.len += Pathcluster.used;
454 /* now append new one */
455 iov[iovcnt].iov_base = Path.data;
456 iov[iovcnt++].iov_len = Path.used;
457 arth.len += Path.used;
458 if (data->AddAlias) {
459 iov[iovcnt].iov_base = Pathalias.data;
460 iov[iovcnt++].iov_len = Pathalias.used;
461 arth.len += Pathalias.used;
465 if (data->Hassamecluster)
466 p += Pathcluster.used;
470 if (!innconf->xrefslave) {
471 /* write heading data */
472 iov[iovcnt].iov_base = (char *) p;
473 iov[iovcnt++].iov_len = HDR(HDR__XREF) - p;
474 arth.len += HDR(HDR__XREF) - p;
475 /* replace with new one */
476 iov[iovcnt].iov_base = data->Xref;
477 iov[iovcnt++].iov_len = data->XrefLength - 2;
478 arth.len += data->XrefLength - 2;
480 /* this points where trailing "\r\n" of orginal Xref header exists */
481 p = HDR(HDR__XREF) + HDR_LEN(HDR__XREF);
485 /* ditch whole Byte header */
486 /* write heading data */
487 iov[iovcnt].iov_base = (char *) p;
488 iov[iovcnt++].iov_len = data->BytesHeader - p;
489 arth.len += data->BytesHeader - p;
491 /* need to skip trailing "\r\n" of Bytes header */
492 p = HDR(HDR__BYTES) + HDR_LEN(HDR__BYTES) + 2;
495 result.type = TOKEN_EMPTY;
499 /* in case Xref is not included in orignal article */
500 if (!HDR_FOUND(HDR__XREF)) {
501 /* write heading data */
502 iov[iovcnt].iov_base = (char *) p;
503 iov[iovcnt++].iov_len = Article->data + (data->Body - 2) - p;
504 arth.len += Article->data + (data->Body - 2) - p;
505 /* Xref needs to be inserted */
506 iov[iovcnt].iov_base = (char *) "Xref: ";
507 iov[iovcnt++].iov_len = sizeof("Xref: ") - 1;
508 arth.len += sizeof("Xref: ") - 1;
509 iov[iovcnt].iov_base = data->Xref;
510 iov[iovcnt++].iov_len = data->XrefLength;
511 arth.len += data->XrefLength;
512 p = Article->data + (data->Body - 2);
514 /* write rest of data */
515 iov[iovcnt].iov_base = (char *) p;
516 iov[iovcnt++].iov_len = Article->data + cp->Next - p;
517 arth.len += Article->data + cp->Next - p;
519 /* revert trailing '\0\n' to '\r\n' of all system header */
520 for (i = 0 ; i < MAX_ARTHEADER ; i++) {
526 arth.iovcnt = iovcnt;
527 arth.arrived = (time_t)0;
528 arth.token = (TOKEN *)NULL;
529 arth.expires = data->Expires;
530 if (innconf->storeonxref) {
531 arth.groups = data->Replic;
532 arth.groupslen = data->ReplicLength;
534 arth.groups = HDR(HDR__NEWSGROUPS);
535 arth.groupslen = HDR_LEN(HDR__NEWSGROUPS);
538 SMerrno = SMERR_NOERROR;
539 result = SMstore(arth);
540 if (result.type == TOKEN_EMPTY) {
541 if (SMerrno == SMERR_NOMATCH)
542 ThrottleNoMatchError();
543 else if (SMerrno != SMERR_NOERROR)
544 IOError("SMstore", SMerrno);
548 /* calculate stored size */
549 for (data->BytesValue = i = 0 ; i < iovcnt ; i++) {
550 if (NeedHeaders && (i + 1 == iovcnt)) {
551 /* body begins at last iov */
552 headersize = data->BytesValue +
553 Article->data + data->Body - (char *) iov[i].iov_base;
555 data->BytesValue += iov[i].iov_len;
557 /* "\r\n" is counted as 1 byte. trailing ".\r\n" and body delimitor are also
559 data->BytesValue -= (data->HeaderLines + data->Lines + 4);
560 /* Figure out how much space we'll need and get it. */
561 snprintf(data->Bytes, sizeof(data->Bytes), "Bytes: %ld\r\n",
563 /* does not include strlen("Bytes: \r\n") */
564 data->BytesLength = strlen(data->Bytes) - 9;
570 buffer_resize(headers, headersize);
571 buffer_set(headers, data->Bytes, strlen(data->Bytes));
572 for (i = 0 ; i < iovcnt ; i++) {
574 buffer_append(headers, iov[i].iov_base,
575 Article->data + data->Body - (char *) iov[i].iov_base);
577 buffer_append(headers, iov[i].iov_base, iov[i].iov_len);
579 buffer_trimcr(headers);
585 ** Parse a header that starts at header. size includes trailing "\r\n"
588 ARTparseheader(CHANNEL *cp, int size)
590 ARTDATA *data = &cp->Data;
591 char *header = cp->In.data + data->CurHeader;
592 HDRCONTENT *hc = cp->Data.HdrContent;
598 /* Find first colon */
599 if ((colon = memchr(header, ':', size)) == NULL || !ISWHITE(colon[1])) {
600 if ((p = memchr(header, '\r', size)) != NULL)
602 snprintf(cp->Error, sizeof(cp->Error),
603 "%d No colon-space in \"%s\" header",
604 NNTP_REJECTIT_VAL, MaxLength(header, header));
610 /* See if this is a system header. A fairly tightly-coded binary search. */
611 c = CTYPE(islower, *header) ? toupper(*header) : *header;
612 for (*colon = '\0', tp = ARTheadertree; tp; ) {
613 if ((i = c - tp->Name[0]) == 0 && (i = strcasecmp(header, tp->Name)) == 0)
623 /* Not a system header, make sure we have <word><colon><space>. */
624 for (p = colon; --p > header; ) {
628 snprintf(cp->Error, sizeof(cp->Error),
629 "%d Space before colon in \"%s\" header",
630 NNTP_REJECTIT_VAL, MaxLength(header, header));
639 /* remember to ditch if it's Bytes: */
641 cp->Data.BytesHeader = header;
643 if (hc->Length != 0) {
647 for (p = colon + 1 ; (p < header + size - 2) &&
648 (ISWHITE(*p) || *p == '\r' || *p == '\n'); p++);
649 if (p < header + size - 2) {
651 /* HDR_LEN() does not include trailing "\r\n" */
652 hc->Length = header + size - 2 - p;
654 snprintf(cp->Error, sizeof(cp->Error),
655 "%d Body of header is all blanks in \"%s\" header",
656 NNTP_REJECTIT_VAL, MaxLength(hp->Name, hp->Name));
663 ** Check Message-ID format based on RFC 822 grammar, except that (as per
664 ** RFC 1036) whitespace, non-printing, and '>' characters are excluded.
665 ** Based on code by Paul Eggert posted to news.software.b on 22-Nov-90
666 ** in <#*tyo2'~n@twinsun.com>, with additional email discussion.
670 ARTidok(const char *MessageID)
675 /* Check the length of the message ID. */
676 if (MessageID == NULL || strlen(MessageID) > NNTP_MSGID_MAXLEN)
679 /* Scan local-part: "< atom|quoted [ . atom|quoted]" */
685 while (ARTatomchar(*++p))
709 /* Scan domain part: "@ atom|domain [ . atom|domain] > \0" */
714 while (ARTatomchar(*++p))
740 return *p == '>' && *++p == '\0';
744 ** Clean up data field where article informations are stored.
745 ** This must be called before article processing.
748 ARTprepare(CHANNEL *cp)
750 ARTDATA *data = &cp->Data;
751 HDRCONTENT *hc = data->HdrContent;
754 for (i = 0 ; i < MAX_ARTHEADER ; i++, hc++) {
758 data->Lines = data->HeaderLines = data->CRwithoutLF = data->LFwithoutCR = 0;
759 data->CurHeader = data->LastTerminator = data->LastCR = cp->Start - 1;
760 data->LastCRLF = data->Body = cp->Start - 1;
761 data->BytesHeader = NULL;
762 data->Feedsite = "?";
767 ** Clean up an article. This is mainly copying in-place, stripping bad
768 ** headers. Also fill in the article data block with what we can find.
769 ** Return NULL if the article is okay, or a string describing the error.
770 ** Parse headers and end of article
771 ** This is called by NCproc().
774 ARTparse(CHANNEL *cp)
776 struct buffer *bp = &cp->In;
777 ARTDATA *data = &cp->Data;
778 long i, limit, fudge, size;
781 HDRCONTENT *hc = data->HdrContent;
783 /* Read through the buffer to find header, body and end of article */
784 /* this routine is designed not to refer data so long as possible for
785 performance reason, so the code may look redundant at a glance */
788 if (cp->State == CSgetheader) {
789 /* header processing */
790 for (; i < limit ;) {
791 if (data->LastCRLF + 1 == i) {
792 /* begining of the line */
793 switch (bp->data[i]) {
795 data->LastTerminator = i;
796 data->NullHeader = false;
800 data->NullHeader = false;
804 data->NullHeader = false;
808 /* header is folded. NullHeader is untouched */
811 snprintf(cp->Error, sizeof(cp->Error), "%d Null Header",
813 data->NullHeader = true;
816 if (data->CurHeader >= cp->Start) {
817 /* parse previous header */
818 if (!data->NullHeader && (*cp->Error == '\0'))
819 /* skip if already got an error */
820 ARTparseheader(cp, i - data->CurHeader);
823 data->NullHeader = false;
828 for (; i < limit ;) {
829 /* rest of the line */
830 switch (bp->data[i]) {
832 snprintf(cp->Error, sizeof(cp->Error), "%d Null Header",
834 data->NullHeader = true;
837 if (data->LastCR >= cp->Start)
842 if (data->LastCR + 1 == i) {
844 data->LastCR = cp->Start - 1;
845 if (data->LastTerminator + 2 == i) {
846 /* terminated still in header */
847 if (cp->Start + 3 == i) {
848 snprintf(cp->Error, sizeof(cp->Error), "%d Empty article",
850 cp->State = CSnoarticle;
852 snprintf(cp->Error, sizeof(cp->Error), "%d No body",
854 cp->State = CSgotarticle;
859 if (data->LastCRLF + MAXHEADERSIZE < i)
860 snprintf(cp->Error, sizeof(cp->Error),
861 "%d Too long line in header %ld bytes",
862 NNTP_REJECTIT_VAL, i - data->LastCRLF);
863 else if (data->LastCRLF + 2 == i) {
865 /* parse previous header */
866 if (data->CurHeader >= cp->Start) {
867 if (!data->NullHeader && (*cp->Error == '\0'))
868 /* skip if already got an error */
869 ARTparseheader(cp, i - 1 - data->CurHeader);
871 snprintf(cp->Error, sizeof(cp->Error), "%d No header",
874 data->LastCRLF = i++;
876 cp->State = CSgetbody;
880 data->LastCRLF = i++;
881 goto endofheaderline;
896 /* body processing, or eating huge article */
897 for (; i < limit ;) {
898 if (data->LastCRLF + 1 == i) {
899 /* begining of the line */
900 switch (bp->data[i]) {
902 data->LastTerminator = i;
915 for (; i < limit ;) {
916 /* rest of the line */
917 switch (bp->data[i]) {
919 if (data->LastCR >= cp->Start)
924 if (data->LastCR + 1 == i) {
926 data->LastCR = cp->Start - 1;
927 if (data->LastTerminator + 2 == i) {
928 /* found end of article */
929 if (cp->State == CSeatarticle) {
930 cp->State = CSgotlargearticle;
932 snprintf(cp->Error, sizeof(cp->Error),
933 "%d Article of %ld bytes exceeds local limit of %ld bytes",
934 NNTP_REJECTIT_VAL, (unsigned long) i - cp->Start,
935 innconf->maxartsize);
937 cp->State = CSgotarticle;
940 if (*cp->Error != '\0' && HDR_FOUND(HDR__MESSAGE_ID)) {
941 HDR_PARSE_START(HDR__MESSAGE_ID);
942 if (HDR_FOUND(HDR__PATH)) {
943 /* to record path into news log */
944 HDR_PARSE_START(HDR__PATH);
945 hopcount = ARTparsepath(HDR(HDR__PATH), HDR_LEN(HDR__PATH),
947 HDR_PARSE_END(HDR__PATH);
949 hops = data->Path.List;
950 if (innconf->logipaddr) {
951 data->Feedsite = RChostname(cp);
952 if (data->Feedsite == NULL)
953 data->Feedsite = CHANname(cp);
954 if (strcmp("0.0.0.0", data->Feedsite) == 0 ||
955 data->Feedsite[0] == '\0')
957 hops && hops[0] ? hops[0] : CHANname(cp);
960 hops && hops[0] ? hops[0] : CHANname(cp);
964 ARTlog(data, ART_REJECT, cp->Error);
965 HDR_PARSE_END(HDR__MESSAGE_ID);
967 if (cp->State == CSgotlargearticle)
971 #if 0 /* this may be examined in the future */
972 if (data->LastCRLF + MAXHEADERSIZE < i)
973 snprintf(cp->Error, sizeof(cp->Error),
974 "%d Too long line in body %d bytes",
975 NNTP_REJECTIT_VAL, i);
978 data->LastCRLF = i++;
994 size = i - cp->Start;
995 fudge = data->HeaderLines + data->Lines + 4;
996 if (innconf->maxartsize > 0)
997 if (size > fudge && size - fudge > innconf->maxartsize)
998 cp->State = CSeatarticle;
1004 ** Clean up an article. This is mainly copying in-place, stripping bad
1005 ** headers. Also fill in the article data block with what we can find.
1006 ** Return true if the article has no error, or false which means the error.
1009 ARTclean(ARTDATA *data, char *buff)
1011 HDRCONTENT *hc = data->HdrContent;
1012 const ARTHEADER *hp = ARTheaders;
1017 TMRstart(TMR_ARTCLEAN);
1018 data->Arrived = Now.time;
1021 /* replace trailing '\r\n' with '\0\n' of all system header to be handled
1022 easily by str*() functions */
1023 for (i = 0 ; i < MAX_ARTHEADER ; i++) {
1028 /* Make sure all the headers we need are there */
1029 for (i = 0; i < MAX_ARTHEADER ; i++) {
1030 if (hp[i].Type == HTreq) {
1033 if (hc[i].Length < 0) {
1034 sprintf(buff, "%d Duplicate \"%s\" header", NNTP_REJECTIT_VAL,
1037 sprintf(buff, "%d Missing \"%s\" header", NNTP_REJECTIT_VAL,
1040 TMRstop(TMR_ARTCLEAN);
1045 /* assumes Message-ID header is required header */
1046 if (!ARTidok(HDR(HDR__MESSAGE_ID))) {
1047 HDR_LEN(HDR__MESSAGE_ID) = 0;
1048 sprintf(buff, "%d Bad \"Message-ID\" header", NNTP_REJECTIT_VAL);
1049 TMRstop(TMR_ARTCLEAN);
1053 if (innconf->linecountfuzz && HDR_FOUND(HDR__LINES)) {
1054 p = HDR(HDR__LINES);
1056 if ((delta = i - atoi(p)) != 0 && abs(delta) > innconf->linecountfuzz) {
1057 sprintf(buff, "%d Linecount %s != %d +- %ld", NNTP_REJECTIT_VAL,
1058 MaxLength(p, p), i, innconf->linecountfuzz);
1059 TMRstop(TMR_ARTCLEAN);
1064 /* Is article too old? */
1065 /* assumes Date header is required header */
1067 if ((data->Posted = parsedate(p, &Now)) == -1) {
1068 sprintf(buff, "%d Bad \"Date\" header -- \"%s\"", NNTP_REJECTIT_VAL,
1070 TMRstop(TMR_ARTCLEAN);
1073 if (innconf->artcutoff) {
1074 long cutoff = innconf->artcutoff * 24 * 60 * 60;
1076 if (data->Posted < Now.time - cutoff) {
1077 sprintf(buff, "%d Too old -- \"%s\"", NNTP_REJECTIT_VAL,
1079 TMRstop(TMR_ARTCLEAN);
1083 if (data->Posted > Now.time + DATE_FUZZ) {
1084 sprintf(buff, "%d Article posted in the future -- \"%s\"",
1085 NNTP_REJECTIT_VAL, MaxLength(p, p));
1086 TMRstop(TMR_ARTCLEAN);
1089 if (HDR_FOUND(HDR__EXPIRES)) {
1090 p = HDR(HDR__EXPIRES);
1091 data->Expires = parsedate(p, &Now);
1094 /* Colon or whitespace in the Newsgroups header? */
1095 /* assumes Newsgroups header is required header */
1096 if ((data->Groupcount =
1097 NGsplit(HDR(HDR__NEWSGROUPS), HDR_LEN(HDR__NEWSGROUPS),
1098 &data->Newsgroups)) == 0) {
1099 TMRstop(TMR_ARTCLEAN);
1100 sprintf(buff, "%d Unwanted character in \"Newsgroups\" header",
1105 /* Fill in other Data fields. */
1106 if (HDR_FOUND(HDR__SENDER))
1107 data->Poster = HDR(HDR__SENDER);
1109 data->Poster = HDR(HDR__FROM);
1110 if (HDR_FOUND(HDR__REPLY_TO))
1111 data->Replyto = HDR(HDR__REPLY_TO);
1113 data->Replyto = HDR(HDR__FROM);
1115 TMRstop(TMR_ARTCLEAN);
1120 ** We are going to reject an article, record the reason and
1124 ARTreject(Reject_type code, CHANNEL *cp, struct buffer *article UNUSED)
1126 /* Remember why the article was rejected (for the status file) */
1129 case REJECT_DUPLICATE:
1131 cp->DuplicateSize += cp->Next - cp->Start;
1139 case REJECT_DISTRIB:
1152 /* should never be here */
1153 syslog(L_NOTICE, "%s unknown reject type received by ARTreject()",
1161 ** Verify if a cancel message is valid. If the user posting the cancel
1162 ** matches the user who posted the article, return the list of filenames
1163 ** otherwise return NULL.
1166 ARTcancelverify(const ARTDATA *data, const char *MessageID, TOKEN *token)
1175 if (!HISlookup(History, MessageID, NULL, NULL, NULL, token))
1177 if ((art = SMretrieve(*token, RETR_HEAD)) == NULL)
1179 local = wire_findheader(art->data, art->len, "Sender");
1180 if (local == NULL) {
1181 local = wire_findheader(art->data, art->len, "From");
1182 if (local == NULL) {
1187 for (p = local; p < art->data + art->len; p++) {
1188 if (*p == '\r' || *p == '\n')
1191 if (p == art->data + art->len) {
1195 q = xmalloc(p - local + 1);
1196 memcpy(q, local, p - local);
1198 q[p - local] = '\0';
1201 /* Compare canonical forms. */
1202 q1 = xstrdup(data->Poster);
1203 HeaderCleanFrom(q1);
1204 if (strcmp(q, q1) != 0) {
1206 sprintf(buff, "\"%.50s\" wants to cancel %s by \"%.50s\"",
1207 q1, MaxLength(MessageID, MessageID), q);
1208 ARTlog(data, ART_REJECT, buff);
1219 ** Process a cancel message.
1222 ARTcancel(const ARTDATA *data, const char *MessageID, const bool Trusted)
1224 char buff[SMBUF+16];
1228 TMRstart(TMR_ARTCNCL);
1229 if (!DoCancels && !Trusted) {
1230 TMRstop(TMR_ARTCNCL);
1234 if (!ARTidok(MessageID)) {
1235 syslog(L_NOTICE, "%s bad cancel Message-ID %s", data->Feedsite,
1236 MaxLength(MessageID, MessageID));
1237 TMRstop(TMR_ARTCNCL);
1241 if (!HIScheck(History, MessageID)) {
1242 /* Article hasn't arrived here, so write a fake entry using
1243 * most of the information from the cancel message. */
1244 if (innconf->verifycancels && !Trusted) {
1245 TMRstop(TMR_ARTCNCL);
1248 InndHisRemember(MessageID);
1249 snprintf(buff, sizeof(buff), "Cancelling %s",
1250 MaxLength(MessageID, MessageID));
1251 ARTlog(data, ART_CANC, buff);
1252 TMRstop(TMR_ARTCNCL);
1255 if (Trusted || !innconf->verifycancels)
1256 r = HISlookup(History, MessageID, NULL, NULL, NULL, &token);
1258 r = ARTcancelverify(data, MessageID, &token);
1260 TMRstop(TMR_ARTCNCL);
1264 /* Get stored message and zap them. */
1265 if (!SMcancel(token) && SMerrno != SMERR_NOENT && SMerrno != SMERR_UNINIT)
1266 syslog(L_ERROR, "%s cant cancel %s (SMerrno %d)", LogName,
1267 TokenToText(token), SMerrno);
1268 if (innconf->immediatecancel && !SMflushcacheddata(SM_CANCELEDART))
1269 syslog(L_ERROR, "%s cant cancel cached %s", LogName, TokenToText(token));
1270 snprintf(buff, sizeof(buff), "Cancelling %s",
1271 MaxLength(MessageID, MessageID));
1272 ARTlog(data, ART_CANC, buff);
1273 TMRstop(TMR_ARTCNCL);
1277 ** Process a control message. Cancels are handled here, but any others
1278 ** are passed out to an external program in a specific directory that
1279 ** has the same name as the first word of the control message.
1282 ARTcontrol(ARTDATA *data, char *Control, CHANNEL *cp UNUSED)
1286 /* See if it's a cancel message. */
1288 if (c == 'c' && strncmp(Control, "cancel", 6) == 0) {
1289 for (p = &Control[6]; ISWHITE(*p); p++)
1291 if (*p && ARTidok(p))
1292 ARTcancel(data, p, false);
1298 ** Parse a Distribution line, splitting it up into NULL-terminated array of
1302 ARTparsedist(const char *p, int size, LISTBUFFER *list)
1308 SetupListBuffer(size, list);
1310 /* loop over text and copy */
1311 for (i = 0, q = list->Data, dp = list->List ; *p ; p++, *q++ = '\0') {
1312 /* skip leading separators. */
1313 for (; *p && ((*p == ',') || ISWHITE(*p)) ; p++)
1318 if (list->ListLength <= i) {
1319 list->ListLength += DEFAULTNGBOXSIZE;
1320 list->List = xrealloc(list->List, list->ListLength * sizeof(char *));
1321 dp = &list->List[i];
1323 /* mark the start of the host, move to the end of it while copying */
1324 for (*dp++ = q, i++ ; *p && (*p != ',') && !ISWHITE(*p) ;)
1330 if (i == list->ListLength) {
1331 list->ListLength += DEFAULTNGBOXSIZE;
1332 list->List = xrealloc(list->List, list->ListLength * sizeof(char *));
1333 dp = &list->List[i];
1340 ** A somewhat similar routine, except that this handles negated entries
1341 ** in the list and is used to check the distribution sub-field.
1344 DISTwanted(char **list, char *p)
1350 for (sawbang = false, c = *p; (q = *list) != NULL; list++) {
1353 if (c == *++q && strcmp(p, q) == 0)
1355 } else if (c == *q && strcmp(p, q) == 0)
1359 /* If we saw any !foo's and didn't match, then assume they are all negated
1360 distributions and return true, else return false. */
1365 ** See if any of the distributions in the article are wanted by the site.
1368 DISTwantany(char **site, char **article)
1370 for ( ; *article; article++)
1371 if (DISTwanted(site, *article))
1377 ** Send the current article to all sites that would get it if the
1378 ** group were created.
1381 ARTsendthegroup(char *name)
1387 for (ngp = NGfind(ARTctl), sp = Sites, i = nSites; --i >= 0; sp++) {
1388 if (sp->Name != NULL && SITEwantsgroup(sp, name)) {
1395 ** Check if site doesn't want this group even if it's crossposted
1396 ** to a wanted group.
1399 ARTpoisongroup(char *name)
1404 for (sp = Sites, i = nSites; --i >= 0; sp++) {
1405 if (sp->Name != NULL && (sp->PoisonEntry || ME.PoisonEntry) &&
1406 SITEpoisongroup(sp, name))
1412 ** Assign article numbers to the article and create the Xref line.
1413 ** If we end up not being able to write the article, we'll get "holes"
1414 ** in the directory and active file.
1417 ARTassignnumbers(ARTDATA *data)
1420 int i, len, linelen, buflen;
1423 if (data->XrefBufLength == 0) {
1424 data->XrefBufLength = MAXHEADERSIZE * 2 + 1;
1425 data->Xref = xmalloc(data->XrefBufLength);
1426 strncpy(data->Xref, Path.data, Path.used - 1);
1428 len = Path.used - 1;
1429 p = q = data->Xref + len;
1430 for (linelen = i = 0; (ngp = GroupPointers[i]) != NULL; i++) {
1431 /* If already went to this group (i.e., multiple groups are aliased
1432 * into it), then skip it. */
1433 if (ngp->PostCount > 0)
1436 /* Bump the number. */
1439 if (!FormatLong(ngp->LastString, (long)ngp->Last, ngp->Lastwidth)) {
1440 syslog(L_ERROR, "%s cant update_active %s", LogName, ngp->Name);
1443 ngp->Filenum = ngp->Last;
1444 /* len ' ' "news_groupname" ':' "#" "\r\n" */
1445 if (len + 1 + ngp->NameLength + 1 + 10 + 2 > data->XrefBufLength) {
1446 data->XrefBufLength += MAXHEADERSIZE;
1447 data->Xref = xrealloc(data->Xref, data->XrefBufLength);
1448 p = data->Xref + len;
1450 if (linelen + 1 + ngp->NameLength + 1 + 10 > MAXHEADERSIZE) {
1452 sprintf(p, "\r\n %s:%lu", ngp->Name, ngp->Filenum);
1454 linelen = buflen - 2;
1456 sprintf(p, " %s:%lu", ngp->Name, ngp->Filenum);
1463 /* p[0] is replaced with '\r' to be wireformatted when stored. p[1] needs to
1467 /* data->XrefLength includes trailing "\r\n" */
1468 data->XrefLength = len + 2;
1469 data->Replic = q + 1;
1470 data->ReplicLength = len - (q + 1 - data->Xref);
1474 ** Parse the data from the xref header and assign the numbers.
1475 ** This involves replacing the GroupPointers entries.
1478 ARTxrefslave(ARTDATA *data)
1480 char *p, *q, *name, *next, c = 0;
1483 bool nogroup = true;
1484 HDRCONTENT *hc = data->HdrContent;
1486 if (!HDR_FOUND(HDR__XREF))
1488 /* skip server name */
1489 if ((p = strpbrk(HDR(HDR__XREF), " \t\r\n")) == NULL)
1491 /* in case Xref is folded */
1492 while (*++p == ' ' || *p == '\t' || *p == '\r' || *p == '\n');
1496 data->ReplicLength = HDR_LEN(HDR__XREF) - (p - HDR(HDR__XREF));
1497 for (i = 0; (*p != '\0') && (p < HDR(HDR__XREF) + HDR_LEN(HDR__XREF)) ; p = next) {
1498 /* Mark end of this entry and where next one starts. */
1500 if ((q = next = strpbrk(p, " \t\r\n")) != NULL) {
1503 while (*++next == ' ' || *next == '\t' || *next == '\r' || *next == '\n');
1509 /* Split into news.group:# */
1510 if ((p = strchr(p, ':')) == NULL) {
1511 syslog(L_ERROR, "%s bad_format %s", LogName, name);
1517 if ((ngp = NGfind(name)) == NULL) {
1518 syslog(L_ERROR, "%s bad_newsgroup %s", LogName, name);
1525 ngp->Filenum = atol(p + 1);
1529 /* Update active file if we got a new high-water mark. */
1530 if (ngp->Last < ngp->Filenum) {
1531 ngp->Last = ngp->Filenum;
1532 if (!FormatLong(ngp->LastString, (long)ngp->Last, ngp->Lastwidth)) {
1533 syslog(L_ERROR, "%s cant update_active %s", LogName, ngp->Name);
1537 /* Mark that this group gets the article. */
1539 GroupPointers[i++] = ngp;
1542 GroupPointers[i] = NULL;
1549 ** Return true if a list of strings has a specific one. This is a
1550 ** generic routine, but is used for seeing if a host is in the Path line.
1553 ListHas(const char **list, const char *p)
1558 for (c = *p; (q = *list) != NULL; list++)
1559 if (strcasecmp(p, q) == 0)
1565 ** Even though we have already calculated the Message-ID MD5sum,
1566 ** we have to do it again since unfortunately HashMessageID()
1567 ** lowercases the Message-ID first. We also need to remain
1568 ** compatible with Diablo's hashfeed.
1572 HashFeedMD5(char *MessageID, unsigned int offset)
1574 static char LastMessageID[128];
1575 static char *LastMessageIDPtr;
1576 static struct md5_context context;
1582 /* Some light caching. */
1583 if (MessageID != LastMessageIDPtr ||
1584 strcmp(MessageID, LastMessageID) != 0) {
1586 md5_update(&context, (unsigned char *)MessageID, strlen(MessageID));
1587 md5_final(&context);
1588 LastMessageIDPtr = MessageID;
1589 strncpy(LastMessageID, MessageID, sizeof(LastMessageID) - 1);
1590 LastMessageID[sizeof(LastMessageID) - 1] = 0;
1593 memcpy(&ret, &context.digest[12 - offset], 4);
1599 ** Old-style Diablo (< 5.1) quickhash.
1603 HashFeedQH(char *MessageID, unsigned int *tmp)
1608 if (*tmp != (unsigned int)-1)
1611 p = (unsigned char *)MessageID;
1615 *tmp = (unsigned int)n;
1621 ** Return true if an element of the HASHFEEDLIST matches
1622 ** the hash of the Message-ID.
1625 HashFeedMatch(HASHFEEDLIST *hf, char *MessageID)
1627 unsigned int qh = (unsigned int)-1;
1631 if (hf->type == HASHFEED_MD5)
1632 h = HashFeedMD5(MessageID, hf->offset);
1633 else if (hf->type == HASHFEED_QH)
1634 h = HashFeedQH(MessageID, &qh);
1637 if ((h % hf->mod + 1) >= hf->begin &&
1638 (h % hf->mod + 1) <= hf->end)
1647 ** Propagate an article to the sites have "expressed an interest."
1650 ARTpropagate(ARTDATA *data, const char **hops, int hopcount, char **list,
1651 bool ControlStore, bool OverviewCreated)
1653 HDRCONTENT *hc = data->HdrContent;
1655 int i, j, Groupcount, Followcount, Crosscount;
1660 /* Work out which sites should really get it. */
1661 Groupcount = data->Groupcount;
1662 Followcount = data->Followcount;
1663 Crosscount = Groupcount + Followcount * Followcount;
1664 for (sp = Sites, i = nSites; --i >= 0; sp++) {
1665 if ((sp->IgnoreControl && ControlStore) ||
1666 (sp->NeedOverviewCreation && !OverviewCreated))
1668 if (sp->Seenit || !sp->Sendit)
1672 if (sp->Originator) {
1673 if (!HDR_FOUND(HDR__XTRACE)) {
1674 if (!sp->FeedwithoutOriginator)
1677 if ((p = strchr(HDR(HDR__XTRACE), ' ')) != NULL) {
1679 for (j = 0, sendit = false; (q = sp->Originator[j]) != NULL; j++) {
1681 if (uwildmat(HDR(HDR__XTRACE), &q[1])) {
1687 if (uwildmat(HDR(HDR__XTRACE), q))
1699 if (sp->Master != NOSITE && Sites[sp->Master].Seenit)
1702 if (sp->MaxSize && data->BytesValue > sp->MaxSize)
1703 /* Too big for the site. */
1706 if (sp->MinSize && data->BytesValue < sp->MinSize)
1707 /* Too small for the site. */
1710 if ((sp->Hops && hopcount > sp->Hops)
1711 || (!sp->IgnorePath && ListHas(hops, sp->Name))
1712 || (sp->Groupcount && Groupcount > sp->Groupcount)
1713 || (sp->Followcount && Followcount > sp->Followcount)
1714 || (sp->Crosscount && Crosscount > sp->Crosscount))
1715 /* Site already saw the article; path too long; or too much
1719 if (sp->HashFeedList &&
1720 !HashFeedMatch(sp->HashFeedList, HDR(HDR__MESSAGE_ID)))
1721 /* hashfeed doesn't match */
1724 if (list && *list != NULL && sp->Distributions &&
1725 !DISTwantany(sp->Distributions, list))
1726 /* Not in the site's desired list of distributions. */
1728 if (sp->DistRequired && (list == NULL || *list == NULL))
1729 /* Site requires Distribution header and there isn't one. */
1732 if (sp->Exclusions) {
1733 for (j = 0; (p = sp->Exclusions[j]) != NULL; j++)
1734 if (ListHas(hops, p))
1737 /* A host in the site's exclusion list was in the Path. */
1741 /* Write that the site is getting it, and flag to send it. */
1742 if (innconf->logsitename) {
1743 if (fprintf(Log, " %s", sp->Name) == EOF || ferror(Log)) {
1745 syslog(L_ERROR, "%s cant write log_site %m", LogName);
1746 IOError("logging site", j);
1752 if (sp->Master != NOSITE)
1753 Sites[sp->Master].Seenit = true;
1755 if (putc('\n', Log) == EOF
1756 || (!BufferedLogs && fflush(Log))
1758 syslog(L_ERROR, "%s cant write log_end %m", LogName);
1762 /* Handle funnel sites. */
1763 for (sp = Sites, i = nSites; --i >= 0; sp++) {
1764 if (sp->Sendit && sp->Funnel != NOSITE) {
1766 funnel = &Sites[sp->Funnel];
1767 funnel->Sendit = true;
1768 if (funnel->FNLwantsnames) {
1769 bp = &funnel->FNLnames;
1770 p = &bp->data[bp->used];
1775 bp->used += strlcpy(p, sp->Name, bp->size - bp->used);
1782 ** Build up the overview data.
1785 ARTmakeoverview(CHANNEL *cp)
1787 ARTDATA *data = &cp->Data;
1788 HDRCONTENT *hc = data->HdrContent;
1789 static char SEP[] = "\t";
1790 static char COLONSPACE[] = ": ";
1791 struct buffer *overview = &data->Overview;
1793 const ARTHEADER *hp;
1796 char *key_old_value = NULL;
1797 int key_old_length = 0;
1799 if (ARTfields == NULL) {
1805 buffer_resize(overview, MAXHEADERSIZE);
1806 buffer_set(overview, "", 0);
1808 /* Write the data, a field at a time. */
1809 for (fp = ARTfields; fp->Header; fp++) {
1810 if (fp != ARTfields)
1811 buffer_append(overview, SEP, strlen(SEP));
1813 j = hp - ARTheaders;
1815 /* If requested, generate keywords from the body of the article and patch
1816 them into the apparent value of the Keywords header so that they make
1817 it into overview. */
1818 if (DO_KEYWORDS && innconf->keywords) {
1819 /* Ensure that there are Keywords: to shovel. */
1820 if (hp == &ARTheaders[HDR__KEYWORDS]) {
1821 key_old_value = HDR(HDR__KEYWORDS);
1822 key_old_length = HDR_LEN(HDR__KEYWORDS);
1823 KEYgenerate(&hc[HDR__KEYWORDS], cp->In.data + data->Body,
1824 key_old_value, key_old_length);
1830 p = data->Bytes + 7; /* skip "Bytes: " */
1831 len = data->BytesLength;
1834 if (innconf->xrefslave) {
1839 len = data->XrefLength - 2;
1849 if (fp->NeedHeader) {
1850 buffer_append(overview, hp->Name, hp->Size);
1851 buffer_append(overview, COLONSPACE, strlen(COLONSPACE));
1853 if (overview->used + overview->left + len > overview->size)
1854 buffer_resize(overview, overview->size + len);
1855 for (i = 0, q = overview->data + overview->left; i < len; p++, i++) {
1856 if (*p == '\r' && i < len - 1 && p[1] == '\n') {
1861 if (*p == '\0' || *p == '\t' || *p == '\n' || *p == '\r')
1868 /* Patch the old keywords back in. */
1869 if (DO_KEYWORDS && innconf->keywords) {
1870 if (key_old_value) {
1872 free(hc->Value); /* malloc'd within */
1873 hc->Value = key_old_value;
1874 hc->Length = key_old_length;
1875 key_old_value = NULL;
1882 ** This routine is the heart of it all. Take a full article, parse it,
1883 ** file or reject it, feed it to the other sites. Return the NNTP
1884 ** message to send back.
1887 ARTpost(CHANNEL *cp)
1889 char *p, **groups, ControlWord[SMBUF], **hops, *controlgroup;
1890 int i, j, *isp, hopcount, oerrno, canpost;
1891 NEWSGROUP *ngp, **ngptr;
1893 ARTDATA *data = &cp->Data;
1894 HDRCONTENT *hc = data->HdrContent;
1895 bool Approved, Accepted, LikeNewgroup, ToGroup, GroupMissing;
1896 bool NoHistoryUpdate, artclean;
1897 bool ControlStore = false;
1898 bool NonExist = false;
1899 bool OverviewCreated = false;
1900 bool IsControl = false;
1901 bool Filtered = false;
1902 struct buffer *article;
1906 #if defined(DO_PERL) || defined(DO_PYTHON)
1908 #endif /* defined(DO_PERL) || defined(DO_PYTHON) */
1911 /* Preliminary clean-ups. */
1913 artclean = ARTclean(data, cp->Error);
1915 /* If we don't have Path or Message-ID, we can't continue. */
1916 if (!artclean && (!HDR_FOUND(HDR__PATH) || !HDR_FOUND(HDR__MESSAGE_ID)))
1918 hopcount = ARTparsepath(HDR(HDR__PATH), HDR_LEN(HDR__PATH), &data->Path);
1919 if (hopcount == 0) {
1920 snprintf(cp->Error, sizeof(cp->Error), "%d illegal path element",
1924 hops = data->Path.List;
1926 if (innconf->logipaddr) {
1927 data->Feedsite = RChostname(cp);
1928 if (data->Feedsite == NULL)
1929 data->Feedsite = CHANname(cp);
1930 if (strcmp("0.0.0.0", data->Feedsite) == 0 || data->Feedsite[0] == '\0')
1931 data->Feedsite = hops && hops[0] ? hops[0] : CHANname(cp);
1933 data->Feedsite = hops && hops[0] ? hops[0] : CHANname(cp);
1935 data->FeedsiteLength = strlen(data->Feedsite);
1937 hash = HashMessageID(HDR(HDR__MESSAGE_ID));
1939 if (HIScheck(History, HDR(HDR__MESSAGE_ID))) {
1940 snprintf(cp->Error, sizeof(cp->Error), "%d Duplicate", NNTP_REJECTIT_VAL);
1941 ARTlog(data, ART_REJECT, cp->Error);
1942 ARTreject(REJECT_DUPLICATE, cp, article);
1946 ARTlog(data, ART_REJECT, cp->Error);
1947 if (innconf->remembertrash && (Mode == OMrunning) &&
1948 !InndHisRemember(HDR(HDR__MESSAGE_ID)))
1949 syslog(L_ERROR, "%s cant write history %s %m", LogName,
1950 HDR(HDR__MESSAGE_ID));
1951 ARTreject(REJECT_OTHER, cp, article);
1955 i = strlen(hops[0]);
1956 if (i == Path.used - 1 &&
1957 strncmp(Path.data, hops[0], Path.used - 1) == 0)
1958 data->Hassamepath = true;
1960 data->Hassamepath = false;
1961 if (Pathcluster.data != NULL &&
1962 i == Pathcluster.used - 1 &&
1963 strncmp(Pathcluster.data, hops[0], Pathcluster.used - 1) == 0)
1964 data->Hassamecluster = true;
1966 data->Hassamecluster = false;
1967 if (Pathalias.data != NULL &&
1968 !ListHas((const char **)hops, (const char *)innconf->pathalias))
1969 data->AddAlias = true;
1971 data->AddAlias = false;
1973 /* And now check the path for unwanted sites -- Andy */
1974 for(j = 0 ; ME.Exclusions && ME.Exclusions[j] ; j++) {
1975 if (ListHas((const char **)hops, (const char *)ME.Exclusions[j])) {
1976 snprintf(cp->Error, sizeof(cp->Error), "%d Unwanted site %s in path",
1977 NNTP_REJECTIT_VAL, MaxLength(ME.Exclusions[j], ME.Exclusions[j]));
1978 ARTlog(data, ART_REJECT, cp->Error);
1979 if (innconf->remembertrash && (Mode == OMrunning) &&
1980 !InndHisRemember(HDR(HDR__MESSAGE_ID)))
1981 syslog(L_ERROR, "%s cant write history %s %m", LogName,
1982 HDR(HDR__MESSAGE_ID));
1983 ARTreject(REJECT_SITE, cp, article);
1988 #if defined(DO_PERL) || defined(DO_PYTHON)
1989 filterPath = HDR(HDR__PATH);
1990 #endif /* DO_PERL || DO_PYHTON */
1992 #if defined(DO_PYTHON)
1993 TMRstart(TMR_PYTHON);
1994 filterrc = PYartfilter(data, article->data + data->Body,
1995 cp->Next - data->Body, data->Lines);
1996 TMRstop(TMR_PYTHON);
1997 if (filterrc != NULL) {
1998 if (innconf->dontrejectfiltered) {
2001 snprintf(cp->Error, sizeof(cp->Error), "%d %.200s", NNTP_REJECTIT_VAL,
2003 syslog(L_NOTICE, "rejecting[python] %s %s", HDR(HDR__MESSAGE_ID),
2005 ARTlog(data, ART_REJECT, cp->Error);
2006 if (innconf->remembertrash && (Mode == OMrunning) &&
2007 !InndHisRemember(HDR(HDR__MESSAGE_ID)))
2008 syslog(L_ERROR, "%s cant write history %s %m", LogName,
2009 HDR(HDR__MESSAGE_ID));
2010 ARTreject(REJECT_FILTER, cp, article);
2014 #endif /* DO_PYTHON */
2016 /* I suppose some masochist will run with Python and Perl in together */
2018 #if defined(DO_PERL)
2020 filterrc = PLartfilter(data, article->data + data->Body,
2021 cp->Next - data->Body, data->Lines);
2024 if (innconf->dontrejectfiltered) {
2027 snprintf(cp->Error, sizeof(cp->Error), "%d %.200s", NNTP_REJECTIT_VAL,
2029 syslog(L_NOTICE, "rejecting[perl] %s %s", HDR(HDR__MESSAGE_ID),
2031 ARTlog(data, ART_REJECT, cp->Error);
2032 if (innconf->remembertrash && (Mode == OMrunning) &&
2033 !InndHisRemember(HDR(HDR__MESSAGE_ID)))
2034 syslog(L_ERROR, "%s cant write history %s %m", LogName,
2035 HDR(HDR__MESSAGE_ID));
2036 ARTreject(REJECT_FILTER, cp, article);
2040 #endif /* DO_PERL */
2042 /* I suppose some masochist will run with both TCL and Perl in together */
2045 if (TCLFilterActive) {
2047 const ARTHEADER *hp;
2049 /* make info available to Tcl */
2051 TCLCurrArticle = article;
2053 Tcl_UnsetVar(TCLInterpreter, "Body", TCL_GLOBAL_ONLY);
2054 Tcl_UnsetVar(TCLInterpreter, "Headers", TCL_GLOBAL_ONLY);
2055 for (i = 0 ; i < MAX_ARTHEADER ; i++, hc++) {
2057 hp = &ARTheaders[i];
2058 Tcl_SetVar2(TCLInterpreter, "Headers", (char *) hp->Name, HDR(i),
2062 Tcl_SetVar(TCLInterpreter, "Body", article->data + data->Body,
2066 code = Tcl_Eval(TCLInterpreter, "filter_news");
2067 Tcl_UnsetVar(TCLInterpreter, "Body", TCL_GLOBAL_ONLY);
2068 Tcl_UnsetVar(TCLInterpreter, "Headers", TCL_GLOBAL_ONLY);
2069 if (code == TCL_OK) {
2070 if (strcmp(TCLInterpreter->result, "accept") != 0) {
2071 if (innconf->dontrejectfiltered) {
2074 snprintf(cp->Error, sizeof(cp->Error), "%d %.200s",
2075 NNTP_REJECTIT_VAL, TCLInterpreter->result);
2076 syslog(L_NOTICE, "rejecting[tcl] %s %s", HDR(HDR__MESSAGE_ID),
2078 ARTlog(data, ART_REJECT, cp->Error);
2079 if (innconf->remembertrash && (Mode == OMrunning) &&
2080 !InndHisRemember(HDR(HDR__MESSAGE_ID)))
2081 syslog(L_ERROR, "%s cant write history %s %m",
2082 LogName, HDR(HDR__MESSAGE_ID));
2083 ARTreject(REJECT_FILTER, cp, article);
2088 /* the filter failed: complain and then turn off filtering */
2089 syslog(L_ERROR, "TCL proc filter_news failed: %s",
2090 TCLInterpreter->result);
2094 #endif /* defined(DO_TCL) */
2096 /* If we limit what distributions we get, see if we want this one. */
2097 if (HDR_FOUND(HDR__DISTRIBUTION)) {
2098 if (HDR(HDR__DISTRIBUTION)[0] == ',') {
2099 snprintf(cp->Error, sizeof(cp->Error), "%d bogus distribution \"%s\"",
2101 MaxLength(HDR(HDR__DISTRIBUTION), HDR(HDR__DISTRIBUTION)));
2102 ARTlog(data, ART_REJECT, cp->Error);
2103 if (innconf->remembertrash && Mode == OMrunning &&
2104 !InndHisRemember(HDR(HDR__MESSAGE_ID)))
2105 syslog(L_ERROR, "%s cant write history %s %m", LogName,
2106 HDR(HDR__MESSAGE_ID));
2107 ARTreject(REJECT_DISTRIB, cp, article);
2110 ARTparsedist(HDR(HDR__DISTRIBUTION), HDR_LEN(HDR__DISTRIBUTION),
2111 &data->Distribution);
2112 if (ME.Distributions &&
2113 !DISTwantany(ME.Distributions, data->Distribution.List)) {
2114 snprintf(cp->Error, sizeof(cp->Error),
2115 "%d Unwanted distribution \"%s\"", NNTP_REJECTIT_VAL,
2116 MaxLength(data->Distribution.List[0],
2117 data->Distribution.List[0]));
2118 ARTlog(data, ART_REJECT, cp->Error);
2119 if (innconf->remembertrash && (Mode == OMrunning) &&
2120 !InndHisRemember(HDR(HDR__MESSAGE_ID)))
2121 syslog(L_ERROR, "%s cant write history %s %m",
2122 LogName, HDR(HDR__MESSAGE_ID));
2123 ARTreject(REJECT_DISTRIB, cp, article);
2128 ARTparsedist("", 0, &data->Distribution);
2131 for (i = nSites, sp = Sites; --i >= 0; sp++) {
2135 sp->FNLnames.used = 0;
2139 if (HDR_FOUND(HDR__FOLLOWUPTO)) {
2140 for (i = 0, p = HDR(HDR__FOLLOWUPTO) ; (p = strchr(p, ',')) != NULL ;
2143 data->Followcount = i;
2145 if (data->Followcount == 0)
2146 data->Followcount = data->Groupcount;
2148 groups = data->Newsgroups.List;
2149 /* Parse the Control header. */
2150 LikeNewgroup = false;
2151 if (HDR_FOUND(HDR__CONTROL)) {
2154 /* Nip off the first word into lowercase. */
2155 strlcpy(ControlWord, HDR(HDR__CONTROL), sizeof(ControlWord));
2156 for (p = ControlWord; *p && !ISWHITE(*p); p++)
2157 if (CTYPE(isupper, *p))
2160 LikeNewgroup = (strcmp(ControlWord, "newgroup") == 0
2161 || strcmp(ControlWord, "rmgroup") == 0);
2163 if (innconf->ignorenewsgroups && LikeNewgroup) {
2164 for (p++; *p && ISWHITE(*p); p++);
2166 for (p++; *p; p++) {
2173 for (p++; *p; p++) {
2179 groupbuff[1] = NULL;
2181 data->Groupcount = 2;
2182 if (data->Followcount == 0)
2183 data->Followcount = data->Groupcount;
2186 LikeNewgroup = (LikeNewgroup || strcmp(ControlWord, "checkgroups") == 0);
2188 /* Control messages to "foo.ctl" are treated as if they were
2189 * posted to "foo". I should probably apologize for all the
2190 * side-effects in the if. */
2191 for (i = 0; (p = groups[i++]) != NULL; )
2192 if ((j = strlen(p) - 4) > 0 && *(p += j) == '.'
2193 && p[1] == 'c' && p[2] == 't' && p[3] == 'l')
2197 /* Loop over the newsgroups, see which ones we want, and get the
2198 * total space needed for the Xref line. At the end of this section
2199 * of code, j will have the needed length, the appropriate site
2200 * entries will have their Sendit and ng fields set, and GroupPointers
2201 * will have pointers to the relevant newsgroups. */
2202 ToGroup = NoHistoryUpdate = false;
2203 Approved = HDR_FOUND(HDR__APPROVED);
2204 ngptr = GroupPointers;
2205 for (GroupMissing = Accepted = false; (p = *groups) != NULL; groups++) {
2206 if ((ngp = NGfind(p)) == NULL) {
2207 GroupMissing = true;
2208 if (LikeNewgroup && Approved) {
2209 /* Checkgroups/newgroup/rmgroup being sent to a group that doesn't
2210 * exist. Assume it is being sent to the group being created or
2211 * removed (or to the admin group to which the checkgroups is posted),
2212 * and send it to all sites that would or would have had the group
2213 * if it were created. */
2214 ARTsendthegroup(*groups);
2218 ARTpoisongroup(*groups);
2220 if (innconf->mergetogroups) {
2221 /* Try to collapse all "to" newsgroups. */
2222 if (*p != 't' || *++p != 'o' || *++p != '.' || *++p == '\0')
2226 if ((sp = SITEfind(p)) != NULL) {
2235 /* Ignore this group? */
2236 if (ngp->Rest[0] == NF_FLAG_IGNORE) {
2237 /* See if any of this group's sites considers this group poison. */
2238 for (isp = ngp->Poison, i = ngp->nPoison; --i >= 0; isp++)
2240 Sites[*isp].Poison = true;
2244 /* Basic validity check. */
2245 if (ngp->Rest[0] == NF_FLAG_MODERATED && !Approved) {
2246 snprintf(cp->Error, sizeof(cp->Error), "%d Unapproved for \"%s\"",
2247 NNTP_REJECTIT_VAL, MaxLength(ngp->Name, ngp->Name));
2248 ARTlog(data, ART_REJECT, cp->Error);
2249 if (innconf->remembertrash && (Mode == OMrunning) &&
2250 !InndHisRemember(HDR(HDR__MESSAGE_ID)))
2251 syslog(L_ERROR, "%s cant write history %s %m", LogName,
2252 HDR(HDR__MESSAGE_ID));
2253 ARTreject(REJECT_UNAPP, cp, article);
2257 /* See if any of this group's sites considers this group poison. */
2258 for (isp = ngp->Poison, i = ngp->nPoison; --i >= 0; isp++)
2260 Sites[*isp].Poison = true;
2262 /* Check if we accept articles in this group from this peer, after
2263 poisoning. This means that articles that we accept from them will
2264 be handled correctly if they're crossposted. */
2265 canpost = RCcanpost(cp, p);
2266 if (!canpost) { /* At least one group cannot be fed by this peer.
2267 If we later reject the post as unwanted group,
2268 don't remember it. If we accept, do remember */
2269 NoHistoryUpdate = true;
2271 } else if (canpost < 0) {
2272 snprintf(cp->Error, sizeof(cp->Error),
2273 "%d Won't accept posts in \"%s\"", NNTP_REJECTIT_VAL,
2275 ARTlog(data, ART_REJECT, cp->Error);
2276 ARTreject(REJECT_GROUP, cp, article);
2280 /* Valid group, feed it to that group's sites. */
2282 for (isp = ngp->Sites, i = ngp->nSites; --i >= 0; isp++) {
2290 /* If it's excluded, don't file it. */
2291 if (ngp->Rest[0] == NF_FLAG_EXCLUDED)
2294 /* Expand aliases, mark the article as getting filed in the group. */
2295 if (ngp->Alias != NULL)
2301 /* Loop over sites to find Poisons/ControlOnly and undo Sendit flags. */
2302 for (i = nSites, sp = Sites; --i >= 0; sp++) {
2303 if (sp->Poison || (sp->ControlOnly && !IsControl)
2304 || (sp->DontWantNonExist && NonExist))
2308 /* Control messages not filed in "to" get filed only in control.name
2310 if (IsControl && Accepted && !ToGroup) {
2311 ControlStore = true;
2312 controlgroup = concat("control.", ControlWord, (char *) 0);
2313 if ((ngp = NGfind(controlgroup)) == NULL)
2314 ngp = NGfind(ARTctl);
2317 ngptr = GroupPointers;
2319 for (isp = ngp->Sites, i = ngp->nSites; --i >= 0; isp++) {
2321 /* Checkgroups/newgroup/rmgroup posted to local.example
2322 * will still be sent with the newsfeeds patterns
2323 * "*,!local.*" and "*,@local.*". So as not to propagate
2324 * them, "!control,!control.*" should be added. */
2331 /* If !Accepted, then none of the article's newgroups exist in our
2332 * active file. Proper action is to drop the article on the floor.
2333 * If ngp == GroupPointers, then all the new articles newsgroups are
2334 * "j" entries in the active file. In that case, we have to file it
2335 * under junk so that downstream feeds can get it. */
2336 if (!Accepted || ngptr == GroupPointers) {
2338 if (NoHistoryUpdate) {
2339 snprintf(cp->Error, sizeof(cp->Error), "%d Can't post to \"%s\"",
2340 NNTP_REJECTIT_VAL, MaxLength(data->Newsgroups.List[0],
2341 data->Newsgroups.List[0]));
2343 snprintf(cp->Error, sizeof(cp->Error),
2344 "%d Unwanted newsgroup \"%s\"", NNTP_REJECTIT_VAL,
2345 MaxLength(data->Newsgroups.List[0],
2346 data->Newsgroups.List[0]));
2348 ARTlog(data, ART_REJECT, cp->Error);
2349 if (!innconf->wanttrash) {
2350 if (innconf->remembertrash && (Mode == OMrunning) &&
2351 !NoHistoryUpdate && !InndHisRemember(HDR(HDR__MESSAGE_ID)))
2352 syslog(L_ERROR, "%s cant write history %s %m",
2353 LogName, HDR(HDR__MESSAGE_ID));
2354 ARTreject(REJECT_GROUP, cp, article);
2357 /* if !GroupMissing, then all the groups the article was posted
2358 * to have a flag of "x" in our active file, and therefore
2359 * we should throw the article away: if you have set
2360 * innconf->remembertrash true, then you want all trash except that
2361 * which you explicitly excluded in your active file. */
2362 if (!GroupMissing) {
2363 if (innconf->remembertrash && (Mode == OMrunning) &&
2364 !NoHistoryUpdate && !InndHisRemember(HDR(HDR__MESSAGE_ID)))
2365 syslog(L_ERROR, "%s cant write history %s %m",
2366 LogName, HDR(HDR__MESSAGE_ID));
2367 ARTreject(REJECT_GROUP, cp, article);
2372 ngp = NGfind(ARTjnk);
2376 /* Junk can be fed to other sites. */
2377 for (isp = ngp->Sites, i = ngp->nSites; --i >= 0; isp++) {
2380 if (!sp->Poison && !(sp->ControlOnly && !IsControl))
2387 if (innconf->xrefslave) {
2388 if (ARTxrefslave(data) == false) {
2389 if (HDR_FOUND(HDR__XREF)) {
2390 snprintf(cp->Error, sizeof(cp->Error),
2391 "%d Xref header \"%s\" invalid in xrefslave mode",
2393 MaxLength(HDR(HDR__XREF), HDR(HDR__XREF)));
2395 snprintf(cp->Error, sizeof(cp->Error),
2396 "%d Xref header required in xrefslave mode",
2399 ARTlog(data, ART_REJECT, cp->Error);
2400 ARTreject(REJECT_OTHER, cp, article);
2404 ARTassignnumbers(data);
2407 /* Now we can file it. */
2408 if (++ICDactivedirty >= innconf->icdsynccount) {
2412 TMRstart(TMR_ARTWRITE);
2413 for (i = 0; (ngp = GroupPointers[i]) != NULL; i++)
2416 token = ARTstore(cp);
2417 /* change trailing '\r\n' to '\0\n' of all system header */
2418 for (i = 0 ; i < MAX_ARTHEADER ; i++) {
2422 if (token.type == TOKEN_EMPTY) {
2423 syslog(L_ERROR, "%s cant store article: %s", LogName, SMerrorstr);
2424 snprintf(cp->Error, sizeof(cp->Error), "%d cant store article",
2426 ARTlog(data, ART_REJECT, cp->Error);
2427 if ((Mode == OMrunning) && !InndHisRemember(HDR(HDR__MESSAGE_ID)))
2428 syslog(L_ERROR, "%s cant write history %s %m", LogName,
2429 HDR(HDR__MESSAGE_ID));
2430 ARTreject(REJECT_OTHER, cp, article);
2431 TMRstop(TMR_ARTWRITE);
2434 TMRstop(TMR_ARTWRITE);
2435 if ((innconf->enableoverview && !innconf->useoverchan) || NeedOverview) {
2436 TMRstart(TMR_OVERV);
2437 ARTmakeoverview(cp);
2438 if (innconf->enableoverview && !innconf->useoverchan) {
2439 if ((result = OVadd(token, data->Overview.data, data->Overview.left,
2440 data->Arrived, data->Expires)) == OVADDFAILED) {
2441 if (OVctl(OVSPACE, (void *)&i) && i == OV_NOSPACE)
2442 IOError("creating overview", ENOSPC);
2444 IOError("creating overview", 0);
2445 syslog(L_ERROR, "%s cant store overview for %s", LogName,
2446 TokenToText(token));
2447 OverviewCreated = false;
2449 if (result == OVADDCOMPLETED)
2450 OverviewCreated = true;
2452 OverviewCreated = false;
2457 strlcpy(data->TokenText, TokenToText(token), sizeof(data->TokenText));
2459 /* Update history if we didn't get too many I/O errors above. */
2460 if ((Mode != OMrunning) ||
2461 !InndHisWrite(HDR(HDR__MESSAGE_ID), data->Arrived, data->Posted,
2462 data->Expires, &token)) {
2464 syslog(L_ERROR, "%s cant write history %s %m", LogName,
2465 HDR(HDR__MESSAGE_ID));
2466 snprintf(cp->Error, sizeof(cp->Error), "%d cant write history, %s",
2467 NNTP_RESENDIT_VAL, strerror(errno));
2468 ARTlog(data, ART_REJECT, cp->Error);
2469 ARTreject(REJECT_OTHER, cp, article);
2473 if (NeedStoredGroup)
2474 data->StoredGroupLength = strlen(data->Newsgroups.List[0]);
2476 /* Start logging, then propagate the article. */
2477 if (data->CRwithoutLF > 0 || data->LFwithoutCR > 0) {
2478 if (data->CRwithoutLF > 0 && data->LFwithoutCR == 0)
2479 snprintf(cp->Error, sizeof(cp->Error),
2480 "%d article includes CR without LF(%d)",
2481 NNTP_REJECTIT_VAL, data->CRwithoutLF);
2482 else if (data->CRwithoutLF == 0 && data->LFwithoutCR > 0)
2483 snprintf(cp->Error, sizeof(cp->Error),
2484 "%d article includes LF without CR(%d)",
2485 NNTP_REJECTIT_VAL, data->LFwithoutCR);
2487 snprintf(cp->Error, sizeof(cp->Error),
2488 "%d article includes CR without LF(%d) and LF withtout CR(%d)",
2489 NNTP_REJECTIT_VAL, data->CRwithoutLF, data->LFwithoutCR);
2490 ARTlog(data, ART_STRSTR, cp->Error);
2492 ARTlog(data, Accepted ? ART_ACCEPT : ART_JUNK, (char *)NULL);
2493 if ((innconf->nntplinklog) &&
2494 (fprintf(Log, " (%s)", data->TokenText) == EOF || ferror(Log))) {
2496 syslog(L_ERROR, "%s cant write log_nntplink %m", LogName);
2497 IOError("logging nntplink", oerrno);
2500 /* Calculate Max Article Time */
2501 i = Now.time - cp->ArtBeg;
2506 cp->Size += data->BytesValue;
2507 if (innconf->logartsize) {
2508 if (fprintf(Log, " %ld", data->BytesValue) == EOF || ferror (Log)) {
2510 syslog(L_ERROR, "%s cant write artsize %m", LogName);
2511 IOError("logging artsize", oerrno);
2516 ARTpropagate(data, (const char **)hops, hopcount, data->Distribution.List,
2517 ControlStore, OverviewCreated);
2519 /* Now that it's been written, process the control message. This has
2520 * a small window, if we get a new article before the newgroup message
2521 * has been processed. We could pause ourselves here, but it doesn't
2522 * seem to be worth it. */
2525 ARTcontrol(data, HDR(HDR__CONTROL), cp);
2527 if (DoCancels && HDR_FOUND(HDR__SUPERSEDES)) {
2528 if (ARTidok(HDR(HDR__SUPERSEDES)))
2529 ARTcancel(data, HDR(HDR__SUPERSEDES), false);
2533 /* And finally, send to everyone who should get it */
2534 for (sp = Sites, i = nSites; --i >= 0; sp++) {
2536 if (!Filtered || !sp->DropFiltered) {
2537 TMRstart(TMR_SITESEND);
2539 TMRstop(TMR_SITESEND);