1 /*$Id: idxthread.c,v 1.35 1999/11/22 01:47:45 lindberg Exp $*/
2 /*$Name: ezmlm-idx-040 $*/
4 /* idxthread.c contains routines to from the ezmlm-idx subject index build */
5 /* a structure of unique subjects as well as a table of messages with */
6 /* pointers to the subject. This leads to information on message threads */
7 /* arranged chronologically within the thread, and with the threads */
8 /* arranged chronologically by the first message within the range. */
9 /* idx_mkthreads() will arrange the author list in a similar manner. This */
10 /* saves some space, and takes a little extra time. It's needed when */
11 /* generating an author index. */
13 #include <sys/types.h>
25 #include "readwrite.h"
30 static stralloc line = {0}; /* primary input */
31 static stralloc authline = {0}; /* second line of primary input */
32 static stralloc dummyind = {0};
34 static substdio ssindex;
35 static char indexbuf[1024];
37 static char strnum[FMT_ULONG];
40 /* if no data, these may be the entire table, so */
41 /* need to be static */
42 static subentry sdummy;
43 static authentry adummy;
46 static void die_nomem(fatal)
49 strerr_die2x(111,fatal,ERR_NOMEM);
54 /* NOTE: These do NOT prevent double locking */
55 static void lockup(fatal)
58 fdlock = open_append("lock");
60 strerr_die2sys(111,fatal,ERR_OPEN_LOCK);
61 if (lock_ex(fdlock) == -1) {
63 strerr_die2sys(111,fatal,ERR_OBTAIN_LOCK);
72 static void newsub(psubt,subject,sublen,msg,fatal)
73 /* Initializes subentry pointed to by psubt, adds a '\0' to subject, */
74 /* allocates space and copies in subject, and puts a pointer to it in */
82 register char *cpfrom, *cpto;
83 register unsigned int cpno;
85 psubt->higher = (subentry *) 0;
86 psubt->lower = (subentry *) 0;
87 psubt->firstmsg = msg;
89 psubt->msginthread = 1;
90 if (!(psubt->sub = alloc ((sublen) * sizeof(char))))
95 while (cpno--) *(cpto++) = *(cpfrom++);
96 psubt->sublen = sublen;
99 static void newauth(pautht,author,authlen,msg,fatal)
100 /* Allocates space for author of length authlen+1 adding a terminal '\0' */
101 /* and puts the pointer in pautht->auth. Analog to newsub(). */
102 authentry *pautht; /* entry for current message */
103 char *author; /* pointer to author string (not sz!) */
104 unsigned int authlen; /* lenth of author */
106 char *fatal; /* sz */
109 register char *cpfrom, *cpto;
110 register unsigned int cpno;
112 pautht->higher = (subentry *) 0;
113 pautht->lower = (subentry *) 0;
114 pautht->firstmsg = msg;
115 if (!(pautht->auth = alloc ((authlen) * sizeof(char))))
120 while (cpno--) *(cpto++) = *(cpfrom++);
121 pautht->authlen = authlen;
124 static void init_dummy(fatal)
129 if (!stralloc_ready(&dummyind,HASHLEN + 1)) die_nomem(fatal);
130 for (i = 0; i< HASHLEN; i++)
132 dummyind.len = HASHLEN;
133 if (!stralloc_append(&dummyind," ")) die_nomem(fatal);
136 void idx_mkthreads(pmsgtable,psubtable,pauthtable,pdatetable,
137 msg_from,msg_to,msg_latest,locked,fatal)
138 /* Threads messages msg_from -> msg_to into pmsgtable & psubtable. When */
139 /* reading the latest index file (containing msg_latest) it locks the */
140 /* directory, unless it is already locked (as in digest creation). */
141 /* msgtable has the subject number 1.. (0 if there is no subject match, */
142 /* which should happen only if the subject index is corrupt.) */
144 /* 19971107 Changed to deal with index files that are missing, or have */
145 /* missing entries, not necessarily reflecting missing archive files. */
146 /* This all to make ezmlm-get more robust to get maximal info out of */
147 /* corrupted archives. */
149 msgentry **pmsgtable; /* table of message<->subject */
150 subentry **psubtable; /* subject no, len, str char * */
151 authentry **pauthtable; /* author no, len, str char * */
152 dateentry **pdatetable; /* message per date */
153 unsigned long msg_from; /* first message in range */
154 unsigned long msg_to; /* last message in range */
155 unsigned long msg_latest; /* latest message in archive (for locking) */
156 int locked; /* if already locked */
157 char *fatal; /* Program-specific */
160 unsigned long idxlatest; /* need to lock for this (last) index file */
161 unsigned long msg; /* current msg number */
162 unsigned long endmsg; /* max msg in this idx file */
163 unsigned long tmpmsg; /* index entry's msg number */
164 unsigned long idx; /* current index file no */
165 unsigned long idxto; /* index containing end of range */
166 unsigned long ulmrange; /* total # of messages in range */
167 char *subject; /* subject on line */
168 unsigned int sublen; /* length of subject */
170 unsigned int authlen;
171 unsigned int pos,posa;
172 unsigned long submax; /* max subject num in subtable */
173 subentry *psubnext; /* points to next entry in subtable */
174 subentry *psubt; /* points to entry in subtable */
175 authentry *pauthnext; /* points to next entry in authtable */
176 authentry *pautht; /* points to entry in authtable */
177 int fd; /* index file handle */
178 int flagmissingindex; /* current index file is missing */
179 int flagauth; /* read index entry has author info */
180 int hasauth; /* current msg's entry has author info */
184 unsigned int datepos,datemax;
185 unsigned int datetablesize,datetableunit;
186 unsigned int lastdate = 0;
187 unsigned int thisdate;
188 register msgentry *x, *y;
190 /* a few unnecessary sanity checks */
191 if (msg_to > msg_latest)
193 if (msg_to < msg_from)
194 strerr_die2x(100,fatal,"Program error: bad range in idx_mkthreads");
195 ulmrange = msg_to - msg_from + 1;
196 if (!(*pmsgtable = (msgentry *) alloc(ulmrange * sizeof(msgentry))))
199 x = y + ulmrange; /* clear */
205 /* max entries - acceptable waste for now */
206 if (!(*psubtable = (subentry *) alloc((ulmrange+1) * sizeof(subentry))))
209 if (!(*pauthtable = (authentry *) alloc((ulmrange+1) * sizeof(authentry))))
211 datetableunit = DATENO * sizeof(dateentry);
212 datetablesize = datetableunit;
213 if (!(*pdatetable = (dateentry *) alloc(datetablesize)))
216 datemax = DATENO - 2; /* entry 0 and end marker */
219 idxlatest = msg_latest / 100;
220 idxto = msg_to / 100;
222 psubnext = *psubtable; /* dummy node to get tree going. Basically, */
223 psubt = &sdummy; /* assure that subject > psubt-sub and that */
224 init_dummy(fatal); /* below ok unless HASHLEN > 40 */
226 psubt->sublen = 40; /* there is something to hold psubt->higher */
227 psubt->higher = (subentry *) 0;
228 psubt->lower = (subentry *) 0;
229 pauthnext = *pauthtable;
231 pautht->auth = psubt->sub;
232 pautht->authlen = psubt->sublen;
233 pautht->higher = (authentry *) 0;
234 pautht->lower = (authentry *) 0;
235 for (idx = msg_from / 100; idx <= idxto; idx++) {
236 /* make index file name */
237 if (!stralloc_copys(&line,"archive/")) die_nomem(fatal);
238 if (!stralloc_catb(&line,strnum,fmt_ulong(strnum,idx))) die_nomem(fatal);
239 if (!stralloc_cats(&line,"/index")) die_nomem(fatal);
240 if (!stralloc_0(&line)) die_nomem(fatal);
241 if (!locked && idx == idxlatest)
243 flagmissingindex = 0;
244 fd = open_read(line.s);
246 if (errno == error_noent) { /* this means the index is not here */
247 /* but the lists is supposedly indexed*/
248 flagmissingindex = 1;
250 strerr_die4sys(111,fatal,ERR_OPEN,line.s,": ");
252 substdio_fdbuf(&ssindex,read,fd,indexbuf,sizeof(indexbuf));
254 msg = 100L * idx; /* current msg# */
255 endmsg = msg + 99L; /* max msg in this index */
256 if (!msg) msg = 1L; /* for start to make msg > tmpmsg */
257 tmpmsg = 0L; /* msg number of read index line */
258 if (endmsg > msg_to) /* skip non-asked for subjects */
260 for (; msg <= endmsg; msg++) {
261 if (!flagmissingindex && (msg > tmpmsg)) {
263 if (getln(&ssindex,&line,&match,'\n') == -1)
264 strerr_die3sys(111,fatal,ERR_READ,"index: ");
266 flagmissingindex = 1;
268 pos = scan_ulong(line.s,&tmpmsg);
269 if (line.s[pos++] == ':') {
270 if (getln(&ssindex,&authline,&match,'\n') == -1)
271 strerr_die3sys(111,fatal,ERR_READ,"index: ");
273 flagmissingindex = 1;
281 if (msg < msg_from) /* Nothing before start of range */
284 subject = line.s + pos;
285 sublen = line.len - pos;
286 if (sublen <= HASHLEN)
287 strerr_die2x(100,fatal,ERR_BAD_INDEX);
290 subject = dummyind.s;
291 sublen = dummyind.len;
294 for(;;) { /* search among already known subjects */
295 res = str_diffn(psubt->sub,subject,HASHLEN);
298 psubt = psubt->higher;
300 newsub(psubnext,subject,sublen,msg,fatal);
301 psubt->higher = psubnext;
306 } else if (res > 0) {
308 psubt = psubt->lower;
310 newsub(psubnext,subject,sublen,msg,fatal);
311 psubt->lower = psubnext;
317 psubt->lastmsg = msg;
318 (psubt->msginthread)++; /* one more message in thread */
322 /* first subnum =1 (=0 is empty for thread) */
323 pmsgt = *pmsgtable + msg - msg_from;
324 pmsgt->subnum = (unsigned int) (psubt - *psubtable + 1);
325 pmsgt->date = lastdate;
328 while (authline.s[pos] && authline.s[pos] != ' ') pos++;
329 if (authline.s[++pos]) {
330 thisdate = date2yyyymm(authline.s + pos);
331 if (thisdate) pmsgt->date = thisdate;
332 if (pmsgt->date > lastdate) {
333 lastdate = pmsgt->date;
334 if (datepos >= datemax) { /* more space */
336 if (!(*pdatetable = (dateentry *) alloc_re(*pdatetable,
337 datetablesize,datetablesize+datetableunit)))
340 (*pdatetable)[datepos].msg = msg; /* first msg this mo */
341 (*pdatetable)[datepos].date = lastdate;
344 posa = byte_chr(authline.s,authline.len,';');
345 if (authline.len > posa + HASHLEN + 1 && authline.s[pos+1] != ' ') {
346 /* old: "; auth", new: ";hash auth" */
347 auth = authline.s + posa + 1;
348 authlen = authline.len - posa - 1;
351 authlen = dummyind.len;
354 /* allright! Same procedure, but for author */
355 for (;;) { /* search among already known authors */
356 res = str_diffn(pautht->auth,auth,HASHLEN);
359 pautht = pautht->higher;
361 newauth(pauthnext,auth,authlen,msg,fatal);
362 pautht->higher = pauthnext;
367 } else if (res > 0) {
369 pautht = pautht->lower;
371 newauth(pauthnext,auth,authlen,msg,fatal);
372 pautht->lower = pauthnext;
380 } /* link from message to this author */
381 pmsgt->authnum = (unsigned int) (pautht - *pauthtable + 1);
382 pautht = *pauthtable;
385 psubt = *psubtable; /* setup psubt. Done here rather than before */
386 /* the for loop, so that we can start off */
387 /* the dummy node. */
391 if (!locked && idx == idxlatest)
392 unlock(); /* 'locked' refers to locked before calling */
394 psubnext->sub = (char *) 0; /* end of table marker */
395 pauthnext->auth = (char *) 0; /* end of table marker */
396 (*pdatetable)[datepos].msg = msg_to + 1;
397 (*pdatetable)[datepos].date = lastdate + 1;
401 void idx_mkthread(pmsgtable,psubtable,pauthtable,msg_from,msg_to,msg_master,
402 msg_latest,locked,fatal)
403 /* Works like idx_mkthreads, except that it finds the subject for message */
404 /* msg_master, then identifies messages in the range that have the same */
405 /* subject. msgtable entries with subject 0 do not match, with '1' do match.*/
407 msgentry **pmsgtable; /* pointer to table of message<->subject */
408 subentry **psubtable; /* ptr to tbl of subject no, len, str char * */
409 authentry **pauthtable;
410 unsigned long msg_from; /* first message in range */
411 unsigned long msg_to; /* last message in range */
412 unsigned long msg_latest; /* latest message in archive (for locking) */
413 unsigned long msg_master; /* master message for single thread, else 0*/
414 int locked; /* if already locked */
415 char *fatal; /* Program-specific */
418 unsigned long idxlatest; /* need to lock for this (last) index file */
419 unsigned long idxto; /* index for last msg in range */
420 unsigned long idx; /* current index file no */
421 unsigned long msg; /* index entry's msg number */
422 unsigned long ulmrange; /* total # of messages in range */
423 subentry *psubt; /* points to last entry in subtable */
424 int ffound; /* msg subject was found in subtable */
425 int flagauth; /* there is author info */
426 int firstfound = 1; /* = 1 until first message in thread found */
427 int res; /* comparison result */
429 unsigned int authlen;
430 authentry *pauthnext; /* points to next entry in authtable */
431 authentry *pautht; /* points to entry in authtable */
433 int fd; /* index file handle */
436 register msgentry *x,*y;
438 if ((ulmrange = msg_to - msg_from +1) <= 0)
439 strerr_die2x(100,fatal,"Program error: bad range in idx_mkthreads");
440 if (!(*pmsgtable = (msgentry *) alloc(ulmrange * sizeof(msgentry))))
450 if (!(*psubtable = (subentry *) alloc(2 * sizeof(subentry))))
453 if (!(*pauthtable = (authentry *) alloc((ulmrange + 1) * sizeof(authentry))))
456 pauthnext = *pauthtable;
460 pautht->authlen = 21;
461 pautht->higher = (authentry *) 0;
462 pautht->lower = (authentry *) 0;
463 idxlatest = msg_latest / 100;
464 idxto = msg_to / 100;
465 idx = msg_master / 100; /* index for master subject */
467 /* Get master subject */
468 if (!stralloc_copys(&line,"archive/")) die_nomem(fatal);
469 if (!stralloc_catb(&line,strnum,fmt_ulong(strnum,idx))) die_nomem(fatal);
470 if (!stralloc_cats(&line,"/index")) die_nomem(fatal);
471 if (!stralloc_0(&line)) die_nomem(fatal);
473 if (!locked && idx == idxlatest)
475 fd = open_read(line.s);
478 if (errno != error_noent)
479 strerr_die4sys(111,fatal,ERR_OPEN,line.s,": ");
481 strerr_die2x(111,fatal,ERR_NOINDEX); /* temp - admin can fix! */
483 substdio_fdbuf(&ssindex,read,fd,indexbuf,sizeof(indexbuf));
485 if (getln(&ssindex,&line,&match,'\n') == -1)
486 strerr_die3sys(111,fatal,ERR_OPEN,"index: ");
489 pos=scan_ulong(line.s,&msg);
490 if (line.s[pos++] == ':') { /* marker for author info */
495 if (msg == msg_master) {
496 newsub(psubt,line.s+pos,line.len-pos,msg,fatal);
497 /* need to update msg later! */
501 if (flagauth) { /* skip author line */
502 if (getln(&ssindex,&line,&match,'\n') == -1)
503 strerr_die3sys(111,fatal,ERR_OPEN,"index: ");
510 if (!locked && idx == idxlatest)
513 strerr_die2x(100,fatal,ERR_NOINDEX);
514 for (idx = msg_from / 100; idx <= idxto; idx++) {
515 /* make index file name */
516 if (!stralloc_copys(&line,"archive/")) die_nomem(fatal);
517 if (!stralloc_catb(&line,strnum,fmt_ulong(strnum,idx))) die_nomem(fatal);
518 if (!stralloc_cats(&line,"/index")) die_nomem(fatal);
519 if (!stralloc_0(&line)) die_nomem(fatal);
520 if (!locked && idx == idxlatest)
522 fd = open_read(line.s);
524 if (errno != error_noent)
525 strerr_die4sys(111,fatal,ERR_OPEN,line.s,": ");
527 substdio_fdbuf(&ssindex,read,fd,indexbuf,sizeof(indexbuf));
529 if (getln(&ssindex,&line,&match,'\n') == -1)
530 strerr_die3sys(111,fatal,ERR_READ,"index: ");
533 pos=scan_ulong(line.s,&msg);
534 if (line.s[pos++] == ':') {
537 if (getln(&ssindex,&authline,&match,'\n') == -1)
538 strerr_die3sys(111,fatal,ERR_READ,"index: ");
543 if (msg < msg_from) /* Nothing before start of range */
545 if (msg > msg_to) /* Don't do anything after range */
547 if (!str_diffn(psubt->sub,line.s+pos,HASHLEN)) {
548 pmsgt = *pmsgtable + msg - msg_from;
549 if (firstfound) { /* update to first message with this subj */
550 psubt->firstmsg = msg;
553 psubt->lastmsg = msg;
557 pmsgt->date = date2yyyymm(authline.s + 1);
558 pos = byte_chr(authline.s,authline.len,';');
559 if (authline.len > pos + HASHLEN + 1 && authline.s[pos+1] != ' ') {
560 /* old: "; auth", new: ";hash auth" */
561 auth = authline.s + pos + 1;
562 authlen = authline.len - pos - 1;
565 authlen = dummyind.len;
567 for (;;) { /* search among already known authors */
568 res = str_diffn(pautht->auth,auth,HASHLEN);
571 pautht = pautht->higher;
573 newauth(pauthnext,auth,authlen,msg,fatal);
574 pautht->higher = pauthnext;
579 } else if (res > 0) {
581 pautht = pautht->lower;
583 newauth(pauthnext,auth,authlen,msg,fatal);
584 pautht->lower = pauthnext;
592 } /* link from message to this author */
593 pmsgt->authnum = (unsigned int) (pautht - *pauthtable + 1);
594 pautht = *pauthtable;
601 if (!locked && idx == idxlatest)
605 psubt->sub = (char *) 0; /* end of table marker */
606 pauthnext->auth = (char *) 0; /* end of table marker */
609 void idx_mklist(pmsgtable,psubtable,pauthtable,msg_from,msg_to,fatal)
610 /* Like mkthreads, except that it works without a subject index. The result */
611 /* is just a dummy subject and a sequential list of messages. This to allow */
612 /* use of the same routines when creating digest from lists that have no */
613 /* subject index (for whatever reason). */
614 msgentry **pmsgtable; /* pointer to table of message<->subject */
615 subentry **psubtable; /* ptr to tbl of subject no, len, str char * */
616 authentry **pauthtable;
617 unsigned long msg_from; /* first message in range */
618 unsigned long msg_to; /* last message in range */
619 char *fatal; /* Program-specific */
621 unsigned long ulmrange;
622 register msgentry *x,*y;
626 if ((ulmrange = msg_to - msg_from +1) <= 0)
627 strerr_die2x(111,fatal,"bad range in idx_mkthreads :");
629 if (!(*pmsgtable = (msgentry *) alloc(ulmrange * sizeof(msgentry))))
640 if (!(*psubtable = (subentry *) alloc(2 * sizeof(subentry))))
643 newsub(psubt,dummyind.s,dummyind.len,msg_from,fatal);
644 psubt->lastmsg = msg_to;
646 psubt->sub = (char *) 0;
647 if (!(*pauthtable = (authentry *) alloc(sizeof(authentry))))
648 die_nomem(fatal); /* nodata. Avoid dangling ptr. */
649 pautht = *pauthtable;
650 pautht->auth = 0; /* tells app that there are no author data */
651 pautht->higher = (authentry *) 0;
652 pautht->lower = (authentry *) 0;
655 void idx_destroythread(msgtable,subtable,authtable)
656 /* Frees space allocated by idxthread routines. This is needed only if */
657 /* one does several threadings in one program run. Otherwise, exit() */
658 /* should free all allocated memory, which will be faster. */
659 msgentry *msgtable; subentry *subtable; authentry *authtable;
664 psubt = subtable; /* free subjects */
666 alloc_free(psubt->sub);
670 pautht = authtable; /* free authors */
671 while(pautht->auth) {
672 alloc_free(pautht->auth);
676 alloc_free(subtable); /* free subtable */
677 alloc_free(authtable); /* free authtable */
678 alloc_free(msgtable); /* free msgtable */
679 subtable = (subentry *) 0; /* kill pointers */
680 authtable = (authentry *) 0;
681 msgtable = (msgentry *) 0;