tsl.c

   1 /* -*-C-*-
   2 *******************************************************************************
   3 *
   4 * File:         tsl.c
   5 * RCS:          $Header: /home/matthew/cvs/bible-kjv-4.10/tsl.c,v 2.10 2005/01/22 17:49:03 matthew Exp $
   6 * Description:  Text Storage Library
   7 * Author:       Chip Chapin, Hewlett Packard Company
   8 * Created:      Thu Aug 24 15:37:16 1989
   9 * Modified:     Mon Apr 26 11:16:05 1993 (Chip Chapin) chip@hpclbis
  10 * Language:     C
  11 * Package:      Bible Retrieval System
  12 * Status:       Experimental (Do Not Distribute)
  13 *
  14 *******************************************************************************
  15 *
  16 * Revisions:
  17 *
  18 * Fri Apr 23 10:14:37 1993 (Chip Chapin) chip@hpclbis
  19 *  Revised tsl_init for new portable data file headers.
  20 *  Rewrite shift-and-or expressions to account for Ultrix C oddities.
  21 * Thu Apr 22 11:38:47 1993 (Chip Chapin) chip@hpclbis
  22 *  Fix incorrect cast to cf_index.
  23 * Thu Mar  4 12:11:24 1993 (Chip Chapin) chip@hpclbis
  24 *  Added SEEK_SET definition for non-SysV systems.
  25 * Wed Jan  6 10:05:43 1993 (Chip Chapin) chip@hpclbis
  26 *  Added compression support for concordance lists.
  27 * Mon Jan  4 11:21:50 1993 (Chip Chapin) chip@hpclbis
  28 *  Added range parameters to tsl_scan_concordance.
  29 * Thu Dec 24 11:32:22 1992 (Chip Chapin) chip@hpclbis
  30 *  Tweak a couple of things to eliminate compile warnings.
  31 * Mon Dec 21 13:40:54 1992 (Chip Chapin) chip@hpclbis
  32 *  Made findfile a separate function in util.c
  33 *******************************************************************************
  34 * $Log: tsl.c,v $
  35 * Revision 2.10  2005/01/22 17:49:03  matthew
  36 * remove spurious definition of malloc and strtok
  37 *
  38 * Revision 2.9  2005/01/22 16:54:13  matthew
  39 * another cast to make things explicit
  40 *
  41 * Revision 2.8  2005/01/22 00:25:08  matthew
  42 * these are not const
  43 *
  44 * Revision 2.7  2005/01/21 19:47:07  matthew
  45 * include cmp.h which prototypes the compression routines we call
  46 *
  47 * Revision 2.6  2003/07/26 11:57:02  matthew
  48 * tidy up of code (__attribute__ ((noreturn)), remove unused variables, etc
  49 *
  50 * Revision 2.5  2003/07/26 11:44:55  matthew
  51 * add function prototypes to the tsl header file (and add some const modifiers)
  52 *
  53 * Revision 2.4  2003/07/26 09:25:05  matthew
  54 * Move tsl_error declaration to tsl.h
  55 *
  56 * Revision 2.3  2003/07/26 09:18:06  matthew
  57 * use starg.h-style variable arguments. Also use gccs format checking
  58 *
  59 * Revision 2.2  2003/01/08 19:10:24  matthew
  60 * correct the .win assignments to be 0 not null, since they are ints not pointers
  61 *
  62 * Revision 2.1  2003/01/08 15:50:53  matthew
  63 * applied debian patch
  64 *
  65 * Revision 2.0  2003/01/08 15:29:52  matthew
  66 * versions collected from the net
  67 *
  68  * Revision 1.9  93/04/26  11:18:14  11:18:14  chip (Chip Chapin)
  69  * Release 4.00
  70  * Public release of portable datafile version.
  71  *
  72  * Revision 1.8  93/04/23  13:08:02  13:08:02  chip (Chip Chapin)
  73  * PORTABILITY RELEASE
  74  * This version supports portable data files, usable on machines with
  75  * differing native byte-orders.
  76  * Also, this version compiles and runs on non-HPUX systems.  It has been
  77  * tested on SunOS 4.? and ULTRIX 4.?, using SPARC and DEC 3100 hardware
  78  * respectively.  Note that the data file format has rolled again.
  79  *
  80  * Revision 1.7  93/01/07  12:17:29  12:17:29  chip (Chip Chapin)
  81  * Release 3.01: Greatly improved compression of concordance data file.
  82  *
  83  * Revision 1.6  93/01/04  16:20:59  16:20:59  chip (Chip Chapin)
  84  * Release 2.1, implements ?in and ?or commands.
  85  *
  86  * Revision 1.5  92/12/24  11:37:10  11:37:10  chip (Chip Chapin)
  87  * Minor release 2.04a, fixes certain compile warnings.
  88  *
  89  * Revision 1.4  92/12/22  11:28:57  11:28:57  chip (Chip Chapin)
  90  * Minor release 2.01 -- fix a couple of bugs.
  91  *
  92  * Revision 1.3  92/12/21  20:00:49  20:00:49  chip (Chip Chapin)
  93  * Release 2.0.  This release adds the concordance, and some small fixes.
  94  *
  95  * Revision 1.2  89/09/14  20:33:52  20:33:52  chip (Chip Chapin)
  96  * Release 1-2.  Supports -f and -l options for formatting the output.
  97  * Updates primarily brl.c, bible.c, and bible.1.
  98  *
  99  * Revision 1.1  89/09/05  17:49:19  17:49:19  chip (Chip Chapin)
 100  * Initial revision
 101  *
 102 *
 103 */
 104
 105 /*----------------------------------------------------------------------
 106 |   NAME:
 107 |       tsl.c
 108 |
 109 |   PURPOSE:
 110 |       This file implements the library of routines that are
 111 |       dependent on the storage structure of the text database.
 112 |
 113 |   FUNCTIONS:
 114 |       tsl_gettext
 115 |               Return text for a particular range of lines.
 116 |       tsl_printtext
 117 |               Write text to stdout instead of returning a buffer.
 118 |       tsl_scan_concordance
 119 |               Return list of references for a particular target.
 120 |       tsl_init
 121 |               Initialize.
 122 |       tsl_close
 123 |               Wrap up.
 124 |
 125 |   HISTORY:
 126 |       890824 cc Extracted storage-dependent functions from brl.c
 127 |
 128 \*----------------------------------------------------------------------*/
 129
 130 #include <stdio.h>
 131 #include <stdarg.h>
 132 #include <stdlib.h>
 133 #include <string.h>
 134 /* #include <search.h> */
 135 #include "cmp.h"
 136 #include "tsl.h"
 137
 138
 139 #  ifndef SEEK_SET
 140 #    define SEEK_SET 0
 141 #  endif /* SEEK_SET */
 142
 143 #define FALSE   (0)
 144 #define TRUE    (1)
 145
 146 #ifdef __GNUC__
 147 static char rcs_ident[] __attribute__ ((unused)) ="@(#)$Header: /home/matthew/cvs/bible-kjv-4.10/tsl.c,v 2.10 2005/01/22 17:49:03 matthew Exp $";
 148 #else
 149 static char rcs_ident[]="@(#)$Header: /home/matthew/cvs/bible-kjv-4.10/tsl.c,v 2.10 2005/01/22 17:49:03 matthew Exp $";
 150 #endif
 151
 152 FILE *tfp;                      /* Text data file pointer */
 153 FILE *cfp;                      /* Concordance data file pointer */
 154 struct tsl_conc_fileheader cfh; /* Concordance file header */
 155 char *cf_words;                 /* Concordance string (word) buffer */
 156 short int *cf_index;            /* Concordance index */
 157 int tsl_wsize;                  /* Window size (bytes) */
 158 int tsl_wnum;                   /* Number of windows */
 159 file_ptr_t *tsl_wtable=NULL;    /* Table of window offsets in data file */
 160
 161 /* buffer structures.
 162    We maintain a doubly linked list of uncompressed text buffers, sorted in
 163    LRU order.
 164    */
 165 struct buffer_rec {
 166     struct buffer_rec   *prev, *next;   /* doubly linked list */
 167     int         win;            /* number of associated window */
 168     char        *bufferp;       /* the buffer */
 169 };
 170
 171 struct buffer_rec tsl_firstbuffer;      /* ptr to first buffer. */
 172 struct buffer_rec tsl_lastbuffer;       /* take a guess... */
 173 struct buffer_rec **tsl_wbuffers=NULL;  /* table for associating a window with
 174                                            a particular buffer.  Indexed by
 175                                            window number, the table yields a
 176                                            pointer to a buffer_rec. */
 177
 178 char *tsl_cmpbuffer=NULL;       /* Global buffer for compressed text */
 179 int tsl_numbuffs;               /* Count how many buffers active */
 180 int tsl_maxbuffs;               /* Maximum number of buffers we're allowed */
 181 int tsl_maxbuffusage=0x100000;  /* Max buffer mem usage (bytes)  */
 182
 183
 184 void tsl_error(const int fatal, const char *format, ...)
 185 /*----------------------------------------------------------------------
 186 |   NAME:
 187 |       tsl_error
 188 |
 189 |   ALGORITHM:
 190 |       Report an error specific to the TSL library.
 191 |
 192 |       fatal    TRUE if the error should cause an exit.
 193 |       va_alist Variable argument list for printing the error
 194 |                report.
 195 |
 196 |   HISTORY:
 197 |       890904 cc Created.
 198 |
 199 \*----------------------------------------------------------------------*/
 200 {
 201     va_list ap;
 202
 203     va_start(ap, format);
 204
 205     vfprintf(stderr, format, ap);
 206     putc('\n', stderr);
 207
 208     va_end(ap);
 209     if (fatal) exit(-1);
 210 } /* tsl_error */
 211
 212
 213
 214 int tsl_scan_concordance(const char *target, ref_t *sbuf, ref_t range_start,
 215                          ref_t range_end )
 216 /*----------------------------------------------------------------------
 217 |   NAME:
 218 |       tsl_scan_concordance
 219 |
 220 |   ALGORITHM:
 221 |       Read concordance data file, searching for a specific
 222 |       target word.   Return list of references, optionally
 223 |       limited to those within a specific range.
 224 |
 225 |         target        Target string to search for
 226 |         sbuf          Output buffer for references
 227 |         range_start   Starting reference.  If zero, then return
 228 |                       all references.
 229 |         range_end     Ending reference.
 230 |
 231 |   RETURN VALUE: number of matches found
 232 |
 233 |   HISTORY:
 234 |       921217 cc Initial creation.
 235 |       921221 cc Revised to use new concordance file format.
 236 |       930104 cc Added range_start, range_end params.
 237 |
 238 \*----------------------------------------------------------------------*/
 239 {
 240     char *word;
 241     int indx, n;
 242     int i, j, rsize;
 243     ref_t ref, curbase;
 244     unsigned char mapbyte;
 245     file_ptr_t inx_start;
 246     unsigned char tbuf[SELECTSZ*sizeof(ref_t)];
 247
 248     if (cfp == NULL) {
 249         tsl_error( FALSE, "(No concordance data file)" );
 250         return(0);
 251     }
 252
 253     /* Search through concordance string buffer */
 254     /* Note that the last string is guaranteed to be "~" */
 255     word = cf_words;
 256     indx = 0;
 257     inx_start = 0;              /* Keep track of index into ref data pool */
 258                                 /* See makeconcfile.c */
 259     while ((n=strcmp(word, target)) < 0) {
 260         /* This isn't it, but keep looking... */
 261         while (*word++) /* advance to next word */
 262             ;
 263         rsize = cf_index[indx];
 264         inx_start += rsize < 0 ? 0 : rsize;
 265         indx++; /* indx is count of words */
 266     }
 267     if (n > 0) {
 268         /* This isn't it, and we've passed where it would be */
 269         return(0);
 270     }
 271
 272     /* Found it! */
 273     rsize = cf_index[indx];
 274     if (rsize < 0) {
 275         /* Special case 1: singletons
 276            If a word has only a single ref, then the ref is
 277            negated and put in the word's index entry.
 278          */
 279         *sbuf = -rsize;
 280         if (!range_start || (range_start <= *sbuf && *sbuf <= range_end))
 281             return( 1 );
 282         else
 283             return( 0 );
 284     }
 285
 286     /* Read all the refs for this word */
 287     /* Where do they end? */
 288     fseek( cfp, (int)(inx_start + univ2int(cfh.data_ptr)), SEEK_SET );
 289     fread( tbuf, 1, rsize, cfp );
 290
 291     /* Process the ref list.
 292        Expand compressed references.
 293        If a range has been specified, exclude refs that are out of it.
 294      */
 295     for (n=i=0; i < rsize; i++) {
 296         if (tbuf[i] > 0x7f) {
 297             /* This begins a compressed entry */
 298             /* First get the base ref, which has been negated */
 299             ref = (tbuf[i] << 8);       /* Do this in two parts. */
 300             ref |= tbuf[++i];           /* Ultrix C goofs otherwise. */
 301             curbase = ref = -ref;
 302             if (range_start) {
 303                 if (ref > range_end) break;
 304                 if (range_start > ref) {
 305                     ;
 306                 } else {
 307                     sbuf[n++] = ref;
 308                 }
 309             } else {
 310                 sbuf[n++] = ref;
 311             }
 312
 313             mapbyte = tbuf[++i];
 314             /* Two zero bytes means end of map */
 315             while (mapbyte | tbuf[i+1]) {
 316                 /* Extract refs from mapbyte */
 317                 for (j=0; j<8; j++) {
 318                     if (mapbyte & (0x01 <<j)) {
 319                         ref = curbase +j +1;
 320                         if (range_start) {
 321                             if (ref > range_end) return( n );
 322                             if (range_start > ref) continue;
 323                         }
 324                         sbuf[n++] = ref;
 325                     }
 326                 } /* for each bit */
 327                 mapbyte = tbuf[++i];
 328                 curbase += 8;
 329             } /* while */
 330             i++; /* Advance to second zero byte */
 331
 332         } else {
 333             /* This is a simple reference.
 334                References are two-byte integers, in hi,lo byte order.
 335              */
 336             ref = (tbuf[i] << 8);       /* Do this in two parts. */
 337             ref |= tbuf[++i];           /* Ultrix C goofs otherwise. */
 338             if (range_start) {
 339                 if (ref > range_end) break;
 340                 if (range_start > ref) continue;
 341             }
 342             sbuf[n++] = ref;
 343         }
 344     }
 345
 346     /* Return count of entries */
 347     return( n );
 348 } /* tsl_scan_concordance */
 349
 350
 351
 352 int tsl_gettext(const int vn, const int vc, char *vb, const int vbsize )
 353 /*----------------------------------------------------------------------
 354 |   NAME:
 355 |       tsl_gettext
 356 |
 357 |   ALGORITHM:
 358 |       Stuff buffer "vb" with text of line number "vn" and the
 359 |       "vc-1" following lines, but no more than "vbsize" (buffer
 360 |       size) characters.
 361 |
 362 |       Returns the size (characters) of the text, *not* counting
 363 |       the terminating null.
 364 |
 365 |   HISTORY:
 366 |       890114 cc Initial implementation using simple plain text file.
 367 |       890829 cc Updated to think about buffer size limits.
 368 |
 369 \*----------------------------------------------------------------------*/
 370 {
 371     int vstart, vsize;
 372
 373     vstart = line_locator[vn];
 374     vsize = line_locator[ vn+vc ] - vstart;
 375     if (vsize >= vbsize)
 376         vsize = vbsize-1;       /* Leave room for trailing null */
 377     return tsl_textread( vstart, vsize, vb );
 378 } /* tsl_gettext */
 379
 380
 381
 382 int tsl_printtext(const int vn, const int vc)
 383 /*----------------------------------------------------------------------
 384 |   NAME:
 385 |       tsl_printtext
 386 |
 387 |   ALGORITHM:
 388 |       Write text of line number "vn" and the "vc-1" following
 389 |       lines to stdout.
 390 |
 391 |       Returns the number of characters written.
 392 |
 393 |   HISTORY:
 394 |       890902 cc Creation from tsl_gettext.
 395 |
 396 \*----------------------------------------------------------------------*/
 397 {
 398     int vstart, vsize;
 399
 400     vstart = line_locator[vn];
 401     vsize = line_locator[ vn+vc ] - vstart;
 402
 403     return tsl_textread( vstart, vsize, NULL );
 404 } /* tsl_printtext */
 405
 406
 407
 408 int tsl_textread(int start, const int vsize, char *vb)
 409 /*----------------------------------------------------------------------
 410 |   NAME:
 411 |       tsl_textread
 412 |
 413 |   ALGORITHM:
 414 |       Get text starting at absolute byte location "start", and
 415 |       continuing for "vsize" bytes.  If "vb" is NULL, then write
 416 |       the text to stdout, otherwise put it into the buffer
 417 |       pointed to by "vb" and append a null (\0).
 418 |
 419 |       Returns the size (characters) of the text, *not* counting
 420 |       the terminating null.
 421 |
 422 |   HISTORY:
 423 |       890824 cc Rewritten to handle windowed compressed files.
 424 |       890829 cc Added all the buffer handling -- used to throw
 425 |               them away each time.
 426 |       890902 cc Added stdout option.
 427 |       890904 cc Iterate on multiple windows, instead of recursing.
 428 |
 429 \*----------------------------------------------------------------------*/
 430 {
 431 #ifdef PLAINTEXT
 432     /* here's the version that works with a plain text file */
 433     if (fseek( tfp, (int)start, 0 ) == EOF) {
 434         tsl_error( TRUE, "Cannot seek" );
 435     }
 436     else {
 437         if (fread( vb, 1, vsize, tfp ) != vsize) {
 438             tsl_error( TRUE, "Short read" );
 439         }
 440         vb[vsize] = '\0';
 441     }
 442 #else
 443     /* here's the version that works with a windowed compressed file */
 444     /* We use the starting byte in the original file ("start"), and
 445        the window size ("tsl_wsize") to determine which window we need.
 446        Using "tsl_wtable", we know where the compressed window starts in the
 447        file, and how big it is.  Read the compressed window and uncompress it.
 448        Now we can locate the start of the text and return it.
 449        */
 450     int window;                 /* current window number */
 451     int bstart;                 /* starting byte relative to beginning
 452                                            of uncompressed window */
 453     file_ptr_t wstart;                  /* starting position of compressed
 454                                            window within the data file */
 455     int cmpwsize;                       /* size of compressed window within the
 456                                            data file */
 457     int bytes_remaining;                /* Number of bytes yet to be done */
 458     int size;                           /* bytes needed from current window */
 459     char *uncbuf;                       /* buffer for uncompressed data */
 460     struct buffer_rec *brp;             /* current buffer rec */
 461     char *cp, *ep;                      /* Handy pointers */
 462
 463     bytes_remaining = vsize;
 464     while (bytes_remaining > 0) {
 465         window = start / tsl_wsize;     /* which window? */
 466         if (window >= tsl_wnum)         /* Yikes!  Off the end! */
 467             tsl_error( TRUE, "Window %d out of range 0-%d", window, tsl_wnum );
 468         bstart = start % tsl_wsize;     /* where in [uncompressed] window? */
 469         if (bstart+bytes_remaining > tsl_wsize)
 470             /* Request crosses boundary into next window */
 471             size = tsl_wsize-bstart;
 472         else
 473             /* Request can be completed in current window */
 474             size = bytes_remaining;
 475         start += size;
 476
 477         /* Notes on buffer handling ...
 478            Three main cases are recognized:
 479            1) The buffer for this window is already present.
 480            2) It's not present and we can allocate a new buffer.
 481            3) It's not present and we reuse an existing buffer.
 482            */
 483
 484         if (tsl_wbuffers[window] != NULL) {
 485             /* Buffer is already present */
 486             brp = tsl_wbuffers[window];
 487             uncbuf = brp->bufferp;
 488
 489             /* Unlink the buffer from the list.
 490                We completely unlink it so the code for putting it back in
 491                can be the same regardless of whether or not this is a new buffer.
 492                */
 493             brp->prev->next = brp->next;
 494             brp->next->prev = brp->prev;
 495
 496         } else {
 497             wstart   = tsl_wtable[window];      /* window start in file */
 498             cmpwsize = tsl_wtable[window+1]
 499                 - wstart;                       /*Size of compressed window*/
 500             if (fseek( tfp, (int)wstart, 0 ) == EOF) {
 501                 tsl_error( TRUE, "Bad seek" );
 502             }
 503             if (cmpwsize > tsl_wsize) {
 504                 /* This should never happen */
 505                 tsl_error( TRUE, "Compressed window bigger than window size!");
 506             }
 507             if ((int)fread( tsl_cmpbuffer, 1, cmpwsize, tfp ) != cmpwsize) {
 508                 tsl_error( TRUE, "Short read" );
 509             }
 510
 511             /* Need a new buffer */
 512             if ( tsl_numbuffs >= tsl_maxbuffs ) {
 513                 /* We're at the limit -- need to recycle one
 514                    Grab the buffer at the end of the LRU list.
 515                    */
 516                 brp = tsl_lastbuffer.prev;      /* there it is */
 517                 brp->prev->next = brp->next;    /* unlink it */
 518                 brp->next->prev = brp->prev;
 519                 uncbuf = brp->bufferp;
 520                 tsl_wbuffers[brp->win] = NULL;  /* former owner loses */
 521             } else {
 522                 /* allocate a new buffer */
 523                 tsl_wbuffers[window] = brp =
 524                     (struct buffer_rec *)malloc( sizeof(struct buffer_rec) );
 525                 if (brp == NULL)
 526                     tsl_error( TRUE, "Bad malloc" );
 527                 brp->bufferp = uncbuf = malloc( tsl_wsize );
 528                 if (uncbuf == NULL)
 529                     tsl_error( TRUE, "Bad malloc" );
 530                 tsl_numbuffs++;
 531             } /* new buffer */
 532             tsl_wbuffers[window] = brp;
 533             brp->win = window;
 534
 535             if (cmp_decompress( (unsigned char*)tsl_cmpbuffer, (unsigned char*)uncbuf, cmpwsize ) != tsl_wsize) {
 536                 /* Last window is probably small.  Just ignore its size */
 537                 if (window != (tsl_wnum-1)) {
 538                     free(uncbuf);
 539                     tsl_error( TRUE, "Bad decompression, result is wrong size" );
 540                 }
 541             }
 542         } /* else we read and decompressed the window */
 543
 544         /* Insert this buffer at head of list */
 545         brp->next = tsl_firstbuffer.next;
 546         brp->next->prev = brp;
 547         tsl_firstbuffer.next = brp;
 548         brp->prev = &tsl_firstbuffer;
 549
 550         /* If we've gotten this far, we have a nice decompressed buffer to use */
 551         cp = &uncbuf[bstart];           /* starting address */
 552         if (vb == NULL) {
 553             ep = cp+size;
 554             while (cp != ep) putchar( *cp++ );
 555         } else {
 556             memcpy( vb, cp, size );
 557             vb += size;
 558         }
 559         bytes_remaining -= size;
 560     } /* while */
 561
 562     if (vb != NULL) *vb = '\0';
 563 #endif
 564
 565     return vsize - bytes_remaining;
 566 } /* tsl_textread */
 567
 568
 569
 570 void tsl_init(char *dfname,char *path, const int memlimit)
 571 /*----------------------------------------------------------------------
 572 |   NAME:
 573 |       tsl_init
 574 |
 575 |   ALGORITHM:
 576 |       Initialize the TSL library.
 577 |
 578 |       dfname          Name of data file.
 579 |       path            Search path to use for file.
 580 |       memlimit        Limit (in Kbytes) on buffer space to use.
 581 |
 582 |   HISTORY:
 583 |       890825 cc Rewrite for compressed windowed files.
 584 |       890830 cc Added memlimit.
 585 |       890904 cc Implemented search path.
 586 |       921221 cc Moved path search into separate function findfile().
 587 |                 Added initialization for concordance.
 588 |       930423 cc Revised data file headers using Univ_Int.
 589 |
 590 \*----------------------------------------------------------------------*/
 591 {
 592     struct tsl_fileheader fh;
 593     int i;
 594     Short_Univ_Int *sup;
 595     Univ_Int *up;
 596     int tablesize;
 597 #define STRSZ 255
 598     char cfname[STRSZ+1]; /*allow space for trailing NULL*/
 599
 600     if (memlimit > 0)
 601         tsl_maxbuffusage = memlimit<<10;        /* times 1024 */
 602
 603     /* Open main data file */
 604     if ((tfp = findfile(dfname, path)) == NULL) {
 605         tsl_error( TRUE, "Cannot open data file %s", dfname );
 606     }
 607
 608     /* What do we have here?  Let's check out the file header... */
 609     if (!fread( &fh, sizeof(fh), 1, tfp )) {
 610         tsl_error( TRUE, "Cannot read data file %s", dfname );
 611     }
 612     if ((fh.magic[0] != TSL_MAGIC1) || (fh.magic[1] != TSL_MAGIC2))
 613         tsl_error( TRUE, "Cannot use data file %s: Bad magic number",
 614                    dfname );
 615     if ((fh.version[0] != TSL_FVERSION1) || (fh.version[1] != TSL_FVERSION2))
 616         tsl_error( TRUE, "Cannot use data file %s: Wrong version",
 617                    dfname );
 618
 619     tsl_wsize = univ2int( fh.wsize );
 620     tsl_wnum  = univ2int( fh.wnum );
 621
 622     /* Grab the window table */
 623     tablesize = sizeof(int)*(tsl_wnum+1);       /* +1 for ending entry */
 624     if ((tsl_wtable = (file_ptr_t *)malloc( tablesize )) == NULL)
 625         tsl_error( TRUE, "Bad malloc" );
 626     if (!fread( tsl_wtable, tablesize, 1, tfp )) {
 627         tsl_error( TRUE, "Error reading data file %s", dfname );
 628     }
 629     /* Convert Univ_Ints in window table to regular ints */
 630     up = (Univ_Int *) tsl_wtable;
 631     for (i=0; i<=tsl_wnum; i++) {
 632         tsl_wtable[i] = (file_ptr_t) univ2int( *up++ );
 633     }
 634
 635     /* Create buffer table (parallel array to window table) */
 636     if ((tsl_wbuffers =
 637          (struct buffer_rec **)malloc(sizeof(*tsl_wbuffers)*tsl_wnum )) == NULL)
 638         tsl_error( TRUE, "Bad malloc" );
 639     for (i=0; i< tsl_wnum; i++) tsl_wbuffers[i] = NULL;
 640
 641     tsl_numbuffs = 0;           /* active buffers of uncompressed text */
 642     tsl_maxbuffs = tsl_maxbuffusage / tsl_wsize;
 643     if (tsl_maxbuffs < 1) tsl_maxbuffs = 1;
 644     tsl_firstbuffer.next = &tsl_lastbuffer;
 645     tsl_firstbuffer.prev = NULL;
 646     tsl_firstbuffer.win  = 0;
 647     tsl_firstbuffer.bufferp = NULL;
 648     tsl_lastbuffer.prev = &tsl_firstbuffer;
 649     tsl_lastbuffer.next = NULL;
 650     tsl_lastbuffer.win  = 0;
 651     tsl_lastbuffer.bufferp = NULL;
 652
 653     /* Global buffer for compressed text.  Much bigger than needed. :-) */
 654     if ((tsl_cmpbuffer = malloc( tsl_wsize )) == NULL)
 655         tsl_error( TRUE, "Bad malloc" );
 656
 657     cmp_init();         /* Initialize decompression */
 658
 659     /* OK, now let's see if there's a matching concordance file */
 660     strncpy( cfname, dfname, STRSZ );
 661     strncat( cfname, ".conc", STRSZ );
 662     if ((cfp = findfile(cfname, path)) == NULL) {
 663         tsl_error( FALSE, "(No concordance file '%s' found)", cfname );
 664     } else {
 665         /* Got a file.  Now read the header. */
 666         if (!fread( &cfh, sizeof(cfh), 1, cfp )) {
 667             tsl_error( FALSE,
 668                       "Warning: Error reading concordance '%s'", cfname );
 669             cfp = NULL;
 670         } else {
 671             if ((cfh.magic[0] != TSL_CONCMAGIC1) ||
 672                 (cfh.magic[1] != TSL_CONCMAGIC2))
 673                 tsl_error( TRUE,
 674                           "Cannot use concordanc file '%s': Bad magic number",
 675                           cfname );
 676             if ((cfh.version[0] != TSL_CONCFVERSION1) ||
 677                 (cfh.version[1] != TSL_CONCFVERSION2))
 678                 tsl_error( TRUE,
 679                           "Cannot use concordance file %s: Wrong version",
 680                           cfname );
 681
 682             /* Allocate & initialize buffer for strings (all words) */
 683             i=univ2int(cfh.index_ptr) - univ2int(cfh.word_ptr);
 684             cf_words = malloc( i );
 685             fread( cf_words, 1, i, cfp );
 686
 687             /* Allocate & initialize buffer for index */
 688             i=univ2int(cfh.data_ptr) - univ2int(cfh.index_ptr);
 689             cf_index = (short int *) malloc( i );
 690             fread( cf_index, 1, i, cfp );
 691             /* Convert from Short_Univ_Int to short int */
 692             sup = (Short_Univ_Int *) cf_index;
 693             for (i=0; i<=univ2int(cfh.word_cnt); i++) {
 694                 cf_index[i] = (short int) shortuniv2int( *sup++ );
 695             }
 696         }
 697     }
 698 } /* tsl_init */
 699
 700
 701 void tsl_close(void)
 702 /*----------------------------------------------------------------------
 703 |   NAME:
 704 |       tsl_close
 705 |
 706 |   ALGORITHM:
 707 |       Tidy up before leaving the TSL library.
 708 |
 709 |   HISTORY:
 710 |
 711 \*----------------------------------------------------------------------*/
 712 {
 713     struct buffer_rec *bufp, *nbufp;
 714
 715     fclose( tfp);
 716     fclose( cfp);
 717
 718     /* Free all kinds of buffers and tables */
 719     bufp=tsl_firstbuffer.next;
 720     while (bufp != &tsl_lastbuffer) {
 721         nbufp = bufp->next;
 722         if (bufp->bufferp != NULL)
 723             free(bufp->bufferp);                /* free the buffer */
 724         free(bufp);                             /* free the buffer rec */
 725         bufp = nbufp;                           /* on to next buffer rec */
 726     }
 727     if (tsl_wtable != NULL) free(tsl_wtable);
 728     if (tsl_cmpbuffer != NULL) free(tsl_cmpbuffer);
 729 } /* tsl_close */