2 ********************************************************************************
5 * RCS: $Header: /home/matthew/cvs/bible-kjv-4.10/compresslib.c,v 2.9 2005/01/23 11:27:00 matthew Exp $
6 * Description: Embedded "decompress" routine.
7 * Author: Chip Chapin, Hewlett Packard Company
8 * Created: Sat May 27 14:53:55 1989
9 * Modified: Thu Sep 14 12:37:36 1989 (Chip Chapin) chip@hpcllcc
11 * Package: Bible Retrieval System
12 * Status: Experimental (Do Not Distribute)
14 * $Log: compresslib.c,v $
15 * Revision 2.9 2005/01/23 11:27:00 matthew
16 * include more standard header files
18 * Revision 2.8 2005/01/22 18:47:35 matthew
19 * return int rather than char (which was getting over-run)
21 * Revision 2.7 2005/01/22 17:56:40 matthew
22 * finish prototyping all functions
24 * Revision 2.6 2005/01/21 19:47:35 matthew
25 * farm some stuff out to cmp.h
27 * Revision 2.5 2003/07/26 13:16:37 matthew
28 * now compiles with -Wall
30 * Revision 2.4 2003/07/26 12:08:02 matthew
31 * declare return types of functions
33 * Revision 2.3 2003/07/26 12:01:05 matthew
34 * remove nested commentings
36 * Revision 2.2 2003/01/16 14:24:50 matthew
37 * correct use of #endif since GCC3.2 is now more pedantic about things
39 * Revision 2.1 2003/01/08 15:50:53 matthew
40 * applied debian patch
42 * Revision 2.0 2003/01/08 15:29:52 matthew
43 * versions collected from the net
45 * Revision 1.2 89/09/14 20:34:00 20:34:00 chip (Chip Chapin)
46 * Release 1-2. Supports -f and -l options for formatting the output.
47 * Updates primarily brl.c, bible.c, and bible.1.
49 * Revision 1.1 89/09/05 17:49:30 17:49:30 chip (Chip Chapin)
53 ********************************************************************************
56 /*----------------------------------------------------------------------
61 | Provides a programmatic interface to Lempel-Ziv-Welch
62 | uncompression. Code is entirely swiped from the
63 | "compress" utility, the authors of which are:
64 | Spencer W. Thomas, Jim McKie, Steve Davies, Ken
65 | Turkowski, James A. Woods, and Joe Orost.
68 | cmp_init Initialize the library.
69 | cmp_decompress Decompress a buffer.
74 \*----------------------------------------------------------------------*/
81 #include <sys/types.h>
83 #define min(a,b) ((a>b) ? b : a)
86 * machine variants which require cc -Dmachine: pdp11, z8000, pcxt
90 * Set USERMEM to the maximum amount of physical user memory available
91 * in bytes. USERMEM is used to determine the maximum BITS that can be used
94 * SACREDMEM is the amount of physical memory saved for others; compress
102 # define USERMEM 450000 /* default user memory */
105 #ifdef interdata /* (Perkin-Elmer) */
106 #define SIGNED_COMPARE_SLOW /* signed compare is slower than unsigned */
110 # define BITS 12 /* max bits/code for 16-bit machine */
111 # define NO_UCHAR /* also if "unsigned char" functions as signed char */
113 #endif /* pdp11 */ /* don't forget to compile with -i */
117 # undef vax /* weird preprocessor */
127 # if USERMEM >= (433484+SACREDMEM)
130 # if USERMEM >= (229600+SACREDMEM)
133 # if USERMEM >= (127536+SACREDMEM)
136 # if USERMEM >= (73464+SACREDMEM)
147 #ifdef PBITS /* Preferred BITS for this memory size */
154 # define HSIZE 69001 /* 95% occupancy */
157 # define HSIZE 35023 /* 94% occupancy */
160 # define HSIZE 18013 /* 91% occupancy */
163 # define HSIZE 9001 /* 91% occupancy */
166 # define HSIZE 5003 /* 80% occupancy */
169 #ifdef M_XENIX /* Stupid compiler can't handle arrays with */
170 # if BITS == 16 /* more than 65535 bytes - so we fake it */
173 # if BITS > 13 /* Code only handles BITS = 12, 13, or 16 */
180 * a code_int must be able to hold 2**BITS values of type int, and also -1
182 typedef int code_int;
184 #ifdef SIGNED_COMPARE_SLOW
185 typedef unsigned int count_int;
186 typedef unsigned short int count_short;
188 typedef int count_int;
191 char_type magic_header[] = { "\037\235" }; /* 1F 9D */
193 /* Defines for third byte of header */
194 #define BIT_MASK 0x1f
195 #define BLOCK_MASK 0x80
196 /* Masks 0x40 and 0x20 are free. I think 0x20 should mean that there is
197 a fourth header byte (for expansion).
199 #define INIT_BITS 9 /* initial number of bits/code */
202 static char rcs_ident[] __attribute__ ((unused)) ="@(#)$Header: /home/matthew/cvs/bible-kjv-4.10/compresslib.c,v 2.9 2005/01/23 11:27:00 matthew Exp $";
204 static char rcs_ident[]="@(#)$Header: /home/matthew/cvs/bible-kjv-4.10/compresslib.c,v 2.9 2005/01/23 11:27:00 matthew Exp $";
207 #define ARGVAL() (*++(*argv) || (--argc && *++argv))
209 int n_bits; /* number of bits/code */
210 int maxbits = BITS; /* user settable max # bits/code */
211 code_int maxcode; /* maximum code, given n_bits */
212 code_int maxmaxcode = 1 << BITS; /* should NEVER generate this code */
213 #ifdef COMPATIBLE /* But wrong! */
214 # define MAXCODE(n_bits) (1 << (n_bits) - 1)
216 # define MAXCODE(n_bits) ((1 << (n_bits)) - 1)
217 #endif /* COMPATIBLE */
220 count_int htab0[8192];
221 count_int htab1[8192];
222 count_int htab2[8192];
223 count_int htab3[8192];
224 count_int htab4[8192];
225 count_int htab5[8192];
226 count_int htab6[8192];
227 count_int htab7[8192];
228 count_int htab8[HSIZE-65536];
229 count_int * htab[9] = {
230 htab0, htab1, htab2, htab3, htab4, htab5, htab6, htab7, htab8 };
232 #define htabof(i) (htab[(i) >> 13][(i) & 0x1fff])
233 unsigned short code0tab[16384];
234 unsigned short code1tab[16384];
235 unsigned short code2tab[16384];
236 unsigned short code3tab[16384];
237 unsigned short code4tab[16384];
238 unsigned short * codetab[5] = {
239 code0tab, code1tab, code2tab, code3tab, code4tab };
241 #define codetabof(i) (codetab[(i) >> 14][(i) & 0x3fff])
243 #else /* Normal machine */
244 count_int htab [HSIZE];
245 unsigned short codetab [HSIZE];
246 #define htabof(i) htab[i]
247 #define codetabof(i) codetab[i]
248 #endif /* XENIX_16 */
249 code_int hsize = HSIZE; /* for dynamic table sizing */
252 char_type lmask[9] = {0xff, 0xfe, 0xfc, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00};
253 char_type rmask[9] = {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff};
258 * To save much memory, we overlay the table used by compress() with those
259 * used by decompress(). The tab_prefix table is the same size and type
260 * as the codetab. The tab_suffix table needs 2**BITS characters. We
261 * get this from the beginning of htab. The output stack uses the rest
262 * of htab, and contains characters. There is plenty of room for any
263 * possible stack (stack used to be 8000 characters).
266 #define tab_prefixof(i) codetabof(i)
268 # define tab_suffixof(i) ((char_type *)htab[(i)>>15])[(i) & 0x7fff]
269 # define de_stack ((char_type *)(htab2))
270 #else /* Normal machine */
271 # define tab_suffixof(i) ((char_type *)(htab))[i]
272 # define de_stack ((char_type *)&tab_suffixof(1<<BITS))
273 #endif /* XENIX_16 */
275 code_int free_ent = 0; /* first unused entry */
278 * block compression parameters -- after all codes are used up,
279 * and compression rate changes, start over.
281 int block_compress = BLOCK_MASK;
285 * the next two codes should not be changed lightly, as they must not
286 * lie within the contiguous general code space.
288 #define FIRST 257 /* first free entry */
289 #define CLEAR 256 /* table clear output code */
291 static code_int getcode(char_type **inbp,char_type *eofp);
294 /*----------------------------------------------------------------------
299 | Initialize state required by the decompress
300 | routines. This replaces the "main" of the original
304 | Stripped from "main", 890825 cc
306 \*----------------------------------------------------------------------*/
308 /* Initialize global variables.
309 In the original compress, these are just initialized statically.
310 We're doing it dynamically so that the package can be reinitialized.
312 maxbits = BITS; /* user settable max # bits/code */
313 maxmaxcode = 1 << BITS; /* should NEVER generate this code */
314 hsize = HSIZE; /* for dynamic table sizing */
315 free_ent = 0; /* first unused entry */
317 /* Argument Processing
318 * All flags are optional.
320 * -V => print Version; debug verbose
323 * -f => force overwrite of output file
324 * -n => no header: useful to uncompress old files
325 * -b maxbits => maxbits. If -b is specified, then maxbits MUST be
327 * -c => cat all output to stdout
328 * -C => generate output compatible with compress 2.0.
330 * (890527) -w window-size => sync the output with "windows" of fixed
331 * size in the input file. If you then keep track of where each
332 * compressed window starts in the output file, you can
333 * start decompressing at a window boundary instead of having
334 * decompress the entire file.
336 * if a string is left, must be an input filename.
344 /* -d do_decomp = 1; */
347 /* -n nomagic = 1; */
348 /* -C block_compress = 0; */
349 /* -b maxbits = atoi(*argv); */
350 /* -c zcat_flg = 1; */
352 /* -w piecesize = atoi(*argv); */
354 if(maxbits < INIT_BITS) maxbits = INIT_BITS;
355 if (maxbits > BITS) maxbits = BITS;
356 maxmaxcode = 1 << maxbits;
361 /* Does NOT update input buffer pointer */
362 int cmp_checkheader(char_type *inb)
364 if ((*inb++ != (magic_header[0] & 0xFF))
365 || (*inb++ != (magic_header[1] & 0xFF))) {
366 fprintf(stderr, "not in compressed format\n");
369 maxbits = *inb++; /* set -b from file */
370 block_compress = maxbits & BLOCK_MASK;
372 maxmaxcode = 1 << maxbits;
375 "compressed with %d bits, can only handle %d bits\n",
385 int cmp_decompress(char_type *inb, char_type *outb, int insize )
386 /*----------------------------------------------------------------------
391 | Decompress from buffer to buffer. This routine adapts to
392 | the codes in the file building the "string" table
393 | on-the-fly; requiring no table to be stored in the
394 | compressed file. The tables used herein are shared with
395 | those of the compress() routine. See the definitions
398 | We assume that the input buffer points to the start of a
399 | block of compressed text, past any magic number or other
402 | inb Pointer to input buffer.
403 | outb Pointer to output buffer.
404 | insize Size (bytes) of input data.
406 | Note that *no* checking is performed regarding the size of
407 | the output data. The calling program is responsible for
408 | providing a large enough output buffer.
410 | Returns the number of bytes of decompressed text.
413 | 890825 cc Derived from the original "decompress", which
414 | worked from stdin to stdout.
416 \*----------------------------------------------------------------------*/
418 register char_type *stackp;
419 register int finchar;
420 register code_int code, oldcode, incode;
421 char_type *eofchar, *outbstart;
424 * As above, initialize the first 256 entries in the table.
426 maxcode = MAXCODE(n_bits = INIT_BITS);
427 for ( code = 255; code >= 0; code-- ) {
428 tab_prefixof(code) = 0;
429 tab_suffixof(code) = (char_type)code;
431 free_ent = ((block_compress) ? FIRST : 256 );
433 outbstart = outb; /* Remember where outb started */
434 eofchar = inb + insize; /* Point just past last char in input buffer */
436 finchar = oldcode = getcode(&inb, eofchar);
437 if(oldcode == -1) /* EOF already? */
438 return 0; /* Get out of here */
439 *outb++ = (char)finchar; /* first code must be 8 bits = char */
440 /* putchar( (char)finchar );*/ /* first code must be 8 bits = char */
443 while ( (code = getcode(&inb, eofchar)) > -1 ) {
445 if ( (code == CLEAR) && block_compress ) {
446 for ( code = 255; code >= 0; code-- )
447 tab_prefixof(code) = 0;
449 free_ent = FIRST - 1;
450 if ( (code = getcode (&inb, eofchar)) == -1 )/*O, untimely death!*/
455 * Special case for KwKwK string.
457 if ( code >= free_ent ) {
463 * Generate output characters in reverse order
465 #ifdef SIGNED_COMPARE_SLOW
466 while ( ((unsigned int)code) >= ((unsigned int)256) )
468 while ( code >= 256 )
471 *stackp++ = tab_suffixof(code);
472 code = tab_prefixof(code);
474 *stackp++ = finchar = tab_suffixof(code);
477 * And put them out in forward order
480 *outb++ = *--stackp; /* putchar ( *--stackp ); */
481 while ( stackp > de_stack );
484 * Generate the new entry.
486 if ( (code=free_ent) < maxmaxcode ) {
487 tab_prefixof(code) = (unsigned short)oldcode;
488 tab_suffixof(code) = finchar;
492 * Remember previous code.
497 return(int)(outb - outbstart);
502 static code_int getcode(char_type **inbp,char_type *eofp)
503 /*----------------------------------------------------------------------
508 | Read one code from the input buffer. If EOF, return -1.
510 | inbp Pointer to the buffer pointer.
511 | eofp Pointer just past last char in input buffer.
513 | code or -1 is returned.
516 | 890825 cc Derived from the original getcode(), which read
519 \*----------------------------------------------------------------------*/
522 * On the VAX, it is important to have the register declarations
523 * in exactly the order given, or the asm will break.
525 * This is also true for the 68020 asm code.
527 register code_int code;
528 static int offset = 0, size = 0;
529 static char_type buf[BITS];
530 register int r_off, bits;
531 register char_type *bp = buf;
533 if ( clear_flg > 0 || offset >= size || free_ent > maxcode ) {
535 * If the next entry will be too big for the current code
536 * size, then we must increase the size. This implies reading
537 * a new buffer full, too.
539 if ( free_ent > maxcode ) {
541 if ( n_bits == maxbits )
542 maxcode = maxmaxcode; /* won't get any bigger now */
544 maxcode = MAXCODE(n_bits);
546 if ( clear_flg > 0) {
547 maxcode = MAXCODE (n_bits = INIT_BITS);
551 /* 890825 cc Replaced fread with buffer read:
552 size = fread( buf, 1, n_bits, stdin );
555 if (*inbp + size > eofp)
559 memcpy(buf, *inbp, size);
563 /* Round size down to integral number of codes */
564 size = (size << 3) - (n_bits - 1);
569 asm( "extzv r10,r9,(r8),r11" );
570 #else /* not a vax */
573 * MC68020 DEPENDENT!!
574 * This code mimics the "#ifndef vax" code below, because the
575 * 68020 'bfextu' instruction is *not* the same as the vax 'extzv'.
578 /* register code_int code; */ /* d7 */
579 /* register int r_off = offset, bits= n_bits; */ /* d6,d5 */
580 register tmp; /* d4 */
584 /* Get first part (low order bits) */
585 code = *bp++; code >>= r_off;
586 tmp = 8; tmp -= r_off;
588 r_off = tmp; /* now, offset into code word */
589 /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
591 tmp = *bp++; tmp <<= r_off; code |= tmp;
595 /* high order bits. */
596 tmp = 8; tmp -= bits;
597 asm(" bfextu (%a5){%d4:%d5},%d4");
601 #else /* not 68020 */
603 * Get to the first byte.
607 /* Get first part (low order bits) */
609 code = ((*bp++ >> r_off) & rmask[8 - r_off]) & 0xff;
611 code = (*bp++ >> r_off);
612 #endif /* NO_UCHAR */
614 r_off = 8 - r_off; /* now, offset into code word */
615 /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
618 code |= (*bp++ & 0xff) << r_off;
620 code |= *bp++ << r_off;
621 #endif /* NO_UCHAR */
625 /* high order bits. */
626 code |= (*bp & rmask[bits]) << r_off;