2 ********************************************************************************
5 * RCS: $Header: /home/matthew/cvs/bible-kjv-4.10/squish.c,v 2.18 2005/01/23 11:27:00 matthew Exp $
6 * Description: Modified "compress" supports windowing on input text.
7 * Author: Chip Chapin, Hewlett Packard Company
8 * Created: Sat May 27 14:53:55 1989
9 * Modified: Wed Dec 23 14:07:10 1992 (Chip Chapin) chip@hpclbis
11 * Package: Bible Retrieval System
12 * Status: Experimental (Do Not Distribute)
15 * Revision 2.18 2005/01/23 11:27:00 matthew
16 * include more standard header files
18 * Revision 2.17 2005/01/22 18:06:33 matthew
19 * no need to shadow the definition of rindex and malloc...
21 * Revision 2.16 2005/01/22 18:06:02 matthew
22 * declare bgnd_flag properly
24 * Revision 2.15 2005/01/22 17:15:24 matthew
25 * and another local shadowing a global
27 * Revision 2.14 2005/01/22 17:12:33 matthew
28 * and another local variable shadowing a global
30 * Revision 2.13 2005/01/22 17:10:42 matthew
31 * rename variable to avoid shadowing a global variable
33 * Revision 2.12 2005/01/22 16:22:09 matthew
34 * add parentheses to make code clearer
36 * Revision 2.11 2005/01/22 16:19:47 matthew
37 * include string.h and unistd.h
39 * Revision 2.10 2005/01/22 16:17:45 matthew
40 * remove extraneous function declaration
42 * Revision 2.9 2005/01/22 16:16:09 matthew
43 * remove rindex(), since everyone has it
45 * Revision 2.8 2005/01/22 16:15:40 matthew
46 * remove #defines to squish.h
48 * Revision 2.7 2005/01/22 15:59:32 matthew
51 * Revision 2.6 2005/01/22 00:35:25 matthew
52 * sort out comments within comments
54 * Revision 2.5 2003/07/26 09:35:26 matthew
55 * correct format arg to fprintf for checkpoint function
57 * Revision 2.4 2003/02/01 02:38:01 matthew
58 * include stidlib.h for malloc
60 * Revision 2.3 2003/01/16 14:24:50 matthew
61 * correct use of #endif since GCC3.2 is now more pedantic about things
63 * Revision 2.2 2003/01/09 13:07:25 matthew
64 * fix arg2 of signal() so it doesnt get called [thinko-repair]
66 * Revision 2.1 2003/01/08 19:16:04 matthew
67 * correct arg 2 of signal()
69 * Revision 2.0 2003/01/08 15:29:52 matthew
70 * versions collected from the net
72 * Revision 1.3 92/12/23 14:10:49 14:10:49 chip (Chip Chapin)
73 * Release 2.03: minor tweaks and bug fixes.
75 * Revision 1.2 89/09/14 20:34:07 20:34:07 chip (Chip Chapin)
76 * Release 1-2. Supports -f and -l options for formatting the output.
77 * Updates primarily brl.c, bible.c, and bible.1.
79 * Revision 1.1 89/09/05 17:49:40 17:49:40 chip (Chip Chapin)
83 ********************************************************************************
87 * Wed Dec 23 14:05:27 1992 (Chip Chapin) chip@hpclbis
88 * Tweak to eliminate compile warnings.
89 * 890529 cc Successfully derived from compress 4.0
90 *******************************************************************************
96 Replied: Thu, 12 Jan 89 13:06:46 PST
97 Replied: Perry Scott <perry@hpfcls>
98 Return-Path: perry@hpfcls
99 Received: from hpfcls.HP.COM (hpscott) by hpcllcc; Thu, 12 Jan 89 12:04:12 pst
100 Return-Path: <perry@hpfcls>
101 Received: by hpfcls.HP.COM; Thu, 12 Jan 89 13:03:46 mst
102 Date: Thu, 12 Jan 89 13:03:46 mst
103 From: Perry Scott <perry@hpfcls>
104 Full-Name: Perry Scott
105 Message-Id: <8901122003.AA24709@hpfcls.HP.COM>
107 Subject: Re: Do you still have compress?
109 The 68020 bfins instruction is ass-backwards from the VAX equivalent, so
110 I could only get marginal speed-up (10-20%) from assembly. If you go
111 with the 68020 version, speed-up is around 50%. Here is the code:
112 Look for "68020". This may be different than the current compress in
113 shared command source.
122 * Compress - data compression program
124 #define min(a,b) ((a>b) ? b : a)
127 * compress.c - File compression ala IEEE Computer, June 1984.
129 * Authors: Spencer W. Thomas (decvax!harpo!utah-cs!utah-gr!thomas)
130 * Jim McKie (decvax!mcvax!jim)
131 * Steve Davies (decvax!vax135!petsd!peora!srd)
132 * Ken Turkowski (decvax!decwrl!turtlevax!ken)
133 * James A. Woods (decvax!ihnp4!ames!jaw)
134 * Joe Orost (decvax!vax135!petsd!joe)
137 * Revision 4.0 85/07/30 12:50:00 joe
138 * Removed ferror() calls in output routine on every output except first.
139 * Prepared for release to the world.
141 * Revision 3.6 85/07/04 01:22:21 joe
142 * Remove much wasted storage by overlaying hash table with the tables
143 * used by decompress: tab_suffix[1<<BITS], stack[8000]. Updated USERMEM
144 * computations. Fixed dump_tab() DEBUG routine.
146 * Revision 3.5 85/06/30 20:47:21 jaw
147 * Change hash function to use exclusive-or. Rip out hash cache. These
148 * speedups render the megamemory version defunct, for now. Make decoder
149 * stack global. Parts of the RCS trunks 2.7, 2.6, and 2.1 no longer apply.
151 * Revision 3.4 85/06/27 12:00:00 ken
152 * Get rid of all floating-point calculations by doing all compression ratio
153 * calculations in fixed point.
155 * Revision 3.3 85/06/24 21:53:24 joe
156 * Incorporate portability suggestion for M_XENIX. Got rid of text on #else
157 * and #endif lines. Cleaned up #ifdefs for vax and interdata.
159 * Revision 3.2 85/06/06 21:53:24 jaw
160 * Incorporate portability suggestions for Z8000, IBM PC/XT from mailing list.
161 * Default to "quiet" output (no compression statistics).
163 * Revision 3.1 85/05/12 18:56:13 jaw
164 * Integrate decompress() stack speedups (from early pointer mods by McKie).
165 * Repair multi-file USERMEM gaffe. Unify 'force' flags to mimic semantics
166 * of SVR2 'pack'. Streamline block-compress table clear logic. Increase
167 * output byte count by magic number size.
169 * Revision 3.0 84/11/27 11:50:00 petsd!joe
170 * Set HSIZE depending on BITS. Set BITS depending on USERMEM. Unrolled
171 * loops in clear routines. Added "-C" flag for 2.0 compatibility. Used
172 * unsigned compares on Perkin-Elmer. Fixed foreground check.
174 * Revision 2.7 84/11/16 19:35:39 ames!jaw
175 * Cache common hash codes based on input statistics; this improves
176 * performance for low-density raster images. Pass on #ifdef bundle
179 * Revision 2.6 84/11/05 19:18:21 ames!jaw
180 * Vary size of hash tables to reduce time for small files.
181 * Tune PDP-11 hash function.
183 * Revision 2.5 84/10/30 20:15:14 ames!jaw
184 * Junk chaining; replace with the simpler (and, on the VAX, faster)
185 * double hashing, discussed within. Make block compression standard.
187 * Revision 2.4 84/10/16 11:11:11 ames!jaw
188 * Introduce adaptive reset for block compression, to boost the rate
189 * another several percent. (See mailing list notes.)
191 * Revision 2.3 84/09/22 22:00:00 petsd!joe
192 * Implemented "-B" block compress. Implemented REVERSE sorting of tab_next.
193 * Bug fix for last bits. Changed fwrite to putchar loop everywhere.
195 * Revision 2.2 84/09/18 14:12:21 ames!jaw
196 * Fold in news changes, small machine typedef from thomas,
197 * #ifdef interdata from joe.
199 * Revision 2.1 84/09/10 12:34:56 ames!jaw
200 * Configured fast table lookup for 32-bit machines.
201 * This cuts user time in half for b <= FBITS, and is useful for news batching
202 * from VAX to PDP sites. Also sped up decompress() [fwrite->putc] and
203 * added signal catcher [plus beef in writeerr()] to delete effluvia.
205 * Revision 2.0 84/08/28 22:00:00 petsd!joe
206 * Add check for foreground before prompting user. Insert maxbits into
207 * compressed file. Force file being uncompressed to end with ".Z".
208 * Added "-c" flag and "zcat". Prepared for release.
210 * Revision 1.10 84/08/24 18:28:00 turtlevax!ken
211 * Will only compress regular files (no directories), added a magic number
212 * header (plus an undocumented -n flag to handle old files without headers),
213 * added -f flag to force overwriting of possibly existing destination file,
214 * otherwise the user is prompted for a response. Will tack on a .Z to a
215 * filename if it doesn't have one when decompressing. Will only replace
216 * file if it was compressed.
218 * Revision 1.9 84/08/16 17:28:00 turtlevax!ken
219 * Removed scanargs(), getopt(), added .Z extension and unlimited number of
220 * filenames to compress. Flags may be clustered (-Ddvb12) or separated
221 * (-D -d -v -b 12), or combination thereof. Modes and other status is
222 * copied with copystat(). -O bug for 4.2 seems to have disappeared with
225 * Revision 1.8 84/08/09 23:15:00 joe
226 * Made it compatible with vax version, installed jim's fixes/enhancements
228 * Revision 1.6 84/08/01 22:08:00 joe
229 * Sped up algorithm significantly by sorting the compress chain.
231 * Revision 1.5 84/07/13 13:11:00 srd
232 * Added C version of vax asm routines. Changed structure to arrays to
233 * save much memory. Do unsigned compares where possible (faster on
236 * Revision 1.4 84/07/05 03:11:11 thomas
237 * Clean up the code a little and lint it. (Lint complains about all
238 * the regs used in the asm, but I'm not going to "fix" this.)
240 * Revision 1.3 84/07/05 02:06:54 thomas
243 * Revision 1.2 84/07/05 00:27:27 thomas
244 * Add variable bit length output.
247 static char rcs_ident[]="@(#)$Header: /home/matthew/cvs/bible-kjv-4.10/squish.c,v 2.18 2005/01/23 11:27:00 matthew Exp $";
253 #include <sys/types.h>
254 #include <sys/stat.h>
261 #define ARGVAL() (*++(*argv) || (--argc && *++argv))
263 int n_bits; /* number of bits/code */
264 int maxbits = BITS; /* user settable max # bits/code */
265 code_int maxcode; /* maximum code, given n_bits */
266 code_int maxmaxcode = 1 << BITS; /* should NEVER generate this code */
267 #ifdef COMPATIBLE /* But wrong! */
268 # define MAXCODE(n_bits) (1 << (n_bits) - 1)
270 # define MAXCODE(n_bits) ((1 << (n_bits)) - 1)
271 #endif /* COMPATIBLE */
274 count_int htab0[8192];
275 count_int htab1[8192];
276 count_int htab2[8192];
277 count_int htab3[8192];
278 count_int htab4[8192];
279 count_int htab5[8192];
280 count_int htab6[8192];
281 count_int htab7[8192];
282 count_int htab8[HSIZE-65536];
283 count_int * htab[9] = {
284 htab0, htab1, htab2, htab3, htab4, htab5, htab6, htab7, htab8 };
286 #define htabof(i) (htab[(i) >> 13][(i) & 0x1fff])
287 unsigned short code0tab[16384];
288 unsigned short code1tab[16384];
289 unsigned short code2tab[16384];
290 unsigned short code3tab[16384];
291 unsigned short code4tab[16384];
292 unsigned short * codetab[5] = {
293 code0tab, code1tab, code2tab, code3tab, code4tab };
295 #define codetabof(i) (codetab[(i) >> 14][(i) & 0x3fff])
297 #else /* Normal machine */
298 count_int htab [HSIZE];
299 unsigned short codetab [HSIZE];
300 #define htabof(i) htab[i]
301 #define codetabof(i) codetab[i]
302 #endif /* XENIX_16 */
303 code_int hsize = HSIZE; /* for dynamic table sizing */
307 * To save much memory, we overlay the table used by compress() with those
308 * used by decompress(). The tab_prefix table is the same size and type
309 * as the codetab. The tab_suffix table needs 2**BITS characters. We
310 * get this from the beginning of htab. The output stack uses the rest
311 * of htab, and contains characters. There is plenty of room for any
312 * possible stack (stack used to be 8000 characters).
315 #define tab_prefixof(i) codetabof(i)
317 # define tab_suffixof(i) ((char_type *)htab[(i)>>15])[(i) & 0x7fff]
318 # define de_stack ((char_type *)(htab2))
319 #else /* Normal machine */
320 # define tab_suffixof(i) ((char_type *)(htab))[i]
321 # define de_stack ((char_type *)&tab_suffixof(1<<BITS))
322 #endif /* XENIX_16 */
324 code_int free_ent = 0; /* first unused entry */
327 int nomagic = 0; /* Use a 3-byte magic number header, unless old file */
328 int zcat_flg = 0; /* Write output on stdout, suppress messages */
329 int quiet = 1; /* don't tell me about compression */
331 /* Added this stuff for piece-wise compression, 890529 */
332 int piecesize = 0; /* default == NOT piece-wise */
333 unsigned int mycheckpoint;
334 int cpcnt = 0; /* count number of pieces */
336 int w_count; /* Order of this window */
337 int w_out_loc; /* Location in compressed file where window starts */
338 struct w_info *w_next; /* pointer to next w_info record */
339 } *firstwin, *curwin;
343 * block compression parameters -- after all codes are used up,
344 * and compression rate changes, start over.
346 int block_compress = BLOCK_MASK;
349 #define CHECK_GAP 10000 /* ratio check interval */
350 count_int checkpoint = CHECK_GAP;
352 * the next two codes should not be changed lightly, as they must not
353 * lie within the contiguous general code space.
355 #define FIRST 257 /* first free entry */
356 #define CLEAR 256 /* table clear output code */
364 void (*bgnd_flag)(int);
369 /* These are used by compress (at least) */
371 long int in_count = 1; /* length of input */
372 long int bytes_out; /* length of compressed output */
373 long int out_count = 0; /* # of codes output (for debugging) */
378 fprintf(stderr,"Usage: squish [-dDVfc][-b maxbits][-w windowsize][file ...]\n");
382 fprintf(stderr,"Usage: squish [-dfvcV] [-b maxbits] [-w windowsize] [file ...]\n");
387 /*****************************************************************
390 * Algorithm from "A Technique for High Performance Data Compression",
391 * Terry A. Welch, IEEE Computer Vol 17, No 6 (June 1984), pp 8-19.
393 * Usage: compress [-dfvc] [-b bits] [file ...]
395 * -d: If given, decompression is done instead.
397 * -c: Write output on stdout, don't remove original.
399 * -b: Parameter limits the max number of bits/code.
401 * -f: Forces output file to be generated, even if one already
402 * exists, and even if no space is saved by compressing.
403 * If -f is not used, the user will be prompted if stdin is
404 * a tty, otherwise, the output file will not be overwritten.
406 * -v: Write compression statistics
408 * file ...: Files to be compressed. If none specified, stdin
411 * file.Z: Compressed form of file with same mode, owner, and utimes
412 * or stdout (if stdin used as input)
415 * Modified Lempel-Ziv method (LZW). Basically finds common
416 * substrings and replaces them with a variable size code. This is
417 * deterministic, and can be done on the fly. Thus, the decompression
418 * procedure needs no input table, but tracks the way the table was built.
421 int main(int argc,char **argv)
423 int overwrite = 0; /* Do not overwrite unless given -f flag */
425 char **filelist, **fileptr;
429 if ( (bgnd_flag = signal ( SIGINT, SIG_IGN )) != SIG_IGN ) {
430 signal ( SIGINT, (void *) onintr );
431 signal ( SIGSEGV, (void *) oops );
435 nomagic = 1; /* Original didn't have a magic number */
436 #endif /* COMPATIBLE */
438 filelist = fileptr = (char **)(malloc(argc * sizeof(*argv)));
441 if((cp = rindex(argv[0], '/')) != 0) {
446 if(strcmp(cp, "unsquish") == 0) {
448 } else if(strcmp(cp, "zcat") == 0) {
454 /* 4.2BSD dependent - take it out if not */
455 setlinebuf( stderr );
458 /* Argument Processing
459 * All flags are optional.
461 * -V => print Version; debug verbose
464 * -f => force overwrite of output file
465 * -n => no header: useful to uncompress old files
466 * -b maxbits => maxbits. If -b is specified, then maxbits MUST be
468 * -c => cat all output to stdout
469 * -C => generate output compatible with compress 2.0.
471 * (890527) -w window-size => sync the output with "windows" of fixed
472 * size in the input file. If you then keep track of where each
473 * compressed window starts in the output file, you can
474 * start decompressing at a window boundary instead of having
475 * decompress the entire file.
477 * if a string is left, must be an input filename.
479 for (argc--, argv++; argc > 0; argc--, argv++) {
480 if (**argv == '-') { /* A flag argument */
481 while (*++(*argv)) { /* Process all flags in this arg */
515 fprintf(stderr, "Missing maxbits\n");
519 maxbits = atoi(*argv);
529 fprintf(stderr, "Missing window-size\n");
533 piecesize = atoi(*argv);
536 fprintf(stderr, "Unknown flag: '%c'; ", **argv);
542 else { /* Input file name */
543 *fileptr++ = *argv; /* Build input file list */
545 /* process nextarg; */
550 if(maxbits < INIT_BITS) maxbits = INIT_BITS;
551 if (maxbits > BITS) maxbits = BITS;
552 maxmaxcode = 1 << maxbits;
554 if (*filelist != NULL) {
555 for (fileptr = filelist; *fileptr; fileptr++) {
557 if (do_decomp != 0) { /* DECOMPRESSION */
558 /* Check for .Z suffix */
559 if (strcmp(*fileptr + strlen(*fileptr) - 2, ".Z") != 0) {
560 /* No .Z: tack one on */
561 strcpy(tempname, *fileptr);
562 strcat(tempname, ".Z");
565 /* Open input file */
566 if ((freopen(*fileptr, "r", stdin)) == NULL) {
567 perror(*fileptr); continue;
569 /* Check the magic number */
571 if ((getchar() != (magic_header[0] & 0xFF))
572 || (getchar() != (magic_header[1] & 0xFF))) {
573 fprintf(stderr, "%s: not in compressed format\n",
577 maxbits = getchar(); /* set -b from file */
578 block_compress = maxbits & BLOCK_MASK;
580 maxmaxcode = 1 << maxbits;
583 "%s: compressed with %d bits, can only handle %d bits\n",
584 *fileptr, maxbits, BITS);
588 /* Generate output filename */
589 strcpy(ofname, *fileptr);
590 ofname[strlen(*fileptr) - 2] = '\0'; /* Strip off .Z */
591 } else { /* COMPRESSION */
592 if (strcmp(*fileptr + strlen(*fileptr) - 2, ".Z") == 0) {
593 fprintf(stderr, "%s: already has .Z suffix -- no change\n",
597 /* Open input file */
598 if ((freopen(*fileptr, "r", stdin)) == NULL) {
599 perror(*fileptr); continue;
602 if (piecesize == 0) {
603 stat ( *fileptr, &statbuf );
604 fsize = (long) statbuf.st_size;
606 /* 890527 -- use the "piece-size" instead */
610 * tune hash table size for small files -- ad hoc,
611 * but the sizes match earlier #defines, which
612 * serve as upper bounds on the number of output codes.
615 if ( fsize < (1 << 12) )
616 hsize = min ( 5003, HSIZE );
617 else if ( fsize < (1 << 13) )
618 hsize = min ( 9001, HSIZE );
619 else if ( fsize < (1 << 14) )
620 hsize = min ( 18013, HSIZE );
621 else if ( fsize < (1 << 15) )
622 hsize = min ( 35023, HSIZE );
623 else if ( fsize < 47000 )
624 hsize = min ( 50021, HSIZE );
626 /* Generate output filename */
627 strcpy(ofname, *fileptr);
628 #ifndef BSD4_2 /* Short filenames */
629 if ((cp=rindex(ofname,'/')) != NULL) cp++;
631 if (strlen(cp) > 12) {
632 fprintf(stderr,"%s: filename too long to tack on .Z\n",cp);
635 #endif /* BSD4_2 Long filenames allowed */
636 strcat(ofname, ".Z");
638 /* Check for overwrite of existing file */
639 if (overwrite == 0 && zcat_flg == 0) {
640 if (stat(ofname, &statbuf) == 0) {
643 fprintf(stderr, "%s already exists;", ofname);
645 fprintf(stderr, " do you wish to overwrite %s (y or n)? ",
648 read(2, response, 2);
649 while (response[1] != '\n') {
650 if (read(2, response+1, 1) < 0) { /* Ack! */
651 perror("stderr"); break;
655 if (response[0] != 'y') {
656 fprintf(stderr, "\tnot overwritten\n");
661 if(zcat_flg == 0) { /* Open output file */
662 if (freopen(ofname, "w", stdout) == NULL) {
667 fprintf(stderr, "%s: ", *fileptr);
670 /* Actually do the compression/decompression */
677 else if (debug == 0) decompress();
679 if (verbose) dump_tab();
683 } else { /* Standard input */
684 if (do_decomp == 0) {
687 if(verbose) dump_tab();
692 /* Check the magic number */
694 if ((getchar()!=(magic_header[0] & 0xFF))
695 || (getchar()!=(magic_header[1] & 0xFF))) {
696 fprintf(stderr, "stdin: not in compressed format\n");
699 maxbits = getchar(); /* set -b from file */
700 block_compress = maxbits & BLOCK_MASK;
702 maxmaxcode = 1 << maxbits;
703 fsize = 100000; /* assume stdin large for USERMEM */
706 "stdin: compressed with %d bits, can only handle %d bits\n",
714 if (debug == 0) decompress();
716 if (verbose) dump_tab();
726 * compress stdin to stdout
728 * Algorithm: use open addressing double hashing (no chaining) on the
729 * prefix code / next character combination. We do a variant of Knuth's
730 * algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime
731 * secondary probe. Here, the modular division first probe is gives way
732 * to a faster exclusive-or manipulation. Also do block compression with
733 * an adaptive reset, whereby the code table is cleared when the compression
734 * ratio decreases, but after the table fills. The variable-length output
735 * codes are re-sized at this point, and a special CLEAR code is generated
736 * for the decompressor. Late addition: construct the table according to
737 * file size for noticeable speed improvement on small files. Please direct
738 * questions about this implementation to ames!jaw.
741 void compress(void) {
743 register code_int i = 0;
745 register code_int ent;
747 register code_int disp;
748 #else /* Normal machine */
751 register code_int hsize_reg;
756 putchar(magic_header[0]); putchar(magic_header[1]);
757 putchar((char)(maxbits | block_compress));
761 #endif /* COMPATIBLE */
764 bytes_out = 3; /* includes 3-byte header mojo */
769 checkpoint = CHECK_GAP;
770 maxcode = MAXCODE(n_bits = INIT_BITS);
771 free_ent = ((block_compress) ? FIRST : 256 );
773 /* Added 890529 for piecewise stuff */
775 mycheckpoint = piecesize;
776 curwin = firstwin = (struct w_info *) malloc( sizeof(struct w_info) );
778 curwin->w_out_loc = bytes_out;
779 curwin->w_next = NULL;
782 else mycheckpoint = 0x7fffffff;
787 for ( fcode = (long) hsize; fcode < 65536L; fcode *= 2L )
789 hshift = 8 - hshift; /* set hash code range bound */
792 cl_hash( (count_int) hsize_reg); /* clear hash table */
794 #ifdef SIGNED_COMPARE_SLOW
795 while ( (c = getchar()) != (unsigned) EOF ) {
797 while ( (c = getchar()) != EOF ) {
801 /* added 890529, fixed 890828 */
802 if (in_count > (long)mycheckpoint) {
803 /* Force a checkpoint: Flush everything and put out a CLEAR
804 before processing this char. This is the heart of the
805 piecewise processing stuff.
807 mycheckpoint += piecesize;
808 output ( (code_int) ent );
814 fprintf( stderr, "Checkpoint %d (%ld) at %ld\n",
815 cpcnt, in_count, bytes_out );
816 /* Also keep track in global data structure */
817 curwin->w_next = (struct w_info *) malloc( sizeof(struct w_info) );
818 curwin = curwin->w_next;
819 curwin->w_count = cpcnt++;
820 curwin->w_out_loc = bytes_out;
821 curwin->w_next = NULL;
826 fcode = (long) (((long) c << maxbits) + ent);
827 i = ((c << hshift) ^ ent); /* xor hashing */
829 if ( htabof (i) == fcode ) {
832 } else if ( (long)htabof (i) < 0 ) /* empty slot */
834 disp = hsize_reg - i; /* secondary hash (after G. Knott) */
838 if ( (i -= disp) < 0 )
841 if ( htabof (i) == fcode ) {
845 if ( (long)htabof (i) > 0 )
848 output ( (code_int) ent );
852 #ifdef SIGNED_COMPARE_SLOW
853 if ( (unsigned) free_ent < (unsigned) maxmaxcode) {
855 if ( free_ent < maxmaxcode) {
857 codetabof (i) = free_ent++; /* code -> hashtable */
860 else if ( (count_int)in_count >= checkpoint && block_compress )
864 * Put out the final code.
866 output( (code_int)ent );
868 output( (code_int)-1 );
870 if (piecesize != 0) {
873 /* Write out stats */
874 statf = fopen( "squish.stats", "w" );
875 fwrite( &piecesize, sizeof(int), 1, statf ); /* size of window */
876 fwrite( &cpcnt, sizeof(int), 1, statf ); /* number of windows */
879 /* location of compressed window in output file */
880 fwrite( &(curwin->w_out_loc), sizeof(int), 1, statf );
881 curwin = curwin->w_next;
882 } while ( curwin != NULL );
883 /* 890828 terminating entry so can easily find size of last window */
884 fwrite( &bytes_out, sizeof(int), 1, statf );
889 * Print out stats on stderr
891 if(zcat_flg == 0 && !quiet) {
894 "%ld chars in, %ld codes (%ld bytes) out, compression factor: ",
895 in_count, out_count, bytes_out );
896 prratio( stderr, in_count, bytes_out );
897 fprintf( stderr, "\n");
898 fprintf( stderr, "\tCompression as in compact: " );
899 prratio( stderr, in_count-bytes_out, in_count );
900 fprintf( stderr, "\n");
901 fprintf( stderr, "\tLargest code (of last block) was %d (%d bits)\n",
902 free_ent - 1, n_bits );
904 fprintf( stderr, "Compression: " );
905 prratio( stderr, in_count-bytes_out, in_count );
908 if(bytes_out > in_count) /* exit(2) if no savings */
913 /*****************************************************************
916 * Output the given code.
918 * code: A n_bits-bit integer. If == -1, then EOF. This assumes
919 * that n_bits =< (long)wordsize - 1.
921 * Outputs code to the file.
923 * Chars are 8 bits long.
925 * Maintain a BITS character long buffer (so that 8 codes will
926 * fit in it exactly). Use the VAX insv instruction to insert each
927 * code in turn. When the buffer fills up empty it and start over.
930 static char buf[BITS];
933 char_type lmask[9] = {0xff, 0xfe, 0xfc, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00};
934 char_type rmask[9] = {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff};
937 void output(code_int code)
944 * On the VAX, it is important to have the register declarations
945 * in exactly the order given, or the asm will break.
947 * This is also true for the 68020 asm code.
949 register int r_off = offset, bits= n_bits;
950 register char * bp = buf;
954 fprintf( stderr, "%5d%c", code,
955 (col+=6) >= 74 ? (col = 0, '\n') : ' ' );
959 /* VAX DEPENDENT!! Implementation on other machines is below.
961 * Translation: Insert BITS bits from the argument starting at
962 * offset bits from the beginning of buf.
964 0; /* Work around for pcc -O bug with asm and if stmt */
965 asm( "insv 4(ap),r11,r10,(r9)" );
966 #else /* not a vax */
969 * MC68020 DEPENDENT!!
970 * This code mimics the "#ifndef vax" code below, because the
971 * 68020 'bfins' instruction is *not* the same as the vax 'insv'.
974 /* register int r_off = offset, bits= n_bits; d7,d6 */
975 register width; /* d5 */
976 register r_code; /* d4 */
979 width = r_off; width &= 7; width -= 8;
980 r_off &= ~0x07; /* previous byte boundary */
982 asm(" bfins %d4,(%a5){%d7:%d5}");
987 asm (" bfins %d4,(%a5){%d7:&8}");
991 asm(" bfins %d4,(%a5){%d7:&8}");
993 #else /* not 68020 */
995 * byte/bit numbering on the VAX is simulated by the following code
998 * Get to the first byte.
1003 * Since code is always >= 8 bits, only need to mask the first
1006 *bp = (*bp & rmask[r_off]) | ((code << r_off) & lmask[r_off]);
1008 bits -= (8 - r_off);
1010 /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
1019 #endif /* MC68020 */
1022 if ( offset == (n_bits << 3) ) {
1033 * If the next entry is going to be too big for the code size,
1034 * then increase it, if possible.
1036 if ( free_ent > maxcode || (clear_flg > 0))
1039 * Write the whole buffer, because the input side won't
1040 * discover the size increase until after it has read it.
1043 if((int)fwrite( buf, 1, n_bits, stdout ) != n_bits)
1045 bytes_out += n_bits;
1050 maxcode = MAXCODE (n_bits = INIT_BITS);
1055 if ( n_bits == maxbits )
1056 maxcode = maxmaxcode;
1058 maxcode = MAXCODE(n_bits);
1062 fprintf( stderr, "\nChange to %d bits\n", n_bits );
1069 * At EOF, write the rest of the buffer.
1072 fwrite( buf, 1, (offset + 7) / 8, stdout );
1073 bytes_out += (offset + 7) / 8;
1078 fprintf( stderr, "\n" );
1080 if( ferror( stdout ) )
1086 * Decompress stdin to stdout. This routine adapts to the codes in the
1087 * file building the "string" table on-the-fly; requiring no table to
1088 * be stored in the compressed file. The tables used herein are shared
1089 * with those of the compress() routine. See the definitions above.
1092 void decompress(void) {
1093 register char_type *stackp;
1094 register int finchar;
1095 register code_int code, oldcode, incode;
1098 * As above, initialize the first 256 entries in the table.
1100 maxcode = MAXCODE(n_bits = INIT_BITS);
1101 for ( code = 255; code >= 0; code-- ) {
1102 tab_prefixof(code) = 0;
1103 tab_suffixof(code) = (char_type)code;
1105 free_ent = ((block_compress) ? FIRST : 256 );
1107 finchar = oldcode = getcode();
1108 if(oldcode == -1) /* EOF already? */
1109 return; /* Get out of here */
1110 putchar( (char)finchar ); /* first code must be 8 bits = char */
1111 if(ferror(stdout)) /* Crash if can't write */
1115 while ( (code = getcode()) > -1 ) {
1117 if ( (code == CLEAR) && block_compress ) {
1118 for ( code = 255; code >= 0; code-- )
1119 tab_prefixof(code) = 0;
1121 free_ent = FIRST - 1;
1122 if ( (code = getcode ()) == -1 ) /* O, untimely death! */
1127 * Special case for KwKwK string.
1129 if ( code >= free_ent ) {
1130 *stackp++ = finchar;
1135 * Generate output characters in reverse order
1137 #ifdef SIGNED_COMPARE_SLOW
1138 while ( ((unsigned long)code) >= ((unsigned long)256) ) {
1140 while ( code >= 256 ) {
1142 *stackp++ = tab_suffixof(code);
1143 code = tab_prefixof(code);
1145 *stackp++ = finchar = tab_suffixof(code);
1148 * And put them out in forward order
1151 putchar ( *--stackp );
1152 while ( stackp > de_stack );
1155 * Generate the new entry.
1157 if ( (code=free_ent) < maxmaxcode ) {
1158 tab_prefixof(code) = (unsigned short)oldcode;
1159 tab_suffixof(code) = finchar;
1163 * Remember previous code.
1172 /*****************************************************************
1175 * Read one code from the standard input. If EOF, return -1.
1179 * code or -1 is returned.
1182 code_int getcode(void) {
1184 * On the VAX, it is important to have the register declarations
1185 * in exactly the order given, or the asm will break.
1187 * This is also true for the 68020 asm code.
1189 register code_int code;
1190 static int boffset = 0, size = 0;
1191 static char_type lbuf[BITS];
1192 register int r_off, bits;
1193 register char_type *bp = lbuf;
1195 if ( clear_flg > 0 || boffset >= size || free_ent > maxcode ) {
1197 * If the next entry will be too big for the current code
1198 * size, then we must increase the size. This implies reading
1199 * a new buffer full, too.
1201 if ( free_ent > maxcode ) {
1203 if ( n_bits == maxbits )
1204 maxcode = maxmaxcode; /* won't get any bigger now */
1206 maxcode = MAXCODE(n_bits);
1208 if ( clear_flg > 0) {
1209 maxcode = MAXCODE (n_bits = INIT_BITS);
1212 size = fread( lbuf, 1, n_bits, stdin );
1214 return -1; /* end of file */
1216 /* Round size down to integral number of codes */
1217 size = (size << 3) - (n_bits - 1);
1222 asm( "extzv r10,r9,(r8),r11" );
1223 #else /* not a vax */
1226 * MC68020 DEPENDENT!!
1227 * This code mimics the "#ifndef vax" code below, because the
1228 * 68020 'bfextu' instruction is *not* the same as the vax 'extzv'.
1231 /* register code_int code; d7 */
1232 /* register int r_off = boffset, bits= n_bits; d6,d5 */
1233 register tmp; /* d4 */
1237 /* Get first part (low order bits) */
1238 code = *bp++; code >>= r_off;
1239 tmp = 8; tmp -= r_off;
1241 r_off = tmp; /* now, offset into code word */
1242 /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
1244 tmp = *bp++; tmp <<= r_off; code |= tmp;
1248 /* high order bits. */
1249 tmp = 8; tmp -= bits;
1250 asm(" bfextu (%a5){%d4:%d5},%d4");
1254 #else /* not 68020 */
1256 * Get to the first byte.
1260 /* Get first part (low order bits) */
1262 code = ((*bp++ >> r_off) & rmask[8 - r_off]) & 0xff;
1264 code = (*bp++ >> r_off);
1265 #endif /* NO_UCHAR */
1266 bits -= (8 - r_off);
1267 r_off = 8 - r_off; /* now, offset into code word */
1268 /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
1271 code |= (*bp++ & 0xff) << r_off;
1273 code |= *bp++ << r_off;
1274 #endif /* NO_UCHAR */
1278 /* high order bits. */
1279 code |= (*bp & rmask[bits]) << r_off;
1280 #endif /* MC68020 */
1291 * Just print out codes from input file. For debugging.
1296 bits = n_bits = INIT_BITS;
1297 maxcode = MAXCODE(n_bits);
1298 free_ent = ((block_compress) ? FIRST : 256 );
1299 while ( ( code = getcode() ) >= 0 ) {
1300 if ( (code == CLEAR) && block_compress ) {
1301 free_ent = FIRST - 1;
1304 else if ( free_ent < maxmaxcode )
1306 if ( bits != n_bits ) {
1307 fprintf(stderr, "\nChange to %d bits\n", n_bits );
1311 fprintf(stderr, "%5d%c", code, (col+=6) >= 74 ? (col = 0, '\n') : ' ' );
1313 putc( '\n', stderr );
1317 code_int sorttab[1<<BITS]; /* sorted pointers into htab */
1319 dump_tab() /* dump string table */
1321 register int i, first;
1323 #define STACK_SIZE 15000
1324 int stack_top = STACK_SIZE;
1327 if(do_decomp == 0) { /* compressing */
1328 register int flag = 1;
1330 for(i=0; i<hsize; i++) { /* build sort pointers */
1331 if((long)htabof(i) >= 0) {
1332 sorttab[codetabof(i)] = i;
1335 first = block_compress ? FIRST : 256;
1336 for(i = first; i < free_ent; i++) {
1337 fprintf(stderr, "%5d: \"", i);
1338 de_stack[--stack_top] = '\n';
1339 de_stack[--stack_top] = '"';
1340 stack_top = in_stack((htabof(sorttab[i])>>maxbits)&0xff,
1342 for(ent=htabof(sorttab[i]) & ((1<<maxbits)-1);
1344 ent=htabof(sorttab[ent]) & ((1<<maxbits)-1)) {
1345 stack_top = in_stack(htabof(sorttab[ent]) >> maxbits,
1348 stack_top = in_stack(ent, stack_top);
1349 fwrite( &de_stack[stack_top], 1, STACK_SIZE-stack_top, stderr);
1350 stack_top = STACK_SIZE;
1352 } else if(!debug) { /* decompressing */
1354 for ( i = 0; i < free_ent; i++ ) {
1356 c = tab_suffixof(ent);
1357 if ( isascii(c) && isprint(c) )
1358 fprintf( stderr, "%5d: %5d/'%c' \"",
1359 ent, tab_prefixof(ent), c );
1361 fprintf( stderr, "%5d: %5d/\\%03o \"",
1362 ent, tab_prefixof(ent), c );
1363 de_stack[--stack_top] = '\n';
1364 de_stack[--stack_top] = '"';
1365 for ( ; ent != NULL;
1366 ent = (ent >= FIRST ? tab_prefixof(ent) : NULL) ) {
1367 stack_top = in_stack(tab_suffixof(ent), stack_top);
1369 fwrite( &de_stack[stack_top], 1, STACK_SIZE - stack_top, stderr );
1370 stack_top = STACK_SIZE;
1376 in_stack(c, stack_top)
1377 register c, stack_top;
1379 if ( (isascii(c) && isprint(c) && c != '\\') || c == ' ' ) {
1380 de_stack[--stack_top] = c;
1383 case '\n': de_stack[--stack_top] = 'n'; break;
1384 case '\t': de_stack[--stack_top] = 't'; break;
1385 case '\b': de_stack[--stack_top] = 'b'; break;
1386 case '\f': de_stack[--stack_top] = 'f'; break;
1387 case '\r': de_stack[--stack_top] = 'r'; break;
1388 case '\\': de_stack[--stack_top] = '\\'; break;
1390 de_stack[--stack_top] = '0' + c % 8;
1391 de_stack[--stack_top] = '0' + (c / 8) % 8;
1392 de_stack[--stack_top] = '0' + c / 64;
1395 de_stack[--stack_top] = '\\';
1409 * This routine returns 1 if we are running in the foreground and stderr
1412 int foreground(void)
1414 if(bgnd_flag) { /* background? */
1416 } else { /* foreground */
1417 if(isatty(2)) { /* and stderr is a tty */
1431 void oops (void) /* wild pointer -- assume bad input */
1433 if ( do_decomp == 1 )
1434 fprintf ( stderr, "uncompress: corrupt input\n" );
1440 void cl_block (void) /* table clear for block compress */
1442 register long int rat;
1444 checkpoint = in_count + CHECK_GAP;
1447 fprintf ( stderr, "count: %ld, ratio: ", in_count );
1448 prratio ( stderr, in_count, bytes_out );
1449 fprintf ( stderr, "\n");
1453 if(in_count > 0x007fffff) { /* shift will overflow */
1454 rat = bytes_out >> 8;
1455 if(rat == 0) { /* Don't divide by zero */
1458 rat = in_count / rat;
1461 rat = (in_count << 8) / bytes_out; /* 8 fractional bits */
1470 void do_clearblock(void)
1475 dump_tab(); /* dump string table */
1477 cl_hash ( (count_int) hsize );
1480 output ( (code_int) CLEAR );
1483 fprintf ( stderr, "clear at output byte %d, input byte %d\n",
1484 bytes_out, in_count );
1489 void cl_hash(count_int lhsize) /* reset code table */
1491 #ifndef XENIX_16 /* Normal machine */
1492 register count_int *htab_p = htab+lhsize;
1495 register long k = lhsize;
1496 register count_int *htab_p;
1499 register long m1 = -1;
1502 for(j=0; j<=8 && k>=0; j++,k-=8192) {
1507 htab_p = &(htab[j][i]);
1513 do { /* might use Sys V memset(3) here */
1531 } while ((i -= 16) >= 0);
1536 for ( i += 16; i > 0; i-- )
1540 void prratio(FILE *stream,long int num,long int den)
1542 register int q; /* Doesn't need to be long */
1544 if(num > 214748L) { /* 2147483647/10000 */
1545 q = num / (den / 10000L);
1547 q = 10000L * num / den; /* Long calculations, though */
1553 fprintf(stream, "%d.%02d%%", q / 100, q % 100);
1558 fprintf(stderr, "%s\n", rcs_ident);
1559 fprintf(stderr, "Options: ");
1561 fprintf(stderr, "vax, ");
1564 fprintf(stderr, "MC68020, ");
1567 fprintf(stderr, "NO_UCHAR, ");
1569 #ifdef SIGNED_COMPARE_SLOW
1570 fprintf(stderr, "SIGNED_COMPARE_SLOW, ");
1573 fprintf(stderr, "XENIX_16, ");
1576 fprintf(stderr, "COMPATIBLE, ");
1579 fprintf(stderr, "DEBUG, ");
1582 fprintf(stderr, "BSD4_2, ");
1584 fprintf(stderr, "BITS = %d\n", BITS);