 /*  *		l z c m p 2 . c   *9  * Actually do compression.  Terminology (and algorithm):   *E  * Assume the input string is "abcd", we have just processed "ab" andpF  * read 'c'.  At this point, a "prefix code" will be assigned to "ab".E  * Search in the prefix:character memory (either the "fast memory" or*D  * the hash-code table) for the code followed by this character.  IfA  * found, assign the code found to the "prefix code" and read therA  * next character.  If not found, output the current prefix code,s@  * generate a new prefix code and store "old_prefix:char" in the-  * table with "new_prefix" as its definition.   *  * Naming conventions:-  *   code	a variable containing a prefix coden0  *   c or char	a variable containing a character  *F  * There are three tables that are searched (dependent on compile-time&  * and execution time considerations):C  *   fast	Direct table-lookup -- requires a huge amount of physical*)  *		(non-paged) memory, but is very fast. "  *   hash	Hash-coded table-lookup.A  *   cache	A "look-ahead" cache for the hash table that optimizes"A  *		searching for the most frequent character.  This considerably*;  *		speeds up processing for raster-images (for example) at   *		a modest amount of memory.I  * Structures are used to hold the actual tables to simplify organizationa  * of the program.  *  * Subroutines:	B  *    compress()	performs data compression on an input datastream.C  *    init_compress()	called by the output routine to clear tables.   */    #include	"lz.h"=   /*  * General variables6  * Cleared by init_compress on a "hard initialization"0  * outputcode() in lzcmp3.c refers to next_code.  */   * long int	in_count;		/* Length of input		*/7 long int	out_count;		/* Bytes written to output file	*/ ; static flag	first_clear = TRUE;	/* Don't zero first time	*/ , code_int	next_code;		/* Next output code		*/G static count_int checkpoint = CHECK_GAP; /* When to test ratio again	*/ 4 static long	ratio = 0;		/* Ratio for last segment	*/   /*E  * These global parameters are set by mainline code.  Unchanged here.   */ 5 extern short	maxbits;		/* Settable max # bits/code	*/ ? extern short	block_compress;		/* For old-style compatibility	*/ = extern code_int	maxmaxcode;		/* Actual maximum output code	*/ 2 extern long	tot_incount;		/* Total input count		*/4 extern long	tot_outcount;		/* Total output count		*/5 extern code_int	hsize;			/* Actual hash table size	*/    #ifdef XENIX_16  static count_int htab0[8192];  static count_int htab1[8192];  static count_int htab2[8192]; static count_int htab3[8192];  static count_int htab4[8192];  static count_int htab5[8192];  static count_int htab6[8192];  static count_int htab7[8192]; & static count_int htab8[HSIZE - 65536];  static count_int *hashtab[9] = {A     htab0, htab1, htab2, htab3, htab4, htab5, htab6, htab7, htab8  };   static U_short code0[16384]; static U_short code1[16384]; static U_short code2[16384]; static U_short code3[16384];$ static U_short code4[HSIZE - 65536]; static U_short *codetab[5] = {%     code0, code1, code3, code3, code4  }   @ #define HASH(i)		(hashtab[((unsigned) (i)) >> 13][(i) & 0x1FFF])@ #define CODE(i)		(codetab[((unsigned) (i)) >> 14][(i) & 0x3FFF])   #else  count_int	hashtab[HSIZE];  U_short		codetab[HSIZE];   #define HASH(i)		hashtab[i]  #define CODE(i)		codetab[i]  #endif   /*  * compress a datastream  *M  * Algorithm:  on large machines, for maxbits <= FBITS, use fast direct table L  * lookup on the prefix code / next character combination.  For smaller codeO  * size, use open addressing modular division double hashing (no chaining), ala M  * Knuth vol. 3, sec. 6.4 Algorithm D, along with G. Knott's relatively-prime M  * secondary probe.  Do block compression with an adaptive reset, whereby the L  * code table is cleared when the compression ratio decreases, but after theM  * table fills.  The variable-length output codes are re-sized at this point,rJ  * and a special LZ_CLEAR code is generated for the decompressor.  For theG  * megamemory version, the sparse array is cleared indirectly through aiG  * "shadow" output code history.  Late additions: for the hashing code,tN  * construct the table according to file size for noticeable speed improvementH  * on small files.  Also detect and cache codes associated with the mostO  * common character to bypass hash calculation on these codes (a characteristiceM  * of highly-compressable raster images).  Please direct questions about thisd  * implementation to ames!jaw.  */[   compress(in)+ STREAM		*in;		/* Input stream structure		*/p /*F  * Compress driver.  Global fsize is the size of the entire datastream?  * (from LZ_STX or LZ_SOH to the terminating LZ_ETX).  You must K  * force a reinitialization -- by calling outputcode() with a new header --eD  * if size is changed.  If the "newer" output format is chosen (withA  * data streams delimited by LZ_SOH/LZ_STX, init_compress will be F  * called automatically.  Otherwise, you must call init_compress(TRUE)0  * before calling compress() for the first time.  */  { 1 	register long		hash_code;	/* What we look for	*// 	register code_int	i;		/* Index into vectors	*/ + 	register int		c;		/* Current input char	*/ . 	register code_int	code;		/* Substring code	*/5 	register int		displacement;	/* For secondary hash	*/ 6 	register code_int	hsize_reg;	/* Size of hash table	*/, 	register int		hshift;		/* For xor hasher	*/   	if ((code = GET(in)) == EOF)< 	    return; 	in_count++; 	hsize_reg = hsize;l 	/*  	 * Set hash code range bound  	 */ 	hshift = 0;D 	for (hash_code = (long) hsize; hash_code < 65536L; hash_code <<= 1) 	    hshift++; 	hshift = 8 - hshift; * 	while ((c = GET(in)) != (unsigned) EOF) { 	    in_count++;7 	    hash_code = (long) (((long) c << maxbits) + code);e2 	    i = (c << hshift) ^ code;		/* XOR hashing		*/< 	    if (HASH(i) == hash_code) {		/* Found at first slot?	*/ 		code = CODE(i);r 		continue;  	    }3 	    else if ((long) HASH(i) < 0)	/* empty slot		*/= 		goto nomatch;o7 	    displacement = hsize_reg - i;	/* secondary hash	*/	 	    if (i == 0) 		displacement = 1;m probe:5 	    if ((i -= displacement) < 0)	/* Wrap around?		*/, 		i += hsize_reg;a< 	    if (HASH(i) == hash_code) {		/* Found in hash table?	*/- 		code = CODE(i);			/* Set new prefix code	*/l( 		continue;			/* Read next input char	*/ 	    }; 	    else if ((long) HASH(i) > 0)	/* If slot is occupied	*/r) 		goto probe;			/* Look somewhere else	*/h nomatch: 	    /*g= 	     * Output the current prefix and designate a new prefix.r< 	     * If the input character was the "hog", save it in the> 	     * look-ahead cache table.  Then, save in the hash table. 	     */< 	    outputcode((code_int) code);	/* No match, put prefix	*/ #if SIGNED_COMPARE_SLOW 8 	    if ((unsigned) next_code < (unsigned) maxmaxcode) { #elseo" 	    if (next_code < maxmaxcode) { #endif1 		CODE(i) = next_code++;		/* code -> hashtable	*/i 		HASH(i) = hash_code; 	    } 	    else if (block_compress, 		  && (count_int) in_count >= checkpoint) {
 		clear(); 	    }+ 	    code = c;				/* Start new substring	*/v 	} 	/*v# 	 * At EOF, put out the final code.o 	 */ 	outputcode((code_int) code);  }m A clear()( /*<  * Check the compression ratio to see whether it is going up:  * or staying the same.  If it is going down, the internal8  * statistics of the file have changed, so clear out our9  * tables and start over.  Inform the decompressor of the )  * change by sending out a LZ_CLEAR code.e  */  {d 	register long int	rat;y  # 	checkpoint = in_count + CHECK_GAP; 	 #if DEBUGe 	if (verbose > 2) {a9 	    divout("at clear() test",  in_count, out_count, "");d= 	    fprintf(stderr, ", ratio at entry: %ld.%02ld, gap %d\n",e7 		rat / 256L, ((rat & 255L) * 100L) / 256L, CHECK_GAP);; 	} #endif8 	if (in_count > 0x007FFFFL) {		/* Shift will overflow	*/ 	    rat = out_count >> 8; 	    if (rat == 0) 		rat = 0x7FFFFFFFL; 	    else {  		rat = in_count / rat;e 	    } 	} 	else {l' 	    rat = (in_count << 8) / out_count;x 	} 	if (rat > ratio)s 	    ratio = rat;  	else {l	 #if DEBUG= 	    if (verbose > 0) {t= 		fprintf(stderr, "Resetting compression, in %ld, out %ld\n",o 		    in_count, out_count);a2 		fprintf(stderr, "Old ratio: %ld == (%ld.%02ld)",; 		    ratio, ratio / 256L, ((ratio & 255L) * 100L) / 256L);b> 		fprintf(stderr, ", test ratio: %ld = (%ld.%02ld), gap %d\n",@ 		    rat, rat / 256L, ((rat & 255L) * 100L) / 256L, CHECK_GAP); 	    } #endif? 	    outputcode((code_int) LZ_CLEAR);	/* Calls init_compress	*/  	} }s e init_compress(full_init)4 flag		full_init;	/* TRUE for full initialization		*/ /*>  * Clear the tables.  Called by outputcode() on LZ_SOH, LZ_STX5  * (full_init TRUE) or on LZ_CLEAR (full_init FALSE). +  * init_compress() is not called on LZ_EOR.u  */0 {* #ifdef XENIX_16o 	register count_int	*hp; 	register int		n;x 	register int		j;c 	register code_int	k;r   	k = hsize;e  	for (j = 0; k > 0; k -= 8192) { 	    i = (k < 8192) ? k : 8192;s 	    hp = hashtab[j++];	 	    n = i >> 4; 	    switch (i & 15) { 	    case 15:	*hp++ = -1;	 	    case 14:	*hp++ = -1;S 	    case 13:	*hp++ = -1;	 	    case 12:	*hp++ = -1;  	    case 11:	*hp++ = -1;  	    case 10:	*hp++ = -1;  	    case  9:	*hp++ = -1;  	    case  8:	*hp++ = -1;  	    case  7:	*hp++ = -1;  	    case  6:	*hp++ = -1;  	    case  5:	*hp++ = -1;  	    case  4:	*hp++ = -1;  	    case  3:	*hp++ = -1;  	    case  2:	*hp++ = -1;  	    case  1:	*hp++ = -1;  	    } 	    while (--n >= 0) { 1 		*hp++ = -1; *hp++ = -1; *hp++ = -1; *hp++ = -1; 1 		*hp++ = -1; *hp++ = -1; *hp++ = -1; *hp++ = -1; 1 		*hp++ = -1; *hp++ = -1; *hp++ = -1; *hp++ = -1; 1 		*hp++ = -1; *hp++ = -1; *hp++ = -1; *hp++ = -1;m 	   }n 	} #elseo 	register count_int	*hp; 	register code_int	n;i   	hp = &hashtab[0];' 	n = hsize >> 4;			/* divide by 16			*/t 	switch (hsize & 15) { 	case 15:	*hp++ = -1;o 	case 14:	*hp++ = -1;e 	case 13:	*hp++ = -1;y 	case 12:	*hp++ = -1;r 	case 11:	*hp++ = -1;e 	case 10:	*hp++ = -1;l 	case  9:	*hp++ = -1;. 	case  8:	*hp++ = -1;t 	case  7:	*hp++ = -1;r 	case  6:	*hp++ = -1;r 	case  5:	*hp++ = -1;f 	case  4:	*hp++ = -1;c 	case  3:	*hp++ = -1;* 	case  2:	*hp++ = -1;c 	case  1:	*hp++ = -1;i 	} 	while (--n >= 0) {w4 	    *hp++ = -1; *hp++ = -1; *hp++ = -1; *hp++ = -1;4 	    *hp++ = -1; *hp++ = -1; *hp++ = -1; *hp++ = -1;4 	    *hp++ = -1; *hp++ = -1; *hp++ = -1; *hp++ = -1;4 	    *hp++ = -1; *hp++ = -1; *hp++ = -1; *hp++ = -1; 	} #endif 	if (full_init) {e 	    tot_incount += in_count;  	    tot_outcount += out_count;r 	    in_count = 0; 	    out_count = 0;n 	    ratio = 0;  	} 	first_clear = FALSE;s 	next_code = firstcode;. }*