= static char *sccsid = "@(#)sq.c         1.8u (UCF) 83/09/02";  /*1  *   sq.c - CP/M compatible file squeezer utility   *  *   compile as follows:  *   cc [-DVAX] -O sq.c -o sq ,  *       (define VAX only if running on VAX)  */    #include <stdio.h> /* #include <signal.h> */ 
 #define VAX 1    #define TRUE 1 #define FALSE 0  #define ERROR (-1)F #define PATHLEN 312     /* Number of characters allowed in pathname */ #define ALTNAME "sq.out"  + /* Definitions and external declarations */   6 #define RECOGNIZE 0xFF76        /* unlikely pattern */  0 /* *** Stuff for first translation module *** */   #define DLE 0x90    1 /* *** Stuff for second translation module *** */   3 #define SPEOF 256       /* special endfile token */ 7 #define NUMVALS 257     /* 256 data values plus SPEOF*/     1 #ifdef VAX   /*  we only want 16 bit integers  */    typedef short INT;  typedef unsigned short UNSIGNED;  0 #else   /*  PDP-11 and other 16-bit machines  */   typedef int INT; typedef unsigned UNSIGNED;   #endif    + /* Definitions and external declarations */   4 INT Usestd;     /* Use stdout for squeezed output */& UNSIGNED crc;   /* error check code */  0 /* *** Stuff for first translation module *** */  9 INT likect;     /*count of consecutive identical chars */  INT lastchar, newchar; unsigned char state;   /* states */  9 #define NOHIST  0       /*don't consider previous input*/ ; #define SENTCHAR 1      /*lastchar set, no lookahead yet */ @ #define SENDNEWC 2      /*newchar set, previous sequence done */C #define SENDCNT 3       /*newchar set, DLE sent, send count next */   1 /* *** Stuff for second translation module *** */   @ #define NOCHILD -1      /* indicates end of path through tree */B #define NUMNODES (NUMVALS + NUMVALS - 1)        /* nbr of nodes */  F #define MAXCOUNT (UNSIGNED) 65535       /* biggest UNSIGNED integer */  9 /* The following array of structures are the nodes of the @  * binary trees. The first NUMVALS nodes becomethe leaves of the>  * final tree and represent the values of the data bytes being*  * encoded and the special endfile, SPEOF.C  * The remaining nodes become the internal nodes of the final tree.   */    struct  nd {;         UNSIGNED weight;        /* number of appearances */ B         INT tdepth;             /* length on longest path in tre*/;         INT lchild, rchild;     /* indexes to next level */  } node[NUMNODES];   5 INT dctreehd;   /*index to head node of final tree */    /* This is the encoding table:.  * The bit strings have first bit in  low bit.=  * Note that counts were scaled so code fits UNSIGNED integer   */   < INT codelen[NUMVALS];           /* number of bits in code */A UNSIGNED code[NUMVALS];         /* code itself, right adjusted */ : UNSIGNED tcode;                 /* temporary code value */    ( /* Variables used by encoding process */  3 INT curin;      /* Value currently being encoded */ : INT cbitsrem;   /* Number of code string bits remaining */G UNSIGNED ccode; /* Current code shifted so next code bit is at right */   = /* This program compresses a file without losing information. 8  * The usq.com program is required to unsqueeze the file  * before it can be used.   *!  * Typical compression rates are: &  *      .COM    6%      (Don't bother).  *      .ASM    33%     (using full ASCII set)?  *      .DIC    46%     (using only uppercase and a few others) 3  * Squeezing a really big file takes a few minutes.   *
  * Useage:  *      sq file ...   *  *D  * The squeezed file name is formed by changing the second from last:  * letter of the file type to Q. If there is no file type,:  * the squeezed file type is QQQ. If the name exists it is  * overwritten!   *>  * The transformations compress strings of identical bytes and?  * then encode each resulting byte value and EOF as bit strings =  * having lengths in inverse proportion to their frequency of ?  * occurrence in the intermediate input stream. The latter uses =  * the Huffman algorithm. Decoding information is included in <  * the squeezed file, so squeezing short files or files withA  * uniformly distributed byte values will actually increase size.   */    /* CHANGE HISTORY:/  * 1.5u **nix version - means output to stdout. >  *  (stdin not allowed becuase sq needs to rewind input, which  *  won't work with pipes.) 7  * Filename generation changed to suit **nix and stdio. >  * 1.6u machine independent output for file compatibility with>  *      original CP/M version SQ, when running on machine with2  *      IBM byte ordering such as Z8000 and 68000.B  * 1.7u machine independence was still lacking for 32-bit machines?  *      like the VAX-11/780, so a typedef was added.  No action <  *      need be taken if running on a 16-bit machine, but if=  *      running on a VAX, define VAX either on the cc line or 8  *      in the program preamble.   Ben Goldfarb 12/13/82;  * 1.8u Modified to run under CI-86 compiler for the IBM PC $  *      Robert J. Beilstein 09/02/83  */   ! #define VERSION "1.7u   12-13-82"   J INT inbackground = 0;  /* change to 1 to suppress informative messages */   & INT buildenc(), gethuff(), getc_crc();   main(argc, argv)	 INT argc; 
 char *argv[];  {          register INT i,c;  /*  -         if (signal(SIGINT, SIG_IGN)==SIG_IGN)                  inbackground++;          else(                 signal(SIGINT, SIG_DFL);.         signal(SIGHUP, SIG_IGN);           */   -         /* Process the parameters in order */ #         for(i = 1; i < argc; ++i) { +                 if(strcmp(argv[i], "-")==0) &                         Usestd = TRUE;	         } #         for(i = 1; i < argc; ++i) { +                 if(strcmp(argv[i], "-")!=0) &                         obey(argv[i]);	         }            if(argc < 2) {                 fprintf(stderr,"File squeezer version %s by\n\tRichard Greenlaw\n\t251 Colony Ct.\n\tGahanna, Ohio 43230\n", VERSION);@                 fprintf(stderr, "Usage: sq [-] pathname ...\n");C                 fprintf(stderr, "\t- squeezed output to stdout\n");                  exit(1);	         }          exit(0); }  obey(p)  unsigned char *p;  {          unsigned char *w,*q;H         unsigned char outfile[PATHLEN+2];        /* output file spec. */  *         /* First build output file name */           strcpy(outfile, p); .         /* Find and change output file type */F         for(w=q = outfile; *q != '\0'; ++q)     /* skip leading /'s */                 if( *q == '/')                          w = q+1;#         for(q = w; *q != '\0'; ++q)                  if(*q == '.') ,                         if(*(q + 1) == '\0')G                                 *q = '\0';      /* kill trailing dot */                          else0                                 switch(*(q+2)) {)                                 case 'q': )                                 case 'Q': b                                         fprintf(stderr, "sq: %s ignored ( already squeezed?)", p);/                                         return; *                                 case '\0':6                                         *(q+3) = '\0';7                                         /* fall thru */ (                                 default:7                                         *(q + 2) = 'Q'; 3                                         goto named; !                                 }          /* No file type */          strcat(outfile, ".QQQ"); named:         if(strlen(w)>14)M                 strcpy(outfile, ALTNAME);       /* check for too long name */          squeeze(p, outfile); }    squeeze(infile, outfile)  unsigned char *infile, *outfile; {          register INT i, c;2         FILE *inbuff, *outbuff; /* file buffers */           if (!inbackground)A                 fprintf(stderr, "\n%s -> %s: ", infile, outfile);   2         if((inbuff=fopen(infile, "rb")) == NULL) {?                 fprintf(stderr, "sq: can't open %s\n", infile);                  return; 	         }          if(Usestd)                 outbuff=stdout; 9         else if((outbuff=fopen(outfile, "wb")) == NULL) { B                 fprintf(stderr, "sq: can't create %s\n", outfile);                 fclose(inbuff);                  return; 	         }  1         /* First pass - get properties of file */ 1         crc = 0;        /* initialize checksum */         if (!inbackground)/                 fprintf(stderr, "analyzing, ");          init_ncr();T         init_huff(inbuff);1         /* rewind(inbuff); */ fseek(inbuff,0L,0);   9         /* Write output file header with decoding info */ "         wrt_head(outbuff, infile);  +         /* Second pass - encode the file */         if (!inbackground)/                 fprintf(stderr, "squeezing, ");   -         init_ncr();     /* For second pass */   ;         /* Translate the input file into the output file */ +         while((c = gethuff(inbuff)) != EOF) B                 if(putc(c, outbuff) == ERROR && ferror(outbuff)) {=                         fprintf(stderr, "sq: write error\n"); &                         goto closeall;                 }          if (!inbackground),                 fprintf(stderr, " done.\n");	 closeall:          fclose(inbuff); 	 closeout:          fflush(outbuff);         fclose(outbuff); }     6 /* First translation - encoding of repeated characters5  * The code is byte for byte pass through except thate7  * DLE is encoded as DLE, zero and repeated byte values 3  * are encoded as value, DLE, count for count >= 3.   */   ( init_ncr()      /*initialize getcnr() */ {F         state = NOHIST;A }R   INTe getcnr(iob)n
 FILE *iob; {e         switch(state) {r         case NOHIST:)                 /* No relevant history */i!                 state = SENTCHAR; 0                 return lastchar = getc_crc(iob);         case SENTCHAR:5                 /* Lastchar is set, need lookahead */b"                 switch(lastchar) {                 case DLE:('                         state = NOHIST; H                         return 0;       /* indicates DLE was the data */                 case EOF:n#                         return EOF;h                 default:h                         for(likect = 1; (newchar = getc_crc(iob)) == lastchar && likect < 255; ++likect)!                                 ;i(                         switch(likect) {                         case 1:f:                                 return lastchar = newchar;                         case 2:e7                                 /* just pass through */m1                                 state = SENDNEWC;;0                                 return lastchar;                          default:0                                 state = SENDCNT;+                                 return DLE;e                         }                  }          case SENDNEWC:=                 /* Previous sequence complete, newchar set */ !                 state = SENTCHAR; *                 return lastchar = newchar;         case SENDCNT: >                 /* Sent DLE for repeat sequence, send count */!                 state = SENDNEWC;b                 return likect;         default:8                 fprintf(stderr,"sq: Bug - bad state\n");                 exit(1);                  /* NOTREACHED */	         }  }     N /******** Second translation - bytes to variable length bit strings *********/    ; /* This translation uses the Huffman algorithm to develop a 8  * binary tree representing the decoding information for:  * a variable length bit string code for each input value.7  * Each string's length is in inverse proportion to its 7  * frequency of appearance in the incoming data stream.t9  * The encoding table is derived from the decoding table.p  *;  * The range of valid values into the Huffman algorithm are =  * the values of a byte stored in an integer plus the specialnB  * endfile value chosen to be an adjacent value. Overall, 0-SPEOF.  *;  * The "node" array of structures contains the nodes of the =  * binary tree. The first NUMVALS nodes are the leaves of thed8  * tree and represent the values of the data bytes being*  * encoded and the special endfile, SPEOF.=  * The remaining nodes become the internal nodes of the tree.a  *.  * In the original design it was believed that1  * a Huffman code would fit in the same number of,1  * bits that will hold the sum of all the counts. 9  * That was disproven by a user's file and was a rare bute>  * infamous bug. This version attempts to choose among equally?  * weighted subtrees according to their maximum depths to avoidd;  * unnecessarily long codes. In case that is not sufficientt9  * to guarantee codes <= 16 bits long, we initially scale ;  * the counts so the total fits in an unsigned integer, but]<  * if codes longer than 16 bits are generated the counts are>  * rescaled to a lower ceiling and code generation is retried.  */t  < /* Initialize the Huffman translation. This requires reading=  * the input file through any preceding translation functionsa;  * to get the frequency distribution of the various values.   */   
 init_huff(ib)r	 FILE *ib;  {L         register INT c, i;G         INT btlist[NUMVALS];    /* list of intermediate binary trees */r6         INT listlen;            /* length of btlist */@         UNSIGNED *wp;           /* simplifies weight counting */7         UNSIGNED ceiling;       /* limit for scaling */N  =         /* Initialize tree nodes to no weight, no children */          init_tree();  *         /* Build frequency info in tree */         do {                 c = getcnr(ib);                  if(c == EOF)"                         c = SPEOF;8                 if(*(wp = &node[c].weight) !=  MAXCOUNT)                          ++(*wp);	         }D         while(c != SPEOF);           ceiling = MAXCOUNT;   5         do {    /* Keep trying to scale and encode */ '                 if(ceiling != MAXCOUNT) C                         fprintf(stderr, "sq: *** rescaling ***, ");                  scale(ceiling); 8                 ceiling /= 2;   /* in case we rescale */  @                 /* Build list of single node binary trees havingS                                  * leaves for the input values with non-zero counts #                                  */n6                 for(i = listlen = 0; i < NUMVALS; ++i)1                         if(node[i].weight != 0) {O3                                 node[i].tdepth = 0;r6                                 btlist[listlen++] = i;                         }   C                 /* Arrange list of trees into a heap with the entry V                                  * indexing the node with the least weight at the top.#                                  */u&                 heap(btlist, listlen);  I                 /* Convert the list of trees to a single decoding tree */ *                 bld_tree(btlist, listlen);  3                 /* Initialize the encoding table */e                 init_enc();   /                 /* Try to build encoding table. F                                  * Fail if any code is > 16 bits long.#                                  */+	         }s.         while(buildenc(0, dctreehd) == ERROR);  +         /* Initialize encoding variables */(/         cbitsrem = 0;   /*force initial read */s0         curin = 0;      /*anything but endfile*/ }   9 /* The count of number of occurrances of each input value 7  * have already been prevented from exceeding MAXCOUNT.a:  * Now we must scale them so that their sum doesn't exceed5  * ceiling and yet no non-zero count can become zero.a5  * This scaling prevents errors in the weights of the*;  * interior nodes of the Huffman tree and also ensures that :  * the codes will fit in an unsigned integer. Rescaling is.  * used if necessary to limit the code length.  */b   scale(ceil)o1 UNSIGNED ceil;  /* upper limit on total weight */  {          register INT i,c;d         INT ovflw, divisor;i         UNSIGNED w, sum;3         unsigned char increased;         /* flag */            do {<                 for(i = sum = ovflw = 0; i < NUMVALS; ++i) {9                         if(node[i].weight > (ceil - sum))r(                                 ++ovflw;.                         sum += node[i].weight;                 }   $                 divisor = ovflw + 1;  8                 /* Ensure no non-zero values are lost */"                 increased = FALSE;.                 for(i = 0; i < NUMVALS; ++i) {+                         w = node[i].weight;a4                         if (w < divisor && w != 0) {V                                 /* Don't fail to provide a code if it's used at all */9                                 node[i].weight = divisor; 1                                 increased = TRUE;                          }                  }f	         }          while(increased);f  /         /* Scaling factor choosen, now scale */          if(divisor > 1)n,                 for(i = 0; i < NUMVALS; ++i)2                         node[i].weight /= divisor; }   ; /* heap() and adjust() maintain a list of binary trees as au9  * heap with the top indexing the binary tree on the listk;  * which has the least weight or, in case of equal weights,*9  * least depth in its longest path. The depth part is not :  * strictly necessary, but tends to avoid long codes which  * might provoke rescaling.-  */a   heap(list, length) INT list[], length;i {d         register INT i;   .         for(i = (length - 2) / 2; i >= 0; --i),                 adjust(list, i, length - 1); }/  , /* Make a heap from a heap with a new top */   adjust(list, top, bottom)n INT list[], top, bottom; {          register INT k, temp;t  7         k = 2 * top + 1;        /* left child of top */;D         temp = list[top];       /* remember root node of top tree */         if( k <= bottom) {A                 if( k < bottom && cmptrees(list[k], list[k + 1]))                          ++k;  I                 /* k indexes "smaller" child (in heap of trees) of top */*P                 /* now make top index "smaller" of old top and smallest child */-                 if(cmptrees(temp, list[k])) {f,                         list[top] = list[k];'                         list[k] = temp;I:                         /* Make the changed list a heap */>                         adjust(list, k, bottom); /*recursive*/                 }A	         }c }t  = /* Compare two trees, if a > b return true, else return falsea.  * note comparison rules in previous comments.  */P   cmptrees(a, b)) INT a, b;       /* root nodes of trees */  {a+         if(node[a].weight > node[b].weight)N                 return TRUE;,         if(node[a].weight == node[b].weight)3                 if(node[a].tdepth > node[b].tdepth)c$                         return TRUE;         return FALSE;E }   7 /* HUFFMAN ALGORITHM: develops the single element trees :  * into a single binary tree by forming subtrees rooted inC  * interior nodes having weights equal to the sum of weights of allS;  * their descendents and having depth counts indicating them   * depth of their longest paths.  *@  * When all trees have been formed into a single tree satisfying=  * the heap property (on weight, with depth as a tie breaker)s@  * then the binary code assigned to a leaf (value to be encoded)/  * is then the series of left (0) and right (1)c+  * paths leading from the root to the leaf. 6  * Note that trees are removed from the heaped list by3  * moving the last element over the top element andi  * reheaping the shorter list.  */i   bld_tree(list, len)  INT list[];b INT len; {bD         register INT freenode;          /* next free node in tree */?         register struct nd *frnp;       /* free node pointer */)J         INT lch, rch;           /* temporaries for left, right children */         INT i;  >         /* Initialize index to next available (non-leaf) node.K                  * Lower numbered nodes correspond to leaves (data values).t                  */l         freenode = NUMVALS;i           while(len > 1) {>                 /* Take from list two btrees with least weightO                                  * and build an interior node pointing to them.c9                                  * This forms a new tree.e#                                  */fA                 lch = list[0];  /* This one will be left child */a  D                 /* delete top (least) tree from the list of trees */&                 list[0] = list[--len];)                 adjust(list, 0, len - 1);r  F                 /* Take new top (least) tree. Reuse list slot later */B                 rch = list[0];  /* This one will be right child */  ?                 /* Form new tree from the two least trees usingtU                                  * a free node as root. Put the new tree in the list.n#                                  */tG                 frnp = &node[freenode]; /* address of next free node */*@                 list[0] = freenode++;   /* put at top for now */#                 frnp->lchild = lch;-#                 frnp->rchild = rch;/C                 frnp->weight = node[lch].weight + node[rch].weight;nO                 frnp->tdepth = 1 + maxchar(node[lch].tdepth, node[rch].tdepth);t:                 /* reheap list  to get least tree at top*/)                 adjust(list, 0, len - 1); 	         }l7         dctreehd = list[0];     /*head of final tree */  }    /* ???????????? */
 maxchar(a, b)  {          return a > b ? a : b;e } 6 /* Initialize all nodes to single element binary trees  * with zero weight and depth.  */,   init_tree()c {          register INT i;   '         for(i = 0; i < NUMNODES; ++i) {n#                 node[i].weight = 0; #                 node[i].tdepth = 0;l)                 node[i].lchild = NOCHILD;;)                 node[i].rchild = NOCHILD; 	         }  }   
 init_enc() {r         register INT i;   '         /* Initialize encoding table */ &         for(i = 0; i < NUMVALS; ++i) {                 codelen[i] = 0;;	         }  }   < /* Recursive routine to walk the indicated subtree and level<  * and maintain the current path code in bstree. When a leaf:  * is found the entire code string and length are put into7  * the encoding table entry for the leaf's data value .2  *'  * Returns ERROR if codes are too long.I  */   + INT             /* returns ERROR or NULL */p buildenc(level, root) 7 INT level;/* level of tree being examined, from zero */n: INT root; /* root of subtree is also data value if leaf */ {          register INT l, r;           l = node[root].lchild;         r = node[root].rchild;  +         if( l == NOCHILD && r == NOCHILD) { <                 /* Leaf. Previous path determines bit stringN                                  * code of length level (bits 0 to level - 1).E                                  * Ensures unused code bits are zero.f#                                  */ &                 codelen[root] = level;F                 code[root] = tcode & (((UNSIGNED)~0) >> (16 - level));3                 return (level > 16) ? ERROR : NULL;0	         }k         else {#                 if( l != NOCHILD) { @                         /* Clear path bit and continue deeper *//                         tcode &= ~(1 << level); ,                         /* NOTE RECURSION */;                         if(buildenc(level + 1, l) == ERROR)z-                                 return ERROR;                  } "                 if(r != NOCHILD) {>                         /* Set path bit and continue deeper */,                         tcode |= 1 << level;,                         /* NOTE RECURSION */;                         if(buildenc(level + 1, r) == ERROR) -                                 return ERROR;d                 } 	         } <         return NULL;    /* if we got here we're ok so far */ }"  1 /* Write out the header of the compressed file */    wrt_head(ob, infile)	 FILE *ob;c@ unsigned char *infile;   /* input file name (w/ or w/o drive) */ {z         register INT l,r;d         INT i, k;fE         INT numnodes;           /* nbr of nodes in simplified tree */*  >         putwe(RECOGNIZE, ob);   /* identifies as compressed */C         putwe(crc, ob);         /* unsigned sum of original data */f  5         /* Record the original file name w/o drive */(          if(*(infile + 1) == ':')0                 infile += 2;    /* skip drive */           do {#                 putce(*infile, ob); 	         }(#         while(*(infile++) != '\0');L    B         /* Write out a simplified decoding tree. Only the interiorB                  * nodes are written. When a child is a leaf index?                  * (representing a data value) it is recoded as G                  * -(index + 1) to distinguish it from interior indexes I                  * which are recoded as positive indexes in the new tree. G                  * Note that this tree will be empty for an empty file.                   */o  D         numnodes = dctreehd < NUMVALS ? 0 : dctreehd - (NUMVALS -1);         putwe(numnodes, ob);  :         for(k = 0, i = dctreehd; k < numnodes; ++k, --i) {#                 l = node[i].lchild; #                 r = node[i].rchild;c:                 l = l < NUMVALS ? -(l + 1) : dctreehd - l;:                 r = r < NUMVALS ? -(r + 1) : dctreehd - r;0                 putwe(l, ob);   /* left child */1                 putwe(r, ob);   /* right child */t	         }s }w  E /* Get an encoded byte or EOF. Reads from specified stream AS NEEDED.   *<  * There are two unsynchronized bit-byte relationships here.9  * The input stream bytes are converted to bit strings of 6  * various lengths via the static variables named c...8  * These bit strings are concatenated without padding to8  * become the stream of encoded result bytes, which this;  * function returns one at a time. The EOF (end of file) is*:  * converted to SPEOF for convenience and encoded like any6  * other input value. True EOF is returned after that.  *5  * The original gethuff() called a seperate function, 9  * getbit(), but that more readable version was too slow.   */N  9 INT             /*  Returns byte values except for EOF */  gethuff(ib) 	 FILE *ib;c { /         INT rbyte;      /* Result byte value */ -         INT need, take; /* numbers of bits */            rbyte = 0;5         need = 8;       /* build one byte per call */   /         /* Loop to build a byte of encoded data <                  * Initialization forces read the first time                  */    loop:          if(cbitsrem >= need) {6                 /* Current code fullfills our needs */                 if(need == 0)s%                         return rbyte; '                 /* Take what we need */ -                 rbyte |= ccode << (8 - need); (                 /* And leave the rest */                 ccode >>= need; !                 cbitsrem -= need; $                 return rbyte & 0xff;	         }h  ,         /* We need more than current code */         if(cbitsrem > 0) {(                 /* Take what there is */-                 rbyte |= ccode << (8 - need); !                 need -= cbitsrem; 	         } 1         /* No more bits in current code string */          if(curin == SPEOF) {=                 /* The end of file token has been encoded. IfwV                                  * result byte has data return it and do EOF next time#                                  */                  cbitsrem = 0;s  7                 /*NOTE: +0 is to fight compiler bug? */ 5                 return (need == 8) ? EOF : rbyte + 0; 	         }p           /* Get an input byte */"'         if((curin = getcnr(ib)) == EOF) =                 curin = SPEOF;  /* convenient for encoding */s  %         /* Get the new byte's code */i         ccode = code[curin];"         cbitsrem = codelen[curin];           goto loop; }     1 /* Get next byte from file and update checksum */r   INTe getc_crc(ib)	 FILE *ib;e {          register INT c;i           c = getc(ib);          if(c != EOF).                 crc += c;       /* checksum */         return c;d }   + /* Output functions with error reporting */   
 putce(c, iob)a INT c;
 FILE *iob; {f2         if(putc(c, iob) == ERROR && ferror(iob)) {5                 fprintf(stderr, "sq: write error\n");                  exit(1);	         }P }.   /*@  * machine independent put-word that writes low order byte first;  *  (compatible with CP/M original) regardless of host cpu.*  */ 
 putwe(w, iob)h INT w;
 FILE *iob; {t         putc(w, iob);d         putc(w>>8, iob);         if (ferror(iob)) {5                 fprintf(stderr, "sq: write error\n");r                 exit(1);	         }* }H