 /*  *			D I F F  */    /*)BUILD	$(TKBOPTIONS) = { 			TASK	= ...DIF 		}C */   #ifdef	DOCUMENTATION' title	diff	Differential File Comparison # index		Differential File Comparison    synopsis   	diff [option] file1 file2   description   > 	Diff compares two files, showing what must be changed to make? 	them identical. Either file1 or file2 (but not both) may refers> 	to directories.  If that is the case, a file in the directory@ 	whose name is the same as the other file argument will be used.A 	The standard input may be used for one of the files by replacinge@ 	the argument by "-".  Except for the standard input, both files 	must be on disk devices.e 	.s*	 	Options:	 	.lm +8c8 	.s.i -8;-b	Remove trailing whitespace (blanks and tabs)@ 	and compress all other strings of whitespace to a single blank.7 	.s.i -8;-c	Print some context -- matching lines before < 	and after the non-match section.  Mark non-matched sections
 	with "|".1 	.s.i -8;-i	Ignore lower/upper case distinctions. 8 	.s.i -8;-e	Output is in an "editor script" format which) 	is compatible with the Unix 'ed' editor. 	 	.s.lm -8 B 	All information needed to compare the files is maintained in mainE 	memory. This means that very large files (or fairly large files with ? 	many differences) will cause the program to abort with an "out;< 	of space" message.  Main memory requirements (in words) are 	approximately:= 	.s)) 		2 * (length of file1 + length of file2)= 	.br 		+ 3 * (number of changes)f 	.sr> 	(Where "length" is the number of lines of data in each file.) 	.s H 	The algorithm reads each file twice, once to build hash tables and onceG 	to check for fortuitous matches (two lines that are in fact different, D 	but which have the same hash value).  CPU time requirements includeA 	sorting the hash tables and randomly searching memory tables fore? 	equivalence classes. For example, on a time-shared VAX-11/780,OA 	comparing two 1000 line files required about 30 seconds (elapsed A 	clock time) and about 10,000 bytes of working storage.  About 90r/ 	per-cent of the time was taken up by file I/O.f   diagnosticsy   	.lm +8t  	.s.i -8;Warning, bad option 'x' 	.s  	The option is ignored.* 	.s.i -8;Usage ... 	.so$ 	Two input files were not specified.; 	.s.i -8;Can't open input file "filename".  Can't continue.  	.s.i -8;Out of spaceo 	.s,= 	The program ran out of memory while comparing the two files.w0 	.s.i -8;Can't read line nnn at xxx in file[A/B] 	.s*? 	This indicates an I/O error when seeking to the specific line.] 	It should not happen./ 	.s.i -8;Spurious match, output is not optimal./ 	.snD 	Two lines that were different yielded the same hash value.  This isE 	harmless except that the difference output is not the minimum set ofdH 	differences between the two files.  For example, instead of the output: 	.sp" 		lines 1 to 5 were changed to ... 	.s  	the program will printB 	.si" 		lines 1 to 3 were changed to ... 	.br" 		lines 4 to 5 were changed to ... 	.siE 	The program uses a CRC16 hash code.  The likelihood of this error ist
 	quite small.r 	.lm -8    author  B 	The diff algorithm was developed by J. W. Hunt and M. D. McIlroy,2 	using a central algorithm defined by H. S. Stone. 	It was published in:e 	.s.lm +4.nf! 	Hunt, J. W., and McIlroy, M. D., / 	An Algorithm for Differential File Comparison,)( 	Computing Science Technical Report #41,* 	Bell Laboratories, Murray Hill, NJ  07974 	.s.lm -4.f    bugs  = 	On RSX and DECUS C on VMS systems, diff may fail if the bothr: 	files are not "variable-length, implied carriage control"8 	format.  The scopy program can be used to convert files" 	to this format if problems arise.  8 	When compiled under VAX C, diff handles STREAM_LF files? 	properly (in addition to the canonical variable-length implied < 	carriage control files).  Other variations should work, but 	have not been tested.  B 	When compiled under VAX C, diff is quite slow for unknown reasonsE 	which ought to be investigated.  On the other hand, it has access to  	effectively unlimited memory.  @ 	Output in a form suitable for ed - the -e option - seems rather? 	pointless; the analogue on DEC systems is SLP (SUMSLP on VMS).nC 	It would be simple to provide SLP-compatible output.  The question A 	is, why bother - since the various DEC file comparison utilities  	already produce it.   #endif   /*I  * Diff maintains all information needed to compare the two files in main H  * memory.  This means that very large files (or fairly large files withK  * many differences) will cause the program to abort with an "out of space"cA  * error.  Main memory requirements (in words) are approximately:c  *D  *	2 * (length of file1 + length of file2) + (3 * number of changes)  *J  * The diff algorithm reads each file twice (once to build hash tables andM  * a second time to check for fortuitous matches), then reads the differenceskG  * by seeking randomly within the files.  CPU time requirements include}H  * sorting the two hash vectors and randomly searching memory tables forB  * equivalence classes.  For example, running in Vax compatibilityC  * mode, two 1000 line files with a fair number of differences took K  * about 25 seconds (elapsed wall clock time) for processing.  Most of this I  * time was spent in the file read routines.  This test required slightly 6  * more than 6000 words of memory for internal tables.  *D  * The diff algorithm was developed by J. W. Hunt and M. D. McIlroy,C  * using a central algorithm defined by H. S. Stone.  The algorithmi  * was described in:  *#  *	Hunt, J. W., and McIlroy, M. D.,B1  *	An Algorithm for Differential File Comparison,;*  *	Computing Science Technical Report #41,,  *	Bell Laboratories, Murray Hill, NJ  07974  *	rE  * The following description is summarized from that document.  WhilenI  * it has been slightly modified to correspond to the program source, thee&  * algorithm is essentially identical.  *?  * 1.	Read the input files, building two vectors containing the ;  *	line number (serial) and hash value (hash) of each line. <  *	Data for fileA will be in a vector pointed to by fileA[],:  *	while data for fileB will be pointed to by fileB[]. The=  *	lengths (number of lines) of the files will be representede>  *	by lenA and lenB respectively.  [This is slightly different!  *	from the published algorithm.]t  *:  * 2.	Note initial and final sequences that have identical;  *	hash values to shorten subsequent processing.  Note thatt:  *	the "jackpot" phase (step 9.) will examine all lines in<  *	the file.  Next, sort each file using hash as the primary'  *	key and serial as the secondary key.   *A  * 3.	Construct an array of equivalence classes (member[1..lenB]) =  *	where each element contains the line number in fileB and ad?  *	flag which is True if the indicated line is the first memberr>  *	of an equivalence class.  (An equivalence class is a set of@  *	lines which all hash to the same value.  The lines themselves"  *	are not necessarily identical.)  *J  * 4.	Construct an array, class[1..lenA], where each element, I, is set to=  *	the index of a line, J, in fileB if member[J] is the first;C  * 	element in an equivalence class and the hash code of line[I] in	D  *	fileA is the same as the hash code of line[J] in fileB.  Class[I])  *	is set to zero if no such line exists.w  *C  *	If non-zero, class[I] now points in member[] to the beginning ofk>  *	the class of lines in fileB equivalent to line[I] in fileA.  *I  * The next two steps implement the longest common subsequence algorithm.u  *D  * 5.	Structure CANDIDATE { a, b, previous }, where a and b are line?  * 	numbers and previous a reference to a candidate, will store	+  *	candidate lists as they are constructed.   *E  *	Vector clist[] stores references to candidates.  It is dimensionedf  *	(0..min(lenA, lenB) + 1)h  *
  *	Initialize *  *		clist[0] = CANDIDATE {   0,   0, -1 };*  *		clist[1] = CANDIDATE { A+1, B+1, -1 };
  *		ktop = 1;p  *>  *	clist[1] is a fence beyond the last usefully filled elementB  *	and -1 is an out-of-range clist index. Ktop is the index of the?  *	fence.  Note, because of the memory allocation used, clist[]h>  *	is actually composed of two vectors, clist[] containing theA  *	candidate reference, and klist[] containing pointers to clist.t  *  * 6.	For (A = 1 to lenA) {e9  *		I = class[A];	-- the index in member[]:  beginning of1/  *				-- the class of lines in fileB equivalents  *				-- to this line in fileA.   *		if (I is non-zero) {.  *			Merge each member into the candidate list  *			as discussed below.  *		}s  *L  * Unravel the chain of candidates, getting a vector of common subsequences:  *1  * 7.	Set all elements of match[0..lenA] to zero.t  *K  * 8.	clist[ktop-1] points to the subsequence chain head.  For each elementhC  *	in the chain, let A and B be the line number entries.  Then, set   *  *		match[A] = B;h  *B  *	The non-zero elements of match[]  now pick out a longest common@  *	subsequence chain, possibly including spurious matches due to>  *	hash coincidences.  The pairings between the two files are:  *  *	if (match[A] is non-zero) {3  *		line A in fileA matches line match[A] in fileB;u  *	}  *@  * Now, read each line of fileA and fileB to check for jackpots:  *  * 9.	for (A = 1 to lenA) {r  *		if (match[A] is nonzero) {6  *			if (fileA[A] is not identical to fileB[match[A]])&  *				match[A] = 0;	-- Hash congruence  *		}   *	}  *M  * Ignoring "squish" complications, the merge step may be defined as follows:o  *	  *	Entry: $  *		clist[]		Candidate pointer array(  *		ktop		Fence beyond last filled index  *		A		Current index in fileAa  *		member[]	Equivalence vector 1  *		I		The index in member[] of the first elementT/  *				  of the class of lines in fileB that are '  *				  equivalent to line[A] in fileA.i  *;  * 1.	Let clist[R] be "an r-candidate" and C a reference tohA  *	the last candidate found, which will always be an r-candidate.tA  *	clist[R] will be updated with this reference once the previousn6  *	value of clist[R] is no longer needed.  Initialize:  *
  *		R = 0;  *		C = clist[0];i  *&  * 2.	Do steps 3 through 6 repeatedly:  *.  *   3.	set B to the line number in member[I];<  *	search clist[R..ktop] for an element, clist[S], such that  *(  *		clist[S-1].b < B and clist[S].b >= B  *;  *	Note that clist[] is ordered on clist[].b so that binary <  *	search will work.  The search algorithm used requires the'  *	two "fence" entries described above..  *7  *	If such an element is found, perform steps 4. and 5.e  *2  *	4. Extend the longest common subsequence chain:  *  *		If (clist[S].b > B) {i  *			clist[R] = C;  *			R = S;k'  *			C = candidate(A, B, clist[S - 1]);t  *		}.  *:  *	5. Extend the number of subsequences, moving the fence:  *  *		If (S == ktop) {"  *			clist[ktop + 1] = clist[ktop]  *			ktop = ktop + 1;    *			break out of step 2's loop;  *		}s  *  *   6.	I = I + 1;9  *	if (member[I] is the first element in another class) {.  *		break out of step 2's loop;o  *	}	  *	else {p  *		continue at step 2.   *	}  *  * 7.	clist[R] = C;.  *	exit merge subroutine.r  *7  * To illustrate vector contents, here is a sample run:   *
  * File A:	  *	line 1r	  *	line 2e	  *	line 3.	  *	line 4u  *	line 5 gets deleted	  *	line 6e  *
  * File B:	  *	line 1D  *	line 1.5 inserted	  *	line 2r  *	line 3 changedS	  *	line 4I	  *	line 6s  *A  * (For clarity, the "squish" step is omitted from the following)h  *G  * On entry to equiv() (after readin and sorting), the file[] vector is,C  * as follows (the first entry in each pair is the line number, the @  * second is the hash value.  Entries are sorted on hash value):  *  * FileA[] (1..lines in fileA):d  *   line   hash=  *	3 042400  6 043300  5 050026  1 102201  2 102701  4 103501   * FileB[] (1..lines in fileB): =  *	6 043300  2 045600  1 102201  3 102701  5 103501  4 147166e  *  *$  * After Equiv has processed file[]:  *  * FileA[] (1..lines in fileA):)  *   line valuen  *	3 0	6 1	5 0	1 3	2 4	4 5  * Member[] (0..lines in fileB)u  *	0	-6	-2	-1	-3	-5	-4  *  *$  * After unsort() has unwound fileB:  *!  * Class[] (1 .. lines in fileA):a  *	3	 4	0	5	0	1f  *;  * Within unravel(), match is built in the following order:   *  *	match[6] := 6  *	match[4] := 5  *	match[2] := 3  *	match[1] := 1  *!  * Match[] (0 .. lines in fileA):   *  *	 0	1	3	0	5	0	6l  *  * Output is as follows:  *  *	1a2  *	> line 1.5 inserted  *	3c4  *	< line 3t  *	---  *	> line 3 changed   *	5d5  *	< line 5 gets deleted  *  *  */    #include <stdio.h> #include <ctype.h>
 #ifdef vms #include		<ssdef.h>l #include		<stsdef.h>1 #define	IO_SUCCESS	(SS$_NORMAL | STS$M_INHIB_MSG)h #define	IO_ERROR	SS$_ABORT #endif /*H  * Note: IO_SUCCESS and IO_ERROR are defined in the Decus C stdio.h file  */2 #ifndef	IO_SUCCESS #define	IO_SUCCESS	0 #endif #ifndef	IO_ERROR #define	IO_ERROR	1 #endif #define	EOS		0 #define	TEMPFILE	"diff.tmp"a #define	TRUE		1i #define	FALSE		0   #ifdef	pdp11 #define	short	inti #endif   extern long	ftell();   typedef struct candidate { 	int	b;			/* Line in fileB		*/ 	int	a;			/* Line in fileA		*/& 	int	link;			/* Previous candidate		*/ } CANDIDATE;   typedef struct line { - 	unsigned short	hash;		/* Hash value etc.		*/l$ 	short		serial;		/* Line number			*/ } LINE;a  / LINE	*file[2];			/* Hash/line for total file	*/s #define	fileA	file[0]* #define	fileB	file[1]f  . LINE	*sfile[2];			/* Hash/line after prefix	*/ #define	sfileA	sfile[0]f #define	sfileB	sfile[1]e  . int	len[2];				/* Actual lines in each file	*/ #define	lenA	len[0]l #define	lenB	len[1]r  & int	slen[2];			/* Squished lengths		*/ #define	slenA	slen[0]r #define	slenB	slen[1]J  - int	prefix;				/* Identical lines at start	*/i+ int	suffix;				/* Identical lenes at end	*/i  < FILE	*infd[2] = { NULL, NULL };	/* Input file identifiers	*/0 FILE	*tempfd;			/* Temp for input redirection	*/   /*:  * The following vectors overlay the area defined by fileA  */t  * int		*class;			/* Unsorted line numbers	*/. int		*klist;			/* Index of element in clist	*/5 CANDIDATE	*clist;			/* Storage pool for candidates	*/ 4 int		clength	= 0;		/* Number of active candidates	*/  ) int		*match;			/* Longest subsequence		*/o. long		*oldseek;		/* Seek position in file A	*/   /*:  * The following vectors overlay the area defined by fileB  */h  0 int		*member;		/* Concatenated equiv. classes	*/. long		*newseek;		/* Seek position in file B	*/0 char		*textb;			/* Input from file2 for check	*/   /*  * Global variables   */i  / int		eflag	= FALSE;	/* Edit script requested	*/e1 int		bflag	= FALSE;	/* Blank supress requested	*/)+ int		cflag	= FALSE;	/* Context printout		*/t/ int		iflag	= FALSE;	/* Ignore case requested	*/y- char		text[257];		/* Input line from file1	*/o #ifndef	vmsr0 char		*free_space;		/* For storage allocation	*/ #endif1 extern char	*myalloc();		/* Storage allocator		*/ 1 extern char	*compact();		/* Storage compactor		*/f   #ifdef	DEBUG #define	TIMING #endif
 #ifdef	TIMINGn extern long	time();  extern char	*$$mend; long		totaltime; long		sectiontime; char		*mstart; #endif   main(argc, argv)
 int		argc;
 char		**argv;  /*  * Diff main program  */e {  	register int	i; 	register char	*ap;,  
 #ifdef	TIMINGe  	sectiontime = time(&totaltime); #endif
 #ifdef vms" 	argc = getredirection(argc,argv); #endif= 	while (argc > 1 && *(ap = argv[1]) == '-' && *++ap != EOS) {* 		while (*ap != EOS) { 			switch (tolower(*ap++)) { 			case 'b':
 					bflag++;n 					break;t   			case 'c':
 					cflag++;o 					break;l   			case 'e':
 					eflag++;e 					break;e   			case 'i':
 					iflag++;o 					break;s   			default:r 					fprintf(stderr,# 						"Warning, bad option '%c'\n",h 						ap[-1]); 					break;b 			} 		}v	 		argc--;r	 		argv++;a 	}   	if (argc != 3)*. 		error("Usage: diff [-options] file1 file2"); 	if (cflag && eflag) { 		fprintf(stderr,i; 			"Warning, -c and -e are incompatible, -c supressed.\n");  		cflag = FALSE; 	} 	argv++; 	for (i = 0; i <= 1; i++) {*/ 		if (argv[i][0] == '-' && argv[i][1] == EOS) {p 			infd[i] = stdin;1/ 			if ((tempfd = fopen(TEMPFILE, "w")) == NULL)e 				cant(TEMPFILE, "work", 1); 		}t 		else {! 			infd[i] = fopen(argv[i], "r");N 		}b 	}* 	if (infd[0] == NULL && infd[1] == NULL) { 		cant(argv[0], "input", 0); 		cant(argv[1], "input", 1); 	} 	else if (infd[1] == NULL)  		opendir(1, &argv[1], infd[0]); 	else if (infd[0] == NULL)  		opendir(0, &argv[0], infd[1]); #ifndef	vmse 	free_space = malloc(1); #endif 	/*a% 	 * Read input, building hash tables.	 	 */
 	input(0);
 	input(1);
 	squish(); #ifdef	DEBUG 	printf("before sort\n");h 	for (i = 1; i <= slenA; i++)s# 		printf("sfileA[%d] = %6d %06o\n",e( 			i, sfileA[i].serial, sfileA[i].hash); 	for (i = 1; i <= slenB; i++)	# 		printf("sfileB[%d] = %6d %06o\n",a( 			i, sfileB[i].serial, sfileB[i].hash); #endif 	sort(sfileA, slenA);r 	sort(sfileB, slenB); 
 #ifdef	TIMINGN 	ptime("input"); #endif #ifdef	DEBUG 	printf("after sort\n"); 	for (i = 1; i <= slenA; i++) # 		printf("sfileA[%d] = %6d %06o\n", ( 			i, sfileA[i].serial, sfileB[i].hash); 	for (i = 1; i <= slenB; i++)t# 		printf("sfileB[%d] = %6d %06o\n",s( 			i, sfileB[i].serial, sfileB[i].hash); #endif 	/*. 	 * Build equivalence classes. 	 */ 	member = (int *)fileB; 	 	equiv(); D 	member = (int *)compact((char *)member, (slenB + 2) * sizeof (int), 			"squeezing member vector"); 	/* 2 	 * Reorder equivalence classes into array class[] 	 */ 	class = (int *)fileA;
 	unsort();B 	class = (int *)compact((char *)class, (slenA + 2) * sizeof (int), 			"compacting class vector");
 #ifdef	TIMINGu 	ptime("equiv/unsort");h #endif 	/*c  	 * Find longest subsequences 	 */= 	klist = (int *)myalloc((slenA + 2) * sizeof (int), "klist"); ; 	clist = (CANDIDATE *)myalloc(sizeof (CANDIDATE), "clist");  	i = subseq(); 	free((char *)member); 	free((char *)class);A< 	match = (int *)myalloc((lenA + 2) * sizeof (int), "match"); 	unravel(klist[i]);l
 	free(clist);[
 	free(klist);c
 #ifdef	TIMINGs 	ptime("subsequence/unravel"); #endif 	/*s7 	 * Check for fortuitous matches and output differencesl 	 */? 	oldseek = myalloc((lenA + 2) * sizeof (* oldseek), "oldseek");	? 	newseek = myalloc((lenB + 2) * sizeof (* newseek), "newseek");*, 	textb = myalloc(sizeof text, "textbuffer"); 	if (check(argv[0], argv[1]))m= 		fprintf(stderr, "Spurious match, output is not optimal\n"); 
 #ifdef	TIMINGi 	ptime("check"); #endif
 	output();
 #ifdef	TIMINGd 	ptime("output");r; 	printf("%ld seconds required\n", sectiontime - totaltime);n #endif 	if (tempfd != NULL) { 		fclose(tempfd);i 		delete(TEMPFILE);p 	} }n * input(which)- int		which;			/* 0 or 1 to redefine infd[]	*/	 /*%  * Read the file, building hash tablee  */t {u 	register LINE		*lentry; 	register int		linect; 	register short		hashval;c 	FILE			*fd; 	unsigned short		hash();   	linect = 0;4 	lentry = (LINE *)myalloc(sizeof(LINE) * 3, "line"); 	fd = infd[which]; 	while (!getline(fd, text)) {r* 		lentry = (LINE *)compact((char *)lentry,! 			(++linect + 3) * sizeof(LINE)," 			"extending line vector");# 		lentry[linect].hash = hash(text);f 	} 	/* D 	 * If input was from stdin ("-" command), finish off the temp file. 	 */ 	if (fd == stdin) {i 		fclose(tempfd);;. 		tempfd = infd[which] = fopen(TEMPFILE, "r"); 	} 	len[which] = linect;  	file[which] = lentry; }u e squish() /*K  * Look for initial and trailing sequences that have identical hash values.o8  * Don't bother building them into the candidate vector.  */* {6 	register int	i; 	register LINE	*ap;i 	register LINE	*bp;r 	int		j; 	int		k;   	/*fC 	 * prefix -> first line (from start) that doesn't hash identically* 	 */' 	i = 0; ap = &fileA[1]; bp = &fileB[1];e7 	while (i < lenA && i < lenB && ap->hash == bp->hash) {e 		i++; ap++; bp++; 	} 	prefix = i; 	/* B  	 * suffix -> first line (from end) that doesn't hash identically 	 */ 	j = lenA - i; 	k = lenB - i; 	ap = &fileA[lenA];* 	bp = &fileB[lenB];l 	i = 0;*1 	while (i < j && i < k && ap->hash == bp->hash) {  		i++; ap--; bp--; 	} 	suffix = i; 	/*r 	 * Tuck the counts away 	 */ 	for (k = 0; k <= 1; k++) {, 		sfile[k] = file[k] + prefix;) 		j = slen[k] = len[k] - prefix - suffix;s  7 		for (i = 0, ap = sfile[k]; i <= slen[k]; i++, ap++) {: 			ap->serial = i; 		}s 	} }e d sort(vector, vecsize)3$ LINE		*vector;		/* What to sort			*/& int		vecsize;		/* How many to sort		*/ /*  * Sort hash entries  */0 {0 	register int	j; 	register LINE	*aim; 	register LINE	*ai;E 	int		mid;	e 	int		k; 	LINE		work;  # 	for (j = 1; j <= vecsize; j *= 2);a 	mid = (j - 1);5 	while ((mid /= 2) != 0) { 		k = vecsize - mid; 		for (j = 1; j <= k; j++) {2 			for (ai = &vector[j]; ai > vector; ai -= mid) { 				aim = &ai[mid];e 				if (aim < ai)	 					break;	/* ?? Why ??		*/ 				if (aim->hash > ai->hash ||d 						aim->hash == ai->hash && 						aim->serial > ai->serial)* 					break;1 				work.hash = ai->hash;s 				ai->hash = aim->hash;	 				aim->hash = work.hash; 				work.serial = ai->serial;n 				ai->serial = aim->serial;n 				aim->serial = work.serial; 			} 		}  	} }n   equiv()t /*!  * Build equivalence class vector   */u {< 	register LINE	*ap;  	register union {l 		LINE	*bp;s
 		int	*mp; 	} r;O 	register int	j;
 	LINE		*atop;B   #ifdef	DEBUG 	printf("equiv entry\n");  	for (j = 1; j <= slenA; j++)E# 		printf("sfileA[%d] = %6d %06o\n",.) 				j, sfileA[j].serial, sfileA[j].hash);O 	for (j = 1; j <= slenB; j++)E# 		printf("sfileB[%d] = %6d %06o\n",f) 				j, sfileB[j].serial, sfileB[j].hash);d #endif 	j = 1;d 	ap = &sfileA[1];i 	r.bp = &sfileB[1];h 	atop = &sfileA[slenA];n# 	while (ap <= atop && j <= slenB) {i 		if (ap->hash < r.bp->hash) { 			ap->hash = 0; 			ap++; 		}A$ 		else if (ap->hash == r.bp->hash) { 			ap->hash = j; 			ap++; 		}t 		else {
 			r.bp++; 			j++;a 		}/ 	} 	while (ap <= atop) {o 		ap->hash = 0;i 		ap++;	 	} 	sfileB[slenB + 1].hash = 0; #ifdef	DEBUG 	printf("equiv exit\n"); 	for (j = 1; j <= slenA; j++)	# 		printf("sfileA[%d] = %6d %06o\n",/) 				j, sfileA[j].serial, sfileA[j].hash);0 	for (j = 1; j <= slenB; j++) # 		printf("sfileB[%d] = %6d %06o\n",h) 				j, sfileB[j].serial, sfileB[j].hash);B #endif 	ap = &sfileB[0];/ 	atop = &sfileB[slenB];d 	r.mp = &member[0];  	while (++ap <= atop) { 	 		r.mp++;	 		*r.mp = -(ap->serial);" 		while (ap[1].hash == ap->hash) { 			ap++;
 			r.mp++; 			*r.mp = ap->serial; 		}} 	} 	r.mp[1] = -1; #ifdef	DEBUG 	for (j = 0; j <= slenB; j++) , 		printf("member[%d] = %d\n", j, member[j]); #endif }e e unsort() /*  * Build class vectors  *// {n 	register int	*temp; 	register int	*tp; 	register union {  		LINE	*ap;D
 		int	*cp; 	} u;S
 	LINE		*evec;  	int		*eclass; #ifdef	DEBUG 	int		i; #endif  D 	temp = (int *)myalloc((slenA + 1) * sizeof(int), "unsort scratch"); 	u.ap = &sfileA[1];p 	evec = &sfileA[slenA];* 	while (u.ap <= evec) {s #ifdef	DEBUG: 		printf("temp[%2d] := %06o\n", u.ap->serial, u.ap->hash); #endif" 		temp[u.ap->serial] = u.ap->hash;	 		u.ap++;n 	} 	/*/. 	 * Copy into class vector and free work space 	 */ 	u.cp = &class[1]; 	eclass = &class[slenA]; 	tp = &temp[1];c 	while (u.cp <= eclass)b 		*u.cp++ = *tp++; 	free((char *) temp);) #ifdef	DEBUG 	printf("unsort exit\n");	 	for (i = 1; i <= slenA; i++)n9 		printf("class[%d] = %d %06o\n", i, class[i], class[i]);m #endif }  n subseq() /*7  * Generate maximum common subsequence chain in clist[]e  */a {m	 	int			a;/ 	register int		ktop; 	register int		b;m 	register int		s; 	 	int			r;*	 	int			i;f 	int			cand;   	klist[0] = newcand(0, 0, -1);. 	klist[1] = newcand(slenA + 1, slenB + 1, -1);" 	ktop = 1;				/* -> guard entry	*/ 	for (a = 1; a <= slenA; a++) {, 		/*+ 		 * For each non-zero element in fileA ...n 		 */m 		if ((i = class[a]) == 0) 			continue;* 		cand = klist[0];		/* No candidate now	*/% 		r = 0;				/* Current r-candidate	*/a 		do { #ifdef	DEBUG7 			printf("a = %d, i = %d, b = %d\n", a, i, member[i]);= #endif 			/*=! 			 * Perform the merge algorithm	 			 */ 			if ((b = member[i]) < 0)b 				b = -b;g #ifdef	DEBUG' 			printf("search(%d, %d, %d) -> %d\n",	% 					r, ktop, b, search(r, ktop, b));e #endif' 			if ((s = search(r, ktop, b)) != 0) {	  				if (clist[klist[s]].b > b) { 					klist[r] = cand;W 					r = s;p& 					cand = newcand(a, b, klist[s-1]); #ifdef	DEBUG* 					dumpklist(ktop, "klist[s-1]->b > b"); #endif 				}( 				if (s >= ktop) {# 					klist[ktop + 1] = klist[ktop];) 					ktop++; #ifdef	DEBUG 					klist[r] = cand;i 					dumpklist(ktop, "extend");  #endif 					break;  				}; 			} 		} while (member[++i] > 0); 		klist[r] = cand; 	} #ifdef	DEBUG' 	printf("last entry = %d\n", ktop - 1);p #endif+ 	return(ktop - 1);			/* Last entry found	*/L }" k int; newcand(a, b, pred)i int		a;		/* Line in fileA			*/ int		b;		/* Line in fileB			*/6 int		pred;		/* Link to predecessor, index in cand[]	*/ {	 	register CANDIDATE	*new;B   	clength++;d, 	clist = (CANDIDATE *)compact((char *)clist,  			clength * sizeof (CANDIDATE), 			"extending clist"); 	new = &clist[clength - 1];  	new->a = a; 	new->b = b; 	new->link = pred; 	return(clength - 1);	 }  l search(low, high, b) register int	low; 
 int		high; int		b;  /*D  * Search klist[low..top] (inclusive) for b.  If klist[low]->b >= b,>  * return zero.  Else return s such that klist[s-1]->b < b andA  * klist[s]->b >= b.  Note that the algorithm presupposes the twom6  * preset "fence" elements, (0, 0) and (slenA, slenB).  */" {  	register int		temp; 	register int		mid;a   	if (clist[klist[low]].b >= b) 		return(0);) 	while ((mid = (low + high) / 2) > low) {s' 		if ((temp = clist[klist[mid]].b) > b)c 			high = mid; 		else if (temp < b)
 			low = mid;n 		else { 			return(mid);* 		}  	} 	return(mid + 1);  }  i
 unravel(k) register int	k;2 {  	register int		i;t 	register CANDIDATE	*cp; 	int			first_trailer;D 	int			difference;   	first_trailer = lenA - suffix;; 	difference = lenB - lenA; #ifdef	DEBUG0 	printf("first trailer = %d, difference = %d\n", 		first_trailer, difference);[ #endif 	for (i = 0; i <= lenA; i++) { 		match[i] = (i <= prefix) ? i) 			: (i > first_trailer) ? i + differenced 			: 0;i 	} #ifdef	DEBUG 	printf("unravel\n");A #endif 	while (k != -1) { 		cp = &clist[k];e #ifdef	DEBUG 		if (k < 0 || k >= clength)" 			error("Illegal link -> %d", k);> 		printf("match[%d] := %d\n", cp->a + prefix, cp->b + prefix); #endif) 		match[cp->a + prefix] = cp->b + prefix;n 		k = cp->link;N 	} }i " check(fileAname, fileBname)  char		*fileAname;i char		*fileBname;i /*I  * Check for hash matches (jackpots) and collect random access indices to=  * the two files.t  */) { . 	register int	a;		/* Current line in file A	*/. 	register int	b;		/* Current line in file B	*/ 	int		jackpot;   /*G  * The VAX C ftell() returns the address of the CURRENT record, not thelJ  * next one (as in DECUS C or, effectively, other C's).  Hence, the values?  * are "off by one" in the array.  OFFSET compensates for this."  */f
 #ifdef vms #define OFFSET (-1)e #elsex #define OFFSET 0 #endif   	b = 1;h 	rewind(infd[0]);+ 	rewind(infd[1]);f /*8  * See above; these would be over-written on VMS anyway.  */h #ifndef vms  	oldseek[0] = ftell(infd[0]);i 	newseek[0] = ftell(infd[1]);  #endif  
 	jackpot = 0;  #ifdef	DEBUG 	printf("match vector\n"); 	for (a = 0; a <= lenA; a++)* 		printf("match[%d] = %d\n", a, match[a]); #endif 	for (a = 1; a <= lenA; a++) { 		if (match[a] == 0) { 			getline(infd[0], text);& 			oldseek[a+OFFSET] = ftell(infd[0]);
 			continue;	  		}t 		while (b < match[a]) { 			getline(infd[1], textb);s& 			newseek[b+OFFSET] = ftell(infd[1]); 			b++;i 		}; 		getline(infd[0], text);s 		getline(infd[1], textb); 		if (!streq(text, textb)) {( 			fprintf(stderr,	"Spurious match:\n");- 			fprintf(stderr, "line %d in %s, \"%s\"\n",> 				a, fileAname, text);- 			fprintf(stderr, "line %d in %s, \"%s\"\n",t 				b, fileBname, textb);' 			match[a] = 0;
 			jackpot++;n 		};% 		oldseek[a+OFFSET] = ftell(infd[0]); % 		newseek[b+OFFSET] = ftell(infd[1]);( 		b++; 	} 	for (; b <= lenB; b++) {  		getline(infd[1], textb);% 		newseek[b+OFFSET] = ftell(infd[1]);  	} /*E  * The logical converse to the code up above, for NON-VMS systems, to H  * store away an fseek() pointer at the beginning of the file.  For VMS,  * we need one at EOF...  */s
 #ifdef vms/ 	getline(infd[0],text);			/* Will hit EOF...	*/v  	oldseek[lenA] = ftell(infd[0]);0 	getline(infd[1],textb);			/* Will hit EOF...	*/  	newseek[lenB] = ftell(infd[1]); #endif   	return(jackpot);r }s   output() {i 	register int	astart;L 	register int	aend; 
 	int		bstart;z 	register int	bend;(   	rewind(infd[0]);d 	rewind(infd[1]);  	match[0] = 0; 	match[lenA+1] = lenB + 1; 	if (!eflag) { 		/* 		 * Normal printout 		 */)7 		for (astart = 1; astart <= lenA; astart = aend + 1) {	 			/*h/ 			 * New subsequence, skip over matching stuff	 			 */ 			while (astart <= lenA2 				  && match[astart] == (match[astart - 1] + 1))
 				astart++;  			/*>2 			 * Found a difference, setup range and print it 			 */" 			bstart = match[astart - 1] + 1; 			aend = astart - 1;-. 			while (aend < lenA && match[aend + 1] == 0) 				aend++;  			bend = match[aend + 1] - 1; 			match[aend] = bend;& 			change(astart, aend, bstart, bend); 		}  	} 	else {} 		/*= 		 * Edit script output -- differences are output "backwards"v/ 		 * for the benefit of a line-oriented editor.r 		 */f3 		for (aend = lenA; aend >= 1; aend = astart - 1) {i 			while (aend >= 1(- 				  && match[aend] == (match[aend + 1] - 1)= 				  && match[aend] != 0) 				aend--;s 			bend = match[aend + 1] - 1; 			astart = aend + 1; / 			while (astart > 1 && match[astart - 1] == 0)w
 				astart--;p" 			bstart = match[astart - 1] + 1; 			match[astart] = bstart;& 			change(astart, aend, bstart, bend); 		}r 	} 	if (lenA == 0)h 		change(1, 0, 1, lenB); }l {" change(astart, aend, bstart, bend) int		astart;
 int		aend; int		bstart;
 int		bend; /*L  * Output a change entry: fileA[astart..aend] changed to fileB[bstart..bend]  */< {l 	/*j% 	 * This catches a "dummy" last entry" 	 */$ 	if (astart > aend && bstart > bend)	 		return;  	range(astart, aend);  	putchar((astart > aend) ? 'a'" 			: (bstart > bend) ? 'd' : 'c'); 	if (!eflag) 		range(bstart, bend); 	putchar('\n');e 	if (!eflag) {4 		fetch(oldseek, astart, aend, lenA, infd[0], "< ");' 		if (astart <= aend && bstart <= bend)] 			printf("---\n");  	}C 	fetch(newseek, bstart, bend, lenB, infd[1], (!eflag) ? "> " : "");i 	if (eflag && bstart <= bend)l 		printf(".\n"); }" b range(from, to),
 int		from; int		to; /*  * Print a range  */i {c' 	printf("%d", (to > from) ? from : to);m 	if (to >= from) 		printf(",%d", to); }  I, fetch(seekvec, start, end, trueend, fd, pfx) long		*seekvec;i register int	start;  register int	end;i
 int		trueend;l
 FILE		*fd; char		*pfx;u /*  * Print the appropriate text]  */e {  	register int	i; 	int		first; 	int		last;    	first = last = FALSE;
 	if (cflag) {\ 		if (start > end)
 			return; 		if (start > 1) { 			start--;a 			first++;. 		}n 		if (end < trueend-1) {	 			end++;r
 			last++; 		}a 	}- 	if (fseek(fd, seekvec[start - 1], 0) != 0) {A: 		printf("?Can't read line %d at %08lx (hex) in file%c\n", 			start, seekvec[start - 1],   			(fd == infd[0]) ? 'A' : 'B'); 	} 	else {(" 		for (i = start; i <= end; i++) {/ 			if (fgetss(text, sizeof text, fd) == NULL) { * 				printf("** Unexpected end of file\n");
 				break; 			} #ifdef DEBUG' 			printf("%5d: %s%s\n", i, pfx, text);e #elsek 			if (cflag) {n& 				if (first || (last && i == end)) { 					putchar(' '); 					first = FALSE;e 				}, 				else	putchar('|'); 			} 			else	printf("%s", pfx); 			printf("%s\n", text); #endif 		}r 	}	  }a   getline(fd, buffer) 
 FILE		*fd; char		*buffer; /*L  * Input routine, read one line to buffer[], return TRUE on eof, else FALSE.J  * The terminating newline is always removed.  If "-b" was given, trailingE  * whitespace (blanks and tabs) are removed and strings of blanks and=G  * tabs are replaced by a single blank.  Getline() does all hacking for   * redirected input files.  */i {f 	register char	*top; 	register char	*fromp; 	register char	c;,  / 	if (fgetss(buffer, sizeof text, fd) == NULL) {r 		*buffer = EOS; 		return(TRUE);[ 	} 	if (fd == stdin)	 		fputss(buffer, tempfd);= 	if (bflag || iflag) { 		top = buffer;] 		fromp = buffer;	! 		while ((c = *fromp++) != EOS) { * 			if (bflag && (c == ' ' || c == '\t')) { 				c = ' ';+ 				while (*fromp == ' ' || *fromp == '\t')D
 					fromp++;t 			}
 			if (iflag)m 				c = tolower(c);) 			*top++ = c; 		}k 		if (bflag && top[-1] == ' ')	 			top--;0
 		*top = EOS;= 	} 	return(FALSE);B }  i" static unsigned short crc16a[] = {$ 	0000000,	0140301,	0140601,	0000500,$ 	0141401,	0001700,	0001200,	0141101,$ 	0143001,	0003300,	0003600,	0143501,% 	0002400,	0142701,	0142201,	0002100,	/ };" static unsigned short crc16b[] = {$ 	0000000,	0146001,	0154001,	0012000,$ 	0170001,	0036000,	0024000,	0162001,$ 	0120001,	0066000,	0074000,	0132001,$ 	0050000,	0116001,	0104001,	0043000, };   unsigned short hash(buffer) char		*buffer; /*,  * Return the CRC16 hash code for the buffer;  * Algorithm from Stu Wecker (Digital memo 130-959-002-00).l  */  {	 	register unsigned short	crc;  	register char		*tp; 	register short 		temp;w  	 	crc = 0;*! 	for (tp = buffer; *tp != EOS;) {h1 		temp = *tp++ ^ crc;	/* XOR crc with new char	*/] 		crc = (crc >> 8) 			^ crc16a[(temp & 0017)]  			^ crc16b[(temp & 0360) >> 4]; 	} #ifdef	DEBUG_ALL4 	printf("%06o: %s\n", (crc == 0) ? 1 : crc, buffer); #endif 	return((crc == 0) ? 1 : crc); }		f ; opendir(which, arg, okfd)a/ int		which;		/* Which file to open (0 or 1)		*/k4 char		**arg;		/* File name argument, &argv[which]	*/- FILE		*okfd;		/* File name (already open)		*/r {s 	register char		*tp; 	register int		c;p 	register char		*newname;    	fgetname(okfd, text); 	/*c 	 * Skip over device name  	 */. 	for (tp = text; (c = *tp) && c != ':'; tp++);
 	if (c)	tp++;d 	else	tp = text; 	/* ' 	 * Skip over [UIC] or [PPN] if presentm 	 */  	if (*tp == '[' || *tp == '(') {. 		while ((c = *tp++) && c != ']' && c != ')'); 		if (c == 0) {t2 			fprintf(stderr, "?Bug: bad file name \"%s\"\n", 					text);  			tp--; 		}m 	} 	strcpy(text, tp); 	/*) 	 * Don't include version  	 */. 	for (tp = text; (c = *tp) && c != ';'; tp++);	 	*tp = 0;t 	/*g7 	 * Now, text has the file name, tp - text, its length,e9 	 * and *arg the (possible) directory name.  Create a newe 	 * file name for opening. 	 */> 	if ((newname = malloc(tp - text + strlen(*arg) + 1)) == NULL)! 		error("Out of space at start"); # 	concat(newname, *arg, text, NULL);i1 	if ((infd[which] = fopen(newname, "r")) == NULL) % 		cant(*arg, "constructed input", 1);r 	else) 		*arg = newname;( }= ) char * myalloc(amount, why) int		amount; char		*why;  /*  * Allocate or crash.   */- {d 	register char	*pointer;  3 	if ((pointer = malloc((unsigned) amount)) == NULL)	 		noroom(why); 	return (pointer); }n   char *! compact(pointer, new_amount, why)m char		*pointer;h int		new_amount; char		*why;a /*)  * Reallocate pointer, compacting storagea  */l {t 	register	*new_pointer;= 	extern char	*realloc();   	/*rB 	 * This routine is heavily dependent on C storage allocator hacks 	 */ #ifndef	vms & 	free(pointer);			/* Do not change		*/1 	free(free_space);		/* The order of this code.	*/ 9 	free_space = malloc(1);		/* This code doesn't work on	*/  #endif% 	if ((new_pointer =		/* Vax-11 C			*/n4 			realloc(pointer, (unsigned) new_amount)) == NULL) 		noroom(why); #ifdef	DEBUG 	if (new_pointer != pointer) {. 		fprintf(stderr, "moved from %06o to %06o\n", 			pointer, new_pointer);s 	} /*	rdump(new_pointer, why);y */ #endif 	return (new_pointer); }    noroom(why)i char		*why;  {t7 	fprintf(stderr, "?DIFF-F-out of room when %s\n", why);p 	exit(IO_ERROR); }"   #ifdef	DEBUG rdump(pointer, why)r int		*pointer; char		*why;a /*  * Dump memory block  */a {  	int	*last;  	int	count;]   	last = (int **)pointer[-1];5 	fprintf(stderr, "dump %s of %06o -> %06o, %d words",u' 			why, pointer, last, last - pointer);g# 	last = (int *)(((int) last) & ~1);b+ 	for (count = 0; pointer < last; ++count) {	 		if ((count & 07) == 0) {& 			fprintf(stderr, "\n%06o", pointer); 		}t& 		fprintf(stderr, "\t%06o", *pointer); 		pointer++; 	} 	fprintf(stderr, "\n");% }\ #endif   cant(filename, what, fatalflag)n char		*filename; char		*what; int		fatalflag;l /*  * Can't open file message  */	 {c@ 	fprintf(stderr, "Can't open %s file \"%s\"\n", what, filename); 	if (fatalflag)( 		error("Can't continue"); }  = #ifdef	DEBUG dump(d_linep, d_len, d_which)	 LINE	*d_linep; {  	register int i; 	 ? 	printf("Dump of file%c, %d elements\n", "AB"[d_which], d_len);t# 	printf("linep @ %06o\n", d_linep);i 	for (i = 0; i <= d_len; i++) {. 		printf("%3d  %6d  %06o\n", i,.( 				d_linep[i].serial, d_linep[i].hash); 	} }*   dumpklist(kmax, why)	 int	kmax;]
 char	*why; /*
  * Dump klistf  */t {b 	register int		i;F 	register CANDIDATE	*cp; 	register int		count;   A 	printf("\nklist[0..%d] %s, clength = %d\n", kmax, why, clength);r 	for (i = 0; i <= kmax; i++) { 		cp = &clist[klist[i]];! 		printf("%2d %2d", i, klist[i]);1. 		if (cp >= &clist[0] && cp < &clist[clength])9 			printf(" (%2d %2d -> %2d)\n", cp->a, cp->b, cp->link);  		else if (klist[i] == -1) 			printf(" End of chain\n"); * 		else	printf(" illegal klist element\n"); 	} 	for (i = 0; i <= kmax; i++) {
 		count = -1;t7 		for (cp = klist[i]; cp > &clist[0]; cp = &cp->link) {	 			if (++count >= 6) { 				printf("\n    ");  				count = 0; 			}" 			printf(" (%2d: %2d,%2d -> %d)",& 				cp-clist, cp->a, cp->b, cp->link); 		}m 		printf("\n");  	} 	printf("*\n");b }  #endif d
 #ifdef	TIMINGm
 ptime(why) char		*why;a /*  * Dump time buffer   */; {	 	long	ttemp;   	ttemp = time(NULL); 	printf("%ld seconds for %s\n",u 		ttemp - sectiontime, why); 	sectiontime = ttemp;e }d #endif