 /* **++ **  FACILITY:  DIFF.C  ** **  MODULE DESCRIPTION:  **> **      This module compares two arrays of lines (representing; **	files) and reports the sequences of consecutive matching ; **	lines between them using the "recursive longest matching  **	sequence" algorithm.    ** **  AUTHORS:9 **	Steppe, Tom. "File Comparison Algorithms." Dr. Dobb's  # **	Journal, #131 (Sep 1987): 54-60.  ** **  CREATION DATE:  2-21-1991  ** ** **  MODIFICATION HISTORY:  ** **      {@tbs@}... **-- */     /* ** **  INCLUDE FILES  ** */
 #ifdef vax11c  #include stdio #include ctype #include string  #include stdlib  #include rms #include descrip #include "diff.h"  #else  #include <stdio.h> #include <ctype.h> #include <string.h>  #include <malloc.h>  #include "diff.h"  #endif     /* ** **  MACRO DEFINITIONS  ** */   #define TRUE	1 #define FALSE	0   # /**  Maximum and Minimum macro  **/ + #define max(x, y)	(((x) > (y)) ? (x) : (y)) + #define min(x, y)	(((x) < (y)) ? (x) : (y))   8 /** Value to indicate identical strings with strcmp  **/ #define ALIKE	0   8 /**  Mask for number of bits in hash code (12 bits)  **/# #define MASK	(unsigned int) 0x0FFFF   & /** Number of possible hash codes  **/ #define HASHSIZ	(MASK+1)   /* ** **  GLOBAL DECLARATIONS  ** **/   7 int diff_sec,   		    /**  Number of diff sections  **/ 6     diff_line;    		    /**  Number of diff lines  **/  D int diff_beg1,    		    /**  First line# for diff sec in file 1  **/D     diff_beg2;    		    /**  First line# for diff sec in file 2  **/   /* **, **  INTERNAL FUNCTION PROTOTYPE DECLARATIONS ** **/   : static BOOLEAN get_inf (char *, TXTINF *, int, FILEINF *); static HASH calc_hash (TXTINF); D static void fnd_seq (FILEINF *, int, int, FILEINF *, int, int, int);6 static BOOLEAN chk_hashes (LINEINF *, LINEINF *, int);1 static int cnt_matches (TXTINF *, TXTINF *, int); : static void rpt_seq (FILEINF *, int, FILEINF *, int, int);A static void rpt_seq_1 (FILEINF *, int, int, FILEINF *, int, int);        /* **++ **  FUNCTIONAL DESCRIPTION:  **> **      compare() compares two arrays of lines and reports the7 **	sequences of consecutive matching lines.  The zeroth 9 **	element of each array is unused so that the index into 8 **	the array is identical to the associated line number. ** **  RETURN VALUE:  **% **      TRUE if comparison succeeded.  **	FALSE if not enough memory. ** **-- */0 BOOLEAN compare (r1, a1, n1, r2, a2, n2, lngval)$ char *r1;	/**  File name for #1  **/2 TXTINF *a1;	/**  Array of lines of text in #1  **/E int n1;		/**  Number of lines in a1 (does not count 0th element)  **/ $ char *r2;	/**  File name for #2  **/2 TXTINF *a2;	/**  Array of lines of text in #2  **/E int n2;		/**  Number of lines in a2 (does not count 0th element)  **/ ) int lngval;	/**  "Long enough" value  **/  { +     FILEINF f1,		/**  File info for #1  **/ $ 	    f2;		/**  File info for #2  **/'     BOOLEAN rtn;	/**  Return value  **/        /**  Initialize  **/     diff_sec = diff_line = 0;      diff_beg1 = diff_beg2 = 0;  5     /**  Gather info for each file, then compare  **/ G     if (rtn = (get_inf (r1, a1, n1, &f1) && get_inf (r2, a2, n2, &f2)))      { 1         fnd_seq (&f1, 1, n1, &f2, 1, n2, lngval);      }   /     /**  Output remaining difference lines  **/ .     if (!(diff_beg1 == n1 && diff_beg2 == n2))     { >     	rpt_seq_1 (&f1, n1, n1-diff_beg1, &f2, n2, n2-diff_beg2);
 	diff_line++;      }      #     /**  Output summary report  **/ N     fprintf (stdout, "\nNumber of difference sections found: %d\n", diff_sec);L     fprintf (stdout, "Number of difference records found: %d\n", diff_line);  +     /**  Return dynamic memory to pool  **/ &     free (f1.line);	free (f1.hashtbl);&     free (f2.line);	free (f2.hashtbl);       return rtn;  }        /* **++ **  FUNCTIONAL DESCRIPTION:  **? **      get_inf() calculates hash codes and builds a hash table  ** ** **  RETURN VALUE:  **$ **      TRUE if get_inf() succeeded. **	FALSE if not enough memory. ** **-- */# static BOOLEAN get_inf (r, a, n, f)   char *r;	    /**  File name  **// TXTINF *a;	    /**  Array of lines of text  **/ + int  n;		    /**  Number of lines in a  **/ # FILEINF *f;	    /**  File info  **/  { 4     unsigned int size;		/**  Size of hash table  **/&     register int i;		/**  Counter  **/3     TBLENTRY *entry;		/**  Entry in hash table  **/   3     f->filename = r;	/**  Assign the file name  **/ 3     f->txt = a;		/**  Assign the array of text  **/      2     /**  Allocate and initialize a hash table  **/'     size = HASHSIZ * sizeof (TBLENTRY); 0     if (f->hashtbl = (TBLENTRY *) malloc (size))     { 1         memset ((char *) f->hashtbl, '\0', size);      }      else     {          return (FALSE);      }      if (n <= 0) !     {	/**  There is no lines  **/          f->line = NULL;      }      else     { 3         /**  Allocate an array of line structs  **/          if ((f->line =  9 		(LINEINF *) malloc ((n+1) * sizeof (LINEINF))) == NULL) 	         {              return (FALSE); 	         } "         for (i = 1;  i <= n;  i++),         {   /**  Loop through the lines  **/M             f->line[i].hash = calc_hash (f->txt[i]);	/**  Calculate hash  **/ A 	    entry = f->hashtbl + f->line[i].hash;	/**  Locate entry  **/   F 	    /**  Update the linked list of lines iwth the same hash code  **/$ 	    f->line[entry->last].nxtln = i; 	    f->line[i].nxtln = 0;  C 	    /** Update the first and last line info in the hash table  **/ !             if (entry->frst == 0) 
             {                   entry->frst = i;
             }              entry->last = i;	         }      }      return (TRUE); }        /* **++ **  FUNCTIONAL DESCRIPTION:  **> **      calc_hash() calculates a hash code for a line of text. ** **  RETURN VALUE:  ** **      a hash code value. ** **-- */ static HASH calc_hash (buf) ! TXTINF buf;	/** Line of text  **/  { 6     register unsigned int chksum;   /**  Checksum  **/+     register int	  i;	    /**  Counter  **/ %     char		  *s;	    /**  Pointer  **/ /     HASH		  hash;	    /**  Hash code value  **/   @     /**  Build up a checksum of the characters in the text   **/P     for (chksum = 0, s = buf.content, i = 0;  i < buf.len;  chksum ^= *s++, i++)     { 	         ;      }   P     /**  Combine the 7-bit checksum and as much of the length as is possible **/?     hash = ((chksum & 0x7F) | ((s - buf.content) << 7)) & MASK;           return (hash); }        /* **++ **  FUNCTIONAL DESCRIPTION:  **C **      fnd_seq() finds a "good sequence" of lines within the given @ **	starting and ending line numbers.  fnd_seq() then recursively< **	finds "good sequences" in the sections of lines above the  **	"good sequence" and below it. ** **  RETURN VALUE:  ** **      none ** **-- */< static void fnd_seq (f1, beg1, end1, f2, beg2, end2, lngval)+ FILEINF	    *f1;	/**  File info for #1  **/ 5 int	    beg1;	/**  First line # to compare in #1  **/ 4 int	    end1;	/**  Last line # to compare in #1  **/2 FILEINF     *f2;        /**  File info for #2  **/? int         beg2;       /**  First line # to compare in #2  **/ > int         end2;       /**  Last line # to compare in #2  **/- int	    lngval;	/**  "Long enough" value  **/  { 1     LINEINF *line1,	/**  Line info ptr in #1  **/ * 	    *line2;	/**  Line info ptr in #2  **//     register int limit;	/**  Looping limit  **/ ,     int	    ln1,	/**  Line number in #1  **/% 	    ln2;	/**  Line number in #2  **/ 2     register int ln;	/**  Working line number  **/,     BOOLEAN go;		/**  Continue to loop?  **/-     int most,		/**  Longest possible seq  **/ - 	most1,		/**  Longest possible due to #1  **/ - 	most2;		/**  Longest possible due to #2  **/ -     int cnt,		/**  Length of longest seq  **/ . 	oldcnt,		/**  Length of prev longest seq  **/( 	n,		/**  Length of cur longest seq  **/) 	m1,		/**  Line of longest seq in #1  **/ ;         m2;             /**  Line of longest seq in #2  **/        /**  Initialize  **/     go = TRUE;     line1 = f1->line;      line2 = f2->line;      .     /**  Initialize longest sequence info  **/-     cnt = 0;		/**  Length of longest seq  **/ 8     m1 = beg1 - 1;	/**  Line # of longest seq in #1  **/=     m2 = beg2 - 1;      /**  Line # of longest seq in #2  **/ 5     oldcnt = 0;		/**  Length of prev longest seq  **/           /*?     **  Calculate max possible number of consecutive lines that (     **	can match (based on line # range)     **/      most1 = end1 - beg1 + 1;     most2 = end2 - beg2 + 1;          /*0     **	Scan lines looking for a "good sequence".<     **	Compare lines in the following order of line numbers:     **     **			    (1, 1) "     **		    (1, 2), (2, 1), (2, 2)1     **	    (1, 3), (2, 3), (3, 1), (3, 2), (3, 3)      **			      ...     **/ 6     for (ln1 = beg1, ln2 = beg2;  TRUE;  ln1++, ln2++)     {          if (ln2 <= end2 - cnt) 	    /* 6 	    **	There are enough lines left in #2 such that it. 	    **	is possible to find a longer sequence. 	    **/ 	{             /*, 	    **  Determine the limit in #1 that both1 	    **	enforces the order scheme and still makes . 	    **	it possible to find a longer sequence. 	    **/' 	    limit = min (ln1 - 1, end1 - cnt);  	     4 	    /**  Calculate first potential match in #1  **/:             for (ln = f1->hashtbl[line2[ln2].hash].frst;  - 		    ln && ln < beg1;  ln = line1[ln].nxtln) 
             {                  ; 
             }  	     + 	    /**  Loop through the lines in #1  **/ =             for (;  ln && ln <= limit;  ln = line1[ln].nxtln) 
             { 8                 if (line1[ln].hash == line2[ln2].hash &&2 		    line1[ln+cnt].hash == line2[ln2+cnt].hash &&8 		    !(ln-m1 == ln2-m2 && ln < m1+cnt && m1 != beg1-1))                 {   /*; 		    **  A candidate for a longer sequence has been found  < 		    **  The current lines match, the currents + cnt match,9 		    **	and this sequence is not a subset of the longest  		    **  sequence so far.	 		    **/   =                     /**  Calculate most possible matches  **/ 3                     most = min (end1-ln+1, most2);   		     		    /*< 		    **	First compare hash codes.  If the number of matches: 		    **	exceeds the longest sequence so far, then compare 		    **	the actual text. 	 		    **/ ?                     if (chk_hashes(line1+ln, line2+ln2, cnt) && ; 			(n = cnt_matches (f1->txt+ln, f2->txt+ln2, most)) > cnt) ?                     {	/**  This is the longest seq. so far  **/ 9                         /**  Update longest seq info  **/  			oldcnt = cnt; 			cnt = n;  			m1 = ln;  			m2 = ln2;  0 			/**  If it's long enough, end the search  **/*                         if (cnt >= lngval)                         { "                             break;                         }  			 ) 			/** Update limit, using new count  **/ % 			limit = min (ln1 - 1, end1 - cnt);                      }                  } 
             } 2 	    /**  If it's long enough, end the search  **/             if (cnt >= lngval)
             {                  break;
             } 
 	    most2--; 	         }          else	         { <             go = FALSE;	    /**  EOF of this is reached  **/	         }  	 0 	/**  Repeat the process for the other file  **/         if (ln1 <= end1 - cnt)	         { *             limit = min (ln2, end2 - cnt);:             for (ln = f2->hashtbl[line1[ln1].hash].frst;  - 		    ln && ln < beg2;  ln = line2[ln].nxtln) 
             {                  ; 
             } =             for (;  ln && ln <= limit;  ln = line2[ln].nxtln) 
             { 8                 if (line1[ln1].hash == line2[ln].hash &&2 		    line1[ln1+cnt].hash == line2[ln+cnt].hash &&9 		    !(ln1-m1 == ln-m2 && ln1 < m1+cnt && m2 != beg2-1))                  { 6                     most = min (end2 - ln + 1, most1);D                     if (chk_hashes (line1 + ln1, line2 + ln, cnt) &&# 			(n = cnt_matches (f1->txt + ln1, $ 					    f2->txt + ln, most)) > cnt)                     { %                         oldcnt = cnt;  			cnt = n;  			m1 = ln1; 			m2 = ln;  			 *                         if (cnt >= lngval)                         { "                             break;                         } ! 			limit = min (ln2, end2 - cnt);                      }                  } 
             }              if (cnt >= lngval)
             {                  break;
             } 
 	    most1--; 	         }          else if (!go) 	         { 8             break;	/**  EOF of this file is reached  **/	         }      }           /*C     **  If the longest seq is shorter than the "long engoug" value, >     **	the "long enough" value can be adjusted for the rest of     **	the comparison process.     **/      if (cnt < lngval)      {          lngval = cnt;      }      B     if (cnt >= 1)	/**  Longest seq exceeds min necessary size  **/     { 3         if (m1 != beg1 && m2 != beg2 && oldcnt > 0)  	    /* < 	    **	There is still something worgh comparing previous to 	    **	the seq. 	    **/	         { ?             /**  Use knowledge of the previous longest seq  **/ : 	    fnd_seq (f1, beg1, m1 - 1, f2, beg2, m2 - 1, oldcnt);	         }  	  	/**  Report the seq  **/  	rpt_seq (f1, m1, f2, m2, cnt);  	 9         if (m1 + cnt - 1 != end1 && m2 + cnt - 1 != end2) H 	    /**  There is still someghing worth comparing subseq to the seq **/	         { E             fnd_seq (f1, m1 + cnt, end1, f2, m2 + cnt, end2, lngval); 	         }      }  }        /* **++ **  FUNCTIONAL DESCRIPTION:  **< **      chk_hashes() determines whether this seq of matching: **	hash codes is longer than cnt.  It knows that the first- **	pair of hash codes is guaranteed to match.  ** **  RETURN VALUE:  **, **      TRUE if this seq is longer than cnt., **	FALSE if this seq is not longer than cnt. ** **-- */- static BOOLEAN chk_hashes (line1, line2, cnt) 1 LINEINF	    *line1,	    /**  Line info for #1 **/ * 	    *line2;	    /**  Line info for #2 **/8 register int cnt;	    /**  Counter to try to exceed  **/ { @     register int n;	    /**  Counter of consecutive matches  **/  F     for (n = 1;  n <= cnt && (++line1)->hash == (++line2)->hash;  n++)     { 	         ;      }      return (n > cnt);  }        /* **++ **  FUNCTIONAL DESCRIPTION:  **@ **      cnt_matches() counts the numbner of consecutive matching **	lines of text.  ** **  RETURN VALUE:  **- **      Number of consecutive matching lines.  ** **-- */% static int cnt_matches (s1, s2, most) / TXTINF *s1,		/**  Starting line in file #1  **/ /        *s2;		/**  Starting line in file #2  **/ 9 register int most;	/**  Most matching lines possible  **/  { :     register int n;	/**  Count of consecutive matches  **/  +     /**  Count the consecutive matches  **/ B     for (n = 0;  n < most && txtcmp (*s1++, *s2++) == ALIKE;  n++)     { 	         ;      }      return (n);  }    int txtcmp (s, t) 	 TXTINF s, 	        t;  {      register int i; 
     int c;       if (s.len > t.len)     {      	return 1;     } else if (s.len < t.len)      {  	return -1;      } else       {  	for (i = 0;  i < s.len;  i++) 	{) 	    if ((c = s.content[i]-t.content[i]))  	    { 	    	 return (c > 0? 1 : -1); 	    } 	}     } 
     return 0;  }      /* **++ **  FUNCTIONAL DESCRIPTION:  **1 **      rpt_seq() reports a matching seq of lines  ** **  RETURN VALUE:  ** **      None ** **-- */) static void rpt_seq (f1, m1, f2, m2, cnt) + FILEINF	    *f1;	/**  File info for #1  **/ 5 int	    m1;		/**  Location of matching seq in #1  **/ 2 FILEINF     *f2;        /**  File info for #2  **/5 int	    m2;		/**  Location of matching seq in #2  **/ 6 int	    cnt;	/**  Number of lines in matching seq  **/ {      if (!(m1 == 1 && m2 == 1))     { & 	rpt_seq_1 (f1, m1, cnt, f2, m2, cnt);     }        diff_beg1 = m1+cnt-1;      diff_beg2 = m2+cnt-1;  }        /* **++ **  FUNCTIONAL DESCRIPTION:  **4 **      rpt_seq_1() ouputs diff report for a section ** **  RETURN VALUE:  ** **      None ** **-- */2 static void rpt_seq_1 (f1, m1, cnt1, f2, m2, cnt2)+ FILEINF	    *f1;	/**  File info for #1  **/ 5 int	    m1;		/**  Location of matching seq in #1  **/ = int	    cnt1;	/**  Number of lines in matching seq in #1  **/*2 FILEINF     *f2;        /**  File info for #2  **/5 int	    m2;		/**  Location of matching seq in #2  **/f= int	    cnt2;	/**  Number of lines in matching seq in #2  **/n {u     struct FAB fab;n     struct RAB rab;        unsigned long retcode;$     static char linebuf[MAXCHARS+1];.     $DESCRIPTOR (linebuf_descriptor, linebuf);"     char *filename = "SYS$OUTPUT";
     int i;     int len;       /*     **	Initialize FAB      **/        fab = cc$rms_fab;      fab.fab$l_fna = filename; %     fab.fab$b_fns = strlen(filename);           /*%     **	    Initialize Source File RABi     **/r       rab = cc$rms_rab;i     rab.rab$l_fab = &fab;p       /*     **	Report difference     **/.  .     if (sys$open (&fab, 0, 0) != RMS$_NORMAL)      {a? 	fprintf(stderr, "Insufficient privilege for write operation");I 	return;     }        rab.rab$b_rac = RAB$C_SEQ;     rab.rab$l_rbf = linebuf;  =     if ((retcode = sys$connect (&rab, 0, 0)) != RMS$_NORMAL) i     {	 	sys$close(&fab, 0, 0); A 	fprintf(stderr, "Insufficient privilege for connect operation");n 	return;     }   )     strcpy (linebuf, "************\r\n");s$     rab.rab$w_rsz = strlen(linebuf);9     if ((retcode = sys$put (&rab, 0, 0)) != RMS$_NORMAL) A     {M 	sys$close(&fab, 0, 0);B? 	fprintf(stderr, "Insufficient privilege for write operation");f 	return;     }*  3     sprintf (linebuf, "File %s\r\n", f1->filename);*$     rab.rab$w_rsz = strlen(linebuf);9     if ((retcode = sys$put (&rab, 0, 0)) != RMS$_NORMAL)       {F 	sys$close(&fab, 0, 0);n? 	fprintf(stderr, "Insufficient privilege for write operation");O 	return;     }t    )     for (i = diff_beg1+1;  i <= m1;  i++)L     {;" 	sprintf (linebuf, "%6.6d   ", i); 	len = strlen (linebuf);: 	memcpy (linebuf+len, f1->txt[i].content, f1->txt[i].len);0 	memcpy (linebuf+len+f1->txt[i].len, "\r\n", 2);& 	rab.rab$w_rsz = f1->txt[i].len+len+2;6 	if ((retcode = sys$put (&rab, 0, 0)) != RMS$_NORMAL)  	{ 	    sys$close(&fab, 0, 0);IC 	    fprintf(stderr, "Insufficient privilege for write operation");N 	    return; 	}     }   #     strcpy (linebuf, "******\r\n");n$     rab.rab$w_rsz = strlen(linebuf);9     if ((retcode = sys$put (&rab, 0, 0)) != RMS$_NORMAL) a     {  	sys$close(&fab, 0, 0);i? 	fprintf(stderr, "Insufficient privilege for write operation");* 	return;     }E  3     sprintf (linebuf, "File %s\r\n", f2->filename);E$     rab.rab$w_rsz = strlen(linebuf);9     if ((retcode = sys$put (&rab, 0, 0)) != RMS$_NORMAL) 	     {l 	sys$close(&fab, 0, 0);F? 	fprintf(stderr, "Insufficient privilege for write operation");r 	return;     }d  )     for (i = diff_beg2+1;  i <= m2;  i++)*     { " 	sprintf (linebuf, "%6.6d   ", i); 	len = strlen (linebuf);: 	memcpy (linebuf+len, f2->txt[i].content, f2->txt[i].len);0 	memcpy (linebuf+len+f2->txt[i].len, "\r\n", 2);& 	rab.rab$w_rsz = f2->txt[i].len+len+2;6 	if ((retcode = sys$put (&rab, 0, 0)) != RMS$_NORMAL)  	{ 	    sys$close(&fab, 0, 0);nC 	    fprintf(stderr, "Insufficient privilege for write operation");0 	    return; 	}     }b  )     strcpy (linebuf, "************\r\n");e$     rab.rab$w_rsz = strlen(linebuf);9     if ((retcode = sys$put (&rab, 0, 0)) != RMS$_NORMAL)       { ? 	fprintf(stderr, "Insufficient privilege for write operation");*     }u       diff_sec++;n6     diff_line += max (m1-diff_beg1-1, m2-diff_beg2-1);       sys$close (&fab, 0, 0);( },