   /*  *			P e r f e c t   H a s h  *  *;  *	Program to search for Minimal Perfect Hash Functions for 5  *	use in lexical analyzers. C.D. Havener Jan 26 1982 0  *	GenRad Inc. 37 Great Road, Bolton Mass. 01740>  *	Based on paper "Minimal Perfect Hash Functions Made Simple":  *	by Richard J. Cichelli - Comm. of ACM Jan 1980 pp 17-19  *8  *	Synopsis: The hash function is h = assoc value of 1st:  *	letter + length of keyword + assoc value of last letter  *:  *	This program finds the associated values of the letters9  *	given a list of keywords, 1 per line. It works most ofr3  *	the time for up to about 40 keywords but certaina;  *	pathological cases exist. A semi-perfect hash is usuallya6  *	found by the program. The user can then tighten the6  *	default limits for max associated char value or the9  *	table limit using the -v and -t options. Sometimes thee6  *	presort heuristics actually make the search process;  *	much more difficult. The user can try his luck at manualg7  *	sorting using the -n option. Since the hash functiona<  *	produces such a limited range of numbers it can only work:  *	for up to about 40 keywords. If a language needs say 80:  *	keywords just split them up into two tables and let the:  *	lexical analyzer look in first one then the other, this;  *	will still be much faster than any other keyword lookup.-  *9  *	This program has run sucessfully on a VAX 11/780 undery/  *	4.1BSD and VMS (using Vax-11 C and Decus C).   */s   /*)BUILD) 	$(MP) = 1			# uses macros with arguments & 	$(STACK)	= 10000		# lots of recursion 	$(TKBOPTIONS)	= { 	    STACK	= 1024t 	    TASK	= ...PRF 	} */   #ifdef	DOCUMENTATION* title	perfect		Find Perfect Hash Functions* index	perfect		Find perfect hash functions   synopsis   	perfect [-options]h   descriptionh  9 	perfect reads a list of keywords from the standard inputp4 	and computes a "perfect hash" function for the set.# 	The following options are defined:g 	.lm +8 # 	.s.i -8;-d######Enable debug code.M< 	.s.i -8;-n######Disable pre-sort of keywords.  (See below).= 	.s.i -8;-t#<n>##Limit the maximum table size to <n> entries.T> 	.s.i -8;-v#<n>##Limit the maximum associated character value.= 	.s.i -8;-k#<n>##Use only the first <n> keywords in the list.t8 	.s.i -8;-a#<n>##Give a status report every <n> seconds. 	(Unix and Vax-11C/VMS only).o= 	.s.i -8;-o#file#Write a sample keyword lookup routine to then 	indicated file.	 	.s.lm -8o8 	The program prints a running commentary on the standard 	output.   Author  + 	Charlie Havener, GenRad Inc.  Bolton Mass.S  / 	(Modified by Martin Minow, Dec, Maynard Mass.)e  
 Discussion  5 	This technical note describes an implementation of ah= 	pragmatic algorithm for finding perfect or semi-perfect hashsA 	functions for lists of keywords. The resulting hash function cano> 	be used to speed up lexical analyzers used in translators and? 	compilers. The algorithm was described by R.J. Cichelli in Theo@ 	January 1980 issue of Communications of the ACM under the titleB 	"Minimal Perfect Hash Functions made Simple." The article did notB 	include a computer language description of the algorithm and someB 	important implementation details were unclear. It is assumed that? 	the user will know what to do with the output of this program.s? 	The -o option may be used to produce a lexical analyser kernel = 	from the tables built by this program. Another reference forl> 	those wishing to pursue the topic further is "More On MinimalA 	Perfect Hash tables" by Curtis Cook and R. Oldehoeft, a Colorado # 	State University Technical Report.c  < 	The program takes a list of keywords and sorts them in such7 	a way that the search time for a hash function will bec> 	reasonable. Once sorted a recursive trial and error procedure: 	hunts for a hash function satisfying user supplied bounds: 	of table size and associated character value limits.  The 	built-in hash function is  * 	    hash = assoc value of first character 		+ keyword length! 		+ assoc value of last characterr  < 	It is critically important to select a good ordering of theA 	keywords before searching begins. I ran up 100 hours of VAX timem? 	searching for a hash function with an unordered list, and gavec= 	up. Once the sort heuristics were debugged it found the hashe< 	function in minutes. Typically it will find the function inB 	minutes or you may as well give up. A status reporting feature is? 	built into the program on the UNIX system that lets you follow = 	the progress of the search depth. If it has trouble, you cans= 	tell just which word it can't get past, and take appropriatedA 	action. If it has trouble you can attempt to alter its choice ofi= 	pre-ordering by moving troublesome words to the front of theO? 	list. In a sequel to his paper, Cichelli stated that sometimeso> 	the sort heuristics makes the search longer. There is also noB 	guarantee that a hash function can be found. The program does the> 	obvious precheck that no two keywords have the same first and> 	last letter and length in common (e.g. BAK KAB). Nonetheless,< 	as pointed out by Jaeschke and Osterberg in an overly harshC 	criticism, there are many pathological sets of keywords that fail.d@ 	(On Cichelli's minimal perfect hash functions method. Comm. ACM? 	Dec. 1980, pp 728-729) The algorithm also only works for up to B 	about 40 keywords due to the limited range of numbers the formula? 	can generate. If you have, say 80 keywords, just make two hashe; 	tables and look in each one. Here are some examples of thet 	program's output: 	.s.nf+ 	Perfect hash function finder, CDH Ver. 2.9J& 	Start time = Mon Oct 17 20:40:40 1983" 	28 keywords, 19 distinct letters.& 	The associated char value limit is 19 	The table size limit is 100 	The search ordering is./ 	 else double continue case float struct sizeofo2 	 static short extern typedef default register for3 	 char while entry int if return do unsigned switchI 	 union goto auto long break 	.sa( 	Success! Associated Char Values Follow:2 	 a =19, b = 4, c = 1, d = 0, e = 0, f = 3, g =17,2 	 h =14, i =17, k =19, l = 6, n = 8, o = 0, r =11,$ 	 s = 6, t = 0, u =16, w =13, y =14,$ 	Hash min = 2, max = 30, spread = 29, 	 do 2, else 4, case 5, double 6, default 7,7 	 float 8, continue 9, typedef 10, short 11, struct 12,l3 	 static 13, extern 14, sizeof 15, char 16, for 17,c6 	 while 18, entry 19, int 20, goto 21, if 22, auto 23,7 	 unsigned 24, return 25, switch 26, long 27, break 28,t 	 union 29, register 30, 	.ss# 	Total search() invocations = 15913," 	 Started Mon Oct 17 20:40:40 1983" 	Finished Mon Oct 17 20:40:42 1983 	.s.nf+ 	Perfect hash function finder, CDH Ver. 2.9 & 	Start time = Mon Oct 17 20:41:11 1983" 	39 keywords, 19 distinct letters.& 	The associated char value limit is 19 	The table size limit is 100 	The search ordering is44 	 TEXT RESET TRUE REWRITE READLN SQRT SQR EOLN TRUNC3 	 PUT EXP PAGE CHR CHAR COS SUCC READ ROUND DISPOSEr3 	 PRED SIN OUTPUT ORD INPUT INTEGER GET MAXINT REALi3 	 WRITE EOF FALSE NEW WRITELN LN ARCTAN ABS BOOLEAN 
 	 PACK UNPACKc 	.s2( 	Success! Associated Char Values Follow:2 	 A =18, B =11, C =16, D =18, E = 3, F = 3, G = 0,2 	 I = 1, K =19, L = 9, M =18, N =19, O = 8, P = 9,$ 	 R = 0, S =14, T = 0, U =13, W =19,$ 	Hash min = 3, max = 45, spread = 43* 	 GET 3, TEXT 4, RESET 5, INPUT 6, TRUE 7,1 	 INTEGER 8, EOF 9, REWRITE 10, FALSE 11, PUT 12, 7 	 REAL 13, OUTPUT 14, EXP 15, PAGE 16, SQR 17, SQRT 18,I/ 	 CHR 19, CHAR 20, TRUNC 21, READ 22, ROUND 23,A6 	 MAXINT 24, READLN 25, EOLN 26, WRITE 27, DISPOSE 28,3 	 ORD 29, LN 30, PRED 31, PACK 32, COS 33, SUCC 34,C0 	 ABS 35, SIN 36, BOOLEAN 37, UNPACK 38, NEW 41, 	 ARCTAN 43, WRITELN 45, 	.sK$ 	Total search() invocations = 149292" 	 Started Mon Oct 17 20:41:11 1983" 	Finished Mon Oct 17 20:41:35 1983 	.s 2.f : 	Usually, the first time you run perf, just let everything 	default. The second time,< 	use the -t option to limit the table size to the first hash# 	value plus the number of keywords.2  8 	On Unix and Vax/VMS (Vax-11 C), The program will accept< 	SIGTERM signals (CTRL/C on VMS) for an update status reportB 	since it may take quite a while to find the hash function values.   Sample keyword tablesa  2 	The following tables are known to work correctly.5 	The first defines the keywords for the C programming 2 	language; the second for a toy computer language.  . 	int char float double struct union long short- 	unsigned auto extern register typedef static . 	goto return sizeof break continue if else for# 	do while switch case default entryw  ( 	GET TEXT RESET OUTPUT MAXINT INPUT TRUE. 	INTEGER EOF REWRITE FALSE CHR CHAR TRUNC REAL- 	SQR SQRT WRITE PUT ORD READ ROUND READLN EXP.. 	PAGE EOLN COS SUCC DISPOSE NEW ABS LN BOOLEAN$ 	WRITELN SIN PACK UNPACK ARCTAN PRED   #endif s #include	<stdio.h> #include	<ctype.h>   #define	EOS	'\0' #define	FALSE	0r #define	TRUE	1 #define	VOID	int   #ifdef	unix  #define	UNIX	1 #define	DOALARM	1e #endif   #ifndef	vms 
 #define	VMS	0z #elsea
 #define	VMS	1l #define	DOALARM	1w #endif   #ifndef	UNIX #define	UNIX	0 #endif   #ifndef	DOALARME #define	DOALARM	0I #endif   #if	UNIX #define	IO_ERROR	1 #endif   #if	VMSR #include	<ssdef.h> #define	IO_ERROR	SS$_ABORT /*0  * This creates text files in vanilla RMS on VMS  */e extern FILE	*fdopen();@ #define	CREATE(f, m) fdopen(creat(f, 0, "rat=cr", "rfm=var"), m) #elseR #define	CREATE	fopen #endif   #if	DOALARMn #include	<signal.h>L extern int	status(); #endif  2 #define	MAXKEYS		100		/* Maximum number of keys	*/5 #define MAXCHARS	0377		/* Maximum number of char's	*/ - #define UNDEF		-1		/* the undefined value		*/    typedef struct keyword {%     int		len;		/* Keyword length			*/.,     char	last;		/* Last byte of keyword			*/'     char	word[1];	/* Keyword text				*/*
 } KEYWORD;   /*4  * Define some frequently used operations as macros:3  *	hash(p)		returns the hash value for this keyworde8  *	used(n)		TRUE if this hash value is in use or illegal/  *	defined(c)	TRUE if this character is definedm  */e  > #define hash(p)		(value[p->word[0]] + p->len + value[p->last])> #define	used(n)		((n > tablesize || n < 0) ? TRUE : mapped[n]) #define	defined(c)	(c != UNDEF)   4 char	cval[MAXCHARS];		/* All possible characters		*/6 short	cused[MAXCHARS];	/* count of often char used		*/? short	order[MAXKEYS];		/* ordering of key words by subscript	*/oC short	neworder[MAXKEYS];	/* the new supposedly improved ordering	*/	? short	hashval[MAXKEYS];	/* current hashvalue of the key word	*/i> short	value[MAXCHARS];	/* associated value of the character	*/? int	mapped[MAXKEYS];	/* track which table entries are in use	*/e7 char	name[50];		/* bigger than any keyword should be	*/      KEYWORD		*keywds[MAXKEYS];   extern long	time();r extern char	*ctime();[ extern char	*malloc(); extern int	aredefined(); extern char	*strchr();   int	debug		= 0;y1 int	nkeys		= sizeof(keywds) / (sizeof (KEYWORD)); 4 int	tablesize	= sizeof(keywds) / (sizeof (KEYWORD)); short	trys		= 0; int	nletters	= 0;o short	kilotrys	= 0;A8 int	atime		= 10 * 60;	/* default alarm status 10 min.	*/ char	*klimit		= NULL;h char	*textp		= NULL; long	bigcount	= 0; short	depth		= 0;h5 short	k_now		= 0;		/* value of k for status report	*/E/ int	nosort		= FALSE;	/* -n sets nosort TRUE		*/)4 long	start, stop;			/* the start and finish times	*/ short	vlimit		= 0; short	keylimit	= 0;y short	tlimit		= 0;0 char	*output		= NULL;		/* Output file if set		*/  1 char	letters[37];			/* string of defined chars	*/;9 char	*letend = letters;		/* -> free space in letters[]	*/  0 main(argc,argv)m
 int		argc; char		*argv[]; {t 	getoptions(argc, argv); 	start = time(NULL); #if	DOALARMe 	signal(SIGALRM,status);8 	signal(SIGTERM,status);		/* status on kill -TERM pid	*/+ 	alarm(atime);			/* status every N secs		*/  #endif	 	setup();t
 	dosort();$ 	printf("The search ordering is\n");
 	prntorder();h 	if (search(0)) {h #if	DOALARM  	    alarm(0); #endif; 	    printf("\nSuccess! Associated Char Values Follow:\n");; 	    prntvals(); 	    prnthash(); 	    printf("\n"); 	    if (output != NULL)
 		dooutput();h 	} 	else {  #if	DOALARMn 	    alarm(0); #endif@ 	    printf("\nFailed to find char values for hash function\n"); 	}; 	printf("Total search invocations = %ld, max depth = %d\n",  	    bigcount, depth); 	stop = time(NULL); & 	printf(" Started %s", ctime(&start));% 	printf("Finished %s", ctime(&stop));) }h i setup()  {  	register KEYWORD	*kp; 	register int		c;s 	register int		i;a 	int			len;n  * 	for (i = 0; (scanf("%s", name)) != EOF &&' 		name[0] != '\0' ; name[0] = '\0',i++)  		{u #ifdef cptrace- 		printf("input data = %s, i = %d\n",name,i);  #endif 		if (i >= MAXKEYS)h 			{/ 			printf("Too many keys, %d max.\n", MAXKEYS);a 			exit(IO_ERROR); 			};d 		len = strlen(name);g2 		kp = (KEYWORD *) malloc(sizeof (KEYWORD) + len); 		if (kp == NULL)r 			{/ 			printf("Out of room allocating keywords\n");e 			exit(IO_ERROR); 			};R 		keywds[i] = kp;n 		kp->len = len; 		kp->last = name[len - 1];r 		strcpy(&kp->word[0], name);  		};  ( 	nkeys = (keylimit == 0) ? i : keylimit;    	for (i = 0; i < MAXKEYS; i++) { 	    hashval[i] = UNDEF; 	    order[i] = i; 	    mapped[i] = FALSE;	 	}  ! 	for (i = 0; i < MAXCHARS; i++) {Y 	    cval[i] = 0;R 	    value[i] = UNDEF; 	}   	if (!precheck()) {R0 	    printf("Perfect hash search terminated\n"); 	    exit(IO_ERROR); 	}   	for (i = 0; i < nkeys; i++) {< 	    c = keywds[i]->word[0];	/* Get first char of keyword	*// 	    cval[c] = c;		/* Remember this one used	*/p6 	    if (cused[c] == 0)		/* If it's the first time,	*/* 		++nletters;		/* Count unique letters		*/4 	    ++cused[c];			/* count how often letter used	*/8 	    c = keywds[i]->last;	/* Get last char of keyword	*// 	    cval[c] = c;		/* And do the same for it	*/; 	    if (cused[c] == 0) 
 		++nletters;( 	    ++cused[c]; 	}  . 	tablesize = (tlimit == 0 ? MAXKEYS : tlimit);8 	printf("Perfect hash function finder, CDH Ver. 2.9\n");* 	printf("Start time = %s", ctime(&start));. 	printf("%d keywords, %d distinct letters.\n", 	    nkeys, nletters);- 	nletters = (vlimit > 0) ? vlimit : nletters;q= 	printf("The associated char value limit is %d\n", nletters);e3 	printf("The table size limit is %d\n", tablesize);e   	/*/: 	 * You should make tablesize at least nkeys + 1 since the9 	 * first value is usually 1 or 2 even if both assoc chart9 	 * values are zero since the keyword length is included!e 	 */ }u i dosort() {H 	register KEYWORD	*kp; 	register int		i, j; 	int			k, m; 	int			newvalues;s   	if (nosort) {, 	    printf("No presorting of keywords.\n"); 	    return; 	}   	/*l< 	 * first order by sum of frequencies of occurrences of each 	 * keys 1st and last letter 	 */   	for (i = 0; i < nkeys; i++) { 	    kp = keywds[i];5 	    order[i] = cused[kp->word[0]] + cused[kp->last];u 	} 	for (m = 0; m < nkeys; m++) {* 	    for (k = -1, i = 0; i < nkeys; i++) { 		if (order[i] > k) {e 		    k = order[i];t, 		    j = i;		/* remember keywd subscript	*/ 		}; 	    } 	    order[j] = 0; 	    neworder[m] = j;{ 	} 	for (i=0; i < nkeys; i++) 	    order[i] = neworder[i]; 	if (debug > 2) {s& 	    printf("After first ordering\n"); 	    prntorder();e 	}   	/*l8 	 * The second ordering follows, keywds whose values are5 	 * defined by keywds earlier in the order are placeds8 	 * immediately after they are defined. This causes hash7 	 * value conflicts to occur as early during the search  	 * as possible. 	 */   	letend = letters; 	letters[0] = EOS;( 	merge(order[0]);			/* prime the pump	*/ 	neworder[0] = order[0]; 	order[0] = UNDEF; 	for (i = 1; i < nkeys;) {) 	    for (newvalues = TRUE; newvalues;) {A 		newvalues = FALSE; 		for (k = 0; k < nkeys; k++) {l 		    if (order[k] == UNDEF) 			continue;! 		    if (aredefined(order[k])) {d 			neworder[i++] = order[k]; 			order[k] = UNDEF; 			continue; 		    }  		}c 		for (k = 0; k < nkeys; k++) {o 		    if (order[k] != UNDEF) { 			neworder[i++] = order[k]; 			merge(order[k]);l 			order[k] = UNDEF; 			newvalues = TRUE;	 			break;	 		    }r 		}] 	    } 	} 	for (i = 0; i < nkeys; i++) 	    order[i] = neworder[i]; 	if (precheck() == FALSE) { < 	    printf("OOPS - call a Guru, the presort botched it\n"); 	    prntorder();  	    exit(IO_ERROR); 	} }f r /*=  * merge - adds keywd letters to the string of those defined. C  * This could be speeded up, but it's not a critical-path function.o  */    VOID merge(n) int		n;  {n 	register KEYWORD	*kp;   	kp = keywds[n]; 	if (debug > 2) ) 	    printf("merging in %s\n", kp->word);	, 	if (strchr(letters, kp->word[0]) == NULL) { 	    *letend++ = kp->word[0];d 	    *letend = EOS;( 	}) 	if (strchr(letters, kp->last) != NULL) {, 	    *letend++ = kp->last; 	    *letend = EOS;  	} }i   /*;  * aredefined - see if 1st & last char of keywd are defineds  */    inte
 aredefined(n)T int		n;  {s 	register KEYWORD	*kp;   	kp = keywds[n];) 	if (strchr(letters, kp->word[0]) != NULLn' 	 && strchr(letters, kp->last) != NULL)n 	    return (TRUE);  	else return (FALSE);  }n k /*:  * precheck - all keywds length,1st and last char disjoint  */d   int-
 precheck() {  	int			pretest;  	register KEYWORD	*ip, *jp;l 	short			i, j; 	short			m, k;
 	char			a, b;    	pretest = TRUE; 	for (m = 0; m < nkeys; m++) { 	    i = order[m]; 	    ip = keywds[i]; 	    a = ip->word[0];n 	    b = ip->last;( 	    for (k = m + 1; k < nkeys-1; k++) { 		j = order[k];e 		jp = keywds[j];  		if (ip->len == jp->len* 		 && ((a == jp->word[0] && b == jp->last). 		  || (a == jp->last && b == jp->word[0]))) { 		    pretest = FALSE;- 		    printf("Precheck fails on %s and %s\n",e 			ip->word, jp->word);  		}g 	    } 	} 	return (pretest); }i ; /*9  * prntorder - printout the current order of the keywords   */<   VOID prntorder()i {o 	register int	i, j;e  % 	for (i = 0, j = 0; i < nkeys; i++) {=- 	    if ((j + keywds[order[i]]->len) >= 60) {+ 		printf("\n");[ 		j = 0; 	    }+ 	    printf(" %s", keywds[order[i]]->word);=$ 	    j += keywds[order[i]]->len + 1; 	} 	printf("\n"); }  > /*5  * prntvals - prints out the letter associated valuesk  */s  
 prntvals() {e 	register int	i, j;r  ( 	for (i = 0, j = 0; i < MAXCHARS; i++) { 	    if (cval[i]) {d 		printf("%s %c =%2d,", $ 		    ((++j % 10) == 0) ? "\n" : "", 		    cval[i], value[i]);  	    } 	} 	printf("\n"); }0   /*7  * prnthash - prints out the hash values for the keywds   */	   VOID
 prnthash() {; 	register int		i, j; 	register KEYWORD	*kp; 	int			swap; 	int			hmin; 	int			hmax; 	int			spread;    
 	swap = TRUE;* 	hmin = MAXKEYS;
 	hmax = 0; 	spread = 0; 	for (i = 0; i < nkeys; i++) {& 	    j = hashval[i] = hash(keywds[i]);" 	    hmin = (hmin < j) ? hmin : j;" 	    hmax = (hmax > j) ? hmax : j; 	    order[i] = i; 	}0 	while (swap) {		/* plain vanilla bubble sort */ 	    swap = FALSE;$ 	    for (i = 0; i < nkeys-1; i++) {0 		if (hashval[order[i+1]] < hashval[order[i]]) { 		    swap = TRUE; 		    j = order[i];  		    order[i] = order[i+1]; 		    order[i+1] = j;i 		}h 	    } 	}1 	printf("Hash min = %d, max = %d, spread = %d\n",a" 	    hmin, hmax, hmax - hmin + 1);& 	for (i=0, j=0; i < nkeys; i++, j++) { 	    kp = keywds[order[i]]; # 	    if (j  + (kp->len + 5) > 60) {h 		printf("\n");  		j = 0; 	    }  	    printf(" %s %d,", kp->word, 		hash(keywds[order[i]])); 	    j += (kp->len + 5); 	} 	printf("\n"); }+ { /*8  * search - calls itself recursively to find char values  */U   int 	 search(k)[ register int	k;[ {= 	register KEYWORD	*p;e 	register int		j; 	 	int			m;  	short			v1, v2, num;  	short			sub1, sub2, subn; 	int			thesame;h   	thesame = FALSE;r 	bigcount++;/ 	k_now = k;			/* global for status reporting	*/ 1 	if (k >= nkeys)			/* hey - we may be all done	*/n 	    return (TRUE);}3 	if (k > depth)			/* global for status reporting	*/e2 	    depth = k;			/* keep track of search depth	*/ 	m = order[k]; 	p = keywds[m]; 6 	sub1 = p->word[0];		/* sub1 = first letter in word	*/3 	sub2 = p->last;			/* sub2 = last letter in word	*/e 	if (sub1 == sub2) 	    thesame = TRUE; 	v1 = value[sub1]; 	v2 = value[sub2];" 	if (defined(v1) && defined(v2)) {0 	    num = hash(p);		/* Both letters defined		*/ 	    if (used(num)) 2 		return (FALSE);		/* this hash value is in use	*/ 	    else {T& 		hashval[m] = num;	/* install it			*/ 		mapped[num] = TRUE;  		if (search(k + 1)) 		    return (TRUE); 		else { 		    hashval[m] = UNDEF;  		    mapped[num] = FALSE; 		    return (FALSE);r 		}  	    } 	} 	else if (defined(v1)) {& 	    for (j = 0; j <= nletters; j++) {	 		v2 = j;T 		num = v1 + p->len + v2;v 		if (!used(num)) {  		    hashval[m] = num;2 		    mapped[num] = TRUE;/ 		    value[sub2] = v2;/ 		    subn = sub2; 		    if (search(k + 1)) 			return (TRUE);i
 		    else 			remove(m, sub2);[ 		}n 	    } 	    return (FALSE); 	} 	else if (defined(v2)) {& 	    for (j = 0; j <= nletters; j++) {	 		v1 = j;h 		num = v1 + p->len + v2;p 		if (!used(num)) {  		    hashval[m] = num;  		    mapped[num] =TRUE; 		    value[sub1] = v1;  		    subn = sub1; 		    if (search(k + 1)) 			return (TRUE); 
 		    else 			remove(m, sub1);a 		}= 	    } 	    return (FALSE); 	}! 	else {				/* neither defined		*/u& 	    for (j = 0; j <= nletters; j++) { 		if (thesame) { 		    v1 = v2 = j; 		    num = v1 + p->len + v2;  		    if (!used(num)) {s 			hashval[m] = num; 			mapped[num] = TRUE;. 			value[sub1] = v1;	/* same as value[sub2]	*/ 			subn = sub1;e 			if (search(k + 1))a 			    return (TRUE);p 			elseT 			    remove(m, subn);  		    }  		}n 		else { 		    value[sub1] = j;. 		    if (search(k))		/* if never TRUE thru	*/- 			return (TRUE);		/* for loop, then FALSE */ 
 		    else 			value[sub1] = UNDEF;  		}r 	    } 	    return (FALSE); 	} }( s /*4  * remove - backup by deleting keywds hash value etc  */    VOID remove(m, subn)h register short	m;	 register short	subn; {v 	if (debug > 6)/? 	    printf("removing %s, subn = %d\n", keywds[m]->word, subn);a 	mapped[hashval[m]] = FALSE; 	hashval[m] = UNDEF; 	value[subn] = UNDEF;n }	 e /*>  * dooutput writes parser tables to the indicated output file.  *//   char	*function[] = { 	"", 	"int",F 	"keyword(text)",e 	"register char\t*text;",  	"/*",C 	" * Look for keyword (string of alpha) in the perfect hash table",e7 	" * Return the index (L_xxx value) or 0 if not found",h 	" */",r 	"{",m 	"\tregister char\t*tp;",v 	"\tregister int\thash;",i 	"",( 	"\tif (*text < FIRST || *text > LAST)", 	"\t    return (0);",a) 	"\tfor (tp = text; isalpha(*tp); tp++)",e 	"\t    ;",F 	"\thash = (tp - text);", & 	"\tif (*--tp < FIRST || *tp > LAST)", 	"\t    return (0);",rB 	"\thash += (px_assoc - FIRST)[*text] + (px_assoc - FIRST)[*tp];",! 	"\tif (px_table[hash] == NULL)",  	"\t    return (0);",a> 	"\tif (strncmp(text, px_table[hash], (tp - text + 1)) != 0)", 	"\t    return (0);",h 	"\treturn(hash);"," 	"}",t 	"", 	NULL, }; t
 dooutput() {a 	FILE		*fd;  	register char	**funp; 	register int	i; 	int		first, last, hval;  * 	if ((fd = CREATE(output, "w")) == NULL) { 	    perror(output); 	    return; 	}% 	fprintf(fd, "#include <stdio.h>\n");"% 	fprintf(fd, "#include <ctype.h>\n");s 	for (i = 0; i < nkeys; i++) {< 	    fprintf(fd, "#define\tL_%s\t", keywds[order[i]]->word);$ 	    if (keywds[order[i]]->len < 14) 		putc('\t', fd); $ 	    if (keywds[order[i]]->len <  6) 		putc('\t', fd);h1 	    fprintf(fd, "%d\n",	hash(keywds[order[i]]));  	}. 	for (i = MAXCHARS; --i >= 0 && cval[i] == 0;) 	    ;
 	last = i;( 	for (i = 0; i <= last && cval[i] == 0;)	 	    i++;L 	first = i;r- 	fprintf(fd, "#define FIRST\t'%c'\n", first);i+ 	fprintf(fd, "#define LAST\t'%c'\n", last);e- 	fprintf(fd, "static char px_assoc[] = {\n");{ 	while (i <= last) {5 	    fprintf(fd, "\t%d,\t/* '%c' */\n", value[i], i);d	 	    i++;- 	} 	fprintf(fd, "};\n");). 	fprintf(fd, "static char *px_table[] = {\n");
 	last = 0; 	for (i = 0; i < nkeys; i++) {# 	    hval = hash(keywds[order[i]]);A 	    while (last < hval) {0 		fprintf(fd, "\tNULL,\t\t\t/*%3d\t*/\n", last);	 		last++;  	    }8 	    fprintf(fd, "\t\"%s\",\t", keywds[order[i]]->word);$ 	    if (keywds[order[i]]->len < 13) 		putc('\t', fd);)$ 	    if (keywds[order[i]]->len <  5) 		putc('\t', fd);e& 	    fprintf(fd, "/*%3d\t*/\n", hval); 	    last = hval + 1;] 	} 	fprintf(fd, "};\n");r- 	for (funp = function; *funp != NULL; funp++)p  	    fprintf(fd, "%s\n", *funp); 	fclose(fd); }e   #if	DOALARMh /*9  * status - on signal this reports the current statisticsp  */(   VOID status() {3 	fprintf(stderr,H 	    "\nSTATUS:  key \"%s\" (%d), search calls = %ld, max depth = %d\n",2 	    keywds[k_now]->word, k_now, bigcount, depth); 	fflush(stderr); 	signal(SIGTERM,status); 	signal(SIGALRM,status); 	alarm(atime); }3 #endif v /*  *			G E T O P T I O N S  *>  * Generalized command line argument processor.  The following!  * types of arguments are parsed:u3  *	flags		The associated int global is incremented:u  *			-f	f-flag set to 1s+  *			-f123	f-flag set to 123 (no separator)s'  *			-fg	f-flag and g-flag incremented. 3  *	values		A value must be present.  The associatedn#  *			int global receives the value:b  *			-v123	value set to 123e  *			-v 123	value set to 1230  *	arguments	The associated global (a char *) is  *			set to the next argument:!  *			-f foo	argument set to "foo"l  */r   #define	FLAG	0 #define	VALUE	1*
 #define	ARG	2e #define	ERROR	3    typedef struct argstruct { 	char	opt;		/* Option byte			*/-" 	char	type;		/* FLAG/VALUE/ARG		*/. 	char	*name;		/* What to set if option seen	*/, 	char	*what;		/* String for error message	*/ } ARGSTRUCT;   static ARGSTRUCT arginfo[] = {( { 'd',	FLAG,	(char *)&debug,		"debug" },9 { 'a',	VALUE,	(char *)&atime,		"alarm time for status" },a4 { 't',	VALUE,	(char *)&tlimit,	"table size limit" },: { 'v',	VALUE,	(char *)&vlimit,	"associated value limit" },3 { 'k',	VALUE,	(char *)&keylimit,	"keyword limit" }, 1 { 'n',	FLAG,	(char *)&nosort,	"no sort wanted" },o4 { 'o',	ARG,	(char *)&output,	"parser output file" }, { EOS,	ERROR,	NULL,			NULL },i };   static char *argtype[] = {( 	"flag", "takes value", "takes argument" }; G static getoptions(argc, argv)
 int		argc;
 char		**argv;  /*  * Process arg's  */a {e 	register char		*ap; 	register int		c;L 	register ARGSTRUCT	*sp;	 	int			i;t 	int			helpneeded;   	getredirection(argc, argv); 	helpneeded = FALSE; 	for (i = 1; i < argc; i++) {r0 	    if ((ap = argv[i]) != NULL && *ap == '-') { 		argv[i] = NULL;o# 		for (ap++; (c = *ap++) != EOS;) {u 		    if (isupper(c))R 			c = tolower(c); 		    sp = arginfo;g, 		    while (sp->opt != EOS && sp->opt != c) 			sp++; 		    switch (sp->type) {g% 		    case FLAG:			/* Set the flag	*/* 			if (!isdigit(*ap)) {  			    (*((int *)sp->name))++;
 			    break;r 			} 		    case VALUE:			/* -x123	*/t 		        if (isdigit(*ap)) {o% 			    *((int *)sp->name) = atoi(ap);f 			    *ap = EOS;; 			}' 			else if (*ap == EOS && ++i < argc) {p* 			    *((int *)sp->name) = atoi(argv[i]); 			    argv[i] = NULL; 			}	 			else {e 			    fprintf(stderr, 				"Bad option '%c%s' (%s)",  				c, ap, sp->what);&& 			    fprintf(stderr, ", ignored\n"); 			    helpneeded++; 			}	 			break;    		    case ARG:			/* -x foo	*/ 			if (++i < argc) {' 			    *((char **) sp->name) = argv[i];  			    argv[i] = NULL; 			}	 			else {f 			    fprintf(stderr,$ 				"Argument needed for '%c' (%s)", 				c, sp->what);	& 			    fprintf(stderr, ", ignored\n"); 			    helpneeded++; 			}	 			break;a   		    case ERROR:[ 			fprintf(stderr,, 			    "Unknown option '%c', ignored\n", c); 			helpneeded++;	 			break;, 		    }) 		}	 	    } 	} 	if (helpneeded > 0) {/ 	    for (sp = arginfo; sp->opt != EOS; sp++) { & 		fprintf(stderr, "'%c' -- %s (%s)\n",, 		    sp->opt, sp->what, argtype[sp->type]); 	    } 	} }[ = /*<  * getredirection() is intended to aid in porting C programs7  * to VMS (Vax-11 C) which does not support '>' and '<'s7  * I/O redirection.  With suitable modification, it maye1  * useful for other portability problems as well.   */n   #include	<stdio.h>   getredirection(argc, argv)
 int		argc;
 char		**argv;	 /*=  * Process vms redirection arg's.  Exit if any error is seen.pD  * If getredirection() processes an argument, argv[i], it is changed  * to NULL.,  *>  * Warning: do not try to simplify the code for vms.  The codeA  * presupposes that getredirection() is called before any data isd(  * read from stdin or written to stdout.  *  * Normal usage is as follows:  *  *	main(argc, argv)t
  *	int		argc;   *	char		*argv[];   *	{  *		register int		i;  *		int			nargs;  *4  *		getredirection(argc, argv);	** setup redirection+  *		for (nargs = 0, i = 1; i < argc, i++) {n1  *		    if (argv[i] == NULL)	** skip if processedr&  *			continue;		** by getredirection())  *		    nargs++;			** here is an argument !  *		    ...				** process argv[i]t  *		}r,  *		if (nargs == 0) {		** no arguments given  *		    ...d  *		}r  *	}  */d {.
 #ifdef	vms+ 	register char		*ap;	/* Argument pointer	*/r 	int			i;	/* argv[] index		*/*# 	int			file;	/* File_descriptor 	*/ - 	extern int		errno;	/* Last vms i/o error 	*/a  4 	for (i = 1; i < argc; i++) {	/* Do all arguments	*/0 	    if (*(ap = argv[i]) == '<') {  /* <file		*/* 		if (freopen(++ap, "r", stdin) == NULL) {( 		    perror(ap);		/* Can't find file	*/) 		    exit(errno);	/* Is a fatal error	*/  		}i+ 		goto erase_arg;		/* Ok, erase argument	*/m 	    }3 	    else if (*ap++ == '>') {	/* >file or >>file	*/ ! 		if (*ap == '>') {	/* >>file		*/i 		    /*3 		     * If the file exists, and is writable by us,d6 		     * call freopen to append to the file (using the7 		     * file's current attributes).  Otherwise, createt3 		     * a new file with "vanilla" attributes as if// 		     * the argument was given as ">filename". 3 		     * access(name, 2) is TRUE if we can write ono 		     * the specified file.	 		     */e! 		    if (access(++ap, 2) == 0) {m* 			if (freopen(ap, "a", stdout) == NULL) { 			    perror(ap); 			    exit(errno);* 			} 			else goto erase_arg;*" 		    }			/* If file accessable	*/( 		    else ;		/* Else it's just >file	*/ 		}e 		/*8 		 * On vms, we want to create the file using "standard"5 		 * record attributes.  create(...) creates the filee3 		 * using the caller's default protection mask andm/ 		 * "variable length, implied carriage return"w8 		 * attributes. dup2() associates the file with stdout. 		 */e6 		if ((file = creat(ap, 0, "rat=cr", "rfm=var")) == -1) 		 || dup2(file, fileno(stdout)) == -1) {r* 		    perror(ap);		/* Can't create file	*/) 		    exit(errno);	/* is a fatal error	*/; 		}			/* If '>' creation	*/ 6 erase_arg:	argv[i] = NULL;		/* red. erases argument	*/ 	    }				/* If redirection	*/ 	}				/* For all arguments	*/f #endif #ifdef	decus- 	argc = argv[0];			/* Supress warning msg.	*/i #endif },   #if	UNIX /*0  * The following is missing on some Unix systems  */    char * strchr(string, c)f register char	*string; register char	c; /*/  * If 'c' is in string, return a pointer to it.   * Else, return NULL.   */e {l 	do {	 	    if (*string == c) 		return (string); 	} while (*string++ != EOS); 	return (NULL);= }L #endif