 /*  *	rxcomp(pattern)  *	char *pattern;n  *C  * Compile the regular expression pattern.  Return TRUE if success.tK  * The compiled pattern is stored in a global character vector 'rx_pattern'd  *G  * If an error occurred, rxcomp() returns FALSE and rx_pattern containsr  * an error message.  *  *B  * Syntax errors print a message and return 0.  Size errors abort.  *  *  * External routines:	&  *	warn			Print warning/error messages  *	fatal			Fatal error exitt  *.  * Regular expressions are defined as follows:  *H  * x	An ordinary character (not mentioned below) matches that character.G  * '\'	The backslash quotes any character.  "\$" matches a dollar-sign.rA  * '^'	A circumflex at the beginning of an expression matches thep  * 	  beginning of a line.C  * '$'	A dollar-sign at the end of an expression matches the end ofs
  * 	  a line.b8  * '.'	A period matches any character except "new-line".H  * ':a'	A colon matches a class of characters described by the followingG  * ':d'	  character.  ":a" matches any alphabetic, ":d" matches digits,xD  * ':n'	  ":n" matches alphanumerics, ": " matches spaces, tabs, and5  * ': '	  other control characters, such as new-line. A  * '*'	An expression followed by an asterisk matches zero or morem=  * 	  occurrances of that expression: "fo*" matches "f", "fo"   * 	  "foo", etc._@  * '+'	An expression followed by a plus sign matches one or more>  * 	  occurrances of that expression: "fo+" matches "fo", etc.@  * '-'	An expression followed by a minus sign optionally matches  * 	  the expression. E  * '[]'	A string enclosed in square brackets matches any character in(@  * 	  that string, but no others.  If the first character in theB  * 	  string is a circumflex, the expression matches any character>  * 	  except "new-line" and the characters in the string.  For=  * 	  example, "[xyz]" matches "xx" and "zyx", while "[^xyz]" @  * 	  matches "abc" but not "axb".  A range of characters may be?  * 	  specified by two characters seperated by "-".  Note that,m;  * 	  [a-z] matches alphabetics, while [z-a] never matches.f  * tD  * The concatenation of regular expressions is a regular expression.  */e   #include	<stdio.h>
 #ifdef vms #include	<ctype.h> #define	FALSE	0o #define	TRUE	1 #endif #include	"t.h"  - #define PMAX	256			/* The longest pattern		*/e   /*B  * Note: the following must be identical in rxgrep() and rxcomp().  */r   #define CHAR	1
 #define BOL	2	
 #define EOL	3t
 #define	ANY	4  #define CLASS	5  #define	NCLASS	6 #define STAR	7 #define	PLUS	8 #define	MINUS	9	 #define	ALPHA	10 #define	DIGIT	11 #define	NALPHA	12e #define	PUNCT	13$ #define	CDIGIT	14		/* Not used				*/ #define RANGE	15 #define	ENDPAT	16'  . static char	*rx_pp;			/* Work area pointer		*/2 char		rx_pattern[PMAX];	/* Pattern output here		*/   char * rxcomp(pattern)+* char		*pattern;	/* Pattern to compile			*/ /*  * Compile the pattern.   */e {'/ 	register char	*s;		/* Source string pointer	*/a0 	register char	*lp;		/* Last pattern pointer		*/* 	register int	c;		/* Current character		*/ 	int		o;		/* Temp				*/n- 	char		*spp;		/* Save beginning of pattern	*/k0 	char		*rx_cclass();	/* Compile class routine	*/  
 	s = pattern;d 	rx_pp = rx_pattern; 	while (c = *s++) {  		/*& 		 * STAR, PLUS and MINUS are special. 		 */S) 		if (c == '*' || c == '+' || c == '-') {t5 			if (rx_pp == rx_pattern || (o=rx_pp[-1]) == BOL ||o 					o == EOL || o == STAR ||* 					o == PLUS || o == MINUS)t0 				return(rx_bad("Illegal occurrance op.", s)); 			rx_store(ENDPAT); 			rx_store(ENDPAT); 			/*t7 			 * Save the pattern's end and move it down one byte.  			 */ 			spp = rx_pp;r 			while (--rx_pp > lp)	 				*rx_pp = rx_pp[-1];	 			*rx_pp = (c == '*') ? STAR :  				(c == '-') ? MINUS : PLUS;* 			rx_pp = spp;		/* Restore pattern end	*/ 			continue; 		}g 		/* 		 * All the rest. 		 */*$ 		lp = rx_pp;			/* Remember start	*/
 		switch(c) {    		case '^':m 			rx_store(BOL);N	 			break;m   		case '$':p 			rx_store(EOL);m	 			break;o   		case '.':  			rx_store(ANY);t	 			break;o   		case '[':  			if ((s = rx_cclass(s)) == 0)e 				return(0);	 			break;e   		case ':':p 			if (*s) {
 				c = *s++;A 				switch(tolower(c)) {  
 				case 'a':) 					rx_store(ALPHA);; 					break;   
 				case 'd':i 					rx_store(DIGIT);r 					break;   
 				case 'n':o 					rx_store(NALPHA); 					break;r  
 				case ' ':* 					rx_store(PUNCT);r 					break;    				default:) 					return(rx_bad("Unknown : type", s));    				} 
 				break; 			}' 			else	return(rx_bad("No : type", s));n   		case '\\':
 			if (*s)
 				c = *s++;   
 		default: 			rx_store(CHAR); 			rx_store(tolower(c)); 		}e 	} 	rx_store(ENDPAT);) 	rx_store(EOS);				/* Terminate string	*/r 	return (TRUE);i }f  
 static char *o rx_cclass(src)! char		*src;		/* Class start				*/  /*  * Compile a class (within [])  */  { * 	register char	*s;		/* Pattern pointer		*/) 	register char	*cp;		/* Pattern start		*/c* 	register int	c;		/* Current character		*/ 	int		o;		/* Temp				*/	  	 	s = src;[ 	o = CLASS;w 	if (*s == '^') {r 		++s;
 		o = NCLASS;s 	}
 	rx_store(o);	 	cp = rx_pp;" 	rx_store(0);				/* Byte count		*/ 	while ((c = *s++) && c!=']') {u+ 		if (c == '\\') {		/* Store quoted char	*/*4 			if ((c = *s++) == '\0')	/* Gotta get something	*/0 				return(rx_bad("Class terminates badly", s)); 			else	rx_store(tolower(c));, 		}U 		else if (c == '-' &&2 				(rx_pp - cp) > 1 && *s != ']' && *s != '\0') {% 			c = rx_pp[-1];		/* Range start		*/)) 			rx_pp[-1] = RANGE;	/* Range signal		*/S% 			rx_store(c);		/* Re-store start	*/_$ 			c = *s++;		/* Get end char and	*/( 			rx_store(tolower(c));	/* Store it		*/ 		}b 		else {0 			rx_store(tolower(c));	/* Store normal char	*/ 		}  	} 	if (c != ']')* 		return(rx_bad("Unterminated class", s)); 	if ((c = (rx_pp - cp)) >= 256)y' 		return(rx_bad("Class too large", s));  	if (c == 0)# 		return(rx_bad("Empty class", s));A	 	*cp = c;c 	return(s);_ };   static rx_store(op) {v  	if (rx_pp >= &rx_pattern[PMAX])+ 		error("?RXCOMP-F-Pattern too complex\n");n 	*rx_pp++ = op;1 }l  
 static int rx_bad(message, stopbyte),% char		*message;	/* Error message			*/p% char		*stopbyte;	/* Pattern end				*/t {c 	concat(rx_pattern,F! 			"Regular expression error \"",  			message,  			",\" stopped at \"",t 			&stopbyte[-1], 	 			"\"");r 	return (FALSE); }[  