 /*  *				c s e t . h   */   
 /*)LIBRARY */   #ifdef	DOCUMENTATION  2 title	cset	Header file for character set functions. index		Header file for character set functions   synopsis   	 #ifdef vms 	 #include "c:cset.h"  	 #else  	 #include <cset.h>  	 #endif   description   E 	The character set functions provide a set of routines for describing A 	and manipulating sets of characters.  The character sets, called E 	"csets", created in this way can be manipulated quickly, and require C 	relatively little storage.  They are meant to be used as arguments 7 	to pattern-matching functions like span() (which see).   D 	For these purposes, a set of functions to create csets, and produceD 	the complement (with respect to the set of all 8-bit characters) ofC 	a set, and the join (union), meet (intersection) and difference of A 	two csets is provided; see cset(), cscomp(), csjoin(), csmeet(),  	and csdiff().  C 	csets can also be used more generally as representations of sets - G 	i.e., the name can be read as "C sets".  In this case, the universe is F 	the set of numbers 0...(cssize-1), where cssize is a global parameter? 	defined in cset.c; it is normally 256 for character work.  The D 	functions provided for this kind of application include csmember(),B 	which checks membership, and csless() and cswith(), which add and 	remove elements from sets.   D 	When csets are used in this way, it is important to understand thatG 	a cset is a data object with an internal structure, and that different F 	csets may share internal data - i.e., csets are not normally "atomic"D 	objects and care must be taken in manipulating them.  A look at the8 	representation of csets should help clarify this point.  A 	The only object you normally manipulate directly in your code is > 	a cset pointer, type (CSET *).  This pointer points to a csetA 	header, which contains a mask and a pointer to a table of cssize B 	bytes.  A character is in the cset if any of the bits in its maskA 	is on in the corresponding table entry.  Csets created by cset() E 	always have a one-bit mask; however, csjoin() and friends, avoid, if D 	possible, using up a bit position, by creating a header with a maskF 	containing more than one bit.  Hence, the join of two csets often can 	be represented very cheaply.   B 	Complements of csets are represented still more efficiently; evenF 	the header of a cset and its complement are shared.  Only the pointer. 	is changed - its bit pattern is complemented.  E 	A consequence of this representation is that a great deal of data is B 	often shared between csets.  When manipulating csets as arbitraryG 	sets, it is important to understand that applying csless() or cswith() F 	to a cset may cause any related csets to be changed.  Thus, after the 	sequence of calls:    		uvowels = cset("AEIOU"); 		lvowels = cset("aeiou");$ 		vowels  = csjoin(uvowels,lvowels);  		lvowels = cswith(lvowels,'y');  A 	'y' is probably a member of vowels.  (Only "probably" because it C 	is impossible to predict whether uvowels and lvowels happen to get B 	the same table; csjoin() cannot use the "cheap" representation if
 	they don't.)   B 	Two methods are available to avoid this problem.  First, cscopy()B 	returns a guaranteed-"unique" copy of a cset.  Second, the globalD 	csunique (in cset.c) can be set, forcing functions such as csjoin()! 	to avoid space-saving shortcuts.    internal  G 	The exact form of the cset header structure was chosen to be identical ? 	to the character set pointer structure used for the PDP-11 CIS D 	instructions.  Any ambitious programmers are encouraged to make use> 	of those instructions to produce fast versions of span() etc.  = 	Note that the use of a complemented pointer to the header to B 	represent a complemented cset relies on malloc() always returningD 	memory pointers with a 0 in the bottom bit of their representation./ 	This is probably true in most implementations.    bugs   author   	Jerry Leichter    #endif   /*
 )EDITLEVEL=10   * Edit history   * 0.0 12-Jul-82 JSL	Invention  */   - #ifndef _CSET_				/* Don't do this twice		 */  #define _CSET_   typedef struct cset + 	{ char	mask;			/* Mask for chars in set	*/ - 	  char	_fill_;			/* For CIS compatibility	*/ ( 	  char	*table;			/* Character table		*/ 	} CSET;  * extern CSET *cset();			/* Make a cset			*/4 extern CSET *cset_t();			/* Make a temporary cset	*/, extern CSET *cscopy();			/* Copy a cset			*/3 extern CSET *csdiff();			/* Difference of csets		*/ . extern CSET *csjoin();			/* Union of csets		*/7 extern CSET *csless();			/* Remove element from cset	*/ 4 extern CSET *csmeet();			/* Intersection of csets	*/4 extern int   csmember();		/* Test for membership		*/3 extern CSET *cswith();			/* Add element to cset		*/ 9 extern CSET *_cscomp();			/* Real, callable complement	*/   * /* The character set matching functions */ extern char *any();  extern char *ospan();  extern char *span(); extern char *upto();  6 #define cscomp (CSET *)~(int)		/* Macro complement		*/  1 extern int csmask;			/* Mask to apply to chars	*/ * extern int cssize;			/* Size of a cset		*/8 extern int csunique;			/* Make unique copies of csets	*/   #endif