 /*  * Kwik index generator.  */   " /*)BUILD	$(FILES)	= { kwik sorts } 		$(STACK)	= 4000  		$(TKBOPTIONS)	= {  			TASK = ...KWI 		}  */   #ifdef	DOCUMENTATION  # title	kwik	Keyword in Context Index  index		Keyword in Context Index    synopsis   	kwik [options] [file ...]   description   D 	Kwik constructs a keyword in context (kwik) index using the data inE 	the named files, writing the resulting index to the standard output.<6 	The standard input is read if no files are specified; 	kwik may be used as. 	a filter.  The following options are defined: 	.lm +165 	.s.i -16;-s		The kwik index normally excludes commonl? 	(stop) words. Specifing the '-s' option empties the stop list,	$ 	thus including the following words: 	.s.nf 		a	by	in	to 		an	for	of	the 		and	from	on	with 		at 	.s.f 9 	.s.i -16;-r		Make the index in reverse alphabetic order. B 	.s.i -16;-t offset	This is used to build index tables.  The input$ 	is entered in the following format: 	.s  		name<TAB>index text  	.s 3 	The kwik index will be output with the name in the  	left hand column.  The kwik'ed @ 	text then follows.  The '-t' option takes a mandatory argument:7 	the column at which the first byte of the kwik'ed text @ 	should be placed.  For example, the index for the Decus library5 	documentation was produced by the following command:e 	.s.nf& 	    kwik -t 16 -w 64 <infile >outfile 	.fc@ 	.s.i -16;-x file__name	The named file contains a user-specified? 	exclusion (stopword) list.  The '-x' option may be repeated ifc@ 	multiple exclusion lists are needed. Note that the order of the$ 	'-s' and '-x' options is important: 	.lm +4" 	.s.i-4;kwik -x file 	.sf@ 	The file contains an exclusion list, one word per line.  Append. 	the contents of the file to the default list. 	.s.i -4;kwik -s -x file 	.s	@ 	Replace the default stoplist by the contents of the named file. 	.s.i -4;kwik -x file -s 	.sr7 	After reading the exclusion file, the entire stop listg. 	is erased.  (This is not a useful procedure.) 	.lm -4r7 	.s.i -16;-w width	The output line width is normally 80eB 	characters. The '-w' option changes it to a user-specified value. 	.lm -16   diagnosticsn   	.lm +8s 	.s.i -8;Usage ... 	.s[A 	Illegal options cause an extensive "help" message to be printed.p! 	.s.i -8;Bad (width | offset) ...2 	.s*E 	An illegal or out-of-range number was given as a parameter to a '-w'e 	or '-t' option.  	.s.i -8;Can't open exclude file 	.s.i -8;Illegal exclusion 	.s ? 	An exclude file text line began with a non-printing character.  	The line is ignored. $ 	.s.i -8;Out of space in saveexclude 	.s @ 	The program ran out of main memory when building the stop list. 	.s.i -8;No index for ...  	.s B 	Kwik was invoked in '-t' mode.  Unfortunately, the indicated line? 	was not in the format "name#<TAB>#text".  The line is ignored.   	.s.i -8;Cannot create temp file 	.s 0 	The sorts subroutine could not create its file. 	.s.i -8;Out of space  	.s 6 	The sorts subroutine filled the available disk space. 	.lm -8    author   	David Conroy, Martin Minow    implementation  ( 	Kwik is linked together with sorts.obj. 	.s / 	The program must be linked with a large enough B 	stack space so that the sorts routine can operate correctly.  The# 	following seems to work correctly:  	.s.nf 	    tkb 	    kwik=kwik,sorts,c:c/lb  	    / 	    stack = 2048  	    //  	.f    bugs; 	If an index line is too long to fit (in the kwik'ed part), A 	the program overlays text on the line, logging the break showing % 	where the beginning of the line was.    #endif   #include <stdio.h>
 #ifdef vms #include		<ssdef.h>r #include		<stsdef.h>1 #define	IO_SUCCESS	(SS$_NORMAL | STS$M_INHIB_MSG)u #define	IO_ERROR	SS$_ABORT #endif /*H  * Note: IO_SUCCESS and IO_ERROR are defined in the Decus C stdio.h file  */h #ifndef	IO_SUCCESS #define	IO_SUCCESS	0 #endif #ifndef	IO_ERROR #define	IO_ERROR	1 #endif   #define	FOLD	'\t'r #define	NBUF	128 #define	FALSE	0  #define	TRUE	1
 #define	EOS	0E) extern	int	sort_r;			/* Reverse kwik			*/    static int	width	= 80;, static int	sflag	= 1;		/* Stop list (on)		*/4 static char	*stopfile = "";		/* User's stop list		*// static int	tflag	= 0;		/* Table format flag		*/i2 static int	offset	= 0;		/* Table format offset		*/ static char	inbuf[NBUF]; static char	outbuf[NBUF];  static char	tbuf[NBUF];a   /*B  * This table contains '1' for bytes that may begin indexed words.>  * This may be defined as the regular expression "[0-9A-Za-z]"  */  static char	ok[] = {? 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/*   0.. 15	*/e? 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/*  16.. 31	*/	? 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/*  32.. 47	*/]C 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,	/*  48.. 63 0-9	*/eC 	0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	/*  64.. 79 A-O	*/iC 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,	/*  80.. 95 P-Z	*/cC 	0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	/*  96..111 a-o	*/.B 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0	/* 112..127 p-z	*/ };   /*2  * Stop words are stored in a sorted bucket table.  */	   struct stoplist {	 	struct stoplist	*next; 
 	char		*word;l };   /*  * Hack for the Decus compiler  */f  . static struct stoplist st_at	= { NULL,	"at" };2 static struct stoplist st_and	= { &st_at,	"and" };1 static struct stoplist st_an	= { &st_and,	"an" }; . static struct stoplist st_a	= { &st_an,	"a" }; #define astop	st_a  . static struct stoplist st_by	= { NULL,	"by" }; #define bstop	st_by;  2 static struct stoplist st_from	= { NULL,	"from" };4 static struct stoplist st_for	= { &st_from,	"for" }; #define fstop	st_for  . static struct stoplist st_in	= { NULL,	"in" }; #define istop	st_ino  . static struct stoplist st_on	= { NULL,	"on" };0 static struct stoplist st_of	= { &st_on,	"of" }; #define	ostop	st_of   . static struct stoplist st_to	= { NULL,	"to" };2 static struct stoplist st_the	= { &st_to,	"the" }; #define tstop	st_the  2 static struct stoplist st_with	= { NULL,	"with" }; #define wstop	st_withn   #define NSTOP	128-' '   + static struct stoplist *stoplist[NSTOP] = {)> /*  32.. 39 */	NULL,	NULL,	NULL,	NULL,	NULL,	NULL,	NULL,	NULL,> /*  40.. 47 */	NULL,	NULL,	NULL,	NULL,	NULL,	NULL,	NULL,	NULL,> /*  48.. 55 */	NULL,	NULL,	NULL,	NULL,	NULL,	NULL,	NULL,	NULL,> /*  56.. 63 */	NULL,	NULL,	NULL,	NULL,	NULL,	NULL,	NULL,	NULL,> /*  64.. 71 */	NULL,	NULL,	NULL,	NULL,	NULL,	NULL,	NULL,	NULL,> /*  72.. 79 */	NULL,	NULL,	NULL,	NULL,	NULL,	NULL,	NULL,	NULL,> /*  80.. 87 */	NULL,	NULL,	NULL,	NULL,	NULL,	NULL,	NULL,	NULL,> /*  88.. 95 */	NULL,	NULL,	NULL,	NULL,	NULL,	NULL,	NULL,	NULL,D /*  96..103 */	NULL,	&astop,	&bstop,	NULL,	NULL,	NULL,	&fstop,	NULL,B /* 104..111 */	NULL,	&istop,	NULL,	NULL,	NULL,	NULL,	NULL,	&ostop,B /* 112..119 */	NULL,	NULL,	NULL,	NULL,	&tstop,	NULL,	NULL,	&wstop,> /* 120..127 */	NULL,	NULL,	NULL,	NULL,	NULL,	NULL,	NULL,	NULL, };   main(argc, argv)
 char *argv[];u {= 	register char *cp;f 	register c, i;E
 	FILE *fp; 	int nf;  
 #ifdef vms# 	argc = getredirection(argc, argv);  #endif  
 	nf = argc-1;	 	for (i=1; i<argc; ++i) {) 		cp = argv[i];; 		if (*cp == '-') {i 			--nf; 			argv[i] = NULL; 			++cp; 			while ((c = *cp++) != EOS) {/ 				switch (tolower(c)) {   
 				case 'r':  					++sort_r; 					break;n  
 				case 'w':( 					if (++i >= argc)S 						usage();* 					width = getv(argv[i],2,NBUF,"width");
 					--nf; 					argv[i] = NULL; 					break;d  
 				case 's':g 					sflag = 0;V 					break;h  
 				case 't':  					if (++i >= argc)d 						usage();
 					tflag++;c, 					offset = getv(argv[i],1,NBUF,"offset");
 					--nf; 					argv[i] = NULL; 					break;>  
 				case 'x':r 					if (++i >= argc)	 						usage(); 					stopfile = argv[i];
 					--nf; 					argv[i] = NULL; 					break;E   				default:
 					usage();s 				}r 			} 		}i 	}+ 	getexclude();			/* Initialize stoplist		*/e- 	width -= offset;		/* Offset is for index		*/t0 	offset -= 1;			/* One byte follows the index	*/
 	if (nf <= 0)  		rotate(stdin); 	else {b 		for (i=1; i < argc; ++i) { 			if ((cp = argv[i]) == NULL)
 				continue;s0 			if ((fp = freopen(cp, "r", stdin)) == NULL) { 				fprintf(stderr, + 					"kwik: %s: cannot open. Continuing\n",K	 					cp); 
 				continue;. 			} 			rotate(fp); 		}C 	} 	sorta(NULL);	 	unrotate(); }o   usage()  {n6 	fprintf(stderr, "-KWIK-parameter error, usage is\n");3 	fprintf(stderr, "\tkwik\t[-r]\t\tReverse kwik\n");a8 	fprintf(stderr, "\t\t[-s]\t\tUse standard stoplist\n");8 	fprintf(stderr, "\t\t[-w width]\tOutput line width\n");= 	fprintf(stderr, "\t\t[-t offset]\tInput in index format\n"); F 	fprintf(stderr, "\t\t[-x exlude]\tExclude these words from index\n");< 	fprintf(stderr, "\t\t[file ...]\tFiles to be processed\n");! 	error("?KWIK-E-Cannot proceed");  }i   getv(arg, min, max, who)$ char	*arg;		/* What to convert				*/* int	min;		/* Minimun acceptable value			*/* int	max;		/* Maximum acceptable value			*/' char	*who;		/* For error printout				*/r {  	register int	result;s   	result = atoi(arg);$ 	if (result < min || result > max) {: 		error("?KWIK-E-bad %s %d, minimum = %d, maximum = %d\n", 			who, result, min, max); 	} 	return(result); }f   getexclude() /*  * Store words to excludes  */e {t
 	FILE	*fp;" 	register struct stoplist **stopp;   	/*i. 	 * If -s wasn't given, erase current stoplist 	 */ 	if (!sflag) {3 		for (stopp = stoplist; stopp < &stoplist[NSTOP];)s 			*stopp++ = NULL;o 	} 	if (*stopfile == EOS)	 		return; ) 	if ((fp = fopen(stopfile, "r")) == NULL)f> 		error("?KWIK-E-can't open exclude file \"%s\"\n", stopfile);2 	while (fgetss(inbuf, sizeof inbuf, fp) != NULL) { 		saveexclude(inbuf);i 	} 	fclose(fp); }t   saveexclude(what) # char		*what;		/* What to save				*/  {m 	register char			*p; 	register struct stoplist	*stp;  	struct stoplist			**stopp;l 	register int			c; 	struct stoplist			*newstop;   	/*s 	 * Force line to lowercase  	 */! 	for (p = what; (c = *p) != EOS;)e 		*p++ = tolower(c);  	stopp = &stoplist[*what - ' '];4 	if (stopp < &stoplist || stopp >= &stoplist[NSTOP]), 		error("Illegal exclusion \"%s\"\n", what);9 	if ((newstop = malloc(sizeof (struct stoplist))) == NULLd- 			|| (p = malloc(strlen(what) + 1)) == NULL)n' 		error("Out of space in saveexclude");.# 	strcpy((newstop->word = p), what);. 	if ((stp = *stopp) == NULL) { 		*stopp = newstop;e 		newstop->next = NULL;e 	} 	else {.: 		while(strcmp(stp->word, what) <= 0 && stp->next != NULL) 			stp = stp->next;v 		newstop->next = stp->next; 		stp->next = newstop; 	} 	t }e   testexclude(what) - register char	*what;		/* Is it excluded?			*/x /*B  * Return true if <what> is in the exclude table.  Note: <what> is@  * guaranteed to be in lowercase and what[0] is guaranteed to be#  * a reasonable printing character.s  */. {  	register struct stoplist	*stp;  	register int			test;e 	U 	test = *what - ' '; 	if (test < 0 || test >= NSTOP)t3 		error("Bug: illegal testexclude \"%s\"\n", what);C 	stp = stoplist[test]; 	while (stp != NULL) {+ 		if ((test = strcmp(stp->word, what)) > 0)o 			return(FALSE);e 		else if (test == 0)d 			return(TRUE); 		else stp = stp->next;h 	} 	return(FALSE);M }i  
 rotate(fp)	 FILE *fp;n {  	register char	*p; 	register char	*tp;  	register char	*inp;  2 	while (fgetss(inbuf, sizeof inbuf, fp) != NULL) { 		inp = inbuf; 		/*' 		 * If index mode, get the index entrys 		 */k 		if (tflag) {0 			for (tp = tbuf; *inp != EOS && *inp != '\t';) 				*tp++ = *inp++; 
 			*tp = EOS;  			if (*inp == EOS)x 				fprintf(stderr,f% 					"%KWIK-W-no index for \"%s\"\n",m 					inbuf); 			else	inp++;			n 		}  		/*) 		 * Erase junk from the rest of the linew 		 */ ! 		for (p = inp; *p != EOS; p++) {f 			if (*p < ' ')
 				*p = ' ';u 		}s 		/*; 		 * Skip to a word, output it, skip to the end of the wordn 		 */R 		for (p = inp;;) {  			while (!ok[*p] && *p != EOS)R 				p++; 			if (*p == EOS) 
 				break; 			stuff(p, inp);S 			while (*p > ' ')C 				p++; 		}  	} }	   stuff(fold_point, start)/ char		*fold_point;	/* Where to rotate from			*/ 4 char		*start;		/* Start of the text (if indexing)	*/ /*0  * Stuff this entry (assuming it isn't excluded)  */= {; 	register char	*p; 	register char	*bp;) 	register c; 	extern char	*cpystr();s   	p = fold_point;
 	bp = outbuf;g 	/*	< 	 * Get the sort argument, test against the exclusion buffer 	 */ 	while ((c = *p++) > ' ')u 		*bp++ = tolower(c);u 	*bp = EOS;  	if (testexclude(outbuf)) 	 		return;  	*bp++ = FOLD; 	/*r? 	 * Copy the input from the rotate point to the end of the lineh 	 */ 	bp = cpystr(bp, fold_point);* 	*bp++ = FOLD; 	/*=C 	 * Copy the rest of the input (from the start to the rotate point)  	 */ 	p = start;0 	while (p < fold_point)  		*bp++ = *p++;	 	/* ' 	 * If indexing, append the index entry  	 */
 	if (tflag) {1 		*bp++ = FOLD;1 		bp = cpystr(bp, tbuf); 	} 	*bp = EOS;9 	sorta(outbuf);, }   
 unrotate() {  	register char	*in;. 	register char	*out; 	register	c; 	char		*start;
 	char		*rest;8 	char		*bufend;0 	char		*middle;1 	char		*next();1, 	long		counter;	/* For "first fold" debug	*/   	bufend = &outbuf[width];04 	middle = &outbuf[(width - 1) / 2];	/* Fold here		*/5 	for (counter = 0; sorto(inbuf) != NULL; counter++) { 4 		for (in = inbuf; (c = *in++) != EOS && c != FOLD;) 			; #if (1 == 1) 		/*- 		 * This is a crude work-around for a bug in{& 		 * the Decus-C I/O system (I think). 		 */ 2 		if (c == EOS && in == &inbuf[1] && counter == 0)) 			continue;	/* Hack bug in i/o system	*/s #endif 		if (c == EOS) {f 			fprintf(stderr,3 				"No first fold in %d byte record number %ld\n",  				strlen(inbuf), counter);$ 			fprintf(stderr, "%s\"\n", inbuf); 			error("Bug: no first fold");  		}r 		counter++; 		/* 		 * Partition the text line 		 */i) 		start = in;			/* start -> after fold	*/t' 		while ((c = *in) != EOS && c != FOLD)N 			in++; 		if (c == EOS) % 			error("Bug: missing second fold");f+ 		*in++ = EOS;			/* Terminate right side	*/_ 		rest = in;' 		while ((c = *in) != EOS && c != FOLD)  			in++; 		/* 		 * Output the index  		 */c 		if (tflag) { 			if (c == EOS)% 				error("Bug: missing third fold");d	 			else {	* 				*in++ = EOS;	/* Terminate left side	*/! 				printf("%-?s ", offset, in);	N 			} 		}N 		else { 			if (c != EOS) 				error("Bug: extra fold");L 		}L 		/*( 		 * Partition the line.  At this point:! 		 *	start ->	line after the fold,% 		 *	rest ->		line from start to fold,. 		 * Clear the line and stuff the text into it 		 */L 		out = &outbuf; 		while (out < bufend) 			*out++ = ' '; 		/*= 		 * Copy from "start" to the right half of the output buffer,: 		 * This algorithm was taken from the Lawerence Livermore 		 * tool kit. 		 */N 		out = middle; , 		for (in = start; (c = *in) != EOS; in++) {% 			if (in > start && in[-1] == ' ') {N* 				if (next(1, start, in, out) >= bufend) 					out = outbuf;   			} 			if (out >= bufend)o 				out = outbuf;o 			*out++ = c; 		}L 		/*< 		 * Copy from the end of the text to the middle (backwards) 		 */* 		out = middle;g$ 		for (in = rest; *in != EOS; in++); 		while (--in >= rest) {	 			out--;g 			if (in[1] == ' ') {) 				if (next(-1, rest, in, out) < outbuf)= 					out = bufend - 1; 			} 			if (out < outbuf) 				out = bufend - 1;g 			*out = *in; 		}; 		/* 		 * Delete trailing blanks  		 */t7 		for (out = bufend; *--out == ' ' && *out >= outbuf;);	 		out[1] = EOS;a 		printf("%s\n", outbuf);r 	} }	   char * next(increment, edge, in, out)0 int		increment;	/* Which direction (+1 | -1)		*/( char		*edge;		/* Lower limit for in			*/) register char	*in;		/* From pointer				*/	+ register char	*out;		/* Output pointer			*/g {c 	register int	c;  & 	for (; in >= edge; in += increment) { 		if (*in == ' ' || *in == EOS)>	 			break;e 		out += increment;> 	}
 	return(out);g }; 	 /*<  * getredirection() is intended to aid in porting C programs7  * to VMS (Vax-11 C) which does not support '>' and '<' 7  * I/O redirection.  With suitable modification, it may-1  * useful for other portability problems as well.1  */*   #include	<stdio.h>   int/ getredirection(argc, argv)
 int		argc;
 char		**argv;1 /*=  * Process vms redirection arg's.  Exit if any error is seen. :  * If getredirection() processes an argument, it is erased?  * from the vector.  getredirection() returns a new argc value.)  *>  * Warning: do not try to simplify the code for vms.  The codeA  * presupposes that getredirection() is called before any data is (  * read from stdin or written to stdout.  *  * Normal usage is as follows:  *  *	main(argc, argv)t
  *	int		argc;s  *	char		*argv[];n  *	{&  *		argc = getredirection(argc, argv);  *	}  */n {s
 #ifdef	vms+ 	register char		*ap;	/* Argument pointer	*/r 	int			i;	/* argv[] index		*/u 	int			j;	/* Output index		*/ # 	int			file;	/* File_descriptor 	*/e  : 	for (j = i = 1; i < argc; i++) {   /* Do all arguments	*/ 	    switch (*(ap = argv[i])) {s 	    case '<':			/* <file		*/l* 		if (freopen(++ap, "r", stdin) == NULL) {( 		    perror(ap);		/* Can't find file	*/, 		    exit(IO_ERROR);	/* Is a fatal error	*/ 		}r 		break;  & 	    case '>':			/* >file or >>file	*/# 		if (*++ap == '>') {	/* >>file		*/i 		    /*3 		     * If the file exists, and is writable by us,r6 		     * call freopen to append to the file (using the7 		     * file's current attributes).  Otherwise, create\3 		     * a new file with "vanilla" attributes as ifa/ 		     * the argument was given as ">filename".t3 		     * access(name, 2) is TRUE if we can write onn 		     * the specified file.	 		     */ ! 		    if (access(++ap, 2) == 0) {	( 			if (freopen(ap, "a", stdout) != NULL)' 			    break;	/* Exit case statement	*/l( 			perror(ap);	/* Error, can't append	*/* 			exit(IO_ERROR);	/* After access test	*/" 		    }			/* If file accessable	*/ 		}  		/*8 		 * On vms, we want to create the file using "standard"5 		 * record attributes.  create(...) creates the file-3 		 * using the caller's default protection mask andf/ 		 * "variable length, implied carriage return"t8 		 * attributes. dup2() associates the file with stdout. 		 */e6 		if ((file = creat(ap, 0, "rat=cr", "rfm=var")) == -1) 		 || dup2(file, fileno(stdout)) == -1) {\* 		    perror(ap);		/* Can't create file	*/, 		    exit(IO_ERROR);	/* is a fatal error	*/ 		}			/* If '>' creation	*/e 		break;			/* Exit case test	*/W  
 	    default: ) 		argv[j++] = ap;		/* Not a redirector	*/t 		break;			/* Exit case test	*/t 	    } 	}				/* For all arguments	*/c) 	argv[j] = NULL;			/* Terminate argv[]	*/i$ 	return (j);			/* Return new argc	*/ #else* 	/* 2 	 * Note: argv[] is referenced to fool the Decus C3 	 * syntax analyser, supressing an unneeded warningi 	 * message. 	 */3 	return (argv[0], argc);		/* Just return as seen	*/  #endif }o       char * cpystr(s1, s2) register char *s1, *s2;n /*2  * Copy string s2 to s1.  s1 must be large enough..  * return a pointer to the trailing null in s2  */(   {  	while ((*s1 = *s2++) != '\0')
 	    s1++; 	return(s1); }U