 /*  *			R S G E N  *C  * Build (and test) the database for rsent() which generates random >  * text messages.  From James Gimpel, "Algorithms in SNOBOL4".  */    /*)BUILD	$(PROGRAM) = rsgen ) 		$(FILES) = { rsgen rsent rsout rstest }  */   #ifdef	DOCUMENTATION  . title	rsgen	Random sentence generator compiler) index		Random sentence generator compiler    synopsis 	.s.nf( 	rsgen input_file output_file table_name 	.s.f  description   . 	If all three arguments are given, rsgen reads0 	the input file, compiling the grammar.  It then0 	writes the C language table to the output file,4 	using the third arugment to specify the table name. 	.sm0 	If only an input_file is specified, the file is. 	compiled and the program enters a dialog mode1 	so you can generate sample sentences.  A C tablen# 	can be generated from dialog mode.o   grammar format  3 	The random sentence generator accepts sentences inp5 	a context-free format (also called "Bacus Naur" forme4 	by computer scientists).  In this format, there are; 	"terminal symbols" -- words to be output and "non-terminalt7 	symbols" -- names of grammar rules.  This grammar alsot7 	allows specification of "rule weights" so that certain,* 	rule choices are more likely than others.  9 	"Terminal symbols" are strings of words which are copied - 	to the output device without interpretation.   < 	"Non-terminal symbols" are words enclosed in angle brackets= 	(where the word is found in the dictionary of non-terminals.   ' 	For example, here is a simple grammar:t  $ 	    ; comments have ';' in column 1+ 	    ; but they can't appear within a rule.n 	    <HELLO> = Hello there.	|a 		Hi there.		| 		Hello <NICE> <PERSON>.+ 	    ; note that a rule is continued by '|' 1 	    ; at the end of the line.  The '|' separates # 	    ; alternative rule expansions.t 	    ;& 	    <NICE> = #4# | nice | <VERY> nice/ 	    ; 4 times out of six, <NICE> does nothing.l0 	    ; 1 time out of six, <NICE> outputs 'nice'.6 	    ; 1 time out of six, <NICE> outputs '<VERY> nice' 	    ;& 	    <VERY> = #4# very | <VERY> <VERY>1 	    ; <VERY> illustrates a "recursive" rule that 6 	    ; may expand to "very" or "very very" or similar.2 	    ; See Gimpel's book for an explanation of how/ 	    ; to prevent these from expanding forever.| 	    ;. 	    <PERSON> = man | woman | child | computer  1 	See the file RSTSET.GRM for an extensive sample.t   diagnostics, 	I5 	Various, self-explanatory, use a sample grammar thatt 	is at times quite insulting.    author  
 	Martin MinowR  / 	From an algorithm published by James Gimpel inV 	"Algorithms for Snobol IV".   bugs   #endif t #include <stdio.h> #include <ctype.h>
 #ifdef	vms #include <ssdef.h> #define	NORMAL	SS$_NORMALn #define	FAILURE	SS$_ABORTv extern FILE	*fdopen();@ #define	CREATE(f, m) fdopen(creat(f, 0, "rat=cr", "rfm=var"), m) #elsee #define	CREATE	fopen #ifdef	decus #define	NORMAL	IO_SUCCESS  #define	FAILURE	IO_ERROR #elses #define	NORMAL	0 #define	FAILURE	2s #endif #endif
 #ifndef decus / #define	streq(st1, st2)	(strcmp(st1, st2) == 0)  #endif   #define	EOS		'\0's #define	EOL		'\n'  #define	TRUE		1t #define	FALSE		0) #define	NRULE		50		/* Number of rules		*/e' #define	NTERM		400		/* Rule bodies			*/d- #define TEMPMAX		256		/* Temp buffer size		*/;   typedef struct rs_rule {! 	char	*r_name;		/* Rule name			*/ , 	int	r_weightsum;		/* Sum of all weights		*/# 	char	**r_term;		/* Rule terms			*/	
 } RS_RULE;   RS_RULE		rule[NRULE];M static char	*term[NTERM];e  8 extern RS_RULE	rstest[];		/* For nasty error messages	*/ extern int	wr_word();d  $ int	rindex = 0;			/* Rule index			*/$ int	tindex = 0;			/* Term index			*// int	debug = 0;			/* Magic printout hack flag	*/d2 extern long	seed;			/* Magic for debugging only	*/. long	oldseed;			/* More magic for debugging	*/( char	line[133];			/* Input text line		*/1 char	*linep = line;			/* -> current input text	*/	* char	temp[TEMPMAX];			/* Working text			*/ FILE	*infd = NULL;   /*-  * Text to output to define praise subroutineR  */E   char	*header[] = { 	"#include\t\t<stdio.h>",n 	"#ifdef\tM68000", 	"ROM_SECT(_C_D00)",
 	"#endif", 	"typedef struct rs_rule {", 	"\tchar\t*r_name;", 	"\tint\tr_weightsum;",h 	"\tchar\t**r_term;",n 	"} RS_RULE;", 	NULL, }; g main(argc, argv)$ int		argc;			/* Argument counter		*/& char		*argv[];		/* Argument vector		*/ /*/  * Get grammar, then generate random sentences.	  *  *	<grammar>	::=	<rule>r  *			||	<rule> <grammar>  */L {  	char		*outtabname;u 	char		*outfilename;    	time(&seed);			/* Randomize		*/C 	if (argc > 1 && argv[1][0] == '-' && tolower(argv[1][1]) == 'd') {,
 	    debug++;" 	    argc--; 	    argv++; 	} 	outtabname = NULL;t 	outfilename = NULL;   	switch (argc) { 	case 0:$ 	case 1:				/* No arguments given	*/3 	    if ((infd = fopen("rstest.grm", "r")) != NULL)g5 		printf("Reading default grammar \"rstest.grm\"\n");g 	    else {m 		do {% 		    printf("Grammar input file: ");	 		    fflush(stdout);* 		    if (gets(line) == NULL)c 			exit(1);n. 		    if ((infd = fopen(line, "r")) == NULL) { 			perror(line);. 			nastygram("Can't find your grammar file. ",) 			"You're <REALLY> a <STUPID> <FOOL>.");}6 			nastygram("but I'll give you another chance.", ""); 		    }  		} while (infd == NULL);  	    } 	    getgrammar(); 	    process();s 	    break;"  ) 	case 4:			/* All arguments specified		*/a 	    outtabname = argv[3];, 	case 3:			/* Output file name specified		*/ 	    outfilename = argv[2];u( 	case 2:			/* Grammar file specified		*/ 	    if (outfilename == NULL) {( 		outfilename = "temp.c";p= 		fprintf(stderr, "Grammar output to \"%s\"\n", outfilename);	 	    } 	    if (outtabname == NULL) { 		outtabname = "rs_table";8 		fprintf(stderr, "Table name is \"%s\"\n", outtabname); 	    }0 	    if ((infd = fopen(argv[1], "r")) == NULL) { 		perror(argv[1]); 		sprintf(line,e8 		    "Your rule file \"%s\" wasn't found.  ", argv[1]); 		nastygram(line,e8 		    "Try again some other day, you <STUPID> <FOOL>."); 		exit(FAILURE); 	    } 	    getgrammar();) 	    outgrammar(outfilename, outtabname);p
 	    stats();e 	    break;o  	 	default:\3 		nastygram("Only a <STUPID> <FOOL> would specify",N$ 			"too many arguments.  Goodbye."); 		exit(FAILURE); 	} }m   nastygram(why1, why2)e char		*why1; char		*why2; /*(  * Tell the programmer where to get off.  */; {	& 	rsent(why1, rstest, wr_word, stdout);& 	rsent(why2, rstest, wr_word, stdout); 	wr_word(NULL, stdout);a }n   stats()  {,0 	printf("%d rules, %d terms\n", rindex, tindex); }    t	 process()  /*  * Get command and do it  */n {) 	register char	*lp;  	register int	howmany; 	extern int	out();  	 	stats();O= 	printf("<return> for rule names,\n'?' for grammar dump,\n");x2 	printf("'.' <filename> to write grammar in C\n");# 	printf("or (<number>)rulename\n");h 	for (;;) {w 	    printf("* "); 	    fflush(stdout);# 	    if (gets((lp = line)) == NULL), 		break; 	    if (*lp == EOS) 		dumpnames(); 	    else if (streq(lp, "??"))
 		debug++; 	    else if (streq(lp, "?"))  		dumpgrammar(); 	    else if (*lp == '.') {  		for (lp++; *lp == ' '; lp++) 		    ;s 		outgrammar(lp, NULL);  	    } 	    else {) 		if (isdigit(*lp)) {o 		    howmany = atoi(lp);m 		    while (isdigit(*lp)) 			lp++; 		    if (*lp == EOS) {"3 			rsent("Gotta have a rule, you <STUPID> <FOOL>.",w  			    rstest, wr_word, stdout); 			continue; 		    }( 		}  		else { 		    howmany = 1; 		}  		while (--howmany >= 0) {' 		    rsent(lp, rule, wr_word, stdout);+ 		    wr_word(NULL, stdout); 		}  	    } 	} };   outgrammar(filename, tablename)  char		*filename; char		*tablename;	 {g 	register char	**hp; 	register int	i; 	register FILE	*outfd;   	outfd = NULL; 	if (*filename != EOS) {3 	    if ((outfd = CREATE(filename, "w")) == NULL) {r 		perror(filename);l, 		printf("Can't create \"%s\"\n", filename); 	    } 	} 	while (outfd == NULL) {% 	    printf("Output C grammar to? ");	 	    fflush(stdout); 	    if (gets(line) == NULL) 		exit(NORMAL);)/ 	    if ((outfd = CREATE(line, "w")) == NULL) {} 		perror(line);m) 		printf("Can't create \"%s\".\n", line);  	    } 	} 	if (tablename != NULL)r 	    strcpy(line, tablename);s 	else line[0] = EOS; 	while (line[0] == EOS) {e" 	    printf("Rule vector name: "); 	    fflush(stdout); 	    if (gets(line) == NULL) 		exit(NORMAL);r 	};\' 	for (hp = header; *hp != NULL; hp++) {(! 	    fprintf(outfd, "%s\n", *hp);u 	}7 	fprintf(outfd, "static char *term[%d] = {\n", tindex);e 	for (i = 0; i < tindex; i++) {  	    if (term[i] == NULL) {" 		fprintf(outfd, " NULL,\n");m 	    } 	    else {a> 		fprintf(outfd, " \"\\%03o%s\",\n", term[i][0], &term[i][1]); 	    } 	} 	fprintf(outfd, "};\n");: 	fprintf(outfd, "RS_RULE %s[%d] = {\n", line, rindex + 1);  	for (i = 0; i <= rindex; i++) {  	    if (rule[i].r_name == NULL) 		fprintf(outfd, "{ NULL,");	 	    elsee. 		fprintf(outfd, "{ \"%s\",", rule[i].r_name);3 	    fprintf(outfd, "\t%d, ", rule[i].r_weightsum);%  	    if (rule[i].r_term == NULL) 		fprintf(outfd, "NULL },\n");	 	    elseU? 		fprintf(outfd, " &term[%d] },\n", rule[i].r_term - &term[0]);p 	} 	fprintf(outfd, "};\n"); 	fclose(outfd);e }i ] dumpnames()} /*  * Dump all rule names  */n {o 	register int		r;d 	register RS_RULE	*rp; 	register int	len;  4 	for (r = 0, rp = &rule[0]; r < rindex; r++, rp++) {( 	    sprintf(line, "<%s> ", rp->r_name); 	    wr_word(line, stdout);u 	} 	wr_word(NULL, stdout);o }d  
 dumpgrammar()i /*  * Dump the entire grammar  */t { 	 	int			r;	 	register RS_RULE	*rp; 	register char		**termp; 	register char		*tp;  4 	for (r = 0, rp = &rule[0]; r < rindex; r++, rp++) {C 	    printf("%3d <%s> [%d] ::=\n", r, rp->r_name, rp->r_weightsum);e9 	    for (termp = rp->r_term; (tp = *termp++) != NULL;) {; 		printf(" #%d# ", *tp++); 		printf(" %s", tp); 		if (*termp != NULL)) 		    printf(" |\n");" 	    } 	    printf("\n"); 	} }l , getgrammar() /*  * Read the grammaro  */  {u 	line[0] = EOS;  	while (!feof(infd)) { 	    getrule();t 	}3 	rule[rindex].r_name = NULL;		/* terminate rules	*/  	rule[rindex].r_weightsum = 0; 	rule[rindex].r_term = NULL; }+  	 getrule()  /*  * Read one rule:]  *,  *	<rule>		::=	<rule_name> '=' <rule_bodies>(  *	<rule_name>	::=	'<' <text_string> '>'   *	<rule_bodies>	::=	<rule_body>%  *			||	<rule_body> '|' <rule_bodies>p  *E  * '|' at the end of the line continues rule_bodies on the next line. *  * else, the rule ends at the end of line.  *(  * ';' outside a rule is a comment line.  */  {t 	register RS_RULE	*rp; 	register int		c;	 	char		*readname();  	int		getbyte();  0 	for (;;) {		/* Loop until a rule is read in		*/, 	    while ((c = peek()) == ' ' || c == EOL) 		getbyte(); 	    if (c == ';') { 		skipeol(); 		continue;< 	    } 	    if (c == EOF) { 		return (FALSE);> 	    }! 	    if (rindex >= (NRULE - 1)) {*' 		error("More than %d rules\n", NRULE);  	    }0 	    rp = &rule[rindex++];		/* rp -> new rule	*/- 	    if ((rp->r_name = readname()) == NULL) {a 		bug("E", "no name for rule");e 		skipeol(); 		rindex--;n 		continue;	 	    } 	    skipwhite();t 	    if (getbyte() != '=') {, 		bug("W", "expecting '=' after rule name"); 	    }  	    rp->r_term = &term[tindex];! 	    rp->r_weightsum = getbody();  	    return (TRUE);  	} }  = int)	 getbody()n /*!  * Read all bodies for this rule:(  *%  *	<rule_body>	::=	(<weight>) <terms>\&  *	<weight>	::=	'#' <digit_string> '#',  *	<term>		::=	text string with <rule_name>s  */e {r 	int		wsum;		/* Weight sum			*/,) 	register char	*tp;		/* Text pointer			*/d* 	register int	c;		/* Current character		*/" 	int		value;		/* Working value		*/ 	int		getbyte(); 	char		*stash();  
 	wsum = 0;/ 	while ((c = skipwhite()) != EOL && c != EOF) {w 	    if (tindex >= (NTERM-1)) {r, 		error("More than %d rule terms\n", NTERM); 	    } 	    if (c == '#') { 		/*- 		 * Specific weight:  <name> = #number# body< 		 */	 		getbyte(); 		value = 0;$ 		while (isdigit((c = getbyte()))) { 		    value *= 10; 		    value += (c - '0');* 		}r 	    } 	    else value = 1; 	    wsum += value;  	    tp = &temp[0];a 	    *tp++ = value;u$ 	    while (tp < &temp[TEMPMAX-2]) { 		switch (peek()) {; 		case EOF:; 		case EOL:= 		case '|':  		    goto breakout;  ' 		case ' ':		/* Trash leading blanks	*/( 		    if (tp == &temp[1]) {N
 			getbyte(); 	 			break;= 		    } 
 		default: 		    *tp++ = getbyte(); 		}e 	    }	 breakout:	+ 	    while (tp > &temp[1] && tp[-1] == ' ') $ 		tp--;				/* Trailing blanks too	*/ 	    *tp = EOS; " 	    term[tindex++] = stash(temp); 	    if (skipwhite() == '|') { 		getbyte(); 		if (skipwhite() == EOL)u 		    getbyte(); 	    }	M 	}4 	term[tindex++] = NULL;			/* Terminate rule terms	*/ 	return (wsum);  }  o char *
 readname() /*  * Read a rule <name>l  */* {  	register char	*tp;[ 	register int	c; 	char		*stash(); 	int		getbyte();   	if (getbyte() != '<') {- 	    bug("E", "Name must start with an '<'");e 	    return (NULL);' 	}0 	for (tp = &temp[0]; tp < &temp[TEMPMAX - 2];) {8 	    if ((c = getbyte()) == '>' || c == EOL || c == EOF) 		break; 	    *tp++ = c;; 	} 	*tp = EOS;( 	if (c != '>') {< 	    bug("E", "Bad or long rule name -- must end with '>'");6 	    fprintf(stderr, "name as read = \"%s\"\n", temp); 	    return (NULL);  	} 	return (stash(temp)); }e h /*  * Stuff  */    char *
 myalloc(size) 
 int		size; /*  * Allocate or die  */) {  	register char	*p;  # 	if ((p = calloc(size, 1)) == NULL) 7 	    error("?-Praise-Can't allocate %d bytes\n", size);] 	return (p); }    char *
 stash(string)= char		*string; /*  * Store a string away  */} {* 	register char	*s;  . 	if ((s = malloc(strlen(string) + 1)) == NULL)E 	    error("?-Praise-Can't store %d byte string.\n", strlen(string));e 	return(strcpy(s, string));} }r   /*  * I/O  */;   inth	 getbyte()u /*  * Read one byte  */( {e 	if (*linep == EOS) {A0 	    if (fgets(line, sizeof line, infd) == NULL) 	    return (EOF); 	    linep = line; 	} 	return (*linep++);l }t   intt peek() /*  * Peek at the next byte  */  {s 	register int	c;   	if ((c = getbyte()) != EOF)
 	    linep--;  	return (c); };   int  skipwhite()s /*3  * Skip over whitespace, return "peek()" after skips  */% {y 	register int	c;  ) 	while ((c = peek()) == ' ' || c == '\t')  	    getbyte();  	return (c); }b  	 skipeol()* /*  * Skip to end of line  */i {  	register int	c;  + 	while ((c = getbyte()) != EOL && c != EOF)  	    ; }F   bug(severity, message) char		*severity; char		*message;  {k  5 	fprintf(stderr, "?%s-Praise-%s", severity, message);	 	if (!feof(infd)) {tB 	    fprintf(stderr, " at line:\n%.*s\n", strlen(line) - 1, line);@ 	    fprintf(stderr, "the error is near byte %d", linep - line);, 	    if (linep > line && linep[-1] >= ' ') {) 		fprintf(stderr, ": '%c'\n", linep[-1]);t 	    }	 	    elsep 		fprintf(stderr, "\n"); 	} 	else*+ 	    fprintf(stderr, " at end of input\n");y! 	if (tolower(severity[0]) == 'f')   	    error("can't continue", 0); }s  
 #ifndef	decusm error(format, arg) char		*format;	 int		arg;" /*  * Pull the plug  */  {e 	fprintf(stderr, format, arg); 	exit(FAILURE);* }" #endif