/* * R S G E N * * Build (and test) the database for rsent() which generates random * text messages. From James Gimpel, "Algorithms in SNOBOL4". */ /*)BUILD $(PROGRAM) = rsgen $(FILES) = { rsgen rsent rsout rstest } */ #ifdef DOCUMENTATION title rsgen Random sentence generator compiler index Random sentence generator compiler synopsis .s.nf rsgen input_file output_file table_name .s.f description If all three arguments are given, rsgen reads the input file, compiling the grammar. It then writes the C language table to the output file, using the third arugment to specify the table name. .s If only an input_file is specified, the file is compiled and the program enters a dialog mode so you can generate sample sentences. A C table can be generated from dialog mode. grammar format The random sentence generator accepts sentences in a context-free format (also called "Bacus Naur" form by computer scientists). In this format, there are "terminal symbols" -- words to be output and "non-terminal symbols" -- names of grammar rules. This grammar also allows specification of "rule weights" so that certain rule choices are more likely than others. "Terminal symbols" are strings of words which are copied to the output device without interpretation. "Non-terminal symbols" are words enclosed in angle brackets (where the word is found in the dictionary of non-terminals. For example, here is a simple grammar: ; comments have ';' in column 1 ; but they can't appear within a rule. = Hello there. | Hi there. | Hello . ; note that a rule is continued by '|' ; at the end of the line. The '|' separates ; alternative rule expansions. ; = #4# | nice | nice ; 4 times out of six, does nothing. ; 1 time out of six, outputs 'nice'. ; 1 time out of six, outputs ' nice' ; = #4# very | ; illustrates a "recursive" rule that ; may expand to "very" or "very very" or similar. ; See Gimpel's book for an explanation of how ; to prevent these from expanding forever. ; = man | woman | child | computer See the file RSTSET.GRM for an extensive sample. diagnostics Various, self-explanatory, use a sample grammar that is at times quite insulting. author Martin Minow From an algorithm published by James Gimpel in "Algorithms for Snobol IV". bugs #endif #include #include #ifdef vms #include #define NORMAL SS$_NORMAL #define FAILURE SS$_ABORT extern FILE *fdopen(); #define CREATE(f, m) fdopen(creat(f, 0, "rat=cr", "rfm=var"), m) #else #define CREATE fopen #ifdef decus #define NORMAL IO_SUCCESS #define FAILURE IO_ERROR #else #define NORMAL 0 #define FAILURE 2 #endif #endif #ifndef decus #define streq(st1, st2) (strcmp(st1, st2) == 0) #endif #define EOS '\0' #define EOL '\n' #define TRUE 1 #define FALSE 0 #define NRULE 50 /* Number of rules */ #define NTERM 400 /* Rule bodies */ #define TEMPMAX 256 /* Temp buffer size */ typedef struct rs_rule { char *r_name; /* Rule name */ int r_weightsum; /* Sum of all weights */ char **r_term; /* Rule terms */ } RS_RULE; RS_RULE rule[NRULE]; static char *term[NTERM]; extern RS_RULE rstest[]; /* For nasty error messages */ extern int wr_word(); int rindex = 0; /* Rule index */ int tindex = 0; /* Term index */ int debug = 0; /* Magic printout hack flag */ extern long seed; /* Magic for debugging only */ long oldseed; /* More magic for debugging */ char line[133]; /* Input text line */ char *linep = line; /* -> current input text */ char temp[TEMPMAX]; /* Working text */ FILE *infd = NULL; /* * Text to output to define praise subroutine */ char *header[] = { "#include\t\t", "#ifdef\tM68000", "ROM_SECT(_C_D00)", "#endif", "typedef struct rs_rule {", "\tchar\t*r_name;", "\tint\tr_weightsum;", "\tchar\t**r_term;", "} RS_RULE;", NULL, }; main(argc, argv) int argc; /* Argument counter */ char *argv[]; /* Argument vector */ /* * Get grammar, then generate random sentences. * * ::= * || */ { char *outtabname; char *outfilename; time(&seed); /* Randomize */ if (argc > 1 && argv[1][0] == '-' && tolower(argv[1][1]) == 'd') { debug++; argc--; argv++; } outtabname = NULL; outfilename = NULL; switch (argc) { case 0: case 1: /* No arguments given */ if ((infd = fopen("rstest.grm", "r")) != NULL) printf("Reading default grammar \"rstest.grm\"\n"); else { do { printf("Grammar input file: "); fflush(stdout); if (gets(line) == NULL) exit(1); if ((infd = fopen(line, "r")) == NULL) { perror(line); nastygram("Can't find your grammar file. ", "You're a ."); nastygram("but I'll give you another chance.", ""); } } while (infd == NULL); } getgrammar(); process(); break; case 4: /* All arguments specified */ outtabname = argv[3]; case 3: /* Output file name specified */ outfilename = argv[2]; case 2: /* Grammar file specified */ if (outfilename == NULL) { outfilename = "temp.c"; fprintf(stderr, "Grammar output to \"%s\"\n", outfilename); } if (outtabname == NULL) { outtabname = "rs_table"; fprintf(stderr, "Table name is \"%s\"\n", outtabname); } if ((infd = fopen(argv[1], "r")) == NULL) { perror(argv[1]); sprintf(line, "Your rule file \"%s\" wasn't found. ", argv[1]); nastygram(line, "Try again some other day, you ."); exit(FAILURE); } getgrammar(); outgrammar(outfilename, outtabname); stats(); break; default: nastygram("Only a would specify", "too many arguments. Goodbye."); exit(FAILURE); } } nastygram(why1, why2) char *why1; char *why2; /* * Tell the programmer where to get off. */ { rsent(why1, rstest, wr_word, stdout); rsent(why2, rstest, wr_word, stdout); wr_word(NULL, stdout); } stats() { printf("%d rules, %d terms\n", rindex, tindex); } process() /* * Get command and do it */ { register char *lp; register int howmany; extern int out(); stats(); printf(" for rule names,\n'?' for grammar dump,\n"); printf("'.' to write grammar in C\n"); printf("or ()rulename\n"); for (;;) { printf("* "); fflush(stdout); if (gets((lp = line)) == NULL) break; if (*lp == EOS) dumpnames(); else if (streq(lp, "??")) debug++; else if (streq(lp, "?")) dumpgrammar(); else if (*lp == '.') { for (lp++; *lp == ' '; lp++) ; outgrammar(lp, NULL); } else { if (isdigit(*lp)) { howmany = atoi(lp); while (isdigit(*lp)) lp++; if (*lp == EOS) { rsent("Gotta have a rule, you .", rstest, wr_word, stdout); continue; } } else { howmany = 1; } while (--howmany >= 0) { rsent(lp, rule, wr_word, stdout); wr_word(NULL, stdout); } } } } outgrammar(filename, tablename) char *filename; char *tablename; { register char **hp; register int i; register FILE *outfd; outfd = NULL; if (*filename != EOS) { if ((outfd = CREATE(filename, "w")) == NULL) { perror(filename); printf("Can't create \"%s\"\n", filename); } } while (outfd == NULL) { printf("Output C grammar to? "); fflush(stdout); if (gets(line) == NULL) exit(NORMAL); if ((outfd = CREATE(line, "w")) == NULL) { perror(line); printf("Can't create \"%s\".\n", line); } } if (tablename != NULL) strcpy(line, tablename); else line[0] = EOS; while (line[0] == EOS) { printf("Rule vector name: "); fflush(stdout); if (gets(line) == NULL) exit(NORMAL); }; for (hp = header; *hp != NULL; hp++) { fprintf(outfd, "%s\n", *hp); } fprintf(outfd, "static char *term[%d] = {\n", tindex); for (i = 0; i < tindex; i++) { if (term[i] == NULL) { fprintf(outfd, " NULL,\n"); } else { fprintf(outfd, " \"\\%03o%s\",\n", term[i][0], &term[i][1]); } } fprintf(outfd, "};\n"); fprintf(outfd, "RS_RULE %s[%d] = {\n", line, rindex + 1); for (i = 0; i <= rindex; i++) { if (rule[i].r_name == NULL) fprintf(outfd, "{ NULL,"); else fprintf(outfd, "{ \"%s\",", rule[i].r_name); fprintf(outfd, "\t%d, ", rule[i].r_weightsum); if (rule[i].r_term == NULL) fprintf(outfd, "NULL },\n"); else fprintf(outfd, " &term[%d] },\n", rule[i].r_term - &term[0]); } fprintf(outfd, "};\n"); fclose(outfd); } dumpnames() /* * Dump all rule names */ { register int r; register RS_RULE *rp; register int len; for (r = 0, rp = &rule[0]; r < rindex; r++, rp++) { sprintf(line, "<%s> ", rp->r_name); wr_word(line, stdout); } wr_word(NULL, stdout); } dumpgrammar() /* * Dump the entire grammar */ { int r; register RS_RULE *rp; register char **termp; register char *tp; for (r = 0, rp = &rule[0]; r < rindex; r++, rp++) { printf("%3d <%s> [%d] ::=\n", r, rp->r_name, rp->r_weightsum); for (termp = rp->r_term; (tp = *termp++) != NULL;) { printf(" #%d# ", *tp++); printf(" %s", tp); if (*termp != NULL) printf(" |\n"); } printf("\n"); } } getgrammar() /* * Read the grammar */ { line[0] = EOS; while (!feof(infd)) { getrule(); } rule[rindex].r_name = NULL; /* terminate rules */ rule[rindex].r_weightsum = 0; rule[rindex].r_term = NULL; } getrule() /* * Read one rule: * * ::= '=' * ::= '<' '>' * ::= * || '|' * * '|' at the end of the line continues rule_bodies on the next line. * else, the rule ends at the end of line. * * ';' outside a rule is a comment line. */ { register RS_RULE *rp; register int c; char *readname(); int getbyte(); for (;;) { /* Loop until a rule is read in */ while ((c = peek()) == ' ' || c == EOL) getbyte(); if (c == ';') { skipeol(); continue; } if (c == EOF) { return (FALSE); } if (rindex >= (NRULE - 1)) { error("More than %d rules\n", NRULE); } rp = &rule[rindex++]; /* rp -> new rule */ if ((rp->r_name = readname()) == NULL) { bug("E", "no name for rule"); skipeol(); rindex--; continue; } skipwhite(); if (getbyte() != '=') { bug("W", "expecting '=' after rule name"); } rp->r_term = &term[tindex]; rp->r_weightsum = getbody(); return (TRUE); } } int getbody() /* * Read all bodies for this rule: * * ::= () * ::= '#' '#' * ::= text string with s */ { int wsum; /* Weight sum */ register char *tp; /* Text pointer */ register int c; /* Current character */ int value; /* Working value */ int getbyte(); char *stash(); wsum = 0; while ((c = skipwhite()) != EOL && c != EOF) { if (tindex >= (NTERM-1)) { error("More than %d rule terms\n", NTERM); } if (c == '#') { /* * Specific weight: = #number# body */ getbyte(); value = 0; while (isdigit((c = getbyte()))) { value *= 10; value += (c - '0'); } } else value = 1; wsum += value; tp = &temp[0]; *tp++ = value; while (tp < &temp[TEMPMAX-2]) { switch (peek()) { case EOF: case EOL: case '|': goto breakout; case ' ': /* Trash leading blanks */ if (tp == &temp[1]) { getbyte(); break; } default: *tp++ = getbyte(); } } breakout: while (tp > &temp[1] && tp[-1] == ' ') tp--; /* Trailing blanks too */ *tp = EOS; term[tindex++] = stash(temp); if (skipwhite() == '|') { getbyte(); if (skipwhite() == EOL) getbyte(); } } term[tindex++] = NULL; /* Terminate rule terms */ return (wsum); } char * readname() /* * Read a rule */ { register char *tp; register int c; char *stash(); int getbyte(); if (getbyte() != '<') { bug("E", "Name must start with an '<'"); return (NULL); } for (tp = &temp[0]; tp < &temp[TEMPMAX - 2];) { if ((c = getbyte()) == '>' || c == EOL || c == EOF) break; *tp++ = c; } *tp = EOS; if (c != '>') { bug("E", "Bad or long rule name -- must end with '>'"); fprintf(stderr, "name as read = \"%s\"\n", temp); return (NULL); } return (stash(temp)); } /* * Stuff */ char * myalloc(size) int size; /* * Allocate or die */ { register char *p; if ((p = calloc(size, 1)) == NULL) error("?-Praise-Can't allocate %d bytes\n", size); return (p); } char * stash(string) char *string; /* * Store a string away */ { register char *s; if ((s = malloc(strlen(string) + 1)) == NULL) error("?-Praise-Can't store %d byte string.\n", strlen(string)); return(strcpy(s, string)); } /* * I/O */ int getbyte() /* * Read one byte */ { if (*linep == EOS) { if (fgets(line, sizeof line, infd) == NULL) return (EOF); linep = line; } return (*linep++); } int peek() /* * Peek at the next byte */ { register int c; if ((c = getbyte()) != EOF) linep--; return (c); } int skipwhite() /* * Skip over whitespace, return "peek()" after skip */ { register int c; while ((c = peek()) == ' ' || c == '\t') getbyte(); return (c); } skipeol() /* * Skip to end of line */ { register int c; while ((c = getbyte()) != EOL && c != EOF) ; } bug(severity, message) char *severity; char *message; { fprintf(stderr, "?%s-Praise-%s", severity, message); if (!feof(infd)) { fprintf(stderr, " at line:\n%.*s\n", strlen(line) - 1, line); fprintf(stderr, "the error is near byte %d", linep - line); if (linep > line && linep[-1] >= ' ') { fprintf(stderr, ": '%c'\n", linep[-1]); } else fprintf(stderr, "\n"); } else fprintf(stderr, " at end of input\n"); if (tolower(severity[0]) == 'f') error("can't continue", 0); } #ifndef decus error(format, arg) char *format; int arg; /* * Pull the plug */ { fprintf(stderr, format, arg); exit(FAILURE); } #endif