/* * MP -- Macro and conditional compilation preprocessor for C */ /* * Designed and written by Robert W. Harper, Jr. in fullfillment of the * programming requirements for ICSS-580 Systems Programming taken * during Winter quarter 1978/79, with invaluable assistance from * Mike Lutz, my project advisor, who helped me with some sticky * design issues and originally wrote the expression evaluator. * * Version 1.10 - 4/18/79 * * Edit log -- document all source changes here (and increment * the edit level!) * First release: 4/4/79 RWH * Fixed conditional compilation bugs and changed #include paths: * 4/7/79 RWH * Added support for character constants: 4/10/79 RWH * Fixed bug re extraneous in front of #include line itself: * 4/10/79 RWH * Fixed incorrect support of character constants: 4/12/79 RWH * Made alpha() and alphanum() macro's instead of functions: * 4/12/79 RWH * Fixed #ifdef/#ifndef argument length bug: 4/12/79 RWH * Rewrote concat() to make it faster: 4/12/79 RWH * Allowed white space between '#' and directive: 4/13/79 RWH * Modified actual() to propagate actuals through nested calls: * 4/13/79 RWH * Added _FILENAME and _LINE: 4/18/79 RWH * Fixed pop_ala() so that works correctly: 4/18/79 RWH * * Added code for decus and vms implementations, also for stdio. * 05-Sep-80 MM * More Decus stuff ... provide same predefined symbols as provided * by the Decus compiler's preprocessor. Also made '-P' flag * default to 'on', supressing stuff, for Decus compiler, * which does not recognize this. 05-Dec-80 RBD * * N O T E * It is necessary to compile this preprocessor * on the TARGET operating system for the pre- * defined symbols to be correct. * * Changed directory search list to look for 'lb:' on RSX, and * 'sy:' on RT-11. 05-Dec-80 * * Eliminate leading whitespace in macro expansion. 06-Dec-80 RBD * * Display filespec string on failing #include file opens, instead * of useless, terse "File not found". 06-Dec-80 * * This implementation of the C macro and conditional compilation * preprocessor is designed to bring the currently-distributed version * of the C compiler up to the standards defined in "The C Programming * Language" by Kernighan and Ritchie (sine qua non). The only * preprocessor feature that remains unimplemented is the "#line" * directive. This version is implemented as an independent program, * designed to be forked to by "cc", much like the other passes of the * compiler, though it may be used independently of the C compiler for * any similar purpose. The callling syntax is * * "mp [-P] input_file output_file" * * where the "-P" switch causes suppression of the convention * used by the compiler to flag included files (usually all lines of * source stemming from an include file are preceded by a * character so that the compiler's line numbers agree with the source). * NOTE: the '-P' switch is turned on permanently for the Decus version. * * All routines are documented with introductory comments preceding the * definition of the function, describing the techniques used therein. * Other terse comments appear within the source code to clarify certain * sticky parts. The overall design of the processor is line-oriented, * in that it reads a line, processes as necessary and then writes it * out. Due to lack of clarity in the specification of constant * expressions in the aforementioned text, this processor evaluates * expressions by first running them through the macro expansion routine, * then calling the recursive-descent parser/evaluator. * * Note that recursion is used in two major areas of the design -- in * the expression evaluator and in the macro expansion routine. * Inherent in using a recursive solution to a problem is a certain * degree of difficulty that arises when an error condition occurs. * In order to solve this, the processor uses two small assembly-language * routines, envsave() and envreset(), to save the frame pointer prior * to the call of either routine (expand() or expr()); upon encountering * an error, envreset() is called to restore the environment to that of * the top-level caller and force a particular value to be returned. * * The code has been fairly thoroughly tested, but inevitably bugs will * appear and complaints about the design will crop up. If you encounter * any bugs or have any suggestions for enhancements, contact the author * or Mike Lutz at RIT School of Computer Science. */ /* * Include compile-time constant definitions, external references, * and structures. */ #ifdef decus #define stdio #define Digital #endif #ifdef vms #define stdio #define Digital #endif #ifdef stdio #include #else #define NULL (0) #endif #include "mpdefs.h" #include "mpextr.h" /* * Directives */ char *dir_tbl[] { /* Directive name table */ "define", "include", "undef", "if", "ifdef", "ifndef", "else", "endif", "message", "line", NIL }; int dir_type[] { /* Directive mnemonics for 'switch' */ DEF, INCL, UNDEF, IF, IFD, IFN, ELSE, ENDIF, MSG, LINE, }; int err_cnt; /* Error counter */ #ifdef decus int nocomp TRUE; /* No stuff for Decus compiler */ #ifdef rsx char syn_err[] {"RSX version. Syntax: mpx ifile ofile"}; #else #ifdef rt11 char syn_err[] {"RT-11 Version. Syntax: run mp \"ifile ofile\""}; #else char syn_err[] {"Decus compiler assumed. Syntax: mp ifile ofile"}; #endif #endif #else int nocomp FALSE; /* No-compilation flag */ char syn_err[] {"Syntax: mp [-P] ifile ofile"}; #endif /* * I/O-related stuff */ #ifdef stdio FILE *curinfil = NULL; /* Pointer to current input file */ FILE *outfil = NULL; /* Pointer to output file */ #else struct buf *curinfil NIL; /* Pointer to file buffer for current */ /* input file */ struct buf *outfil NIL; /* Current output file buffer pointer */ #endif /* * #include */ int inclpdl[INCDEPTH]; struct stack inclstk { INCDEPTH, 0, inclpdl }; int inclvl 0; /* #include nesting level */ int inclflag FALSE; /* Ugly fix for extraneous before */ /* #include line itself */ char *srchlist[] { "", /* This must be first! */ #ifdef Digital /* Next, look on device 'C:', then ... */ "c:", #ifdef rt11 /* On RT-11, system disk is SY: */ "sy:", #else "lb:", /* On RSX, default task/library disk LB:*/ #endif #else "/usr/include/", /* UNIX */ #endif NIL /* This must be last! */ }; /* * Conditional compilation */ int ifpdl[IFDEPTH]; struct stack ifstack { IFDEPTH, 0, ifpdl }; int father TRUE; /* Previous level expansion flag */ int self TRUE; /* Current level expansion flag */ /* * Input line processing related data structures */ char line[LINESZ]; int lineno[INCDEPTH]; struct sym *_line; /* _LINE macro -- definition is current */ /* line number */ char *lp; /* Line pointer */ /* * Macro definition and expansion data structures */ char def_ala[ALASIZE][MAXIDLEN+1]; /* Definition ala */ char *call_ala[ALASIZE]; /* Dynamically-allocatable call ala */ char *callpdl[DEFDEPTH]; struct stack callstk { DEFDEPTH, 0, callpdl }; /* Call stack - used to process nested */ /* macro calls */ struct sym *refpdl[DEFDEPTH]; struct stack refstack { DEFDEPTH, 0, refpdl }; /* Stack for referenced symbols */ int *frame NIL; /* * Main driver */ main(argc, argv) char *argv[]; int argc; { register char *tp1, *tp2; register int i; char name[MAXIDLEN+1]; int tvec[2], dir_kind, filecnt; /* * Initialize line number stack */ for (i = 0; i < INCDEPTH; i++) lineno[i] = 0; for (i = 1, filecnt = 0; i < argc; i++) if (*argv[i] == '-') switch (argv[i][1]) { case 'p': case 'P': nocomp = TRUE; continue; default: printerr(syn_err); exit(ERROR); } else if (filecnt > 2) { printerr(syn_err); exit(ERROR); } else tvec[filecnt++] = i; if (filecnt != 2) { printerr(syn_err); exit(ERROR); } #ifdef decus if ((curinfil = fopen(argv[tvec[0]], "r")) == NULL) { #else curinfil = get_mem(sizeof *curinfil); if (fopen(argv[tvec[0]], curinfil) < 0) { #endif printerr("Can't open input file"); exit(ERROR); } #ifdef decus if ((outfil = fopen(argv[tvec[1]], "w")) == NULL) { #else outfil = get_mem(sizeof *outfil); if (fcreat(argv[tvec[1]], outfil) < 0) { #endif printerr("Can't create output file"); exit(ERROR); } sym_init(); /* Initialize symbol table */ i = strlen(argv[tvec[0]]); sym_enter("_FILENAM", 0, tp1 = get_mem(i+3)); *tp1++ = '"'; strcopy(argv[tvec[0]], tp1); *(tp1+i) = '"'; *(tp1+i+1) = EOS; _line = sym_enter("_LINE", 0, tp1 = get_mem(9)); strcopy("\" \"", tp1); #ifdef decus sym_enter("decus",0,""); /* Do predefines like Decus compiler */ sym_enter("nofpu",0,""); sym_enter("pdp11",0,""); #ifdef vax sym_enter("vax",0,""); #endif #ifdef rsx sym_enter("rsx",0,""); #endif #ifdef rt11 sym_enter("rt11",0,""); #endif tp1 = ctime(NULL); #else time(tvec); /* Get encoded time and date */ tp1 = ctime(tvec); /* Convert to text string */ #endif *(tp1+24) = '"'; /* Replace ending '\n' with a '"' */ tp2 = get_mem(27); /* Date-time string is 27 characters */ sym_enter("_DATE", 0, tp2); *tp2++ = '"'; /* Emit initial double quote */ strcopy(tp1, tp2); /* * Main driving loop of the processor */ while (readline() != EOF) { if (line[0] == '#' && line[1] != EOS) { /* * A non-null directive? */ lp = skipblnk(++lp); lp = get_id(lp, name); lp = skipblnk(lp); if ((dir_kind = dir_find(name)) == ERROR) printerr("Illegal directive"); else { if (father && self) non_cond(dir_kind); cond(dir_kind); } } if (line[0] == '#' || !(father && self)) line[0] = EOS; writeline(line); } if (!empty(&ifstack)) /* Check for dangling if's */ printerr("Unterminated #if"); #ifdef stdio fclose(outfil); #else fflush(outfil); /* Flush output file buffer */ close(outfil->fildes); /* Close output file */ #endif exit(err_cnt); } /* end main() */ /* * Process conditional compilation directives. This procedure is * always executed, regardless of whether or not we are flushing due * to a false conditional. The general technique used here is as follows. * The variables 'self' and 'father' contain the truth value of the * current if-else-endif clause and the immediately enclosing clause, * respectively. Whenever an if is encountered, both are stacked, * father &= self (to propagate any false conditionals), and * self = eval(arg). * * Upon encountering an else, self = !self, to reverse the sense of the * current condition. An endif causes the condition stack to be popped * into father and self, restoring our context to the immediately * enclosing level. * * Note: * This is a bad algorithm, as it requires a stack (and thus a maximum * nesting level). A true/false counter arraingement is to be preferred. * */ cond(dir_kind) int dir_kind; { char symbol[MAXIDLEN+1]; /* buffer for ifdef/ifndef */ switch(dir_kind) { case IF: case IFD: case IFN: if ((push(father, &ifstack) == ERROR) || (push(self, &ifstack) == ERROR)) { printerr("Maximum #if depth exceeded"); break; } if (father &= self) switch (dir_kind) { case IF: self = (expr(lp) != 0); break; case IFD: lp = get_id(lp, symbol); self = (lookup(symbol) != NIL); break; case IFN: lp = get_id(lp, symbol); self = (lookup(symbol) == NIL); break; } else self = FALSE; break; case ELSE: if (*lp != EOS) printerr("Extraneous argument"); if (empty(&ifstack)) printerr("#else without #if"); else self = !self; break; case ENDIF: if (*lp != EOS) printerr("Extraneous argument"); if (empty(&ifstack)) printerr("#endif without #if"); else { self = (int)pop(&ifstack); father = (int)pop(&ifstack); } break; } return; } /* * Non-conditional directive processing is performed here. This * procedure is executed only if we are not currently flushing. The * message and undef directives are implemented in a very straightforwar * manner. The others are a little more complex. Macro definition is * accomplished in four steps. First the name of the macro being * defined is picked up; second formal parameters are processed; third, * the actual definition is processed (and index marker substitution is * peformed), and finally, the symbol is entered into the symbol table. * Include processing goes as follows. First the path name and its * delimiter are picked up, then we attempt to find the file by * prepending the paths specified in 'srchlist[]' to it. If the file * is found, the current input file is stacked (without closing it) and * the included file is set up as the new current input file. */ non_cond(dir_kind) int dir_kind; { register int i, t, argcnt; char *defptr, *fid, *tp, delim, name[MAXIDLEN+1]; extern char *get_def(); #ifdef stdio FILE *tbp; #else struct buf *tbp; #endif switch(dir_kind) { case LINE: writeline(line); /* Do nothing */ break; case MSG: printerr(lp); break; case UNDEF: lp = get_id(lp, name); if (sym_del(name) == ERROR) printerr("Symbol not defined"); break; case DEF: lp = get_id(lp, name); if (*name == EOS) { printerr("No symbol given"); break; } if ((argcnt = formal()) == ERROR) { printerr("Illegal argument list"); break; } defptr = get_def(lp, argcnt); sym_enter(name, argcnt, defptr); break; case INCL: if (inclvl == INCDEPTH) { printerr("Maximum #include depth exceeded"); break; } #ifndef stdio tbp = get_mem(sizeof *curinfil); #endif delim = *lp; if (delim != '<' && delim != '"') { printerr("Illegal file specification delimiter"); break; } delim = (delim == '<') ? '>' : '"'; fid = ++lp; while (*lp != delim && *lp != EOS) lp++; if (*lp == EOS) { printerr("Illegal file specification"); break; } *lp = EOS; /* * At this point 'fid' points to the null-terminated * file specification. If the file specification is * enclosed in '"' then the current directory is * first searched for the specified file; if this * fails or if the file spec is delimited by '<' * and '>' then a standard list of paths is searched * (see 'srchlist' array). */ for (i = (delim == '>') ? 1 : 0; srchlist[i]; i++) { tp = concat(srchlist[i], fid); #ifdef stdio if ((tbp = fopen(tp, "r")) != NULL) break; #else if ((t = fopen(tp, tbp)) >= 0) break; #endif } #ifdef stdio if (tbp == NULL) { #else if (t < 0) { #endif printerr("Failed to open 'include' file. Looked for"); fprintf(stderr,"\t'%s' on your default device,\n",fid); fprintf(stderr,"\tand on devices"); for (i=1;;i++) { if (srchlist[i] == NIL) break; fprintf(stderr," '%s'",srchlist[i]); } fputs(". Sorry.\n",stderr); free(tbp); break; } if (push(curinfil, &inclstk) == ERROR) screech("#include stack overflow (impossible)"); inclvl++; curinfil = tbp; /* * Inhibit in front of #include line at top level */ if (inclvl == 1) inclflag = TRUE; break; } /* end switch */ return; } /* end non_cond() */ /* * Read a line of input routine -- loads 'line[]' with the next line * from the current input file. Line continuation is handled here -- * any occurrence of backslash-newline is replaced by a blank and the * next line is tacked on to the current one. * * Upon hitting end of file, the include stack is popped to restore * input to the previous level. If the stack is empty, EOF is returned * (indicating end of file at the top level. */ int readline() { register int buf_indx, curch, i; int ctn_cnt; /* Continuation line counter */ buf_indx = 0, ctn_cnt = 0; do { while ((curch = getc(curinfil)) != '\n') { if (curch == EOF) { /* Current file exhausted */ #ifdef stdio fclose(curinfil); #else close(curinfil->fildes); free(curinfil); /* Release I/O node */ #endif if (empty(&inclstk)) { /* * Don't lose xyz */ if (buf_indx > 0) break; else return(EOF); } else { curinfil = #ifdef stdio (FILE *) #else (struct buf *) #endif pop(&inclstk); inclvl--; } continue; } line[buf_indx++] = curch; if (buf_indx >= LINESZ) { printerr("Next line too long, truncated."); while (curch != '\n' && curch != EOF) curch = getc(curinfil); break; } } lineno[inclvl]++; /* Bump source line number */ if (inclvl == 0) charincr(_line->defptr, 6); if (line[buf_indx-1] == '\\') { curch = line[buf_indx-1] = ' '; ctn_cnt++; /* Count a continuation line */ } } while (curch != '\n'); /* Repeat till real end of line */ line[buf_indx] = EOS; lp = line; for (i = 0; i < ctn_cnt; i++) writeline(""); /* Keep compiler in synch */ return(OK); } /* * Write line routine -- calls expand() to process macro calls, * emitting expanded text to the output file. If we are processing * a line from an include file, a character is emitted preceding * the line as a flag to the compiler. */ writeline(lineptr) char *lineptr; { if (inclvl > 0 && !inclflag && !nocomp) putc(SOH, outfil); /* Flag a #include line */ inclflag = FALSE; /* Reset #include line flag */ /* * Save our environment for expand() error recovery. * Then, expand the line to the output file. */ frame = envsave(); expand(lineptr, NIL, NIL, 0); putc('\n', outfil); } /* * Find a directive */ dir_find(cp) char *cp; { register int i; for (i = 0; dir_tbl[i] != NIL; i++) if (lexeq(dir_tbl[i], cp)) return(dir_type[i]); return(ERROR); } /* * Set up definition argument list array (def_ala) with the formal * parameters of the macro currently being defined. Returns the * number of formals actually processed, or ERROR if any syntax errors * are encountered. Nested parantheses are handled here to permit * macro calls as formal parameters. */ int formal() { register int cnt; if (*lp != '(') return(0); for (cnt = 0; cnt < ALASIZE; cnt++) { lp = skipblnk(++lp); lp = get_id(lp, def_ala[cnt]); if (def_ala[cnt][0] == EOS) break; lp = skipblnk(lp); if (*lp != ',') break; } if (*lp != ')') return(ERROR); lp++; /* Skip past the ')' */ return(cnt+1); /* Make it origin 1 and return */ } /* * Set up call argument list array (call_ala) with the actual parameters * of the macro currently being expanded. Each entry of the call_ala * is dynamically allocated; the maximum length of any one actual is * determined by the compile-time constant "MAXARGSZ". All occurrences * of formal parameters are replaced by their corresponding actuals at * the next outer level of call, thereby propagating actual parameters * through nested calls. The updated source pointer is returned to the * call unless an error is encountered, in which case "ERROR" is * returned. */ char * actual(srcp, argcnt) char *srcp; int argcnt; { register char *src; register int argno, parenlvl; char *dst, *actp; int dstmax; static char arg_buf[MAXARGSZ]; static char *proto_ala[ALASIZE]; /* Call's prototype ala */ src = srcp; if (argcnt == 0) return(src); if (*src != '(') { printerr("Required argument(s) missing"); return(ERROR); } dstmax = arg_buf+MAXARGSZ-1; for (argno = 0; argno < argcnt && *src != ')'; argno++) { dst = arg_buf, src++, parenlvl = 0; for (;;) { src = skipq(src, &dst, dstmax); if ((*src == ')' || *src == ',') && parenlvl == 0) break; switch(*src) { case '(': parenlvl++; break; case ')': parenlvl--; break; case EOS: printerr("Unterminated argument list"); rlse_ala(argno-1); return(ERROR); } if ((*src & 0200) == 0) putch(*src++, &dst, dstmax); else { actp = call_ala[*src++ & 0177]; while (*actp != EOS) putch(*actp++, &dst, dstmax); } } if (putch(EOS, &dst, dstmax) == ERROR) { printerr("Actual parameter length exceeds maximum"); arg_buf[0] = EOS; } proto_ala[argno] = get_mem(dst - arg_buf); strcopy(arg_buf, proto_ala[argno]); } if (*src++ != ')' || argno < argcnt) { printerr("Argument count error"); rlse_ala(argno); return(ERROR); } else { for (argno = 0; argno < argcnt; argno++) call_ala[argno] = proto_ala[argno]; return(src); } } /* * Get macro definition routine -- loads a dynamically-allocated buffer * with the text of the macro definition. A pointer to the resulting * definition block is passed back to the caller. A pointer to the * start of the definition to be processed is passed in 'srcp'; the * number of arguments to the macro is passed in 'numargs'. * Index markers representing the index of the formal parameter in the * ala are substituted for formals as the definition is copied. Index * markers are encoded as (0200 | ala index). Note that no buffer * limit checking is performed, because in no case will the processed * text be larger than the original source. * * Leading whitespace is discarded. 05-Dec-80 RBD */ char * get_def(srcp, numargs) char *srcp; int numargs; { register char *src, *p; register int ala_index; char *dst; static char defbuf[LINESZ]; char idbuf[MAXIDLEN+1]; src = skipblnk(srcp); /* Junk leading whitespace */ dst = defbuf; for (;;) { src = skipq(src, &dst, defbuf+LINESZ-1); if (c_alpha(*src)) { p = get_id(src, idbuf); for (ala_index = 0; ala_index < numargs; ala_index++) if (lexeq(idbuf, def_ala[ala_index])) { *dst++ = 0200 | ala_index; src = p; break; } while (src < p) *dst++ = *src++; } else if ((*dst++ = *src++) == EOS) break; } p = get_mem(dst - defbuf); strcopy(defbuf, p); return(p); } /* * Macro expansion routine -- scans text pointed to by 'srcp' for macro * calls and formal parameters (which are by now represented as index * markers (see 'get_def()') ). If a character is neither a part of a * macro call nor an index marker, it is transmitted to the output file * or buffer (see 'putch()'). Upon encountering a macro call, the * current argument list array is pushed onto the stack, the new actual * parameters are loaded, and 'expand()' is called recursively. On * return from this recursive call, the arguments are released and the * ala stack is popped. Upon encountering a formal parameter, * 'expand()' is simply called recursively to expand the actual parameter * corresponding to the formal. Note that the buffer length check is * performed only upon encountering the end of the current definition; * meanwhile excess characters are flushed. Thus only one check is * necessary and only one error is reported. To prevent cyclical * definitions, each symbol table entry has a flag which is set whenever * a call on this macro is encountered. This flag is checked for a zero * value before expansion of a macro begins; if it is set, the error * return is taken. This prevents circular definitions from causing * problems. */ int expand(srcp, dst, dstmax, argcnt) char *srcp, **dst, *dstmax; int argcnt; { register char *src, *p; register struct sym *sym; char idbuf[MAXIDLEN+1]; src = srcp; for (;;) { src = skipq(src, dst, dstmax); if (c_alpha(*src)) { p = get_id(src, idbuf); if ((sym = lookup(idbuf)) == NIL) { while (src < p) putch(*src++, dst, dstmax); } else { src = p; if (sym->ref != 0) exp_err("Illegal recursive expansion", argcnt); else { if (push(sym, &refstack) == ERROR) exp_err("Exceeded def nest", argcnt); else sym->ref = 1; } push_ala(argcnt); if ((src = actual(src, sym->nargs)) == ERROR) exp_err(NIL, 0); expand(sym->defptr, dst, dstmax, sym->nargs); rlse_ala(sym->nargs-1); pop_ala(argcnt); } } else { if ((*src & 0200) != 0) { expand(call_ala[*src++ & 0177], dst, dstmax, argcnt); } else { if (*src == EOS) { if (!empty(&refstack)) (struct sym *) pop(&refstack)->ref = 0; return(OK); } else if (putch(*src++, dst, dstmax) == ERROR) exp_err("expansion length exceeded", argcnt); } } } } /* * Error routine for expand(). If 'msg' is non-NIL, the message pointed * to by 'msg' is printed on the standard error output. The second * argument specifies the number of currently-active entries in the call * ala -- that is, the number of ala entries that must be freed up before * aborting. After the ala has been released, the ala stack is purged to * release the memory used by suspended calls to expand(). */ exp_err(msg, ala_cnt) char *msg; int ala_cnt; { if (msg != NIL) printerr(msg); while (!empty(&refstack)) pop(&refstack)->ref = 0; rlse_ala(ala_cnt-1); /* Release current ala entries */ while (!empty(&callstk)) free(pop(&callstk)); /* Purge stack */ envreset(frame, ERROR); } /* * Routine to push the contents of the call ala onto the stack. */ push_ala(numargs) int numargs; { register int i; for (i=0; i= 0; i--) call_ala[i] = pop(&callstk); return; }