/* * GRESUB - grep subroutines * * Edits: * * 27-Jan-85 lmf Consolidate subroutines with grep, ch and lines * 02-Feb-85 lmf Fix bug from above * 16-Oct-85 lmf Don't change match pointer without match */ #define IDENT "v1.3.d" #include #include "grep.h" #define FAST register ; extern int case_fl ; /* case specific match flag */ extern int debug ; /* Set for debug code */ extern char *pp ; /* General pointer into tmp_buf */ extern char *pat_beg ; /* start of matched pattern */ extern char *pat_end ; /* and its end */ extern char lbuf[LMAX] ; /* input line buffer */ extern char tmp_buf[LMAX] ; /* scratch buffer */ /*++ * CANT - Where's the damn file */ cant(s) char *s ; { fprintf(stderr, "%s: cannot open\n", s) ; } /*++ * COMPILE - Compile the pattern into global pbuf[] */ compile(source, dest) char *source ; /* Pattern to compile */ char *dest ; /* and where to put it */ { FAST char *s ; /* Source string pointer */ FAST char *lp ; /* Last pattern pointer */ FAST int c ; /* Current character */ int o ; /* Temp */ char *spp ; /* Save beginning of pattern */ char *cclass() ; /* Compile class routine */ s = source ; if (debug) printf("Pattern = \"%s\"\n", s) ; pp = tmp_buf ; while (c = *s++) { /* * STAR, PLUS and MINUS are special. */ if (c == '*' || c == '+' || c == '-') { if (pp == tmp_buf || (o = pp[-1]) == BOL || o == EOL || o == STAR || o == PLUS || o == MINUS) badpat("Illegal occurrance op.", source, s) ; store(ENDPAT) ; store(ENDPAT) ; spp = pp ; /* Save pattern end */ while (--pp > lp) /* Move pattern down */ *pp = pp[-1] ; /* one byte */ *pp = (c == '*') ? STAR : (c == '-') ? MINUS : PLUS ; pp = spp ; /* Restore pattern end */ continue ; } /* * All the rest. */ lp = pp ; /* Remember start */ switch(c) { case BOL_CH : if(s-1 == source) store(BOL) ; /* only MAGIC at start of line */ else { store(CHAR) ; store(c) ; } break ; case EOL_CH : if(*s == 0) store(EOL) ; /* only MAGIC at end of line */ else { store(CHAR) ; store(c) ; } break ; case ANY_CH : /* anything goes */ store(ANY) ; break ; case '[': s = cclass(source, s) ; break ; case ':': if (*s) { c = *s++ ; switch(lower(c)) { case 'a': store(ALPHA) ; break ; case 'd': store(DIGIT) ; break ; case 'n': store(NALPHA) ; break ; case ' ': store(PUNCT) ; break ; default: badpat("Unknown : type", source, s) ; } break ; } else badpat("No : type", source, s) ; case '\\': if (*s) c = *s++ ; default: store(CHAR) ; store(lower(c)) ; } } store(ENDPAT) ; /* set end if interest */ store(0) ; /* Terminate string */ cpystr(dest, tmp_buf) ; /* Return string */ if (debug) { for (lp = tmp_buf ; lp <= pp ;) { if ((c = (*lp++ & 0377)) < ' ') printf("<%o>", c) ; else printf("%c", c) ; } printf("\n") ; } } /*++ * CCLASS - Compile a class (within []) */ static char * cclass(source, src) char *source ; /* Pattern start -- for error msg. */ char *src ; /* Class start */ { FAST char *s ; /* Source pointer */ FAST char *cp ; /* Pattern start */ FAST int c ; /* Current character */ int o ; /* Temp */ s = src ; o = CLASS ; if (*s == '^') { ++s ; o = NCLASS ; } store(o) ; cp = pp ; store(0) ; /* Byte count */ while ((c = *s++) && c!=']') { if (c == '\\') { /* Store quoted char */ if ((c = *s++) == '\0') /* Gotta get something */ badpat("Class terminates badly", source, s) ; else store(lower(c)) ; } else if (c == '-' && (pp - cp) > 1 && *s != ']' && *s != '\0') { c = pp[-1] ; /* Range start */ pp[-1] = RANGE ; /* Range signal */ store(c) ; /* Re-store start */ c = *s++ ; /* Get end char and */ store(lower(c)) ; /* Store it */ } else { store(lower(c)) ; /* Store normal char */ } } if (c != ']') badpat("Unterminated class", source, s) ; if ((c = (pp - cp)) >= 256) badpat("Class too large", source, s) ; if (c == 0) badpat("Empty class", source, s) ; *cp = c ; return(s) ; } /*++ * STORE - Pack temp buffer if room available */ store(op) { if (pp >= &tmp_buf[PMAX]) error("Pattern too complex\n") ; *pp++ = op ; } /*++ * LOWER - conditional case conversion */ lower(c) int c ; { return(case_fl ? c : tolower(c)) ; /* Case conversion */ } /*++ * MATCH - Match the current line, return 1 if it does. */ match(line, pattern) char *line ; /* Line to check */ char *pattern ; /* String to check against */ { FAST char *l ; /* Line pointer */ char *mpnt ; char *pmatch() ; for (l = line ; *l ; l++) { if (mpnt = pmatch(l, pattern)) { /* Check for match */ pat_beg = l ; /* Where it started */ pat_end = mpnt ; /* and where it stops */ if(debug) fprintf(stderr,"Match at: %d - %d\n", pat_beg-lbuf, pat_end-lbuf); return(TRUE) ; } } return(FALSE) ; } /*++ * PMATCH - Pattern match longest 'pattern' occuring in 'line' */ static char * pmatch(line, pattern) char *line ; /* (partial) line to match */ char *pattern ; /* (partial) pattern to match */ { FAST char *l ; /* Current line pointer */ FAST char *p ; /* Current pattern pointer */ FAST char c ; /* Current character */ char *e ; /* End for STAR and PLUS match */ int op ; /* Pattern operation */ int n ; /* Class counter */ char *are ; /* Start of STAR match */ l = line ; if (debug > 1) printf("pmatch(\"%s\")\n", line) ; p = pattern ; while ((op = *p++) != ENDPAT) { if (debug > 1) printf("byte[%d] = 0%o, '%c', op = 0%o\n", l-line, *l, *l, op) ; switch(op) { case CHAR: if (lower(*l++) != *p++) return(FALSE) ; break ; case BOL: if (l != lbuf) return(FALSE) ; break ; case EOL: if (*l != '\0') return(FALSE) ; break ; case ANY: if (*l++ == '\0') return(FALSE) ; break ; case DIGIT: if ((c = *l++) < '0' || (c > '9')) return(FALSE) ; break ; case ALPHA: c = lower(*l++) ; if (c < 'a' || c > 'z') return(FALSE) ; break ; case NALPHA: c = lower(*l++) ; if (c >= 'a' && c <= 'z') break ; else if (c < '0' || c > '9') return(FALSE) ; break ; case PUNCT: c = *l++ ; if (c == 0 || c > ' ') return(FALSE) ; break ; case CLASS: case NCLASS: c = lower(*l++) ; n = *p++ & 0377 ; do { if (*p == RANGE) { p += 3 ; n -= 2 ; if (c >= p[-2] && c <= p[-1]) break ; } else if (c == *p++) break ; } while (--n > 1) ; if ((op == CLASS) == (n <= 1)) return(FALSE) ; if (op == CLASS) p += n - 2 ; break ; case MINUS: e = pmatch(l, p) ; /* Look for a match */ while (*p++ != ENDPAT) ; /* Skip over pattern */ if (e) /* Got a match? */ l = e ; /* Yes, update string */ break ; /* Always succeeds */ case PLUS: /* One or more ... */ if ((l = pmatch(l, p)) == 0) return(FALSE) ; /* Gotta have a match */ case STAR: /* Zero or more ... */ are = l ; /* Remember line start */ while (*l && (e = pmatch(l, p))) l = e ; /* Get longest match */ while (*p++ != ENDPAT) ; /* Skip over pattern */ while (l >= are) { /* Try to match rest */ if (e = pmatch(l, p)) return(e) ; --l ; /* Nope, try earlier */ } return(FALSE) ; /* Nothing else worked */ default: printf("Bad op code %d\n", op) ; error("Cannot happen -- match\n") ; } } return(l) ; /* return end of match */ }