# /* * grep. */ /*)BUILD $(TKBOPTIONS) = { TASK = ...GRE } */ #ifdef DOCUMENTATION title grep Get Regular Expression and Print index Get Regular Expression and Print synopsis grep [options] regular_expression [ file ...] description Grep searches each specified file (if none are specified, it reads stdin) for lines matching the given pattern. Grep allows wild-card file names in the file list. The following options are recognized: .lm +8 .s.i -4;-c Only print a count of matching lines. .s.i -4;-f Print file name option, see below. .s.i -4;-n Preceed each line by its line number. .s.i -4;-v Print non-matching lines. .s.lm -8 The file name is normally printed if more than one file argument is given, or if a wild-card file name was specified. The "-f" flag reverses this action (print name if one file, not if more). .s The regular_expression defines the pattern to search for. Upper- and lower-case are not distinguished by grep. Blank lines never match. The regular_expression should be quoted. .s The regular_expression is built out of the following elements: .lm +8 .s.i -6;x An ordinary character (not mentioned below) matches that character. .s.i -6;'_\' The backslash quotes any character. "_\$" matches a dollar-sign. .s.i -6;'_^' A circumflex at the beginning of an expression matches the beginning of a line. .s.i -6;'$' A dollar-sign at the end of an expression matches the end of a line. .s.i -6;'.' A period matches any character except "new-line". .s.i -6;':a' A colon matches a class of characters##described .i -6;':d' by##the##following##character.##":a" matches any .i -6;':n' alphabetic, ":d" matches##digits.##":n"##matches .i -6;':#' alphanumerics, and ":#" matches spaces tabs, and .br other control characters, such as newline. .s.i -6;'*' An expression followed by an asterisk matches zero or more occurrances of that expression: "fo*" matches "f", "fo" "foo", etc. .s.i -6;'+' An expression followed by a plus sign matches one or more occurrances of that expression: "fo+" matches "fo", etc. .s.i -6;'-' An expression followed by a minus sign optionally matches the expression. .s.i -6;'[]' A string enclosed in square brackets matches any character in that string, but no others. If the first character in the string is a circumflex, the expression matches any character except "new-line" and the characters in the string. .s For example, "A[xyz]+B" matches "AxxB" and "AxyzzyB", while "A[^xyz]+B" matches "AbcB" but not "AxB". A range of characters may be specified by two characters separated by "-". Note that [a-z] matches alphabetics, while [z-a] never matches. .s.lm -8 The concatenation of regular expressions is a regular expression. diagnostics .lm +8 .s.i -8;No arguments .s.i -8;Unknown flag .s.i -8;No pattern .s.i -8;"file__name": cannot open .s.i -8;Illegal occurrence operator ... .s An operator was found in an illegal context. For example, the pattern "*foo" is illegal as the '*' operator must modify a previously specified pattern element. .s.i -8;No : type .s A colon was followed by an unknown modifier. .s.i -8;Class terminates badly .s A character class "[...]" was incorrectly terminated. For example, "[A-]" is incorrect. .s.i -8;Unterminated class .s Character classes must be terminated by ']'. .s.i -8;Class too large .s An internal buffer filled. .s.i -8;Empty class .s A character class must designate something: "[]" is illegal. .s.i -8;Pattern too complex .s An internal buffer filled. .lm -8 author David Conroy, Martin Minow. bugs #endif char *documentation[] = { "grep searches a file for a given pattern. Execute by", " grep [flags] regular_expression file_list", "", "Flags are single characters preceeded by '-':", " -c Only a count of matching lines is printed", " -f Print file name for matching lines switch, see below", " -n Each line is preceeded by its line number", " -v Only print non-matching lines", "", "On RSX, RT11, RSTS, input or output may be redirected:", " grep ... file.out", "The file_list is a list of files (wildcards are acceptable on RSX modes).", "If no files are given, input comes from the terminal. There is no prompting.", "The file name is normally printed if there is a file given.", "The -f flag reverses this action (print name no file, not if more).", "", "The regular_expression defines the pattern to search for. Upper- and", "lower-case are always ignored. Blank lines never match. The expression", "should be quoted to prevent file-name translation.", "x An ordinary character (not mentioned below) matches that character.", "'\\' The backslash quotes any character. \"\\$\" matches a dollar-sign.", "'^' A circumflex at the beginning of an expression matches the", " beginning of a line.", "'$' A dollar-sign at the end of an expression matches the end of", " a line.", "'.' A period matches any character except \"new-line\".", "':a' A colon matches a class of characters described by the following", "':d' character. \":a\" matches any alphabetic, \":d\" matches digits,", "':n' \":n\" matches alphanumerics, \": \" matches spaces, tabs, and", "': ' other control characters, such as new-line.", "'*' An expression followed by an asterisk matches zero or more", " occurrances of that expression: \"fo*\" matches \"f\", \"fo\"", " \"foo\", etc.", "'+' An expression followed by a plus sign matches one or more", " occurrances of that expression: \"fo+\" matches \"fo\", etc.", "'-' An expression followed by a minus sign optionally matches", " the expression.", "'[]' A string enclosed in square brackets matches any character in", " that string, but no others. If the first character in the", " string is a circumflex, the expression matches any character", " except \"new-line\" and the characters in the string. For", " example, \"[xyz]\" matches \"xx\" and \"zyx\", while \"[^xyz]\"", " matches \"abc\" but not \"axb\". A range of characters may be", " specified by two characters seperated by \"-\". Note that,", " [a-z] matches alphabetics, while [z-a] never matches.", "", "The concatenation of regular expressions is a regular expression.", "", 0 }; #include #define LMAX 512 #define PMAX 256 #define CHAR 1 #define BOL 2 #define EOL 3 #define ANY 4 #define CLASS 5 #define NCLASS 6 #define STAR 7 #define PLUS 8 #define MINUS 9 #define ALPHA 10 #define DIGIT 11 #define NALPHA 12 #define PUNCT 13 #define RANGE 14 #define ENDPAT 15 int cflag; int fflag; int nflag; int vflag; int nfile; int debug = 0; /* Set for debug code */ char *pp; char file_name[81]; char lbuf[LMAX]; char pbuf[PMAX]; main(argc, argv) char *argv[]; { register char *p; register int c, i; int gotpattern; FILE *f; int gotcha; if (argc <= 1) usage("No arguments"); if (argc == 2 && argv[1][0] == '?' && argv[1][1] == 0) { help(); return; } nfile = argc-1; gotpattern = 0; for (i=1; i < argc; ++i) { p = argv[i]; #ifdef vms /* * Hand-knit I/O redirection for vms */ if (*p == '<') { freopen(&p[1], "r", stdin); argv[i] = NULL; continue; } if (*p == '>') { if (p[1] == '>') freopen(&p[2], "a", stdout); else freopen(&p[1], "w", stdout); argv[i] = NULL; continue; } #endif if (*p == '-') { ++p; while (c = *p++) { switch(lower(c)) { case '?': help(); break; case 'c': ++cflag; break; case 'd': ++debug; break; case 'f': ++fflag; break; case 'n': ++nflag; break; case 'v': ++vflag; break; default: usage("Unknown flag"); } } argv[i] = 0; --nfile; } else if (!gotpattern) { compile(p); argv[i] = 0; ++gotpattern; --nfile; } } if (!gotpattern) usage("No pattern"); if (nfile == 0) grep(stdin, 0); else { fflag = fflag ^ (nfile > 0); for (i=1; i < argc; ++i) { if ((p = argv[i]) != NULL) { if ((f = fwild(p, "r")) == NULL) cant(p); else { gotcha = 0; while (fnext(f) != NULL) { fgetname(f, file_name); grep(f, file_name); gotcha = 1; } if (!gotcha) cant(p); } } } } } file(s) char *s; { printf("File %s:\n", s); } cant(s) char *s; { fprintf(stderr, "%s: cannot open\n", s); } help() /* * Give good help */ { register char **dp; for (dp = documentation; *dp; dp++) printf("%s\n", *dp); } usage(s) char *s; { fprintf(stderr, "?GREP-E-%s\n", s); fprintf(stderr, "Usage: grep [-cfnv] pattern [file ...]. grep ? for help\n"); exit(1); } compile(source) char *source; /* Pattern to compile */ /* * Compile the pattern into global pbuf[] */ { register char *s; /* Source string pointer */ register char *lp; /* Last pattern pointer */ register int c; /* Current character */ int o; /* Temp */ char *spp; /* Save beginning of pattern */ char *cclass(); /* Compile class routine */ s = source; if (debug) printf("Pattern = \"%s\"\n", s); pp = pbuf; while (c = *s++) { /* * STAR, PLUS and MINUS are special. */ if (c == '*' || c == '+' || c == '-') { if (pp == pbuf || (o=pp[-1]) == BOL || o == EOL || o == STAR || o == PLUS || o == MINUS) badpat("Illegal occurrance op.", source, s); store(ENDPAT); store(ENDPAT); spp = pp; /* Save pattern end */ while (--pp > lp) /* Move pattern down */ *pp = pp[-1]; /* one byte */ *pp = (c == '*') ? STAR : (c == '-') ? MINUS : PLUS; pp = spp; /* Restore pattern end */ continue; } /* * All the rest. */ lp = pp; /* Remember start */ switch(c) { case '^': store(BOL); break; case '$': store(EOL); break; case '.': store(ANY); break; case '[': s = cclass(source, s); break; case ':': if (*s) { c = *s++; switch(lower(c)) { case 'a': store(ALPHA); break; case 'd': store(DIGIT); break; case 'n': store(NALPHA); break; case ' ': store(PUNCT); break; default: badpat("Unknown : type", source, s); } break; } else badpat("No : type", source, s); case '\\': if (*s) c = *s++; default: store(CHAR); store(lower(c)); } } store(ENDPAT); store(0); /* Terminate string */ if (debug) { for (lp = pbuf; lp < pp;) { if ((c = (*lp++ & 0377)) < ' ') printf("\\%o ", c); else printf("%c ", c); } printf("\n"); } } char * cclass(source, src) char *source; /* Pattern start -- for error msg. */ char *src; /* Class start */ /* * Compile a class (within []) */ { register char *s; /* Source pointer */ register char *cp; /* Pattern start */ register int c; /* Current character */ int o; /* Temp */ s = src; o = CLASS; if (*s == '^') { ++s; o = NCLASS; } store(o); cp = pp; store(0); /* Byte count */ while ((c = *s++) && c!=']') { if (c == '\\') { /* Store quoted char */ if ((c = *s++) == '\0') /* Gotta get something */ badpat("Class terminates badly", source, s); else store(lower(c)); } else if (c == '-' && (pp - cp) > 1 && *s != ']' && *s != '\0') { c = pp[-1]; /* Range start */ pp[-1] = RANGE; /* Range signal */ store(c); /* Re-store start */ c = *s++; /* Get end char and */ store(lower(c)); /* Store it */ } else { store(lower(c)); /* Store normal char */ } } if (c != ']') badpat("Unterminated class", source, s); if ((c = (pp - cp)) >= 256) badpat("Class too large", source, s); if (c == 0) badpat("Empty class", source, s); *cp = c; return(s); } store(op) { if (pp >= &pbuf[PMAX]) error("Pattern too complex\n"); *pp++ = op; } lower(c) register int c; { if (c>='A' && c<='Z') c += 'a'-'A'; return(c); } badpat(message, source, stop) char *message; /* Error message */ char *source; /* Pattern start */ char *stop; /* Pattern end */ { register int c; fprintf(stderr, "-GREP-E-%s, pattern is\"%s\"\n", message, source); fprintf(stderr, "-GREP-E-Stopped at byte %d, '%c'\n", stop-source, stop[-1]); error("?GREP-E-Bad pattern\n"); } grep(fp, fn) FILE *fp; /* File to process */ char *fn; /* File name (for -f option) */ /* * Scan the file for the pattern in pbuf[] */ { register int lno, count, m; lno = 0; count = 0; while (fgetss(lbuf, sizeof lbuf, fp)) { ++lno; m = match(); if ((m && !vflag) || (!m && vflag)) { ++count; if (!cflag) { if (fflag && fn) { file(fn); fn = 0; } if (nflag) printf("%d\t", lno); printf("%s\n", lbuf); } } } if (cflag) { if (fflag && fn) file(fn); printf("%d\n", count); } } match() /* * Match the current line (in lbuf[]), return 1 if it does. */ { register char *l; /* Line pointer */ char *pmatch(); for (l = lbuf; *l; l++) { if (pmatch(l, pbuf)) return(1); } return(0); } char * pmatch(line, pattern) char *line; /* (partial) line to match */ char *pattern; /* (partial) pattern to match */ { register char *l; /* Current line pointer */ register char *p; /* Current pattern pointer */ register char c; /* Current character */ char *e; /* End for STAR and PLUS match */ int op; /* Pattern operation */ int n; /* Class counter */ char *are; /* Start of STAR match */ l = line; if (debug > 1) printf("pmatch(\"%s\")\n", line); p = pattern; while ((op = *p++) != ENDPAT) { if (debug > 1) printf("byte[%d] = 0%o, '%c', op = 0%o\n", l-line, *l, *l, op); switch(op) { case CHAR: if (lower(*l++) != *p++) return(0); break; case BOL: if (l != lbuf) return(0); break; case EOL: if (*l != '\0') return(0); break; case ANY: if (*l++ == '\0') return(0); break; case DIGIT: if ((c = *l++) < '0' || (c > '9')) return(0); break; case ALPHA: c = lower(*l++); if (c < 'a' || c > 'z') return(0); break; case NALPHA: c = lower(*l++); if (c >= 'a' && c <= 'z') break; else if (c < '0' || c > '9') return(0); break; case PUNCT: c = *l++; if (c == 0 || c > ' ') return(0); break; case CLASS: case NCLASS: c = lower(*l++); n = *p++ & 0377; do { if (*p == RANGE) { p += 3; n -= 2; if (c >= p[-2] && c <= p[-1]) break; } else if (c == *p++) break; } while (--n > 1); if ((op == CLASS) == (n <= 1)) return(0); if (op == CLASS) p += n - 2; break; case MINUS: e = pmatch(l, p); /* Look for a match */ while (*p++ != ENDPAT); /* Skip over pattern */ if (e) /* Got a match? */ l = e; /* Yes, update string */ break; /* Always succeeds */ case PLUS: /* One or more ... */ if ((l = pmatch(l, p)) == 0) return(0); /* Gotta have a match */ case STAR: /* Zero or more ... */ are = l; /* Remember line start */ while (*l && (e = pmatch(l, p))) l = e; /* Get longest match */ while (*p++ != ENDPAT); /* Skip over pattern */ while (l >= are) { /* Try to match rest */ if (e = pmatch(l, p)) return(e); --l; /* Nope, try earlier */ } return(0); /* Nothing else worked */ default: printf("Bad op code %d\n", op); error("Cannot happen -- match\n"); } } return(l); }