/* * grep documentation and help */ #include #include "grep.h" extern gr_ident ; #ifdef DOCUMENTATION title grep Get Regular Expression and Print index Get Regular Expression and Print synopsis grep [options] regular_expression {bool {!}expr ...}[ file ...] description Grep searches each specified file (if none are specified, it reads stdin) for lines matching the given pattern expression. Grep allows wild-card file names in the file list. The following options are recognized: .lm +8 .s.i -4;-c Only print a count of matching lines. .s.i -4;-f Print file name option, see below. .s.i -4;-n Preceed each line by its line number. .s.i -4;-u Set upper and lower case distinctive. .s.i -4;-v Print non-matching lines. .s.i -4;-s Match on "SOUNDEX" value of string. .s.i -4;-i Reverse video on match string (word). .s.i -4;-q Quit switch.. Aborts search with any keystroke .s.lm -8 The file name is normally printed if more than one file argument is given, or if a wild-card file name was specified. The "-f" flag reverses this action (print name if one file, not if more). .s The regular_expression defines patterns to search for. Multiple expressions can be logically combined using boolian operators: and (&,+), or (|), xor (~), not (!) and can be factored using parenthesis. Boolian operators must be seperated from expression by spaces and evaluate left to right to control line selection. .s Upper- and lower-case are not distinguished by grep unless requested with '-u'. Blank lines never match. The regular_expression should usually be quoted. .s The regular_expression is built out of the following elements: .lm +8 .s.i -6;x An ordinary character (not mentioned below) matches that character. .s.i -6;'_\' The backslash quotes any character. "_\$" matches a dollar-sign. .s.i -6;'_^' A circumflex at the beginning of an expression matches the beginning of a line. .s.i -6;'$' A dollar-sign at the end of an expression matches the end of a line. .s.i -6;'.' A period matches any character except "new-line". .s.i -6;':a' A colon matches a class of characters##described .i -6;':d' by##the##following##character.##":a" matches any .i -6;':n' alphabetic, ":d" matches##digits.##":n"##matches .i -6;':#' alphanumerics, and ":#" matches spaces tabs, and .br other control characters, such as newline. .s.i -6;'*' An expression followed by an asterisk matches zero or more occurrances of that expression: "fo*" matches "f", "fo" "foo", etc. .s.i -6;'+' An expression followed by a plus sign matches one or more occurrances of that expression: "fo+" matches "fo", etc. .s.i -6;'-' An expression followed by a minus sign optionally matches the expression. .s.i -6;'[]' A string enclosed in square brackets matches any character in that string, but no others. If the first character in the string is a circumflex, the expression matches any character except "new-line" and the characters in the string. .s For example, "A[xyz]+B" matches "AxxB" and "AxyzzyB", while "A[^xyz]+B" matches "AbcB" but not "AxB". A range of characters may be specified by two characters separated by "-". Note that [a-z] matches alphabetics, while [z-a] never matches. .s.lm -8 The concatenation of regular_expressions is a regular_expression. diagnostics .lm +8 .s.i -8;No arguments .s.i -8;Unknown flag .s.i -8;No pattern .s.i -8;"file__name": cannot open .s.i -8;Illegal occurrence operator ... .s An operator was found in an illegal context. For example, the pattern "*foo" is illegal as the '*' operator must modify a previously specified pattern element. .s.i -8;No : type .s A colon was followed by an unknown modifier. .s.i -8;Class terminates badly .s A character class "[...]" was incorrectly terminated. For example, "[A-]" is incorrect. .s.i -8;Unterminated class .s Character classes must be terminated by ']'. .s.i -8;Class too large .s An internal buffer filled. .s.i -8;Empty class .s A character class must designate something: "[]" is illegal. .s.i -8;Pattern too complex .s An internal buffer filled. .lm -8 .s.i -8;Ill formed expression .s The boolian combination of select patterns is semantically incorrect, i.e. missing parenthesis or trailing operator. Acknowledgements The Soundex algorithm was invented by Margaret K. Odell and Robert C. Russell. U.S. patents 1261167 (1918) and 1435663 (1922). The version used here was modified from one described in Donald Knuth, Sorting and Searching. author David Conroy, Martin Minow. modified Mike Fraser bugs #endif static char *documentation[] = { "grep searches a file for given patterns. Execute by", " grep [flags] expr {bool {!}expr} file_list", "", "Flags are single characters preceeded by '-':", " -c Only a count of matching lines is printed", " -f Print file name for matching lines, see below", " -n Preceed each line by its line number", " -u Set upper and lower case distinctive", " -v Only print non-matching lines", " -s Match on soundex value of string", " -i Reverse video display on matching string (word)", " -q Quit Switch - Aborts search on any keystroke", "", "Input or output may be redirected:", " grep ... file.out", "The file_list is a list of files.", "If no files are given, input comes from the terminal. There is no prompting.", "The file name is normally printed if there is a file given.", "The -f flag reverses this action (print name no file, not if more).", "", "The regular_expression defines patterns to search for. Regular_expressions", "can be logically combined using boolian operators: and (&,+), or (|), xor (~)", "not (!) and can be factored using parenthesis. The logical opearators must", "be seperated from the select patterns by spaces and evaluate left to right.", "", "Upper- and lower-case are equivalent unless requested by to -u flag. Blank", "lines never match. The expression should usually be quoted to prevent", "file-name translation.", "x An ordinary character (not mentioned below) matches that character.", "'\\' The backslash quotes any character. \"\\$\" matches a dollar-sign.", "'^' A circumflex at the beginning of an expression matches the", " beginning of a line.", "'$' A dollar-sign at the end of an expression matches the end of", " a line.", "'.' A period matches any character except \"new-line\".", "':a' A colon matches a class of characters described by the following", "':d' character. \":a\" matches any alphabetic, \":d\" matches digits,", "':n' \":n\" matches alphanumerics, \": \" matches spaces, tabs, and", "': ' other control characters, such as new-line.", "'*' An expression followed by an asterisk matches zero or more", " occurrances of that expression: \"fo*\" matches \"f\", \"fo\"", " \"foo\", etc.", "'+' An expression followed by a plus sign matches one or more", " occurrances of that expression: \"fo+\" matches \"fo\", etc.", "'-' An expression followed by a minus sign optionally matches", " the expression.", "'[]' A string enclosed in square brackets matches any character in", " that string, but no others. If the first character in the", " string is a circumflex, the expression matches any character", " except \"new-line\" and the characters in the string. For", " example, \"[xyz]\" matches \"xx\" and \"zyx\", while \"[^xyz]\"", " matches \"abc\" but not \"axb\". A range of characters may be", " specified by two characters seperated by \"-\". Note that,", " [a-z] matches alphabetics, while [z-a] never matches.", "", "The concatenation of regular_expressions is a regular_expression.", 0 } ; help() /* * Give good help */ { register char **dp ; for (dp = documentation; *dp; dp++) printf("%s\n", *dp) ; #ifdef decus printf("\n%s %s\t\t%s %s\n", "Version:", gr_ident, "Built:", _DATE ) ; #else printf("\n%s %s\n", "Version:", gr_ident ) ; #endif } usage(s) char *s ; { fprintf(stderr, "?GREP-E-%s\n", s) ; fprintf(stderr, "Usage: grep [-cfnuvsi] pattern {bool pattern} [file ...] grep ? for help\n"); exit(1) ; }