{$Y-,I+,W-} program gpp; { Author : C. E. Chew Date : December 1984 } { This program uses a Boyer-Moore pattern matching algorithm to quickly scan text files for a given pattern. The program may be invoked as follows: .RUN SY:GPP $[-options] pattern inputfile [outputfile] The pattern comprises a string of characters. No context is implied. The inputfile specification may contain the standard RT-11 wildcards. The default outputfile is TT:. Link with bottom of 1500 octal. } const version = 'V1.0 December 1985'; stringlength = 80; defaultstringlength = stringlength; radix50length = 3; filenamesize = 10; directory_base = 6; null = chr(0); gppname = "GPP"; logical_disks = "*.DSK,*.DEV"; defaultoutfile = "TT:"; defaultoutextension = "LST"; type string = array [1..stringlength+1] of char; dynamicstring = record s : string end; radix50 = array [1..radix50length] of integer; filenamestring = array [1..filenamesize+1] of char; relation = (lt,le,eq,ge,gt,ne); buffer = array [0..255] of integer; queue = @queueelement; queueelement = record name : string; start : integer; size : integer; next : queue end; dorecord = record cursor, filecount, totalsize, freesize : integer end; header = record d_tota, d_next, d_high, d_extr, d_strt, device_base, device_size : integer; pathname : string end; statusbits = (nop0, nop1, nop2, nop3, nop4, nop5, nop6, nop7, tent, empty, perm, endblk, nop12, nop13, nop14, prot); status = set of statusbits; entry = record state : record case boolean of true : (word : integer); false : (state : status) end; e_name : radix50; e_leng : integer; e_used_chan_jnum : integer; e_date : integer; block : integer; segment : integer; offset : integer; e_names : filenamestring; eod : boolean end; var inp, out : text; dev : file of buffer; freeq, qhead, qtail : queue; subdirectory, command, device, template : string; forcecase, forceupper, printnumber : boolean; inputstringlength : integer; { Pattern and string } p : @string; s : @dynamicstring; slength, plength : integer; { Boyer-Moore workspace } delta1 : array [chr(0)..chr(127)] of integer; delta2 : array [1..stringlength] of integer; procedure fatal(p,m:string); external; procedure error(p,m:string); external; procedure inform(p,m:string); external; procedure warn(p,m:string); external; procedure r50tos(r:radix50; l:integer; s:string; m:integer); external; procedure substring(s1:string; var s2:string; st,sp:integer); external; procedure insert(s1:string; var s2:string; p,m:integer); external; procedure delete(var s:string; st,sp:integer); external; function compare(s1:string; r:relation; s2:string):boolean; external; procedure concatenate(var s1:string; s2:string; m:integer); external; procedure trimright(var s:string); external; procedure uppercase(var c:char); external; procedure lowercase(var c:char); external; function position(s1,s2:string; s:integer):integer; external; function stoi(s:string;b,st:integer;var so:integer):integer; external; function length(s:string):integer; external; function verify(s1,s2:string):integer; external; procedure rerun; external; procedure bminit; var ch : char; i, j, k : integer; f : array [0..stringlength+1] of integer; begin { Delta1 table } for ch := chr(0) to chr(127) do delta1[ch] := plength; for j := 1 to plength do delta1[p@[j]] := min(delta1[p@[j]], plength-j); { Delta2 table } for j := 1 to plength do delta2[j] := plength+plength - j; j := plength; k := plength + 1; while j > 0 do begin f[j] := k; while (k <= plength) and (p@[j] <> p@[k]) do begin delta2[k] := min(delta2[k], plength-j); k := f[k] end; k := k - 1; j := j - 1 end; for j := 1 to k do delta2[j] := min(delta2[j], plength+k-j) end; function bmsearch:boolean; var i, j : integer; begin { Search for pattern } bmsearch := false; j := plength; while j <= slength do begin i := plength; while (i > 0) and (s@.s[j] = p@[i]) do begin i := i - 1; j := j - 1 end; if i = 0 then begin bmsearch := true; j := slength + 1 end else j := j + max(delta1[s@.s[j]], delta2[i]) end end; procedure deletequeue(var n : string; var b, s : integer); var q : queue; begin with qhead@ do begin n := name; b := start; s := size end; q := qhead; qhead := q@.next; q@.next := freeq; freeq := q; if qhead = nil then qtail := nil end; procedure insertqueue(var n : string; b, s :integer); var q : queue; begin if freeq <> nil then begin q := freeq; freeq := freeq@.next end else new(q); with q@ do begin name := n; start := b; size := s; next := nil end; if qtail = nil then qhead := q else qtail@.next := q; qtail := q end; function qempty:boolean; begin qempty := qtail = nil end; function wildmatch(var s, t : string):boolean; const asterisk = '*'; percent = '%'; comma = ','; colon = ':'; period = '.'; space = ' '; var tx : integer; matched : boolean; function matching(var s, t : string; sx, tx : integer):boolean; var chs, cht : char; matched, done : boolean; begin done := false; repeat while s[sx] = space do sx := sx + 1; chs := s[sx]; cht := t[tx]; if cht = comma then cht := null; if cht = asterisk then begin tx := tx + 1; sx := sx - 1; repeat sx := sx + 1; matched := matching(s, t, sx, tx) until matched or (s[sx] = null); done := true end else if (chs = null) or (cht = null) then begin matched := chs = cht; done := true end else if (chs = cht) or (cht = percent) then begin sx := sx + 1; tx := tx + 1 end else begin matched := false; done := true end until done; matching := matched end; begin tx := 1; loop matched := matching(s, t, 1, tx); exit if matched; while (t[tx] <> null) and (t[tx] <> comma) do tx := tx + 1; exit if t[tx] = null; tx := tx + 1 end; wildmatch := matched end; function parse(var command, device, template : string):boolean; const legal_characters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789:.*%,"; var colons, periods : integer; cinx, f : integer; ch : char; s : string; function directory(var command, device : string):boolean; const colon = ':'; comma = ','; period = '.'; default_device = "DK:"; default_extension = ".*"; default_filename = "*"; var result : boolean; c, f, g, h : integer; d : string; function nextseparator(var c : string; s : integer):integer; begin while not ((c[s] = colon) or (c[s] = comma) or (c[s] = null)) do s := s + 1; nextseparator := s end; begin directory := true; if command[1] = null then device := default_device else begin f := 1; device := ""; result := false; repeat g := nextseparator(command, f); if command[g] = colon then begin substring(command, d, f, g-f+1); delete(command, f, g-f+1); g := nextseparator(command, f); if command[g] = colon then begin error(gppname, "Bad device specification"); directory := false; result := true end end else if device[1] = null then d := default_device else d := device; if not result then begin if device[1] = null then device := d else if compare(device, ne, d) then begin error(gppname, "Too many devices"); directory := false; result := true end; if not result then begin h := f-1; c := 0; repeat h := h + 1; if command[h] = period then c := c + 1 until h = g; if c > 1 then begin error(gppname, "Bad extension specification"); directory := false; result := true end else begin if c = 0 then insert(default_extension, command, g, stringlength); if command[f] = period then insert(default_filename, command, f, stringlength); f := nextseparator(command, f); if command[f] = null then result := true else f := f + 1 end end end until result end end; begin parse := false; if directory(command, device) then if verify(command, legal_characters) <> 0 then error(gppname, "Illegal character in file specification") else begin template := command; parse := true end end; procedure searchdirectory(var subdirectory, filespecification : string); var ent : entry; head : header; function getword(var ent : entry):integer; begin with ent do begin if offset = 256 then get(dev); getword := dev@[offset mod 256]; offset := offset + 1 end end; procedure nextsegment(var ent : entry; var head : header); var i : integer; begin with ent, head do begin if d_next > 0 then begin segment := d_next; offset := 0; seek(dev, device_base + (segment-1)*2); i := getword(ent); d_next := getword(ent); i := getword(ent) + getword(ent); d_strt := getword(ent) end else eod := true end end; procedure getentry(var ent : entry; var head : header); var i, j : integer; begin with ent do begin state.word := getword(ent); if endblk in state.state then with head do begin segment := d_next; nextsegment(ent, head); if not eod then getentry(ent, head) end else begin block := block + e_leng; for i := 1 to radix50length do e_name[i] := getword(ent); r50tos(e_name, 9, e_names, filenamesize); insert(".", e_names, 7, filenamesize); e_leng := getword(ent); e_used_chan_jnum := getword(ent); e_date := getword(ent); for i := 1 to head.d_extr do j := getword(ent) end end end; procedure restoreentry(var head : header; var ent : entry); begin with ent, head do seek(dev, device_base + (segment-1)*2 + ord(offset>255)) end; function resetentry(var ent : entry; var head : header):boolean; var directory_ok : boolean; begin with ent, head do begin deletequeue(pathname, device_base, device_size); directory_ok := device_size > directory_base + 2; if directory_ok then begin eod := false; segment := 1; offset := 0; e_leng := 0; device_base := device_base + directory_base; restoreentry(head, ent); d_tota := getword(ent); d_next := getword(ent); d_high := getword(ent); d_extr := getword(ent); d_strt := getword(ent); directory_ok := ((d_tota > 0) and (d_tota < 32)) and ((d_next >= 0) and (d_next <= d_high)) and ((d_high >= 1) and (d_high <= d_tota)) and ((d_extr >= 0) and (d_extr <= 998) and not odd(d_extr)) and (d_strt = directory_base + 2*d_tota); if directory_ok then begin d_extr := d_extr div 2; block := d_strt; getentry(ent, head) end end end; resetentry := directory_ok end; procedure dofile(var head : header; var ent : entry; var subdirectory, filespecification : string); var i, j, line : integer; ch : char; pname : string; firsttime : boolean; begin with head, ent do begin if perm in state.state then begin (* * Hooks for recursive subdirectory search * * if wildmatch(e_names, subdirectory) then begin * pname := pathname; * substring(e_names, nname, 1, 6); * trimright(nname); * concatenate(pname, "/", stringlength); * concatenate(pname, nname, stringlength); * insertqueue(pname, device_base-directory_base+block, e_leng) * end; *) if wildmatch(e_names, filespecification) then begin pname := pathname; concatenate(pname, e_names, stringlength); i := 1; j := 1; repeat ch := pname[j]; pname[i] := ch; if ch <> ' ' then i := i + 1; j := j + 1 until ch = null; reset(inp, pname); if eof(inp) then fatal(gppname, "Cannot access file"); firsttime := true; line := 0; while not eof(inp) do begin i := 1; line := line + 1; while not eoln(inp) and (i <= inputstringlength) do begin read(inp, ch); if forcecase then if forceupper then uppercase(ch) else lowercase(ch); s@.s[i] := ch; i := i + 1 end; readln(inp); s@.s[i] := null; slength := i - 1; if slength > 0 then begin if bmsearch then begin if firsttime then begin writeln(out); writeln(out, 'File ', pname, ':'); firsttime := false end; if printnumber then write(out, line:6, ' : '); writeln(out, s@.s) end end end end end end end; begin if resetentry(ent, head) then begin with ent do while not eod do begin dofile(head, ent, subdirectory, filespecification); getentry(ent, head) end end else begin warn(gppname, ""); writeln('Illegal directory ', head.pathname) end end; function initialise:boolean; var i, j : integer; ch : char; begin initialise := false; if argc <= 5 then begin if argc >= 4 then begin command := ""; concatenate(command, argv[3]@, stringlength); if parse(command, device, template) then begin if argc = 5 then rewrite(out, argv[4]@, defaultoutextension) else rewrite(out, defaultoutfile, defaultoutextension); qtail := nil; qhead := nil; freeq := nil; subdirectory := logical_disks; inputstringlength := defaultstringlength; forcecase := true; forceupper := false; printnumber := false; if argv[1]@[0] = '-' then begin i := 1; ch := argv[1]@[1]; while ch <> null do begin uppercase(ch); if ch = 'S' then begin j := stoi(argv[1]@, 10, i+2, i); if j > 0 then inputstringlength := j; if i <> 0 then i := i - 2 else i := length(argv[1]@) - 1 end else if ch = 'C' then begin forcecase := false end else if ch = 'N' then begin printnumber := true end else if ch = 'U' then begin forceupper := true end; i := i + 1; ch := argv[1]@[i] end end; plength := length(argv[2]@); p := argv[2]; if forcecase then begin i := 0; repeat i := i + 1; if forceupper then uppercase(p@[i]) else lowercase(p@[i]) until p@[i] = null end; if plength > 0 then begin reset(dev, device); bminit; new(s, inputstringlength+1); if s <> nil then initialise := true end end end end end; begin if not initialise then begin writeln; writeln(gppname, ' ', version); writeln; writeln; writeln('Usage is :'); writeln; writeln(' $[-options] pattern infile [outfile]'); writeln; writeln('Default outfile is ', defaultoutfile); writeln('Default output extension is ', defaultoutextension); writeln('Default input device is DK:'); writeln('Default maximum input string length is ', defaultstringlength); writeln('Default conversion strategy is to convert all to lowercase'); writeln; writeln('Option C : case is significant'); writeln('Option S : maximum input string length'); writeln('Option N : print line number'); writeln('Option U : convert all to uppercase'); writeln; writeln('Note : Option C will override option U'); writeln; rerun end else begin if eof(dev) then fatal(gppname, "Cannot access input device"); insertqueue(device, 0, maxint); repeat searchdirectory(subdirectory, template); until qempty; rewrite(out, "tt:") end end.