program speller (input, output, source, words, absent, dictionary, report); {**************************************************************} { } { Copyright (c) 1981, 83, 84, Bob Schor } { 85, 86 Rockefeller University } { 1230 York Ave } { New York, NY 10021 } { } { All rights reserved. May not be copied without this notice. } { } {**************************************************************} { Software developed with Oregon Software Pascal 1.2 and 2.0, running under RT-11. Attempts made to conform to ISO standards and avoid implementation extensions. } { this program checks a source text against a dictionary to catch possible spelling errors } { Version 3.1 -- numbers excluded from "non-word" list } { Version 3.1 -- non-word list extension changed to "NON" } { Version 3.2 -- skip numeral bug fixed; first non-numeral now recognized } { Version 4.1 -- trailing blanks omitted, sentinels changed } { Version 4.1 -- only text files used, absent file extension now "ABS" } { Version 4.1 -- Pascal-2 "legalanswer" routine added } { Version 5.12 -- full Pascal-2 file name default added, third parameter } { made true default (involves change in TEMPYY files) } { Version 6.4 -- use VM: if available for scratch files (P-2 bug found!) } { Version 6.4 -- delete temp files after use } { Version 6.7 -- definescratchdevice "bug" fixed -- initialize filelength } { Version 6.8 -- ignore "|" flag in .PRS files } { Version 6.8 -- fixed bug, using VM: with sorted files (sorted device) } CONST version = 'SPELL Version 6.8'; maxwordlength = 20; startofwordlist = 'start-of-sorted-list'; nullword = ' '; filenamelength = 14; hyphenflag = '|'; TYPE devicetype = PACKED ARRAY [1 .. 3] OF char; answerset = SET OF char; wordlengthtype = 1 .. maxwordlength; wordtype = PACKED ARRAY [wordlengthtype] OF char; filenametype = PACKED ARRAY [1 .. filenamelength] OF char; VAR scratchdevice, sorteddevice : devicetype; source, report : text; words, absent, dictionary : text; sourcename, sortedname, reportname, dictionaryname : filenametype; alreadysorted, nodictionary : boolean; proseinputfile : boolean; FUNCTION lowercase (letter : char) : char; BEGIN { lowercase } IF letter IN ['A'..'Z'] THEN lowercase := chr (ord (letter) - ord ('A') + ord ('a')) ELSE lowercase := letter END; FUNCTION uppercase (letter : char) : char; BEGIN { uppercase } IF ('a' <= letter) AND (letter <= 'z') THEN uppercase := chr (ord (letter) - ord ('a') + ord ('A')) ELSE uppercase := letter END; FUNCTION legalanswer (possibleanswers : answerset) : char; VAR answer : char; PROCEDURE informoferror; VAR answer : char; commabelongshere : boolean; BEGIN { informoferror } write ('Error -- legal answers are '); IF ' ' IN possibleanswers THEN BEGIN write ('[space]'); commabelongshere := true END ELSE commabelongshere := false; FOR answer := succ(' ') TO 'Z' DO IF answer IN possibleanswers THEN BEGIN IF commabelongshere THEN write (', '); commabelongshere := true; write (answer:1) END; write (' : ') END; BEGIN { legalanswer } REPEAT IF eoln THEN BEGIN readln; answer := ' ' END ELSE readln (answer); answer := uppercase(answer); IF NOT (answer IN possibleanswers) THEN informoferror UNTIL answer IN possibleanswers; legalanswer := answer END; FUNCTION answerisyes : boolean; BEGIN { answerisyes } answerisyes := legalanswer (['Y', 'N', ' ']) = 'Y' END; PROCEDURE initialize; VAR filelength : integer; PROCEDURE definescratchdevice; BEGIN { definescratchdevice } filelength := 0; rewrite (words, 'TEMPYY.001', 'VM:', filelength); close (words); IF filelength > 0 THEN scratchdevice := 'VM:' ELSE scratchdevice := 'DK:' END; FUNCTION namelength (filename : filenametype) : integer; VAR lastnonblank : 1 .. filenamelength; BEGIN { namelength } lastnonblank := filenamelength; WHILE (filename[lastnonblank] = ' ') AND (lastnonblank > 1) DO lastnonblank := pred(lastnonblank); namelength := lastnonblank END; BEGIN { initialize } writeln; writeln (version); writeln; writeln ('Spelling checker'); writeln; definescratchdevice; writeln (scratchdevice, ' -- scratch files (TEMPYY.xxx) and ', 'sorted list of words (SPELLT.TMP)'); writeln ('DK:', ' -- list of words absent from dictionary (SPELLT.ABS)'); writeln; writeln ('Default source extensions are .DOC (checked first) and .PRS'); REPEAT write ('Enter name of source file -- '); readln (sourcename); reset (source, sourcename, '.DOC', filelength); IF filelength > 0 THEN proseinputfile := false ELSE BEGIN close (source); reset (source, sourcename, '.PRS', filelength); IF filelength > 0 THEN proseinputfile := true ELSE BEGIN close (source); write ('File '); write (sourcename:namelength(sourcename)); writeln (' not found!') END END UNTIL filelength > 0; REPEAT nodictionary := false; write ('Enter dictionary name [DIXION.DCT] -- '); readln (dictionaryname); reset (dictionary, dictionaryname, 'DIXION.DCT', filelength); close (dictionary); IF filelength <= 0 THEN BEGIN writeln ('Warning -- dictionary not found!'); write ('Are you sorting without a dictionary? '); nodictionary := answerisyes END UNTIL (filelength > 0) OR nodictionary; write ('Enter report file name [TT:SPELLT.LST] -- '); readln (reportname) END; FUNCTION wordlength (word : wordtype) : wordlengthtype; VAR trialindex : wordlengthtype; BEGIN { wordlength } IF word = nullword THEN wordlength := 1 ELSE BEGIN trialindex := maxwordlength; WHILE word[trialindex] = ' ' DO trialindex := pred(trialindex); wordlength := trialindex END END; PROCEDURE makewordlist (VAR sortedname : filenametype); { makes partly-sorted word list via heap sort (cf. Wirth, A+DS=P) } CONST heapsize = 800; TYPE heapindex = 0 .. heapsize; VAR alphanum : SET OF char; halfheapsize, leftend, rightend : heapindex; heap : ARRAY [1 .. heapsize] OF wordtype; nextword, lastword, savedword : wordtype; nextchar : char; PROCEDURE getnextchar; BEGIN { getnextchar } IF eof (source) THEN nextchar := ' ' ELSE IF eoln (source) THEN BEGIN readln (source); nextchar := ' ' END ELSE read (source, nextchar) END; PROCEDURE getnextword; { process source in terms of "words" } VAR nextwordindex : 0 .. maxwordlength; savedindex : wordlengthtype; FUNCTION acceptable (letter : char) : boolean; BEGIN { acceptable } IF proseinputfile THEN acceptable := uppercase (letter) IN alphanum + ['-', '''', hyphenflag] ELSE acceptable := uppercase (letter) IN alphanum + ['-', ''''] END; FUNCTION nextwordhashyphen : boolean; VAR wordindex : wordlengthtype; hashyphen : boolean; BEGIN { nextwordhashyphen } hashyphen := false; FOR wordindex := 1 TO maxwordlength DO IF nextword[wordindex] = '-' THEN hashyphen := true; nextwordhashyphen := hashyphen END; BEGIN { getnextword } IF savedword = nullword THEN BEGIN nextwordindex := 0; REPEAT IF proseinputfile AND (nextchar = hyphenflag) THEN getnextchar ELSE BEGIN nextwordindex := succ(nextwordindex); nextword[nextwordindex] := lowercase (nextchar); getnextchar END UNTIL ((nextwordindex = maxwordlength) OR NOT acceptable (nextchar)); IF nextwordindex = maxwordlength THEN WHILE acceptable (nextchar) DO getnextchar ELSE FOR nextwordindex := succ(nextwordindex) TO maxwordlength DO nextword[nextwordindex] := ' '; IF nextwordhashyphen THEN savedword := nextword; REPEAT getnextchar UNTIL eof (source) OR (uppercase (nextchar) IN alphanum) END ELSE BEGIN nextword := savedword; IF nextwordhashyphen THEN BEGIN nextwordindex := 1; WHILE savedword[nextwordindex] <> '-' DO nextwordindex := succ(nextwordindex); savedindex := 1; nextword[nextwordindex] := ' '; FOR nextwordindex := succ(nextwordindex) TO maxwordlength DO BEGIN nextword[nextwordindex] := ' '; savedword[savedindex] := savedword[nextwordindex]; savedindex := succ(savedindex) END; FOR savedindex := savedindex TO maxwordlength DO savedword[savedindex] := ' ' END ELSE savedword := nullword END END; PROCEDURE sift (left, right : heapindex); { form and maintain heap (a la heap sort) } VAR here, there : heapindex; word : wordtype; moretocome : boolean; BEGIN { sift } here := left; IF here <= halfheapsize THEN BEGIN there := 2*here; moretocome := there <= right END ELSE moretocome := false; word := heap[here]; WHILE moretocome DO BEGIN IF there < right THEN IF heap[there] > heap[succ(there)] THEN there := succ(there); IF word <= heap[there] THEN moretocome := false ELSE BEGIN heap[here] := heap[there]; here := there; IF here <= halfheapsize THEN BEGIN there := 2*here; moretocome := there <= right END ELSE moretocome := false END END; heap[here] := word END; PROCEDURE fillheap; VAR here : heapindex; BEGIN { fillheap } getnextword; FOR here := heapsize DOWNTO succ(halfheapsize) DO BEGIN heap[here] := nextword; getnextword END; FOR here := halfheapsize DOWNTO 1 DO BEGIN heap[here] := nextword; getnextword; sift (here, heapsize) END; END; PROCEDURE siftwordsthroughheap; BEGIN { siftwordsthroughheap } leftend := heapsize; WHILE nextword <> nullword DO BEGIN IF heap[1] <> lastword THEN writeln (words, heap[1]:wordlength(heap[1])); lastword := heap[1]; IF heap[1] <= nextword THEN BEGIN heap[1] := nextword; getnextword; sift (1, leftend) END ELSE BEGIN heap[1] := heap[leftend]; sift (1, pred(leftend)); heap[leftend] := nextword; getnextword; IF leftend <= halfheapsize THEN sift (leftend, heapsize); leftend := pred(leftend); IF leftend = 0 THEN leftend := heapsize END END END; PROCEDURE flushheap; BEGIN { flushheap } rightend := heapsize; REPEAT IF (heap[1] <> lastword) AND (heap[1] <> nullword) THEN writeln (words, heap[1]:wordlength(heap[1])); lastword := heap[1]; heap[1] := heap[leftend]; sift (1, pred(leftend)); heap[leftend] := heap[rightend]; rightend := pred(rightend); IF leftend <= halfheapsize THEN sift (leftend, rightend); leftend := pred(leftend) UNTIL leftend = 0; WHILE rightend > 0 DO BEGIN IF (heap[1] <> lastword) AND (heap[1] <> nullword) THEN writeln (words, heap[1]:wordlength(heap[1])); lastword := heap[1]; heap[1] := heap[rightend]; sift (1, rightend); rightend := pred(rightend) END END; BEGIN { makewordlist } halfheapsize := heapsize DIV 2; alphanum := ['A' .. 'Z', '0' .. '9']; REPEAT getnextchar UNTIL eof (source) OR (uppercase (nextchar) IN alphanum); lastword := nullword; savedword := nullword; getnextword; alreadysorted := nextword = startofwordlist; IF alreadysorted THEN BEGIN sortedname := sourcename; sorteddevice := 'DK:'; close (source) END ELSE BEGIN savedword := nextword; sortedname := 'SPELLT.TMP '; sorteddevice := scratchdevice; rewrite (words, sortedname, scratchdevice); fillheap; siftwordsthroughheap; flushheap; close (source); close (words) END END; PROCEDURE sortwordlist (sortedname : filenametype); { performs poly-phase sort of word list (cf. Wirth, A+DS=P) } CONST endtape = 4; tapenamelength = 10; TYPE tapenumbertype = 1 .. endtape; tapenameindextype = 1 .. tapenamelength; tapenametype = PACKED ARRAY [tapenameindextype] OF char; VAR listfilelength : integer; sortfile : ARRAY [tapenumbertype] OF text; selectlevel : integer; actual, dummy : ARRAY [tapenumbertype] OF integer; tapemap : ARRAY [tapenumbertype] OF tapenumbertype; tapename : tapenametype; PROCEDURE maketapename (tape : integer); { note -- temp files are named TEMPYY.nnn, where nnn is the numeric encoding of "tape" } PROCEDURE mtn (index : tapenameindextype; VAR tape : integer); BEGIN { mtn } IF index < tapenamelength THEN mtn (succ(index), tape); tapename [index] := chr(ord('0') + tape MOD 10); tape := tape DIV 10 END; BEGIN { maketapename } tapename := 'TEMPYY. '; mtn (8, tape) END; PROCEDURE distributeinitialruns; VAR tape : tapenumbertype; outputtape : tapenumbertype; nextword : wordtype; lastword : ARRAY [tapenumbertype] OF wordtype; endoftape : boolean; PROCEDURE selecttape; VAR tape : tapenumbertype; runs : integer; BEGIN { selecttape } IF dummy[outputtape] < dummy[succ(outputtape)] THEN outputtape := succ(outputtape) ELSE BEGIN IF dummy[outputtape] = 0 THEN BEGIN selectlevel := succ(selectlevel); runs := actual[1]; FOR tape := 1 TO pred(endtape) DO BEGIN dummy[tape] := runs + actual[succ(tape)] - actual[tape]; actual[tape] := runs + actual[succ(tape)] END END; outputtape := 1 END; dummy[outputtape] := pred(dummy[outputtape]) END; PROCEDURE copyrun; VAR word : wordtype; endofrun : boolean; BEGIN { copyrun } REPEAT word := nextword; writeln (sortfile[outputtape], word:wordlength(word)); IF eof (words) THEN endofrun := true ELSE BEGIN readln (words, nextword); endofrun := nextword < word END UNTIL endofrun; endoftape := eof (words); lastword[outputtape] := word END; BEGIN { distributeinitialruns } reset (words, sortedname, scratchdevice, listfilelength); readln (words, nextword); FOR tape := 1 TO pred(endtape) DO BEGIN actual[tape] := 1; dummy[tape] := 1; maketapename (tape); rewrite (sortfile[tape], tapename, scratchdevice, listfilelength) END; selectlevel := 1; outputtape := 1; actual[endtape] := 0; dummy[endtape] := 0; REPEAT selecttape; copyrun UNTIL endoftape OR (outputtape = pred(endtape)); WHILE NOT endoftape DO BEGIN selecttape; IF lastword[outputtape] <= nextword THEN BEGIN copyrun; IF endoftape THEN dummy[outputtape] := succ(dummy[outputtape]) ELSE copyrun END ELSE copyrun END; FOR tape := 1 TO pred(endtape) DO BEGIN close (sortfile[tape]); maketapename (tape); reset (sortfile[tape], tapename, scratchdevice) END; FOR tape := 1 TO endtape DO tapemap[tape] := tape END; PROCEDURE mergetoendtape; VAR mergecount : integer; nextword : ARRAY [tapenumbertype] OF wordtype; PROCEDURE mergeonerun; VAR tape, minimaltape : tapenumbertype; auxtape : 0 .. endtape; word, firstword : wordtype; auxtapemap : ARRAY [tapenumbertype] OF tapenumbertype; endoftape : boolean; BEGIN { mergeonerun } auxtape := 0; FOR tape := 1 TO pred(endtape) DO IF dummy[tape] > 0 THEN dummy[tape] := pred(dummy[tape]) ELSE BEGIN auxtape := succ(auxtape); auxtapemap[auxtape] := tapemap[tape] END; FOR tape := 1 TO auxtape DO BEGIN IF eof(sortfile[auxtapemap[tape]]) THEN nextword[auxtapemap[tape]] := nullword ELSE readln (sortfile[auxtapemap[tape]], nextword[auxtapemap[tape]]) END; IF auxtape = 0 THEN dummy[endtape] := succ(dummy[endtape]) ELSE REPEAT minimaltape := 1; firstword := nextword[auxtapemap[minimaltape]]; FOR tape := 2 TO auxtape DO BEGIN word := nextword[auxtapemap[tape]]; IF word < firstword THEN BEGIN firstword := word; minimaltape := tape END END; word := firstword; endoftape := word = nullword; IF NOT endoftape THEN BEGIN IF eof(sortfile[auxtapemap[minimaltape]]) THEN nextword[auxtapemap[minimaltape]] := nullword ELSE readln (sortfile[auxtapemap[minimaltape]], nextword[auxtapemap[minimaltape]]); writeln (sortfile[tapemap[endtape]], word:wordlength(word)); END; IF (word > nextword[auxtapemap[minimaltape]]) OR endoftape THEN BEGIN auxtapemap[minimaltape] := auxtapemap[auxtape]; auxtape := pred(auxtape) END UNTIL auxtape = 0 END; PROCEDURE rotatetapes; VAR tape : tapenumbertype; lasttape : tapenumbertype; lastdummy, nexttolastactual : integer; BEGIN { rotatetapes } close (sortfile[tapemap[endtape]]); maketapename (tapemap[endtape]); reset (sortfile[tapemap[endtape]], tapename, scratchdevice); lasttape := tapemap[endtape]; lastdummy := dummy[endtape]; nexttolastactual := actual[pred(endtape)]; FOR tape := endtape DOWNTO 2 DO BEGIN tapemap[tape] := tapemap[pred(tape)]; dummy[tape] := dummy[pred(tape)]; actual[tape] := actual[pred(tape)] - nexttolastactual END; tapemap[1] := lasttape; dummy[1] := lastdummy; actual[1] := nexttolastactual END; BEGIN { mergetoendtape } dummy[endtape] := 0; maketapename (tapemap[endtape]); rewrite (sortfile[tapemap[endtape]], tapename, scratchdevice, listfilelength); FOR mergecount := actual[pred(endtape)] DOWNTO 1 DO mergeonerun; rotatetapes END; PROCEDURE removeduplicates; VAR word, lastword : wordtype; BEGIN { removeduplicates } rewrite (words, sortedname, scratchdevice); writeln (words, startofwordlist:wordlength(startofwordlist)); lastword := nullword; WHILE NOT eof (sortfile[tapemap[1]]) DO BEGIN REPEAT readln (sortfile[tapemap[1]], word); UNTIL eof (sortfile[tapemap[1]]) OR (word <> lastword); IF word <> lastword THEN writeln (words, word:wordlength(word)); lastword := word END; close (words); close (sortfile[tapemap[1]]) END; PROCEDURE deletetemptapes; VAR tape : tapenumbertype; BEGIN { deletetemptapes } FOR tape := 1 TO endtape DO BEGIN maketapename (tape); rewrite (sortfile[tape], tapename, scratchdevice); delete (sortfile[tape]) END END; BEGIN { sortwordlist } distributeinitialruns; FOR selectlevel := selectlevel DOWNTO 1 DO mergetoendtape; removeduplicates; deletetemptapes END; PROCEDURE lookupwords (sortedname : filenametype); VAR word, correctword : wordtype; PROCEDURE skipnumerals; BEGIN { skipnumerals } REPEAT IF eof (words) THEN word := nullword ELSE readln (words, word) UNTIL NOT (word[1] IN ['0' .. '9']) END; FUNCTION missing (word : wordtype) : boolean; BEGIN { missing } IF nodictionary THEN missing := true ELSE BEGIN WHILE (correctword < word) AND NOT eof (dictionary) DO readln (dictionary, correctword); missing := correctword <> word END END; BEGIN { lookupwords } IF NOT nodictionary THEN BEGIN reset (dictionary, dictionaryname, 'DIXION.DCT'); readln (dictionary, correctword); IF correctword = startofwordlist THEN readln (dictionary, correctword) ELSE BEGIN writeln ('Dictionary not in proper format!'); nodictionary := true; close (dictionary) END END; reset (words, sortedname, sorteddevice); readln (words, word); { throw away startofwordlist } rewrite (absent, 'SPELLT.ABS'); writeln (absent, startofwordlist:wordlength(startofwordlist)); skipnumerals; WHILE word <> nullword DO BEGIN IF missing (word) THEN writeln (absent, word:wordlength(word)); IF eof (words) THEN word := nullword ELSE readln (words, word) END; close (words); close (absent); close (dictionary) END; PROCEDURE makereport; CONST linenumber = 60; columnnumber = 3; TYPE pagetype = ARRAY [1 .. linenumber, 1 .. columnnumber] OF wordtype; VAR pageofwords : pagetype; word : wordtype; column : 1 .. columnnumber; line : 1 .. linenumber; BEGIN { makereport } reset (absent, 'SPELLT.ABS'); rewrite (report, reportname, 'TT:SPELLT.LST'); readln (absent, word); REPEAT FOR column := 1 TO columnnumber DO FOR line := 1 TO linenumber DO BEGIN IF NOT eof (absent) THEN readln (absent, pageofwords [line, column]) ELSE pageofwords [line, column] := nullword END; FOR line := 1 TO linenumber DO BEGIN FOR column := 1 TO columnnumber DO BEGIN word := pageofwords [line, column]; IF word <> nullword THEN write (report, word, ' ':5) END; IF pageofwords [line, 1] <> nullword THEN writeln (report) END; page (report) UNTIL word = nullword; close (absent); close (report) END; BEGIN { speller } initialize; makewordlist (sortedname); IF NOT alreadysorted THEN sortwordlist (sortedname); lookupwords (sortedname); makereport END.