program smap(input,output,source,object,symbols); { Simple Minded Assembler Program. This program is intended as a simple illustration of a two pass assembler producing code for an absolute loader. The target machine is a PDP-8. SMAP is not sophisticated, and does not include much in the way of error checking. (In particular, it makes no attempt to validate microcoded combinations of PDP-8 operate instructions). SMAP only allows for a couple of assembler directives: * origin directive $ end directive } const dd = 1; { error number for doubly defined symbol } ln = 2; { for too long a name } lo = 3; { for too large an octal constant } sx = 5; { for some obscure syntax error } undef = 6; { for an undefined symbol (pass 2) } xp = 4; { cross page reference (pass 2) } mask177 = 127; thispage = 128; mask7600 = 3968; type symbol = packed array[1..6] of char; { Symbols are up to 6 chars long } symtype = (label8,mri,iot,opr,indir); { Classification of symbol types } { Record structure for each entry in } { symbol table } entry = record name : symbol; stype : symtype; value : 0..4095; end; { Classification of tokens in input stream } { used in basic lexical analysis } token = (labels,comment,octal,origin,ending,identifier,null,errors); { More detailed classification of input } { tokens used in pass 2 } xtoken = (xlab8,xcomment,xoctal,xorigin,xending,xnull,xerrors, xvariable,xmriopcode,xiotopcode,xopropcode,xindirectbit); var error : boolean; { Flag set to true on any fatal error } ended : boolean; { Flag set to true when reach end of source on current pass } line : array[1..120] of char; { Where we store a line of source program } numchars : integer; { Number of characters in this line. } posn : integer; { Where we are on line } symtab : array[1..512] of entry; { The symbol table itself } numsym : 1..512; { the number of defined symbols } where : 0..4095; { Location counter } source : text; { File with source text of PDP8 program } object : text; { File for object code generated in pass 2 } symbols: text; { File with standard symbol definitions } newsym : symbol; { Name of most recently read identifier } newval : 0..4095; { value of most recently read octal # } { or value of newsym } printval : 0..4095; passno : integer; { Pass number } { - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - } { Utility functions } { } { - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - } procedure octwrite(x : integer; d : integer;var f : text); var temp : 0 .. 7; ch : char; begin if d > 1 then octwrite(x div 8, d - 1,f); temp:= x mod 8; ch:=chr(ord('0') + temp); write(f,ch) end; { - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - } function octread(var ff : text) : integer; var v: integer; begin { skip over anything other than valid octal digits } while not (ff^ in ['0'..'7']) do get(ff); v:=0; { consume valid octal digits } while (ff^ in ['0'..'7']) do begin v:=8*v + (ord(ff^) - ord('0')); get(ff) end; { force into an appropriate range } octread:=v mod 4096 end; { - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - } function orintegers(alpha,beta : integer) : integer; var result,bits : integer; begin { Very frequently we will need to combine specific bits of some integers. Of course, this is not allowed in PASCAL. Hence this contrived routine for performing the task. Its written to handle just 12-bits, as required for our PDP-8 } alpha:=alpha mod 4096; { just in case, make them 12bit numbers} beta:=beta mod 4096; bits:=2048; result:=0; while bits>0 do begin if ((alpha >= bits) or (beta >= bits)) then result:=result+bits; if (alpha >= bits) then alpha:=alpha-bits; if (beta >= bits) then beta:=beta-bits; bits:=bits div 2 end; orintegers:=result end; { - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - } function andintegers(alpha,beta : integer) : integer; var result,bits : integer; begin { Very frequently we will need to combine specific bits of some integers. Of course, this is not allowed in PASCAL. Hence this contrived routine for performing the task. Its written to handle just 12-bits, as required for our PDP-8 } alpha:=alpha mod 4096; { just in case, make them 12bit numbers} beta:=beta mod 4096; bits:=2048; result:=0; while bits>0 do begin if ((alpha >= bits) & (beta >= bits)) then result:=result+bits; if (alpha >= bits) then alpha:=alpha-bits; if (beta >= bits) then beta:=beta-bits; bits:=bits div 2 end; andintegers:=result end; {-----------------------------------------------------------------------} procedure errs(errno,passno : integer); var i : integer; { Same error can sometimes generate multiple messages, only take first } begin if not error then begin error:=true; writeln; writeln; writeln('Assembly error detected in Pass ',passno:3); writeln; writeln; writeln('Error occurred in or about the line: '); writeln; for i:=1 to numchars do write(line[i]); writeln; for i:=1 to (posn-1) do write(' '); writeln('^'); for i:=1 to (posn-1) do write(' '); writeln('|'); writeln; writeln; write('Error type: '); case errno of xp : writeln('Cross Page Reference.'); dd : writeln('doubly defined symbol, ',newsym); ln : writeln('too long a name, ',newsym); lo : writeln('too large an octal constant'); sx : writeln('syntax error'); undef : writeln('undefined symbol, ',newsym); end; end end; {-----------------------------------------------------------------------} procedure andsymtab; var i: integer; e: entry; begin writeln(object,numsym); for i:=1 to numsym do begin e:=symtab[i]; with e do begin write(object,name,' '); case stype of label8: write(object,' 0 '); mri: write(object,' 1 '); opr: write(object,' 2 '); iot: write(object,' 3 '); indir: write(object,' 4 '); end; octwrite(value,4,object); writeln(object) end; end; writeln(object,'$') end; {-----------------------------------------------------------------------} procedure printsymtab; var i: integer; e: entry; begin writeln; writeln('Symbol table: '); writeln; writeln('Name Type Value Name Type Value'); writeln; for i:=1 to numsym do begin e:=symtab[i]; with e do begin write(name,' '); case stype of label8: write('label '); mri: write('mri '); opr: write('opr '); iot: write('iot '); indir: write('indirect '); end; octwrite(value,4,output); end; if 0 = (i mod 2) then writeln else write(' ') end; end; {-----------------------------------------------------------------------} procedure tab(tabpos : integer); var i : integer; begin i:=1; while i< tabpos do begin write(' '); i:=i+1 end; end; {-----------------------------------------------------------------------} procedure dumpline; var i : integer; begin i:=1; while i< numchars do begin write(line[i]); i:=i+1 end; writeln end; {-----------------------------------------------------------------------} procedure skipline; begin posn:=numchars+1 end; {-----------------------------------------------------------------------} procedure getline; var ch: char; begin numchars:=0; while not eoln(source) do begin read(source,ch); numchars:=numchars+1; if ((ch >= 'A') and (ch<='Z')) then ch:=chr(ord(ch)-ord('A')+ord('a')); line[numchars]:=ch; end; readln(source); line[numchars+1]:=' '; line[numchars+2]:=chr(0); { null character marks end } numchars:=numchars+2; posn:=1 end; {-----------------------------------------------------------------------} function nextsym: token; { Here we have one of those simple transition state type lexical analyzers; This function is intended only to classify the type of the next symbol as a comment, label, origin directive, end directive, an octal constant, an identifier, null line or obvious syntactic error. Nextsym is used directly by PASS1; Nextsym is also used indirectly, via Xnextsym, from PASS2. During the first pass, when merely building up the symbol table, the symbol classifications produced by Nextsym are adequate; a more refined classification is needed by Pass2. Characters being read by "smap" fall into the following categories: 1) $ special end directive character 2) * special origin directive character 3) , distinctive mark for a label, turns a preceeding identifier into a label 4) 0..7 valid characters in an octal constant 5) a..z,0..9 valid characters in an identifier 6) space delimiter (tab is taken as a space) 7) null (special 0 character inserted into line buffer) end of line marker 8) / comment marker We can distinguish the following states: start begining to process next symbol inoct reading an octal constant ident reading an identifier (or maybe a label) The transitions can be summarized as follows (! => set readsym to type of symbol specified (% => set state to state specified ( # => state:=done symbol 1 2 3 4 5 6 7 8 state +---------+-------+-------+-------+-------+-------+------+-------- | # |%inoct | # |%inoct |%ident |%start | # | # start | | | | | | | | |!ending |!origin|!errors|!octal |!identi| | !null|!comment +---------+-------+-------+-------+-------+-------+------+-------- | # | # | # |build | # | # | # | # inoct | | | |octal | | | | |!errors |!errors|!errors|number |!errors| | |!errors +---------+-------+-------+-------+-------+-------+------+-------- | # | # | # |extend |extend | # | # | # ident | | | |name |name | | | |!errors |!errors|!label8| | | | |!errors +---------+-------+-------+-------+-------+-------+------+-------- } type lexstate = (start, ident, inoct, done); var state: lexstate; ch : char; octdigit : boolean; alphabetic : boolean; numeric : boolean; dollars : boolean; comma : boolean; star : boolean; slash : boolean; space : boolean; endln : boolean; readtype : token; octval : 0..4095; symlength : integer; symbuf : array [1..7] of char; errno : integer; i : integer; begin state:=start; octval:=0; symlength:=0; for i:=1 to 6 do symbuf[i]:=' '; while state<>done do begin if posn>numchars then getline; ch:=line[posn]; posn:=posn+1; alphabetic:=(ch in ['a'..'z']); numeric:=(ch in ['0'..'9']); octdigit:=(ch in ['0'..'7']); dollars:=(ch = '$'); star:=(ch = '*'); slash:=(ch = '/'); space:=((ch = ' ') or (ord(ch)=9)); { 9 should be tab character } endln:=(ch = chr(0)); comma:=(ch = ','); case state of start: begin if star then begin readtype:=origin; state:=inoct end else if slash then begin state:=done; readtype:=comment end else if dollars then begin state:=done; readtype:=ending end else if alphabetic then begin readtype:=identifier; state:=ident; symlength:=1; symbuf[symlength]:=ch end else if octdigit then begin readtype:=octal; octval:=ord(ch)-ord('0'); state:=inoct end else if endln then begin readtype:=null; state:=done end else if not space then begin errno:=sx; readtype:=errors; state:=done end end; inoct: begin if octdigit then begin if octval>= 512 then begin errno:=lo; readtype:=errors; state:=done end else begin octval:=octval*8 + (ord(ch)-ord('0')); end end else if space or endln then state:=done else begin errno:=sx; readtype:=errors end end; ident: begin if alphabetic or numeric then begin symlength:=symlength+1; if symlength <= 6 then symbuf[symlength]:=ch; if symlength>6 then begin state:=done; pack(symbuf,1,newsym); errno:=ln; readtype:=errors end; end else if space then state:=done else if comma then begin state:=done; readtype:=labels end end end; end; if readtype=errors then errs(errno,passno); if readtype in [ labels , identifier ] then pack(symbuf,1,newsym); if readtype in [ origin, octal ] then newval:=octval; { case readtype of origin: writeln('origin'); labels: writeln('labels'); identifier: writeln('identifier'); comment: writeln('comment'); octal: writeln('octal'); ending: writeln('ending'); end; } nextsym:=readtype end; {-----------------------------------------------------------------------} function findposn :integer; var top,bottom,sposn: integer; found : boolean; { Find the index number in symtab where "newsysm" is, or if it isn't the place where it should be if keeping things ordered. } begin top:=numsym; bottom:=1; found:=false; while not found do begin sposn:=((top-bottom) div 2) + bottom; { writeln('findposn, top, bottom, sposn ',top,bottom,sposn); writeln('newsym, symtab[sposn].name ',newsym,symtab[sposn].name); } if newsym=symtab[sposn].name then found:=true else if newsym>symtab[sposn].name then begin bottom:=sposn+1; sposn:=bottom end else top:=sposn-1; found:=found or (bottom>top) end; findposn:=sposn end; {-----------------------------------------------------------------------} function xnextsym : xtoken; { Need a more detailed classification of tokens in input stream when doing Pass2. Nextsym's classes are mostly OK, except its class identifier must now be more precisely defined. Identifiers must now be catagorized as variables mem-ref opcodes iot opcodes operate opcodes (indirection symbol) Information needed to do this reclassification is provided by data in symbol table. So when get an identifier, find it in symbol table and return a categorisation based on the data there if its not in the symbol table, then generate an error message for undefined symbol. } var temp : token; sympos : integer; xtemp : xtoken; begin temp:=nextsym; case temp of labels: xtemp:=xlab8; comment: xtemp:=xcomment; octal: xtemp:=xoctal; origin: xtemp:=xorigin; ending: xtemp:=xending; null: xtemp:=xnull; errors: xtemp:=xerrors; identifier: begin sympos:=findposn; { Find where symbol should be } if symtab[sympos].name <> newsym then begin { Symbol missing, record an error and print } { message for an undefined symbol } xtemp:=xerrors; errs(undef,passno); end else begin { Set xtemp according to type of symbol } case symtab[sympos].stype of label8: xtemp:=xvariable; mri: xtemp:=xmriopcode; iot: xtemp:=xiotopcode; opr: xtemp:=xopropcode; indir: xtemp:=xindirectbit; end; { Set newval to value from symbol table } newval:=symtab[sympos].value end; end; end; xnextsym:=xtemp end; {-----------------------------------------------------------------------} procedure insertsymbol; var sposn: integer; { Position where symbol should go in table } n : integer; begin sposn:=findposn; { Find where it should go } if newsym=symtab[sposn].name then { doubly defined symbol } errs(dd,passno) else begin { Insert a new symbol in sequence } n:=numsym; while n>=sposn do begin symtab[n+1]:=symtab[n]; n:=n-1 end; symtab[sposn].name:=newsym; symtab[sposn].stype:=label8; symtab[sposn].value:=where; numsym:=numsym+1 end; end; {-----------------------------------------------------------------------} procedure pass1; { In pass 1, we need to identify and process origin directives, an end directive an labels. Comments can be ignored. Other input lines are accepted unchecked (some more syntax checking done in pass 2) and serve only to increment counter "where". } begin reset(source); { Get ready to read file with source code. } where:=0; { Initialize origin for code/data. } ended:=false; { Mark not yet at end of source text. } numchars:=0; passno:=1; posn:=1; while not (ended or error) do begin case nextsym of comment: skipline; origin: begin where:=newval; end; labels: begin insertsymbol end; ending: ended:=true; octal,identifier: begin where:=where+1; skipline; end; null,errors: end; end; writeln; writeln; if not error then begin writeln('SMAP Pass 1, Symbol table listing:'); writeln; writeln; printsymtab; writeln; writeln end; end; {-----------------------------------------------------------------------} procedure writecode(cval: integer;next : xtoken); { Write code value, cval, to object file, generate listing. Also check that nothing left on input line, for that would be some kind of syntax error. } begin write(object,' '); octwrite(cval,4,object); writeln(object); octwrite(where,4,output); tab(4); octwrite(cval,4,output); tab(8); dumpline; where:=where+1; if not (next in [xcomment,xnull]) then errs(sx,passno) else skipline end; {-----------------------------------------------------------------------} procedure iots; { IOT instructions should occur by themselves, but do have the possiblity of something like iot 632 meaning opcode 6632 so allow nextsym to be null or octal, others are errors. } var next: xtoken; begin printval:=newval; { will have been set by xnextsym } next:=xnextsym; while next = xoctal do begin printval:=orintegers(printval,newval); next:=xnextsym end; writecode(printval,next) end; {-----------------------------------------------------------------------} procedure operate; { Will allow any sequence of successive operate instructions or operate instructions and octal constants } var next : xtoken; begin printval:=newval; next:=xnextsym; while next in [xoctal,xopropcode] do begin printval:=orintegers(printval,newval); next:=xnextsym end; writecode(printval,next) end; {-----------------------------------------------------------------------} procedure memrefs; { Memref variable/octal, or Memref variable/octal, nothing else allowed } var next : xtoken; temp : 0..4095; pcp : 0..4095; dp : 0..4095; begin printval:=newval; next:=xnextsym; if next=xindirectbit then begin printval:=orintegers(printval,newval); next:=xnextsym end; if not (next in [xoctal,xvariable]) then errs(sx,passno) else begin { have to check for page 0, this page or erroneous } { cross page references } if newval<128 then begin { page 0 } printval:=orintegers(printval,newval); writecode(printval,xnextsym) end else begin pcp:=andintegers(where,mask7600); dp:=andintegers(newval,mask7600); if pcp <> dp then errs(xp,passno) else begin temp:=andintegers(newval,mask177); temp:=orintegers(temp,thispage); printval:=orintegers(printval,temp); writecode(printval,xnextsym); end; end; end; end; {-----------------------------------------------------------------------} procedure pass2; begin reset(source); { Get ready to read file with source code. } ended:=false; { Mark not yet at end of source text. } numchars:=0; posn:=1; passno:=2; rewrite(object); where:=0; writeln; writeln; writeln('SMAP Pass 2 Assembly listing.'); writeln; writeln; while not (ended or error) do begin printval:=0; case xnextsym of xcomment: begin tab(20); dumpline; skipline; { skip over comments } end; xorigin: begin { want to output equivalent of an } { origin directive for the loader } write(object,'*'); octwrite(newval,4,object); writeln(object); where:=newval; tab(20); dumpline; if xnextsym <> xnull then errs(sx,passno) end; xoctal,xvariable: begin { just copy octal constants to output } writecode(newval,xnextsym) end; xmriopcode: memrefs; xiotopcode: iots; xopropcode: operate; xending: ended:=true; xnull,xerrors,xlab8: { do nothing } end end; writeln(object,'$'); if error then rewrite(object) else andsymtab end; {-----------------------------------------------------------------------} procedure initialize; var i,j: integer; begin error:=false; { Read standard symbols from an ordered file } reset(symbols,'/pub/211/source/symbols'); readln(symbols,numsym); for i:=1 to numsym do begin with symtab[i] do begin for j:=1 to 6 do read(symbols,name[j]); read(symbols,j); case j of 0: stype:=label8; 1: stype:=mri; 2: stype:=opr; 3: stype:=iot; 4: stype:=indir; end; value:=octread(symbols); readln(symbols); end; end; end; {-----------------------------------------------------------------------} { } { Main program here, } { initialize; } { pass1; } { if no errors then pass2; } {-----------------------------------------------------------------------} begin initialize; pass1; if not error then pass2 end.