-- lexical scanner by M.S.Ter Haseborg jul 2004
-- L_in(zoek_set) => { skip_string , zoek_string , zoek_index }
--
-- L_init(filenumber/text_string)
-- L_end()
--
-- L_any(skip_char's)
-- L_nany(not skip_char's)
--
-- zoek_set : a) set of matching character's
-- b) sequence of name's (v.b. { name , name .. } )
-- name : sequence of character('s)
-- : sequense of { start_set of character('s) , match_set of character('s) }
--
-- zoek_index : -1 is EOF , 0 no match else index of match in zoek_string
-- as done now within a multiple match the nearest one to begin of line is
-- used and also is the lowest order in zoek_string . that means that not by
-- default it is the longest . {"aap","aap's"} if both started on index x
-- the secound one never is found . so put them otherway around .
--
-- local : line bevat de te scannen character's
-- local : skip bevat de te character welke niet worden beschouwd tot het start symbol
--
-- als geen L_any({}) wordt gegeven wordt de match beschouwdt als splitsing
-- van text voor zoek_set en zoek_set .
-- als een L_any(metiets) or een Lnany(metiets) wordt gezet dan wordt de match
-- gezien als een aaneensluiting van metiets&zoek_set of ~metiets&zoek_set .
-- als L_zoek {}(leeg) is betekend dit dat de match 'has failed'
sequence skip_ -- skip set
integer anchor_ -- mode
constant TRUE=1 , FALSE = 0
-- Algemene File afhandeling --
--structure fileinfostruc
-- sequence filename
-- sequence currentline
-- integer characcess
-- integer linenumber
--end structure
constant FILENAME=1,CURRENTLINE=2,LINENUMBER=3,SKIP=4,ANCHOR=5
sequence fileinfo={} -- stack of inuse file's
sequence line -- analiserende regel
integer linenumber -- aantal gelezen regels
integer filenum -- als het een file is dan >=0
-- init handling : file or text
global function L_init(sequence filenumber)
if length(fileinfo)=0 then
fileinfo={{0,"",0,0,0}}
else
fileinfo[length(fileinfo)][CURRENTLINE]=line
fileinfo[length(fileinfo)][LINENUMBER] =linenumber
fileinfo[length(fileinfo)][SKIP] =skip_
fileinfo[length(fileinfo)][ANCHOR] =anchor_
fileinfo&={{filenumber,"",0,0,0}}
end if
if atom(filenumber) then
filenum=filenumber
line=gets(filenum)
else
filenum=-1
line=filenumber
end if
linenumber=1
if sequence(line) then
return TRUE
else
return FALSE
end if
end function
global function L_end()
if length(fileinfo)>1 then
filenum =fileinfo[length(fileinfo)][FILENAME]
line =fileinfo[length(fileinfo)][CURRENTLINE]
linenumber=fileinfo[length(fileinfo)][LINENUMBER]
skip_ =fileinfo[length(fileinfo)][SKIP]
anchor_ =fileinfo[length(fileinfo)][ANCHOR]
fileinfo =fileinfo[1..length(fileinfo)]
return TRUE
else
fileinfo ={}
return FALSE
end if
end function
function getline()
if filenum>0 then return gets(filenum) else return -1 end if
end function
-- Locale hulp functie's --
function min(sequence list)
-- bepaal de index van het kleinste element > 0
integer result=0
for i=1 to length(list) do
if list[i]!=0
then if result=0
then result=i
elsif list[result]>list[i]
then result=i
end if
end if
end for
return result
end function
-- SKIP functie's --
global procedure L_any(sequence skip)
skip_= repeat(0,128)
anchor_=0
for i=1 to length(skip) do
skip_[skip[i]]=1
anchor_=1
end for
end procedure
global procedure L_nany(sequence skip)
skip_= repeat(1,128)
anchor_=1
for i=1 to length(skip) do
skip_[skip[i]]=0
end for
end procedure
-- De Lexical Analyser --
global constant L_SKIP=1,L_SEARCH=2,L_INDEX=3
-- structure L_result = skip_string , search_string , search_index
global sequence L_result
global procedure L_in(sequence search)
integer index,search_index
sequence klad
-- used by anchor=0
sequence eof
integer subindex
L_result={{},{},0}
-- search = "asdfgh" is same as "a"|"b"|"d"|"f"|"g"|"h" is [asdfgh]?
-- deze eerste ben ik nog niet zeker van . of moet dit [asdfgh]+
--
-- = {"a",":","karel"} is as "a" | ":" | "karel"
-- = {"procedure",{"startsymbolset","restsymbolset"}}
-- maybe todo
-- = {"procedure","%s","%d","%i","%c","%h"}
-- is "procedure" | string | float | integer | char | hex
-- string is [ ~skipset ]* /or should it be a IDENT def. ?
-- float is [0-9]+(.[0-9]+|e[-+]?[0-9]+)
-- integer is [0-9]+
-- hex is [0-9a-fA-F]+
-- char is [.]? /is this usefull ?
if length(line)=0 then line=getline() end if
-- if string or EOF then return with {},{},-1
if atom(line) then L_result[L_INDEX]=line return end if
-- start at first element of 'line'
-- if not in skip then it must be in search other it fail's .
index = 1
while skip_[line[index]] do
index+=1
if index>length(line)
then eof=getline()-- get line
if atom(eof) then L_result[L_INDEX]=eof exit end if
line&=eof
end if
end while
if index>1
then L_result[L_SKIP]=line[1..index-1]
end if
-- eof
if L_result[L_INDEX]!=0 then return end if
-- als een der elementen langer is dan index..length(line) haal eerst een line op
if not sequence(search[1])
then -- acts as [zoek_string]+
search_index=find(line[index],search)
if search_index!=0
then L_result[L_INDEX]=search_index
L_result[L_SEARCH]=line[index]
line=line[index+1..]
end if
else search_index=0
if anchor_
then -- &ANCHOR=1 : line ANY(skip) . L_skip zoekset . L_search =
for i=1 to length(search) do
if sequence(search[i][1])
then if find(line[index],search[i][1])
then search_index=i exit end if
else if 1=match(search[i],line[index..])
then search_index=i exit end if
end if
end for
else -- &ANCHOR=0 : line ARB . L_skip zoekset . L_search =
klad=repeat(0,length(search))
for i=1 to length(search) do
if sequence(search[i][1])
then for j=index to length(line) do
if find(line[j],search[i][1])
then klad[i]=j exit end if
end for
else klad[i]=match(search[i],line[index..])
end if
end for
search_index=min(klad)
while search_index=0 do
eof=getline()
if atom(eof) then L_result[L_INDEX]=eof exit end if
subindex=length(line)+1
line&=eof
for i=1 to length(search) do
if sequence(search[i][1])
then for j=subindex to length(line) do
if find(line[j],search[i][1])
then klad[i]=j exit end if
end for
else klad[i]=match(search[i],line[subindex..])
if klad[i] then klad[i]+=subindex-1 end if
end if
end for
search_index=min(klad)
end while
-- zet de string voor de 'match' in SKIP
if search_index
then index=klad[search_index]+index-1
if index>1
then L_result[L_SKIP]=line[1..index-1]
end if
end if
-- end anchor=0
end if
-- geef het eerste 'match' punt .
if search_index!=0
then L_result[L_INDEX]=search_index
if sequence(search[search_index][1])
then -- bepaal waar het laaste char is in de tweede set van i
-- same as [charset]*
subindex=index+1 --this stops at eol/eof
while length(line)>=subindex do
if find(line[subindex],search[search_index][2])
then subindex +=1 -- in fact if a overflow happens it should load a new line ?
else exit
end if
end while
else subindex=index+length(search[search_index])
end if
L_result[L_SEARCH]=line[index..subindex-1]
line=line[subindex..]
end if
end if
end procedure
-- Error message --
global procedure L_error(sequence mes)
printf(1,"%s (at Line=%d Lex result=%s(skip)%s(token)%d(index))\n",{mes,linenumber,L_result[1],L_result[2],L_result[3]})
puts(1,line)
abort(1)
end procedure
-----------------------------------------------------------------------------