-- lexical scanner by M.S.Ter Haseborg jul 2004

-- L_in(zoek_set) => { skip_string , zoek_string , zoek_index }
--
-- L_init(filenumber/text_string)
-- L_end()
--
-- L_any(skip_char's)
-- L_nany(not skip_char's)
-- 
-- zoek_set : a) set of matching character's
--            b) sequence of name's (v.b. { name , name .. } )
-- name  : sequence of character('s)
--       : sequense of { start_set of character('s) , match_set of character('s) }
-- 
-- zoek_index : -1 is EOF , 0 no match else index of match in zoek_string
-- as done now within a multiple match the nearest one to begin of line is
-- used and also is the lowest order in zoek_string . that means that not by
-- default it is the longest . {"aap","aap's"} if both started on index x
-- the secound one never is found . so put them otherway around .
--
-- local : line bevat de te scannen character's
-- local : skip bevat de te character welke niet worden beschouwd tot het start symbol
--
-- als geen L_any({}) wordt gegeven wordt de match beschouwdt als splitsing
-- van text voor zoek_set en zoek_set .
-- als een L_any(metiets) or een Lnany(metiets) wordt gezet dan wordt de match
-- gezien als een aaneensluiting van metiets&zoek_set of ~metiets&zoek_set .
-- als L_zoek {}(leeg) is betekend dit dat de match 'has failed'

sequence skip_    -- skip set
integer  anchor_  -- mode
constant TRUE=1 , FALSE = 0 

-- Algemene File afhandeling --

--structure fileinfostruc
--  sequence filename
--  sequence currentline
--  integer  characcess
--  integer  linenumber
--end structure                

constant FILENAME=1,CURRENTLINE=2,LINENUMBER=3,SKIP=4,ANCHOR=5
sequence fileinfo={} -- stack of inuse file's
sequence line        -- analiserende regel
integer  linenumber  -- aantal gelezen regels
integer  filenum     -- als het een file is dan >=0

-- init handling : file or text
global function L_init(sequence filenumber)
if length(fileinfo)=0 then 
  fileinfo={{0,"",0,0,0}}
else 
  fileinfo[length(fileinfo)][CURRENTLINE]=line
  fileinfo[length(fileinfo)][LINENUMBER] =linenumber
  fileinfo[length(fileinfo)][SKIP]       =skip_
  fileinfo[length(fileinfo)][ANCHOR]     =anchor_
  fileinfo&={{filenumber,"",0,0,0}}
end if 

if atom(filenumber) then
  filenum=filenumber
  line=gets(filenum)
else
  filenum=-1
  line=filenumber 
end if    
linenumber=1 

if sequence(line) then
  return TRUE
else
  return FALSE
end if
end function    

global function L_end()
if length(fileinfo)>1 then
  filenum   =fileinfo[length(fileinfo)][FILENAME]
  line      =fileinfo[length(fileinfo)][CURRENTLINE]
  linenumber=fileinfo[length(fileinfo)][LINENUMBER]
  skip_     =fileinfo[length(fileinfo)][SKIP]
  anchor_   =fileinfo[length(fileinfo)][ANCHOR]
  fileinfo  =fileinfo[1..length(fileinfo)]
  return TRUE
else   
  fileinfo  ={}
  return FALSE
end if
end function     

function getline()
if filenum>0 then return gets(filenum) else return -1 end if 
end function  

-- Locale hulp functie's --

function min(sequence list)
-- bepaal de index van het kleinste element > 0
integer result=0
 for i=1 to length(list) do
   if list[i]!=0 
   then if    result=0 
        then  result=i
        elsif list[result]>list[i]
        then  result=i
        end if
   end if     
 end for
 return result
end function 

-- SKIP functie's --

global procedure L_any(sequence skip)
skip_= repeat(0,128)
anchor_=0
for i=1 to length(skip) do
    skip_[skip[i]]=1
    anchor_=1
end for       

end procedure

global procedure L_nany(sequence skip)
skip_= repeat(1,128) 
anchor_=1
for i=1 to length(skip) do
    skip_[skip[i]]=0
end for
end procedure

-- De Lexical Analyser --

global constant L_SKIP=1,L_SEARCH=2,L_INDEX=3
-- structure L_result = skip_string , search_string , search_index
global sequence L_result

global procedure L_in(sequence search)
integer  index,search_index
sequence klad
-- used by anchor=0
sequence eof
integer  subindex

   L_result={{},{},0}
-- search = "asdfgh" is same as "a"|"b"|"d"|"f"|"g"|"h" is [asdfgh]? 
--           deze eerste ben ik nog niet zeker van . of moet dit [asdfgh]+
--
--        = {"a",":","karel"} is as "a" | ":" | "karel" 
--        = {"procedure",{"startsymbolset","restsymbolset"}}
-- maybe todo
--        = {"procedure","%s","%d","%i","%c","%h"} 
--        is "procedure" | string | float | integer | char | hex
-- string  is [ ~skipset ]*                  /or should it be a IDENT def. ?
-- float   is [0-9]+(.[0-9]+|e[-+]?[0-9]+)
-- integer is [0-9]+
-- hex     is [0-9a-fA-F]+
-- char    is [.]?                           /is this usefull ?

   if length(line)=0 then line=getline() end if
-- if string or EOF then return with {},{},-1   
   if atom(line) then L_result[L_INDEX]=line return end if
-- start at first element of 'line' 
-- if not in skip then it must be in search other it fail's .
   index = 1
   
   while skip_[line[index]] do
      index+=1
      if index>length(line)
      then eof=getline()-- get line
           if atom(eof) then L_result[L_INDEX]=eof exit end if
           line&=eof
      end if 
   end while
   if index>1 
   then L_result[L_SKIP]=line[1..index-1]
   end if 
   
   -- eof 
   if L_result[L_INDEX]!=0 then return end if
   
   -- als een der elementen langer is dan index..length(line) haal eerst een line op
   if not sequence(search[1])
   then -- acts as [zoek_string]+
        search_index=find(line[index],search)
        if search_index!=0
        then L_result[L_INDEX]=search_index
             L_result[L_SEARCH]=line[index]
             line=line[index+1..]
        end if     
   else search_index=0 
        if anchor_ 
        then -- &ANCHOR=1 : line ANY(skip) . L_skip zoekset . L_search =
          for i=1 to length(search) do 
            if sequence(search[i][1])
            then if find(line[index],search[i][1])
                 then search_index=i exit end if
            else if 1=match(search[i],line[index..])
                 then search_index=i exit end if
            end if
          end for
        else -- &ANCHOR=0 : line ARB . L_skip zoekset . L_search =
          klad=repeat(0,length(search))
          for i=1 to length(search) do  
            if sequence(search[i][1])
            then for j=index to length(line) do
                 if find(line[j],search[i][1])
                 then klad[i]=j exit end if
                 end for
            else klad[i]=match(search[i],line[index..])
            end if
          end for
          search_index=min(klad) 
          while search_index=0 do 
             eof=getline()
             if atom(eof) then L_result[L_INDEX]=eof exit end if
             subindex=length(line)+1   
             line&=eof
             for i=1 to length(search) do
                 if sequence(search[i][1])
                 then for j=subindex to length(line) do
                          if find(line[j],search[i][1])
                          then klad[i]=j exit end if
                      end for
                 else klad[i]=match(search[i],line[subindex..])
                      if klad[i] then klad[i]+=subindex-1 end if
                 end if     
             end for     
             search_index=min(klad)
          end while  
          -- zet de string voor de 'match' in SKIP 
          if search_index
          then index=klad[search_index]+index-1 
             if index>1
             then L_result[L_SKIP]=line[1..index-1]
             end if
          end if
        -- end anchor=0
        end if   
        
        -- geef het eerste 'match' punt .     
        if search_index!=0
        then L_result[L_INDEX]=search_index
             if sequence(search[search_index][1])
             then -- bepaal waar het laaste char is in de tweede set van i 
                  -- same as [charset]*
                  subindex=index+1 --this stops at eol/eof 
                  while length(line)>=subindex do 
                    if find(line[subindex],search[search_index][2]) 
                    then subindex +=1 -- in fact if a overflow happens it should load a new line ? 
                    else exit
                    end if
                  end while
             else subindex=index+length(search[search_index])
             end if
             L_result[L_SEARCH]=line[index..subindex-1]
             line=line[subindex..]
        end if     
   end if
end procedure

-- Error message --

global procedure L_error(sequence mes)
   printf(1,"%s (at Line=%d Lex result=%s(skip)%s(token)%d(index))\n",{mes,linenumber,L_result[1],L_result[2],L_result[3]}) 
   puts(1,line) 
   abort(1)  
end procedure

-----------------------------------------------------------------------------