implementation module lex from Text import class Text, instance Text String import Control.Monad import Data.Either import Data.Functor import Data.List import StdString import StdBool import StdFile import System.CommandLine import qualified Text Start :: *World -> *World Start w # (out, w) = stdio w # (toparse, out) = readEntireFile out # (_, w) = fclose (out <<< "\n" <<< toString (lexer toparse)) w = w readEntireFile :: *File -> *([Char], *File) readEntireFile f # (b, c, f) = freadc f | not b = ([], f) # (cs, f) = readEntireFile f = ([c:cs], f) lexer :: [Char] -> LexerOutput lexer [] = Right [] lexer x = case lex x of (Right t, rest) = lexer rest >>= \ts.Right [t:ts] (Left e, _) = Left e untilNext :: [Char] -> [Char] untilNext [] = [] untilNext ['*':'/':rest] = rest untilNext [x:rest] = untilNext rest lex :: [Char] -> (Either String Token, [Char]) lex [] = (Right EndOfFileToken, []) //Comments lex ['/':'/':rest] = lex (tl (dropWhile ((<>) '\n') rest)) lex ['/':'*':rest] = lex (untilNext rest) //Keyword tokens lex ['v':'a':'r':rest] = (Right VarToken, rest) lex ['V':'o':'i':'d':rest] = (Right VoidToken, rest) lex ['r':'e':'t':'u':'r':'n':rest] = (Right ReturnToken, rest) lex ['i':'f':rest] = (Right IfToken, rest) lex ['e':'l':'s':'e':rest] = (Right ElseToken, rest) lex ['w':'h':'i':'l':'e':rest] = (Right WhileToken, rest) lex ['T':'r':'u':'e':rest] = (Right TrueToken, rest) lex ['F':'a':'l':'s':'e':rest] = (Right FalseToken, rest) lex ['I':'n':'t':rest] = (Right IntTypeToken, rest) lex ['C':'h':'a':'r':rest] = (Right CharTypeToken, rest) lex ['B':'o':'o':'l':rest] = (Right BoolTypeToken, rest) //Two character tokens lex [':':':':rest] = (Right DoubleColonToken, rest) lex ['<':'=':rest] = (Right LesserEqToken, rest) lex ['>':'=':rest] = (Right GreaterEqToken, rest) lex ['=':'=':rest] = (Right EqualsToken, rest) lex ['&':'&':rest] = (Right AmpersandsToken, rest) lex ['|':'|':rest] = (Right PipesToken, rest) lex ['-':'>':rest] = (Right ArrowToken, rest) //One character tokens lex ['(':rest] = (Right BraceOpenToken, rest) lex [')':rest] = (Right BraceCloseToken, rest) lex ['{':rest] = (Right CBraceOpenToken, rest) lex ['}':rest] = (Right CBraceCloseToken, rest) lex ['[':rest] = (Right SquareOpenToken, rest) lex [']':rest] = (Right SquareCloseToken, rest) lex [',':rest] = (Right CommaToken, rest) lex [':':rest] = (Right ColonToken, rest) lex [';':rest] = (Right SColonToken, rest) lex ['.':rest] = (Right DotToken, rest) lex ['+':rest] = (Right PlusToken, rest) lex ['-':rest] = (Right DashToken, rest) lex ['*':rest] = (Right StarToken, rest) lex ['/':rest] = (Right SlashToken, rest) lex ['%':rest] = (Right PercentToken, rest) lex ['=':rest] = (Right AssignmentToken, rest) lex ['<':rest] = (Right LesserToken, rest) lex ['>':rest] = (Right BiggerToken, rest) lex ['!':rest] = (Right ExclamationToken, rest) //Value tokens lex ['\'':x:'\'':rest] = (Right (CharToken x), rest) lex ['\'':'\\':x:'\'':rest] = case x of 'a' = (Right (CharToken (toChar 7)), rest) //Alarm 'b' = (Right (CharToken '\b'), rest) //Backspace 'f' = (Right (CharToken '\f'), rest) //Formfeed 'n' = (Right (CharToken '\n'), rest) //Newline 'r' = (Right (CharToken '\r'), rest) //Carriage Return 't' = (Right (CharToken '\t'), rest) //Horizontal tab 'v' = (Right (CharToken '\v'), rest) //Vertical tab _ = (Left ("Unknown escape: \\" +++ toString x), []) lex [x:xs] | isSpace x = lex xs | isDigit x = let (is, rest) = span isDigit xs in (Right (NumberToken [x:is]), rest) | isAlpha x = let (is, rest) = span (\c.isAlphanum c || c == '_') xs in (Right (IdentToken [x:is]), rest) | otherwise = (Left ("Unexpected character: " +++ toString x), []) instance toString LexerOutput where toString (Left l) = "Error: " +++ l toString (Right x) = 'Text'.concat (print 0 x) tab :: Int -> [String] tab i = replicate i "\t" print :: Int [Token] -> [String] print _ [] = [] print i [(IdentToken l):rest] = [toString l:print i rest] print i [(NumberToken j):rest] = [toString j:print i rest] print i [(CharToken c):rest] = ["'":toString c:"'":print i rest] print i [VarToken:rest] = ["var ":print i rest] print i [ReturnToken:rest] = ["return ":print i rest] print i [IfToken:rest] = ["if":print i rest] print i [ElseToken:rest] = ["else":print i rest] print i [WhileToken:rest] = ["while":print i rest] print i [TrueToken:rest] = ["True":print i rest] print i [FalseToken:rest] = ["False":print i rest] print i [VoidToken:rest] = ["Void":print i rest] print i [IntTypeToken:rest] = ["Int":print i rest] print i [CharTypeToken:rest] = ["Char":print i rest] print i [BoolTypeToken:rest] = ["Bool":print i rest] print i [DoubleColonToken:rest] = [" :: ":print i rest] print i [LesserEqToken:rest] = [" <= ":print i rest] print i [GreaterEqToken:rest] = [" >= ":print i rest] print i [EqualsToken:rest] = [" == ":print i rest] print i [AmpersandsToken:rest] = [" && ":print i rest] print i [PipesToken:rest] = [" || ":print i rest] print i [ArrowToken:rest] = [" -> ":print i rest] print i [BraceOpenToken:rest] = ["(":print i rest] print i [BraceCloseToken:rest] = [")":print i rest] print i [CBraceOpenToken:rest] = ["{\n":tab (i+1)] ++ print (i+1) rest print i [CBraceCloseToken:rest] = case rest of [CBraceCloseToken:_] = ["}\n":tab (i-2)] ++ print (i-1) rest _ = ["}\n":tab (i-1)] ++ print (i-1) rest print i [SquareOpenToken:rest] = ["[":print i rest] print i [SquareCloseToken:rest] = ["]":print i rest] print i [CommaToken:rest] = [", ":print i rest] print i [ColonToken:rest] = [":":print i rest] print i [SColonToken:rest] = case rest of [CBraceCloseToken:_] = [";\n":tab (i-1)] ++ print i rest _ = [";\n":tab i] ++ print i rest print i [DotToken:rest] = [".":print i rest] print i [PlusToken:rest] = [" + ":print i rest] print i [DashToken:rest] = [" - ":print i rest] print i [StarToken:rest] = [" * ":print i rest] print i [SlashToken:rest] = [" / ":print i rest] print i [PercentToken:rest] = [" % ":print i rest] print i [AssignmentToken:rest] = [" = ":print i rest] print i [LesserToken:rest] = [" < ":print i rest] print i [BiggerToken:rest] = [" > ":print i rest] print i [ExclamationToken:rest] = ["!":print i rest] print i [EndOfFileToken:rest] = ["\n":print i rest]