implementation module lex import Control.Monad, Control.Applicative import Data.Either, Data.Func, Data.Void import Data.Map from StdFunc import o import StdBool import StdList import StdChar import StdString import StdTuple import yard import AST :: LexItem = LexToken Int TokenValue | LexSpace Int Int | LexNL | LexEOF | LexItemError String escapes :: Map Char Char escapes = fromList [('a', toChar 7), ('b', '\b'), ('f', '\f'), ('n', '\n'), ('r', '\r'), ('t', '\t'), ('v', '\v'), ('\'', '\''), ('"', '"')] lexer :: [Char] -> LexerOutput lexer r = fst $ runParser (lexProgram 1 1) r lexProgram :: Int Int -> Parser Char [Token] lexProgram line column = lexToken >>= \t->case t of LexEOF = pure [] LexNL = lexProgram (line+1) 1 (LexSpace l c) = lexProgram (line+l) (column+c) (LexItemError e) = fail PositionalError line column ("LexerError: " +++ e) (LexToken c t) = lexProgram line (column+c) >>= \rest->pure [({line=line,col=column}, t):rest] lexToken :: Parser Char LexItem lexToken = //Comments lexBlockComment <|> lexComment <|> //Keyword tokens lexKw "var" VarToken <|> lexKw "Void" VoidToken <|> lexKw "return" ReturnToken <|> lexKw "if" IfToken <|> lexKw "else" ElseToken <|> lexKw "while" WhileToken <|> lexKw "True" TrueToken <|> lexKw "False" FalseToken <|> lexKw "Int" IntTypeToken <|> lexKw "Bool" BoolTypeToken <|> lexKw "Char" CharTypeToken <|> lexKw "let" LetToken <|> //Character tokens lexEscape <|> lexCharacter <|> //Two char ops tokens lexWord "::" DoubleColonToken <|> lexWord "!=" NotEqualToken <|> lexWord "<=" LesserEqToken <|> lexWord ">=" GreaterEqToken <|> lexWord "==" EqualsToken <|> lexWord "&&" AmpersandsToken <|> lexWord "||" PipesToken <|> lexWord "[]" EmptyListToken <|> lexWord "->" ArrowToken <|> //One char ops tokens lexWord "(" BraceOpenToken <|> lexWord ")" BraceCloseToken <|> lexWord "{" CBraceOpenToken <|> lexWord "}" CBraceCloseToken <|> lexWord "[" SquareOpenToken <|> lexWord "]" SquareCloseToken <|> lexWord "," CommaToken <|> lexWord ":" ColonToken <|> lexWord ";" SColonToken <|> lexWord "." DotToken <|> lexWord "+" PlusToken <|> lexWord "*" StarToken <|> lexWord "/" SlashToken <|> lexWord "%" PercentToken <|> lexWord "=" AssignmentToken <|> lexWord "<" LesserToken <|> lexWord ">" BiggerToken <|> lexWord "!" ExclamationToken <|> lexWord "-" DashToken <|> lexWord "\\" BackslashToken <|> //Number and identifier tokens lexString <|> lexNumber <|> lexIdentifier <|> (item '\n' >>| pure LexNL) <|> //Whitespace (satisfy isSpace >>| (pure $ LexSpace 0 1)) <|> (eof >>| pure LexEOF) where lexWord :: String TokenValue -> Parser Char LexItem lexWord s tv = list ls >>| pure (LexToken (length ls) tv) where ls = fromString s lexKw :: String TokenValue -> Parser Char LexItem lexKw kw tv = lexWord kw tv <* check (not o isIdentChar) lexComment :: Parser Char LexItem lexComment = list (fromString "//") >>| top until (eof <|> (item '\n' >>| pure Void)) >>| pure LexNL lexBlockComment :: Parser Char LexItem lexBlockComment = list (fromString "/*") >>| (top until (list (fromString "*/"))) >>= \chars->pure $ widthHeight chars 0 0 where widthHeight :: [Char] Int Int -> LexItem widthHeight [] l c = LexSpace l c widthHeight ['\n':xs] l _ = widthHeight xs (l+1) 0 widthHeight [x:xs] l c = widthHeight xs l (c+1) lexNumber :: Parser Char LexItem lexNumber = some (satisfy isDigit) >>= \si->pure $ LexToken (length si) (NumberToken $ toInt $ toString si) lexIdentifier :: Parser Char LexItem lexIdentifier = some (satisfy isIdentChar) >>= \si->pure $ LexToken (length si) (IdentToken $ toString si) isIdentChar c = isAlphanum c || c == '_' lexCharacter :: Parser Char LexItem lexCharacter = item '\'' *> satisfy ((<>) '\'') <* item '\'' >>= \char->pure $ LexToken 3 (CharToken char) lexEscape :: Parser Char LexItem lexEscape = item '\'' *> item '\\' *> top <* item '\'' >>= \char->pure case get char escapes of Just e = LexToken 4 (CharToken e) Nothing = LexItemError $ "Unknown escape: " +++ toString char lexString :: Parser Char LexItem lexString = item '"' *> ( many ( (satisfy (\c->c <> '"' && c <> '\\')) <|> (item '\\' *> top >>= \char->case get char escapes of Just e = pure e Nothing = empty) ))<* item '"' >>= \cs-> pure $ LexToken (length cs) (StringToken cs)