implementation module lex import Control.Monad, Control.Applicative import Data.Either, Data.Func, Data.Maybe, Data.Functor from StdFunc import o, const import StdBool import StdList import StdChar import StdString from Text import class Text(textSize,concat), instance Text String import yard lexer :: [Char] -> LexerOutput lexer r = case runParser (lexProgram 1 1) r of (Right p, _) = Right p (Left e, _) = Left e lexProgram :: Int Int -> Parser Char [Token] lexProgram line column = lexToken >>= \t->case t of LexEOF = pure [] (LexItemError e) = fail PositionalError line column ("LexerError: " +++ e) (LexToken c t) = lexProgram line (column+c) >>= \rest->pure [{line=line, column=column, token=t}:rest] LexNL = lexProgram (line+1) 1 (LexSpace l c) = lexProgram (line+l) (column+c) :: LexItem = LexToken Int TokenValue | LexSpace Int Int | LexNL | LexEOF | LexItemError String lexToken :: Parser Char LexItem lexToken = //Comments lexBlockComment <|> lexComment <|> //Keyword tokens lexKw "var" VarToken <|> lexKw "Void" VoidToken <|> lexKw "return" ReturnToken <|> lexKw "if" IfToken <|> lexKw "else" ElseToken <|> lexKw "while" WhileToken <|> lexKw "True" TrueToken <|> lexKw "False" FalseToken <|> lexKw "Int" IntTypeToken <|> lexKw "Bool" BoolTypeToken <|> lexKw "Char" CharTypeToken <|> //Character tokens lexEscape <|> lexCharacter <|> //Two char ops tokens lexWord "::" DoubleColonToken <|> lexWord "!=" NotEqualToken <|> lexWord "<=" LesserEqToken <|> lexWord ">=" GreaterEqToken <|> lexWord "==" EqualsToken <|> lexWord "&&" AmpersandsToken <|> lexWord "||" PipesToken <|> lexWord "[]" EmptyListToken <|> lexWord "->" ArrowToken <|> //One char ops tokens lexWord "(" BraceOpenToken <|> lexWord ")" BraceCloseToken <|> lexWord "{" CBraceOpenToken <|> lexWord "}" CBraceCloseToken <|> lexWord "[" SquareOpenToken <|> lexWord "]" SquareCloseToken <|> lexWord "," CommaToken <|> lexWord ":" ColonToken <|> lexWord ";" SColonToken <|> lexWord "." DotToken <|> lexWord "+" PlusToken <|> lexWord "*" StarToken <|> lexWord "/" SlashToken <|> lexWord "%" PercentToken <|> lexWord "=" AssignmentToken <|> lexWord "<" LesserToken <|> lexWord ">" BiggerToken <|> lexWord "!" ExclamationToken <|> lexWord "-" DashToken <|> //Number and identifier tokens lexNumber <|> lexIdentifier <|> (item '\n' >>| pure LexNL) <|> (satisfy isSpace >>| (pure $ LexSpace 0 1)) <|> (eof >>| pure LexEOF) where lexWord :: String TokenValue -> Parser Char LexItem lexWord s tv = list (fromString s) >>| pure (LexToken (textSize s) tv) lexKw :: String TokenValue -> Parser Char LexItem lexKw kw tv = lexWord kw tv <* check (not o isIdentChar) lexUntilNL = top until (eof <|> (item '\n' >>| pure Void)) lexComment :: Parser Char LexItem lexComment = list (fromString "//") >>| lexUntilNL >>| pure LexNL lexBlockComment :: Parser Char LexItem lexBlockComment = list (fromString "/*") >>| (top until (list (fromString "*/"))) >>= \chars->pure $ widthHeight chars 0 0 where widthHeight :: [Char] Int Int -> LexItem widthHeight [] l c = LexSpace l c widthHeight ['\n':xs] l _ = widthHeight xs (l+1) 0 widthHeight [x:xs] l c = widthHeight xs l (c+1) lexNumber :: Parser Char LexItem lexNumber = toString <$> some (satisfy isDigit) >>= \si->pure $ LexToken (textSize si) (NumberToken $ toInt si) lexIdentifier :: Parser Char LexItem lexIdentifier = toString <$> some (satisfy isIdentChar) >>= \si->pure $ LexToken (textSize si) (IdentToken si) isIdentChar c = isAlphanum c || c == '_' lexCharacter :: Parser Char LexItem lexCharacter = item '\'' *> satisfy ((<>) '\'') <* item '\'' >>= \char->pure $ LexToken 3 (CharToken char) lexEscape :: Parser Char LexItem lexEscape = item '\'' *> item '\\' *> top <* item '\'' >>= \char->pure case char of 'a' = LexToken 4 (CharToken $ toChar 7) 'b' = LexToken 4 (CharToken '\b') 'b' = LexToken 4 (CharToken '\b') 'f' = LexToken 4 (CharToken '\f') 'n' = LexToken 4 (CharToken '\n') 'r' = LexToken 4 (CharToken '\t') 'v' = LexToken 4 (CharToken '\v') '\'' =LexToken 4 (CharToken '\'') c = (LexItemError $ "Unknown escape: " +++ toString c)