1 implementation module lex
3 import Control.Monad, Control.Applicative
4 import Data.Either, Data.Func, Data.Void
16 = LexToken Int TokenValue
22 lexer :: [Char] -> LexerOutput
23 lexer r = fst $ runParser (lexProgram 1 1) r
25 lexProgram :: Int Int -> Parser Char [Token]
26 lexProgram line column = lexToken >>= \t->case t of
28 LexNL = lexProgram (line+1) 1
29 (LexSpace l c) = lexProgram (line+l) (column+c)
30 (LexItemError e) = fail <?>
31 PositionalError line column ("LexerError: " +++ e)
32 (LexToken c t) = lexProgram line (column+c)
33 >>= \rest->pure [({line=line,col=column}, t):rest]
35 lexToken :: Parser Char LexItem
38 lexBlockComment <|> lexComment <|>
40 lexKw "var" VarToken <|> lexKw "Void" VoidToken <|>
41 lexKw "return" ReturnToken <|> lexKw "if" IfToken <|>
42 lexKw "else" ElseToken <|> lexKw "while" WhileToken <|>
43 lexKw "True" TrueToken <|> lexKw "False" FalseToken <|>
44 lexKw "Int" IntTypeToken <|> lexKw "Bool" BoolTypeToken <|>
45 lexKw "Char" CharTypeToken <|>
47 lexEscape <|> lexCharacter <|>
49 lexWord "::" DoubleColonToken <|> lexWord "!=" NotEqualToken <|>
50 lexWord "<=" LesserEqToken <|> lexWord ">=" GreaterEqToken <|>
51 lexWord "==" EqualsToken <|> lexWord "&&" AmpersandsToken <|>
52 lexWord "||" PipesToken <|> lexWord "[]" EmptyListToken <|>
53 lexWord "->" ArrowToken <|>
55 lexWord "(" BraceOpenToken <|> lexWord ")" BraceCloseToken <|>
56 lexWord "{" CBraceOpenToken <|> lexWord "}" CBraceCloseToken <|>
57 lexWord "[" SquareOpenToken <|> lexWord "]" SquareCloseToken <|>
58 lexWord "," CommaToken <|> lexWord ":" ColonToken <|>
59 lexWord ";" SColonToken <|> lexWord "." DotToken <|>
60 lexWord "+" PlusToken <|> lexWord "*" StarToken <|>
61 lexWord "/" SlashToken <|> lexWord "%" PercentToken <|>
62 lexWord "=" AssignmentToken <|> lexWord "<" LesserToken <|>
63 lexWord ">" BiggerToken <|> lexWord "!" ExclamationToken <|>
64 lexWord "-" DashToken <|>
65 //Number and identifier tokens
66 lexNumber <|> lexIdentifier <|>
67 (item '\n' >>| pure LexNL) <|>
69 (satisfy isSpace >>| (pure $ LexSpace 0 1)) <|>
72 lexWord :: String TokenValue -> Parser Char LexItem
73 lexWord s tv = list ls >>| pure (LexToken (length ls) tv)
74 where ls = fromString s
76 lexKw :: String TokenValue -> Parser Char LexItem
77 lexKw kw tv = lexWord kw tv <* check (not o isIdentChar)
79 lexComment :: Parser Char LexItem
80 lexComment = list (fromString "//")
81 >>| top until (eof <|> (item '\n' >>| pure Void)) >>| pure LexNL
83 lexBlockComment :: Parser Char LexItem
84 lexBlockComment = list (fromString "/*")
85 >>| (top until (list (fromString "*/")))
86 >>= \chars->pure $ widthHeight chars 0 0
88 widthHeight :: [Char] Int Int -> LexItem
89 widthHeight [] l c = LexSpace l c
90 widthHeight ['\n':xs] l _ = widthHeight xs (l+1) 0
91 widthHeight [x:xs] l c = widthHeight xs l (c+1)
93 lexNumber :: Parser Char LexItem
94 lexNumber = some (satisfy isDigit) >>= \si->pure $
95 LexToken (length si) (NumberToken $ toInt $ toString si)
97 lexIdentifier :: Parser Char LexItem
98 lexIdentifier = some (satisfy isIdentChar)
99 >>= \si->pure $ LexToken (length si) (IdentToken $ toString si)
101 isIdentChar c = isAlphanum c || c == '_'
103 lexCharacter :: Parser Char LexItem
104 lexCharacter = item '\'' *> satisfy ((<>) '\'') <* item '\''
105 >>= \char->pure $ LexToken 3 (CharToken char)
107 lexEscape :: Parser Char LexItem
108 lexEscape = item '\'' *> item '\\' *> top <* item '\''
109 >>= \char->pure case char of
110 'a' = LexToken 4 (CharToken $ toChar 7)
111 'b' = LexToken 4 (CharToken '\b')
112 'b' = LexToken 4 (CharToken '\b')
113 'f' = LexToken 4 (CharToken '\f')
114 'n' = LexToken 4 (CharToken '\n')
115 'r' = LexToken 4 (CharToken '\t')
116 'v' = LexToken 4 (CharToken '\v')
117 '\'' =LexToken 4 (CharToken '\'')
118 c = (LexItemError $ "Unknown escape: " +++ toString c)