e3c8484e7f888bd57995cd0db92e8426e52a96f8
[cc1516.git] / src / lex.icl
1 implementation module lex
2
3 import Control.Monad, Control.Applicative
4 import Data.Either, Data.Func
5 from StdFunc import o
6 import StdBool
7 import StdList
8 import StdChar
9
10 import yard
11
12 lexer :: [Char] -> LexerOutput
13 lexer r = case runParser lexProgram r of
14 (Right p, _) = Right p
15 (Left e, _) = Left $ toString e
16
17 lexProgram :: Parser Char [Token]
18 lexProgram = some lexToken <* many (satisfy isSpace) <* eof
19 >>= \ts->pure (map (\t->(0, 0, t)) ts)
20
21 lexToken :: Parser Char TokenValue
22 lexToken =
23 //Keyword tokens
24 (lexKw "var" VarToken) <|>
25 (lexKw "Void" VoidToken) <|>
26 (lexKw "return" ReturnToken) <|>
27 (lexKw "if" IfToken) <|>
28 (lexKw "else" ElseToken) <|>
29 (lexKw "while" WhileToken) <|>
30 (lexKw "True" TrueToken) <|>
31 (lexKw "False" FalseToken) <|>
32 (lexKw "Int" IntTypeToken) <|>
33 (lexKw "Bool" BoolTypeToken) <|>
34 (lexKw "Char" CharTypeToken) <|>
35 //Escape chars tokens
36 (liftM CharToken $ item '\'' *> item '\\' *> lexEscape <* item '\'') <|>
37 //Normal chars tokens
38 (liftM CharToken $ item '\'' *> satisfy ((<>) '\'') <* item '\'') <|>
39 //Two char ops tokens
40 (lexOp "::" DoubleColonToken) <|> (lexOp "!=" NotEqualToken) <|>
41 (lexOp "<=" LesserEqToken) <|> (lexOp ">=" GreaterEqToken) <|>
42 (lexOp "==" EqualsToken) <|> (lexOp "&&" AmpersandsToken) <|>
43 (lexOp "||" PipesToken) <|> (lexOp "[]" EmptyListToken) <|>
44 (lexOp "->" ArrowToken) <|> (lexOp "(" BraceOpenToken) <|>
45 //One char ops tokens
46 (lexOp ")" BraceCloseToken) <|> (lexOp "{" CBraceOpenToken) <|>
47 (lexOp "}" CBraceCloseToken) <|> (lexOp "[" SquareOpenToken) <|>
48 (lexOp "]" SquareCloseToken) <|> (lexOp "," CommaToken) <|>
49 (lexOp ":" ColonToken) <|> (lexOp ";" SColonToken) <|>
50 (lexOp "." DotToken) <|> (lexOp "+" PlusToken) <|>
51 (lexOp "*" StarToken) <|> (lexOp "/" SlashToken) <|>
52 (lexOp "%" PercentToken) <|> (lexOp "=" AssignmentToken) <|>
53 (lexOp "<" LesserToken) <|> (lexOp ">" BiggerToken) <|>
54 (lexOp "!" ExclamationToken) <|> (lexOp "-" DashToken) <|>
55 //Number tokens
56 (liftM (NumberToken o toInt o toString) $ some $ satisfy isDigit) <|>
57 //Ident tokens
58 (liftM IdentToken $ some $ satisfy isIdentChar) <|>
59 (satisfy isSpace >>| lexToken) //<|>
60 // (eof >>| pure EndOfFileToken)
61 where
62 isIdentChar c = isAlphanum c || c == '_'
63 lexOp s tv = list (fromString s) >>| pure tv
64 lexKw kw tv = lexOp kw tv <* check (not o isIdentChar) >>| pure tv
65 lexEscape = (
66 lexOp "a" (toChar 7) <|> lexOp "b" '\b' <|> lexOp "f" '\f' <|>
67 lexOp "n" '\n' <|> lexOp "r" '\t' <|> lexOp "v" '\v' <|>
68 lexOp "'" '\'') <?> ("Unknown escape", 0)