a6f31ab8eb0e4abad9d597e629c9e0bfc091bfef
[cc1516.git] / src / lex.icl
1 implementation module lex
2
3 import Control.Monad, Control.Applicative
4 import Data.Either, Data.Func
5 from StdFunc import o
6 import StdBool
7 import StdList
8 import StdChar
9
10 import yard
11
12 lexer :: [Char] -> LexerOutput
13 lexer r = case runParser lexProgram r of
14 (Right p, _) = Right p
15 (Left e, _) = Left $ toString e
16
17 lexProgram :: Parser Char [Token]
18 lexProgram = some lexToken >>= \ts->pure (map (\t->(0, 0, t)) ts)
19
20 lexToken :: Parser Char TokenValue
21 lexToken =
22 //Keyword tokens
23 (lexKw "var" VarToken) <|>
24 (lexKw "Void" VoidToken) <|>
25 (lexKw "return" ReturnToken) <|>
26 (lexKw "if" IfToken) <|>
27 (lexKw "else" ElseToken) <|>
28 (lexKw "while" WhileToken) <|>
29 (lexKw "True" TrueToken) <|>
30 (lexKw "False" FalseToken) <|>
31 (lexKw "Int" IntTypeToken) <|>
32 (lexKw "Bool" BoolTypeToken) <|>
33 (lexKw "Char" CharTypeToken) <|>
34 //Escape chars tokens
35 (liftM CharToken $ item '\'' *> item '\\' *> lexEscape <* item '\'') <|>
36 //Normal chars tokens
37 (liftM CharToken $ item '\'' *> satisfy ((<>) '\'') <* item '\'') <|>
38 //Two char ops tokens
39 (lexOp "::" DoubleColonToken) <|> (lexOp "!=" NotEqualToken) <|>
40 (lexOp "<=" LesserEqToken) <|> (lexOp ">=" GreaterEqToken) <|>
41 (lexOp "==" EqualsToken) <|> (lexOp "&&" AmpersandsToken) <|>
42 (lexOp "||" PipesToken) <|> (lexOp "[]" EmptyListToken) <|>
43 (lexOp "->" ArrowToken) <|> (lexOp "(" BraceOpenToken) <|>
44 //One char ops tokens
45 (lexOp ")" BraceCloseToken) <|> (lexOp "{" CBraceOpenToken) <|>
46 (lexOp "}" CBraceCloseToken) <|> (lexOp "[" SquareOpenToken) <|>
47 (lexOp "]" SquareCloseToken) <|> (lexOp "," CommaToken) <|>
48 (lexOp ":" ColonToken) <|> (lexOp ";" SColonToken) <|>
49 (lexOp "." DotToken) <|> (lexOp "+" PlusToken) <|>
50 (lexOp "*" StarToken) <|> (lexOp "/" SlashToken) <|>
51 (lexOp "%" PercentToken) <|> (lexOp "=" AssignmentToken) <|>
52 (lexOp "<" LesserToken) <|> (lexOp ">" BiggerToken) <|>
53 (lexOp "!" ExclamationToken) <|> (lexOp "-" DashToken) <|>
54 //Number tokens
55 (liftM (NumberToken o toInt o toString) $ some $ satisfy isDigit) <|>
56 //Ident tokens
57 (liftM IdentToken $ some $ satisfy isIdentChar) <|>
58 (satisfy isSpace >>| lexToken) //<|>
59 // (eof >>| pure EndOfFileToken)
60 where
61 isIdentChar c = isAlphanum c || c == '_'
62 lexOp s tv = list (fromString s) >>| pure tv
63 lexKw kw tv = lexOp kw tv <* check (not o isIdentChar) >>| pure tv
64 lexEscape = (
65 lexOp "a" (toChar 7) <|> lexOp "b" '\b' <|> lexOp "f" '\f' <|>
66 lexOp "n" '\n' <|> lexOp "r" '\t' <|> lexOp "v" '\v' <|>
67 lexOp "'" '\'') <?> ("Unknown escape", 0)