Merge branch 'master' of github.com:dopefishh/cc1516
[cc1516.git] / src / lex.icl
1 implementation module lex
2
3 import Control.Monad, Control.Applicative
4 import Data.Either, Data.Maybe, Data.Map, Data.Func
5 import StdString
6 import StdTuple
7 from StdFunc import const, o
8 import StdBool
9 import StdList
10 import StdChar
11
12 import yard
13
14 lexer :: [Char] -> LexerOutput
15 lexer r = case runParser lexProgram r of
16 (Right p, _) = Right p
17 (Left e, _) = Left $ toString e
18
19 trans1 :: Char a -> Parser Char a
20 trans1 t r = item t >>| pure r
21
22 lexProgram :: Parser Char [Token]
23 lexProgram = lexToken >>= \t->pure [(0, 0, t)]
24
25 lexToken :: Parser Char TokenValue
26 lexToken =
27 (lexKw "var" VarToken) <|>
28 (lexKw "Void" VoidToken) <|>
29 (lexKw "return" ReturnToken) <|>
30 (lexKw "if" IfToken) <|>
31 (lexKw "else" ElseToken) <|>
32 (lexKw "while" WhileToken) <|>
33 (lexKw "True" TrueToken) <|>
34 (lexKw "False" FalseToken) <|>
35 (lexKw "Int" IntTypeToken) <|>
36 (lexKw "Bool" BoolTypeToken) <|>
37 (lexKw "Char" CharTypeToken) <|>
38 //Escape chars
39 (liftM CharToken $ item '\'' *> item '\\' *> lexEscape <* item '\'') <|>
40 //Normal chars
41 (liftM CharToken $ item '\'' *> satisfy ((<>) '\'') <* item '\'') <|>
42 (lexOp "::" DoubleColonToken) <|>
43 (lexOp "!=" NotEqualToken) <|>
44 (lexOp "<=" LesserEqToken) <|>
45 (lexOp ">=" GreaterEqToken) <|>
46 (lexOp "==" EqualsToken) <|>
47 (lexOp "&&" AmpersandsToken) <|>
48 (lexOp "||" PipesToken) <|>
49 (lexOp "[]" EmptyListToken) <|>
50 (lexOp "->" ArrowToken) <|>
51 (lexOp "(" BraceOpenToken) <|>
52 (lexOp ")" BraceCloseToken) <|>
53 (lexOp "{" CBraceOpenToken) <|>
54 (lexOp "}" CBraceCloseToken) <|>
55 (lexOp "[" SquareOpenToken) <|>
56 (lexOp "]" SquareCloseToken) <|>
57 (lexOp "," CommaToken) <|>
58 (lexOp ":" ColonToken) <|>
59 (lexOp ";" SColonToken) <|>
60 (lexOp "." DotToken) <|>
61 (lexOp "+" PlusToken) <|>
62 (lexOp "*" StarToken) <|>
63 (lexOp "/" SlashToken) <|>
64 (lexOp "%" PercentToken) <|>
65 (lexOp "=" AssignmentToken) <|>
66 (lexOp "<" LesserToken) <|>
67 (lexOp ">" BiggerToken) <|>
68 (lexOp "!" ExclamationToken) <|>
69 (lexOp "-" DashToken) <|>
70 //Numbers
71 (liftM (NumberToken o toInt o toString) $ some $ satisfy isDigit) <|>
72 //Identifiersr
73 (liftM IdentToken $ some $ satisfy isIdentChar) <|>
74 (satisfy isSpace >>| lexToken)
75
76 lexEscape = (
77 lexOp "a" (toChar 7) <|>
78 lexOp "b" '\b' <|>
79 lexOp "f" '\f' <|>
80 lexOp "n" '\n' <|>
81 lexOp "r" '\t' <|>
82 lexOp "v" '\v' <|>
83 lexOp "'" '\''
84 ) <?> ("Unknown escape", 0)
85
86 lexKw :: String a -> Parser Char a
87 lexKw kw tv = list (fromString kw) <* check (not o isIdentChar) >>| pure tv
88
89 lexOp :: String a -> Parser Char a
90 lexOp s tv = list (fromString s) >>| pure tv
91
92 isIdentChar :: Char -> Bool
93 isIdentChar c = isAlphanum c || c == '_'