better lexer
[cc1516.git] / src / lex.icl
1 implementation module lex
2
3 import Control.Monad, Control.Applicative
4 import Data.Either, Data.Func, Data.Maybe, Data.Functor
5 from StdFunc import o
6 import StdBool
7 import StdList
8 import StdChar
9
10 import yard
11
12 lexer :: [Char] -> LexerOutput
13 lexer r = case runParser lexProgram r of
14 (Right p, _) = Right p
15 (Left e, _) = Left $ toString e
16
17 lexProgram :: Parser Char [Token]
18 lexProgram = catMaybes <$> some lexToken <* eof
19 >>= \ts->pure $ (map (\t->(0, 0, t)) ts)
20
21 lexToken :: Parser Char (Maybe TokenValue)
22 lexToken =
23 //Comments
24 (list (fromString "//") >>| lexUntilNL >>| pure Nothing) <|>
25 (list (fromString "/*") >>| lexUntilCommentClose >>| pure Nothing) <|>
26 //Keyword tokens
27 lexKw "var" VarToken <|>
28 lexKw "Void" VoidToken <|>
29 lexKw "return" ReturnToken <|>
30 lexKw "if" IfToken <|>
31 lexKw "else" ElseToken <|>
32 lexKw "while" WhileToken <|>
33 lexKw "True" TrueToken <|>
34 lexKw "False" FalseToken <|>
35 lexKw "Int" IntTypeToken <|>
36 lexKw "Bool" BoolTypeToken <|>
37 lexKw "Char" CharTypeToken <|>
38 //Escape chars tokens
39 liftM (Just o CharToken)
40 (list (fromString "'\\") *> lexEscape <* item '\'') <|>
41 //Normal chars tokens
42 liftM (Just o CharToken)
43 (item '\'' *> satisfy ((<>) '\'') <* item '\'') <|>
44 //Two char ops tokens
45 lexOp "::" DoubleColonToken <|> lexOp "!=" NotEqualToken <|>
46 lexOp "<=" LesserEqToken <|> lexOp ">=" GreaterEqToken <|>
47 lexOp "==" EqualsToken <|> lexOp "&&" AmpersandsToken <|>
48 lexOp "||" PipesToken <|> lexOp "[]" EmptyListToken <|>
49 lexOp "->" ArrowToken <|>
50 //One char ops tokens
51 lexOp "(" BraceOpenToken <|>
52 lexOp ")" BraceCloseToken <|> lexOp "{" CBraceOpenToken <|>
53 lexOp "}" CBraceCloseToken <|> lexOp "[" SquareOpenToken <|>
54 lexOp "]" SquareCloseToken <|> lexOp "," CommaToken <|>
55 lexOp ":" ColonToken <|> lexOp ";" SColonToken <|>
56 lexOp "." DotToken <|> lexOp "+" PlusToken <|>
57 lexOp "*" StarToken <|> lexOp "/" SlashToken <|>
58 lexOp "%" PercentToken <|> lexOp "=" AssignmentToken <|>
59 lexOp "<" LesserToken <|> lexOp ">" BiggerToken <|>
60 lexOp "!" ExclamationToken <|> lexOp "-" DashToken <|>
61 //Number tokens
62 liftM (Just o NumberToken o toInt o toString) (some $ satisfy isDigit) <|>
63 //Ident tokens
64 liftM (Just o IdentToken o toString) (some $ satisfy isIdentChar) <|>
65 (satisfy isSpace >>| pure Nothing)
66 where
67 lexUntilNL = top until (eof <|> (item '\n' >>| pure Void))
68 lexUntilCommentClose = top until (list (fromString "*/"))
69 isIdentChar c = isAlphanum c || c == '_'
70 lexOp s tv = list (fromString s) >>| pure (Just tv)
71 lexKw kw tv = lexOp kw tv <* check (not o isIdentChar)
72 lexEscape = fromJust <$> ((
73 lexOp "a" (toChar 7) <|> lexOp "b" '\b' <|> lexOp "f" '\f' <|>
74 lexOp "n" '\n' <|> lexOp "r" '\t' <|> lexOp "v" '\v' <|>
75 lexOp "'" '\'') <?> ("Unknown escape", 0))