hoig
[cc1516.git] / src / lex.icl
1 implementation module lex
2
3 import Control.Monad, Control.Applicative
4 import Data.Either, Data.Func, Data.Maybe, Data.Functor
5 from StdFunc import o
6 import StdBool
7 import StdList
8 import StdChar
9
10 import yard
11
12 lexer :: [Char] -> LexerOutput
13 lexer r = case runParser lexProgram r of
14 (Right p, _) = Right p
15 (Left e, _) = Left $ toString e
16
17 lexProgram :: Parser Char [Token]
18 lexProgram = catMaybes <$> some lexToken <* eof
19 >>= \ts->pure $ (map (\t->(0, 0, t)) ts)
20
21 //Of hier een record van maken
22 //lexToken :: Parser Char (Maybe (Int, Int, Int, TokenValue))
23 lexToken :: Parser Char (Maybe TokenValue)
24 lexToken =
25 //Comments
26 (list (fromString "//") >>| lexUntilNL >>| pure Nothing) <|>
27 (list (fromString "/*") >>| lexUntilCommentClose >>| pure Nothing) <|>
28 //Keyword tokens
29 lexKw "var" VarToken <|>
30 lexKw "Void" VoidToken <|>
31 lexKw "return" ReturnToken <|>
32 lexKw "if" IfToken <|>
33 lexKw "else" ElseToken <|>
34 lexKw "while" WhileToken <|>
35 lexKw "True" TrueToken <|>
36 lexKw "False" FalseToken <|>
37 lexKw "Int" IntTypeToken <|>
38 lexKw "Bool" BoolTypeToken <|>
39 lexKw "Char" CharTypeToken <|>
40 //Escape chars tokens
41 liftM (Just o CharToken)
42 (list (fromString "'\\") *> lexEscape <* item '\'') <|>
43 //Normal chars tokens
44 liftM (Just o CharToken)
45 (item '\'' *> satisfy ((<>) '\'') <* item '\'') <|>
46 //Two char ops tokens
47 lexOp "::" DoubleColonToken <|> lexOp "!=" NotEqualToken <|>
48 lexOp "<=" LesserEqToken <|> lexOp ">=" GreaterEqToken <|>
49 lexOp "==" EqualsToken <|> lexOp "&&" AmpersandsToken <|>
50 lexOp "||" PipesToken <|> lexOp "[]" EmptyListToken <|>
51 lexOp "->" ArrowToken <|>
52 //One char ops tokens
53 lexOp "(" BraceOpenToken <|>
54 lexOp ")" BraceCloseToken <|> lexOp "{" CBraceOpenToken <|>
55 lexOp "}" CBraceCloseToken <|> lexOp "[" SquareOpenToken <|>
56 lexOp "]" SquareCloseToken <|> lexOp "," CommaToken <|>
57 lexOp ":" ColonToken <|> lexOp ";" SColonToken <|>
58 lexOp "." DotToken <|> lexOp "+" PlusToken <|>
59 lexOp "*" StarToken <|> lexOp "/" SlashToken <|>
60 lexOp "%" PercentToken <|> lexOp "=" AssignmentToken <|>
61 lexOp "<" LesserToken <|> lexOp ">" BiggerToken <|>
62 lexOp "!" ExclamationToken <|> lexOp "-" DashToken <|>
63 //Number tokens
64 liftM (Just o NumberToken o toInt o toString) (some $ satisfy isDigit) <|>
65 //Ident tokens
66 liftM (Just o IdentToken o toString) (some $ satisfy isIdentChar) <|>
67 (satisfy isSpace >>| pure Nothing)
68 where
69 lexUntilNL = top until (eof <|> (item '\n' >>| pure Void))
70 lexUntilCommentClose = top until (list (fromString "*/"))
71 isIdentChar c = isAlphanum c || c == '_'
72 lexOp s tv = list (fromString s) >>| pure (Just tv)
73 lexKw kw tv = lexOp kw tv <* check (not o isIdentChar)
74 lexEscape = fromJust <$> ((
75 lexOp "a" (toChar 7) <|> lexOp "b" '\b' <|> lexOp "f" '\f' <|>
76 lexOp "n" '\n' <|> lexOp "r" '\t' <|> lexOp "v" '\v' <|>
77 lexOp "'" '\'') <?> LexError "Unknown escape")