curry gotcha
[cc1516.git] / lex.icl
1 implementation module lex
2
3 import Control.Monad, Control.Applicative
4 import Data.Either, Data.Func, Data.Void
5 import Data.Map
6 from StdFunc import o
7 import StdBool
8 import StdList
9 import StdChar
10 import StdString
11 import StdTuple
12
13 import yard
14 import AST
15
16 :: LexItem
17 = LexToken Int TokenValue
18 | LexSpace Int Int
19 | LexNL
20 | LexEOF
21 | LexItemError String
22
23 escapes :: Map Char Char
24 escapes = fromList [('a', toChar 7), ('b', '\b'), ('f', '\f'), ('n', '\n'),
25 ('r', '\r'), ('t', '\t'), ('v', '\v'), ('\'', '\''), ('"', '"')]
26
27 lexer :: [Char] -> LexerOutput
28 lexer r = fst $ runParser (lexProgram 1 1) r
29
30 lexProgram :: Int Int -> Parser Char [Token]
31 lexProgram line column = lexToken >>= \t->case t of
32 LexEOF = pure []
33 LexNL = lexProgram (line+1) 1
34 (LexSpace l c) = lexProgram (line+l) (column+c)
35 (LexItemError e) = fail <?>
36 PositionalError line column ("LexerError: " +++ e)
37 (LexToken c t) = lexProgram line (column+c)
38 >>= \rest->pure [({line=line,col=column}, t):rest]
39
40 lexToken :: Parser Char LexItem
41 lexToken =
42 //Comments
43 lexBlockComment <|> lexComment <|>
44 //Keyword tokens
45 lexKw "var" VarToken <|> lexKw "Void" VoidToken <|>
46 lexKw "return" ReturnToken <|> lexKw "if" IfToken <|>
47 lexKw "else" ElseToken <|> lexKw "while" WhileToken <|>
48 lexKw "True" TrueToken <|> lexKw "False" FalseToken <|>
49 lexKw "Int" IntTypeToken <|> lexKw "Bool" BoolTypeToken <|>
50 lexKw "Char" CharTypeToken <|>
51 lexKw "let" LetToken <|>
52 //Character tokens
53 lexEscape <|> lexCharacter <|>
54 //Two char ops tokens
55 lexWord "::" DoubleColonToken <|> lexWord "!=" NotEqualToken <|>
56 lexWord "<=" LesserEqToken <|> lexWord ">=" GreaterEqToken <|>
57 lexWord "==" EqualsToken <|> lexWord "&&" AmpersandsToken <|>
58 lexWord "||" PipesToken <|> lexWord "[]" EmptyListToken <|>
59 lexWord "->" ArrowToken <|>
60 //One char ops tokens
61 lexWord "(" BraceOpenToken <|> lexWord ")" BraceCloseToken <|>
62 lexWord "{" CBraceOpenToken <|> lexWord "}" CBraceCloseToken <|>
63 lexWord "[" SquareOpenToken <|> lexWord "]" SquareCloseToken <|>
64 lexWord "," CommaToken <|> lexWord ":" ColonToken <|>
65 lexWord ";" SColonToken <|> lexWord "." DotToken <|>
66 lexWord "+" PlusToken <|> lexWord "*" StarToken <|>
67 lexWord "/" SlashToken <|> lexWord "%" PercentToken <|>
68 lexWord "=" AssignmentToken <|> lexWord "<" LesserToken <|>
69 lexWord ">" BiggerToken <|> lexWord "!" ExclamationToken <|>
70 lexWord "-" DashToken <|> lexWord "\\" BackslashToken <|>
71 //Number and identifier tokens
72 lexString <|> lexNumber <|> lexIdentifier <|>
73 (item '\n' >>| pure LexNL) <|>
74 //Whitespace
75 (satisfy isSpace >>| (pure $ LexSpace 0 1)) <|>
76 (eof >>| pure LexEOF)
77 where
78 lexWord :: String TokenValue -> Parser Char LexItem
79 lexWord s tv = list ls >>| pure (LexToken (length ls) tv)
80 where ls = fromString s
81
82 lexKw :: String TokenValue -> Parser Char LexItem
83 lexKw kw tv = lexWord kw tv <* check (not o isIdentChar)
84
85 lexComment :: Parser Char LexItem
86 lexComment = list (fromString "//")
87 >>| top until (eof <|> (item '\n' >>| pure Void)) >>| pure LexNL
88
89 lexBlockComment :: Parser Char LexItem
90 lexBlockComment = list (fromString "/*")
91 >>| (top until (list (fromString "*/")))
92 >>= \chars->pure $ widthHeight chars 0 0
93 where
94 widthHeight :: [Char] Int Int -> LexItem
95 widthHeight [] l c = LexSpace l c
96 widthHeight ['\n':xs] l _ = widthHeight xs (l+1) 0
97 widthHeight [x:xs] l c = widthHeight xs l (c+1)
98
99 lexNumber :: Parser Char LexItem
100 lexNumber = some (satisfy isDigit) >>= \si->pure $
101 LexToken (length si) (NumberToken $ toInt $ toString si)
102
103 lexIdentifier :: Parser Char LexItem
104 lexIdentifier = some (satisfy isIdentChar)
105 >>= \si->pure $ LexToken (length si) (IdentToken $ toString si)
106
107 isIdentChar c = isAlphanum c || c == '_'
108
109 lexCharacter :: Parser Char LexItem
110 lexCharacter = item '\'' *> satisfy ((<>) '\'') <* item '\''
111 >>= \char->pure $ LexToken 3 (CharToken char)
112
113 lexEscape :: Parser Char LexItem
114 lexEscape = item '\'' *> item '\\' *> top <* item '\''
115 >>= \char->pure case get char escapes of
116 Just e = LexToken 4 (CharToken e)
117 Nothing = LexItemError $ "Unknown escape: " +++ toString char
118
119 lexString :: Parser Char LexItem
120 lexString = item '"' *> (
121 many (
122 (satisfy (\c->c <> '"' && c <> '\\')) <|>
123 (item '\\' *> top >>= \char->case get char escapes of
124 Just e = pure e
125 Nothing = empty)
126 ))<* item '"' >>= \cs-> pure $ LexToken (length cs) (StringToken cs)