testfiles
[cc1516.git] / lex.icl
1 implementation module lex
2
3 import Control.Monad, Control.Applicative
4 import Data.Either, Data.Func, Data.Void
5 import Data.Map
6 from StdFunc import o
7 import StdBool
8 import StdList
9 import StdChar
10 import StdString
11 import StdTuple
12
13 import yard
14 import AST
15
16 :: LexItem
17 = LexToken Int TokenValue
18 | LexSpace Int Int
19 | LexNL
20 | LexEOF
21 | LexItemError String
22
23 escapes :: Map Char Char
24 escapes = fromList [('a', toChar 7), ('b', '\b'), ('f', '\f'), ('n', '\n'),
25 ('r', '\r'), ('t', '\t'), ('v', '\v'), ('\'', '\''), ('"', '"')]
26
27 lexer :: [Char] -> LexerOutput
28 lexer r = fst $ runParser (lexProgram 1 1) r
29
30 lexProgram :: Int Int -> Parser Char [Token]
31 lexProgram line column = lexToken >>= \t->case t of
32 LexEOF = pure []
33 LexNL = lexProgram (line+1) 1
34 (LexSpace l c) = lexProgram (line+l) (column+c)
35 (LexItemError e) = fail <?>
36 PositionalError line column ("LexerError: " +++ e)
37 (LexToken c t) = lexProgram line (column+c)
38 >>= \rest->pure [({line=line,col=column}, t):rest]
39
40 lexToken :: Parser Char LexItem
41 lexToken =
42 //Comments
43 lexBlockComment <|> lexComment <|>
44 //Keyword tokens
45 lexKw "var" VarToken <|> lexKw "Void" VoidToken <|>
46 lexKw "return" ReturnToken <|> lexKw "if" IfToken <|>
47 lexKw "else" ElseToken <|> lexKw "while" WhileToken <|>
48 lexKw "True" TrueToken <|> lexKw "False" FalseToken <|>
49 lexKw "Int" IntTypeToken <|> lexKw "Bool" BoolTypeToken <|>
50 lexKw "Char" CharTypeToken <|>
51 //Character tokens
52 lexEscape <|> lexCharacter <|>
53 //Two char ops tokens
54 lexWord "::" DoubleColonToken <|> lexWord "!=" NotEqualToken <|>
55 lexWord "<=" LesserEqToken <|> lexWord ">=" GreaterEqToken <|>
56 lexWord "==" EqualsToken <|> lexWord "&&" AmpersandsToken <|>
57 lexWord "||" PipesToken <|> lexWord "[]" EmptyListToken <|>
58 lexWord "->" ArrowToken <|>
59 //One char ops tokens
60 lexWord "(" BraceOpenToken <|> lexWord ")" BraceCloseToken <|>
61 lexWord "{" CBraceOpenToken <|> lexWord "}" CBraceCloseToken <|>
62 lexWord "[" SquareOpenToken <|> lexWord "]" SquareCloseToken <|>
63 lexWord "," CommaToken <|> lexWord ":" ColonToken <|>
64 lexWord ";" SColonToken <|> lexWord "." DotToken <|>
65 lexWord "+" PlusToken <|> lexWord "*" StarToken <|>
66 lexWord "/" SlashToken <|> lexWord "%" PercentToken <|>
67 lexWord "=" AssignmentToken <|> lexWord "<" LesserToken <|>
68 lexWord ">" BiggerToken <|> lexWord "!" ExclamationToken <|>
69 lexWord "-" DashToken <|>
70 //Number and identifier tokens
71 lexString <|> lexNumber <|> lexIdentifier <|>
72 (item '\n' >>| pure LexNL) <|>
73 //Whitespace
74 (satisfy isSpace >>| (pure $ LexSpace 0 1)) <|>
75 (eof >>| pure LexEOF)
76 where
77 lexWord :: String TokenValue -> Parser Char LexItem
78 lexWord s tv = list ls >>| pure (LexToken (length ls) tv)
79 where ls = fromString s
80
81 lexKw :: String TokenValue -> Parser Char LexItem
82 lexKw kw tv = lexWord kw tv <* check (not o isIdentChar)
83
84 lexComment :: Parser Char LexItem
85 lexComment = list (fromString "//")
86 >>| top until (eof <|> (item '\n' >>| pure Void)) >>| pure LexNL
87
88 lexBlockComment :: Parser Char LexItem
89 lexBlockComment = list (fromString "/*")
90 >>| (top until (list (fromString "*/")))
91 >>= \chars->pure $ widthHeight chars 0 0
92 where
93 widthHeight :: [Char] Int Int -> LexItem
94 widthHeight [] l c = LexSpace l c
95 widthHeight ['\n':xs] l _ = widthHeight xs (l+1) 0
96 widthHeight [x:xs] l c = widthHeight xs l (c+1)
97
98 lexNumber :: Parser Char LexItem
99 lexNumber = some (satisfy isDigit) >>= \si->pure $
100 LexToken (length si) (NumberToken $ toInt $ toString si)
101
102 lexIdentifier :: Parser Char LexItem
103 lexIdentifier = some (satisfy isIdentChar)
104 >>= \si->pure $ LexToken (length si) (IdentToken $ toString si)
105
106 isIdentChar c = isAlphanum c || c == '_'
107
108 lexCharacter :: Parser Char LexItem
109 lexCharacter = item '\'' *> satisfy ((<>) '\'') <* item '\''
110 >>= \char->pure $ LexToken 3 (CharToken char)
111
112 lexEscape :: Parser Char LexItem
113 lexEscape = item '\'' *> item '\\' *> top <* item '\''
114 >>= \char->pure case get char escapes of
115 Just e = LexToken 4 (CharToken e)
116 Nothing = LexItemError $ "Unknown escape: " +++ toString char
117
118 lexString :: Parser Char LexItem
119 lexString = item '"' *> (
120 many (
121 (satisfy (\c->c <> '"' && c <> '\\')) <|>
122 (item '\\' *> top >>= \char->case get char escapes of
123 Just e = pure e
124 Nothing = empty)
125 ))<* item '"' >>= \cs-> pure $ LexToken (length cs) (StringToken cs)