e4bac8be0e6e4833d4ca9722fe489530eadec697
[cc1516.git] / parse.icl
1 implementation module parse
2
3 import GenPrint
4 import StdString
5 import StdTuple
6 import StdList
7 from StdFunc import const, o
8 import Data.Either
9 import Data.Maybe
10 import Data.Functor
11 import Data.Tuple
12 import Control.Monad
13 import Control.Applicative
14 import Data.Func
15 import StdMisc
16 from Data.List import intercalate, replicate, instance Functor []
17 from Text import class Text(concat), instance Text String
18
19 import yard
20 import lex
21 import AST
22
23 parser :: [Token] -> ParserOutput
24 parser ts = case runParser parseProgram ts of
25 (Right ast, [(p, t):xs]) = Left $ PositionalError p.line p.col (
26 "Unable to parse from: " +++ printToString t)
27 x = fst x
28
29 parseProgram :: Parser Token AST
30 parseProgram = many parseLetDecl >>= \fds1->
31 some parseFunDecl >>= \fds2->
32 pure $ AST (fds1++fds2)
33
34 parseLetDecl :: Parser Token FunDecl
35 parseLetDecl = peekPos >>= \p->
36 satTok LetToken >>|
37 (optional parseFunType) >>= \mt->
38 parseIdent >>= \f->
39 satTok AssignmentToken >>|
40 parseExpr >>= \e->
41 satTok SColonToken >>|
42 pure (FunDecl p f [] mt [] [ReturnStmt $ Just e])
43
44 parseFunDecl :: Parser Token FunDecl
45 parseFunDecl = liftM6 FunDecl
46 (peekPos)
47 (parseIdent)
48 (parseBBraces $ parseSepList CommaToken parseIdent)
49 (optional (satTok DoubleColonToken *> parseFunType))
50 (satTok CBraceOpenToken *> many parseVarDecl)
51 (many parseStmt <* satTok CBraceCloseToken)
52
53 parseStmt :: Parser Token Stmt
54 parseStmt = parseIfStmt <|> parseWhileStmt <|>
55 parseSColon parseAssStmt <|> parseSColon parseReturnStmt <|>
56 (parseSColon parseFunCall
57 >>= \(ident, args, fs)->pure $ FunStmt ident args fs)
58 where
59 parseSColon :: (Parser Token a) -> Parser Token a
60 parseSColon p = p <* satTok SColonToken
61
62 parseReturnStmt :: Parser Token Stmt
63 parseReturnStmt =
64 satTok ReturnToken *> liftM ReturnStmt (optional parseExpr)
65
66 parseAssStmt :: Parser Token Stmt
67 parseAssStmt =
68 AssStmt <$> (parseVarDef <* satTok AssignmentToken) <*> parseExpr
69
70 parseIfStmt :: Parser Token Stmt
71 parseIfStmt = liftM3 IfStmt
72 (satTok IfToken *> parseBBraces parseExpr)
73 (parseBlock <|> parseOneLine)
74 (liftM (fromMaybe [])
75 (optional (satTok ElseToken *> (parseBlock<|> parseOneLine))))
76
77 parseWhileStmt :: Parser Token Stmt
78 parseWhileStmt = satTok WhileToken *> (WhileStmt <$>
79 (parseBBraces parseExpr) <*> (parseBlock <|> parseOneLine))
80
81 parseBlock :: Parser Token [Stmt]
82 parseBlock = parseBCBraces (many parseStmt)
83
84 parseOneLine :: Parser Token [Stmt]
85 parseOneLine = pure <$> parseStmt
86
87 parseFunType :: Parser Token Type
88 parseFunType = parseFT
89 where
90 parseFT :: Parser Token Type
91 parseFT = (liftM2 (->>) (parseSF <* satTok ArrowToken) (parseFT)) <|>
92 parseSF <|>
93 (FuncType <$> parseType)
94 parseSF :: Parser Token Type
95 parseSF = parseBBraces parseFT
96
97 parseVarDecl :: Parser Token VarDecl
98 parseVarDecl = liftM4 VarDecl
99 peekPos
100 ((parseType >>= \t->pure $ Just t)<|> trans1 VarToken Nothing)
101 (parseIdent <* satTok AssignmentToken)
102 (parseExpr <* satTok SColonToken)
103
104 parseType :: Parser Token Type
105 parseType =
106 trans1 IntTypeToken IntType <|>
107 trans1 CharTypeToken CharType <|>
108 trans1 BoolTypeToken BoolType <|>
109 trans1 VoidToken VoidType <|>
110 (ListType <$> (parseBSqBraces parseType)) <|>
111 (TupleType <$> (parseTuple parseType)) <|>
112 (IdType <$> parseIdent)
113
114 parseExpr :: Parser Token Expr
115 parseExpr = //Operators in order of binding strength
116 parseOpR (trans1 ColonToken BiCons) $
117 parseOpR (trans1 PipesToken BiOr) $
118 parseOpR (trans1 AmpersandsToken BiAnd) $
119 parseOpR (trans1 EqualsToken BiEquals <|>
120 trans1 LesserToken BiLesser <|>
121 trans1 BiggerToken BiGreater <|>
122 trans1 LesserEqToken BiLesserEq <|>
123 trans1 GreaterEqToken BiGreaterEq <|>
124 trans1 NotEqualToken BiUnEqual) $
125 parseOpL (trans1 PlusToken BiPlus <|>
126 trans1 DashToken BiMinus) $
127 parseOpL (trans1 StarToken BiTimes <|>
128 trans1 SlashToken BiDivide <|>
129 trans1 PercentToken BiMod) $ parseBasicExpr
130 where
131 parseOpR :: (Parser Token Op2) (Parser Token Expr) -> Parser Token Expr
132 parseOpR ops prev = peekPos >>= \pos-> prev >>= \e1->optional (
133 ops >>= \op->parseOpR ops prev >>= \e->pure (op, e)
134 ) >>= \moe->pure $ maybe e1 (\(op,e2)->Op2Expr pos e1 op e2) moe
135
136 parseOpL :: (Parser Token Op2) (Parser Token Expr) -> Parser Token Expr
137 parseOpL ops prev = peekPos >>= \pos-> prev >>= \e1->many (
138 ops >>= \op->prev >>= \e->pure (op, e))
139 >>= \moe->foldM (\e->(\(op,e2)->pure $ Op2Expr pos e op e2)) e1 moe
140
141 parseBasicExpr :: Parser Token Expr
142 parseBasicExpr = peekPos >>= \pos ->
143 (trans2 (StringToken []) (\(StringToken cs)->makeStrExpr pos cs)) <|>
144 (TupleExpr pos <$> (parseTuple parseExpr)) <|>
145 parseBBraces parseExpr <|>
146 trans1 EmptyListToken (EmptyListExpr pos) <|>
147 trans1 TrueToken (BoolExpr pos True) <|>
148 trans1 FalseToken (BoolExpr pos False) <|>
149 trans2 (NumberToken zero) (\(NumberToken i)->IntExpr pos i) <|>
150 trans2 (CharToken zero) (\(CharToken c)->CharExpr pos c) <|>
151 (Op1Expr pos <$> parseOp1 <*> parseExpr) <|>
152 (parseFunCall >>= \(ident, args, fs)->
153 pure $ FunExpr pos ident args fs) <|>
154 (VarExpr pos <$> parseVarDef)
155
156 makeStrExpr :: Pos [Char] -> Expr
157 makeStrExpr p [] = EmptyListExpr p
158 makeStrExpr p [x:xs] = Op2Expr p (CharExpr zero x) BiCons (makeStrExpr p xs)
159
160 parseFunCall :: Parser Token (String, [Expr], [FieldSelector])
161 parseFunCall = liftM3 (\x y z->(x, y, z))
162 parseIdent
163 (parseBBraces $ parseSepList CommaToken parseExpr)
164 parseFieldSelectors
165
166 parseVarDef :: Parser Token VarDef
167 parseVarDef = liftM2 VarDef parseIdent parseFieldSelectors
168
169 parseFieldSelectors :: Parser Token [FieldSelector]
170 parseFieldSelectors = many (satTok DotToken *>
171 parseIdent >>= \i->case i of
172 "hd" = pure FieldHd
173 "tl" = pure FieldTl
174 "fst" = pure FieldFst
175 "snd" = pure FieldSnd
176 _ = empty)
177
178 parseOp1 :: Parser Token Op1
179 parseOp1 = trans1 DashToken UnMinus <|> trans1 ExclamationToken UnNegation
180
181 parseBBraces :: (Parser Token a) -> Parser Token a
182 parseBBraces p = satTok BraceOpenToken *> p <* satTok BraceCloseToken
183
184 parseBCBraces :: (Parser Token a) -> Parser Token a
185 parseBCBraces p = satTok CBraceOpenToken *> p <* satTok CBraceCloseToken
186
187 parseBSqBraces :: (Parser Token a) -> Parser Token a
188 parseBSqBraces p = satTok SquareOpenToken *> p <* satTok SquareCloseToken
189
190 parseTuple :: (Parser Token a) -> Parser Token (a, a)
191 parseTuple p = satTok BraceOpenToken *>
192 (liftM2 tuple (p <* satTok CommaToken) p)
193 <* satTok BraceCloseToken
194
195 trans :: TokenValue (TokenValue -> a) -> Parser Token (Pos, a)
196 trans t f = (\(pos,token)->(pos, f token)) <$> satTok t
197
198 trans2 :: TokenValue (TokenValue -> a) -> Parser Token a
199 trans2 t f = snd <$> trans t f
200
201 trans1 :: TokenValue a -> Parser Token a
202 trans1 t r = trans2 t $ const r
203
204 peekPos :: Parser Token Pos
205 peekPos = fst <$> peek
206
207 derive gPrint TokenValue
208 derive gEq TokenValue
209 satTok :: TokenValue -> Parser Token Token
210 satTok t = top >>= \tok=:({line,col},token) -> if (eq t token)
211 (pure tok) (fail <?> PositionalError line col
212 ("ParseError: Unexpected token: " +++ printToString token
213 +++ "\nExpected: " +++ printToString t))
214 where
215 eq (IdentToken _) (IdentToken _) = True
216 eq (NumberToken _) (NumberToken _) = True
217 eq (CharToken _) (CharToken _) = True
218 eq (StringToken _) (StringToken _) = True
219 eq x y = gEq {|*|} x y
220
221 parseSepList :: TokenValue (Parser Token a) -> Parser Token [a]
222 parseSepList sep p =
223 (liftM2 (\es->(\e->reverse [e:es])) (some (p <* satTok sep)) p) <|>
224 (liftM pure p) <|> pure empty
225
226 parseIdent :: Parser Token String
227 parseIdent = trans2 (IdentToken "") (\(IdentToken e)->toString e)
228
229 //liftM only goes to liftM5
230 liftM6 f m1 m2 m3 m4 m5 m6 = f <$> m1 <*> m2 <*> m3 <*> m4 <*> m5 <*> m6