start with adts
[minfp.git] / parse.icl
1 implementation module parse
2
3 import Control.Applicative
4 import Control.Monad
5 import Control.Monad.State
6 import Control.Monad.Trans
7 import Data.Either
8 import Data.GenEq
9 import Data.Functor
10 import Data.Func
11 import Data.List
12 import Data.Tuple
13 import Text.GenPrint
14 import StdEnv
15
16 import ast
17
18 cons x xs = [x:xs]
19
20 (<:>) infixl 0 :: a (m [a]) -> m [a] | Functor m
21 (<:>) l r = cons l <$> r
22
23 :: Token
24 = TTEq | TTSemiColon | TTLambda | TTDot | TTBrackOpen | TTBrackClose
25 | TTComma | TTBool Bool | TTInt Int | TTOp [Char] | TTIdent [Char]
26 | TTDColon | TTPipe
27
28 derive gEq Token
29 derive gPrint Token
30
31 instance toString Token where toString t = printToString t
32
33 lex :: [Char] -> Either [String] [Token]
34 lex [] = pure []
35 lex ['//\n':ts] = lex ts
36 lex ['//',t:ts] = lex ['/','/':ts]
37 lex ['/**/':ts] = lex $ dropWhile ((<>)'\n') ts
38 lex ['/*',t:ts] = lex ['/','*':ts]
39 lex [';':ts] = TTSemiColon <:> lex ts
40 lex [')':ts] = TTBrackClose <:> lex ts
41 lex ['(':ts] = TTBrackOpen <:> lex ts
42 lex ['True':ts] = TTBool True <:> lex ts
43 lex ['False':ts] = TTBool False <:> lex ts
44 lex ['-',t:ts]
45 | isDigit t = lex [t:ts] >>= \v->case v of
46 [TTInt i:rest] = Right [TTInt (~i):rest]
47 x = pure x
48 lex [t:ts]
49 | isSpace t = lex ts
50 | isDigit t
51 # (i, ts) = span isDigit [t:ts]
52 = TTInt (toInt (toString i)) <:> lex ts
53 | isAlpha t
54 # (i, ts) = span isAlpha [t:ts]
55 = TTIdent i <:> lex ts
56 | isOp t
57 # (i, ts) = span isOp [t:ts]
58 | i =: ['='] = TTEq <:> lex ts
59 | i =: [','] = TTComma <:> lex ts
60 | i =: ['.'] = TTDot <:> lex ts
61 | i =: ['\\'] = TTLambda <:> lex ts
62 | i =: ['::'] = TTDColon <:> lex ts
63 | i =: ['|'] = TTPipe <:> lex ts
64 = TTOp i <:> lex ts
65 = Left ["Unexpected: " +++ toString t +++ " ord: " +++ toString (toInt t)]
66 where
67 isOp c = isMember c ['!@#$%^&*=+/?-_|\\\'",<>.:']
68
69 :: Parser a :== StateT ParseState (Either [String]) a
70 :: ParseState =
71 { tokens :: [Token]
72 , ifxs :: [((Parser Expression) -> Parser Expression, Int)]
73 }
74
75 pTop :: Parser Token
76 pTop = getState >>= \s->case s.tokens of
77 [t:ts] = put {s & tokens=ts} >>| pure t
78 [] = liftT (Left ["Fully consumed input"])
79
80 pEof :: Parser ()
81 pEof = getState >>= \s->case s.tokens of
82 [] = pure ()
83 [t:ts] = liftT (Left ["Expected EOF but got ":map toString [t:ts]])
84
85 (?) infixl 9 :: (Parser a) (a -> Bool) -> Parser a
86 (?) p f = p >>= \v->if (f v) (pure v) empty
87
88 pToken :: (Token -> Parser Token)
89 pToken = (?) pTop o (===)
90
91 pChainl :: (Parser (a a -> a)) (Parser a) -> Parser a
92 pChainl op p = foldl (flip ($)) <$> p <*> many (flip <$> op <*> p)
93
94 pChainr :: (Parser (a a -> a)) (Parser a) -> Parser a
95 pChainr op p = flip ($) <$> p <*> (flip <$> op <*> pChainr op p) <|> p
96
97 parse :: [Token] -> Either [String] [Either TypeDef Function]
98 parse ts = fst <$> runStateT (reverse <$> pAST <* pEof) {tokens=ts, ifxs=[]}
99 where
100 pAST :: Parser [Either TypeDef Function]
101 pAST = many (Right <$> pFunction <|> Left <$> pTypeDef)
102 >>= mapM (either (pure o Left) \(id, args, body)->Right o
103 Function id args <$ modify (\t->{t & tokens=body}) <*> pExpression <* pEof)
104
105 pTypeDef :: Parser TypeDef
106 pTypeDef = TypeDef
107 <$ pToken TTDColon
108 <*> pId
109 <*> many pId
110 <* pToken TTEq
111 <*> (cons <$> pCons <*> many (pToken TTPipe *> pCons))
112 <* pToken TTSemiColon
113
114 pCons = tuple <$> pId <*> many pType
115
116 pType
117 = TInt <$ pTop ? (\t->t=:(TTIdent ['Int']))
118 <|> TBool <$ pTop ? (\t->t=:(TTIdent ['Bool']))
119 <|> TVar <$> pId
120 <|>
121 :: Type
122 = TVar [Char]
123 | TTuple Type Type
124 | TInt
125 | TBool
126 | (-->) infixr 9 Type Type
127
128 pId = (\(TTIdent i)->i) <$> pTop ? (\t->t=:(TTIdent _))
129 pOp = (\(TTOp i)->i) <$> pTop ? (\t->t=:(TTOp _))
130 pBrack p = pToken TTBrackOpen *> p <* pToken TTBrackClose
131
132 pFunction :: Parser ([Char], [[Char]], [Token])
133 pFunction
134 = tuple3
135 <$> (pFunId <|> pId)
136 <*> many pId
137 <* pToken TTEq
138 <*> many (pTop ? ((=!=)TTSemiColon))
139 <* pToken TTSemiColon
140
141 pFunId :: Parser [Char]
142 pFunId = pOp
143 >>= \i->pChainr <$ pId ? ((==) ['ifxr']) <|> pChainl <$ pId ? ((==)['ifxl'])
144 >>= \p->(\(TTInt i)->i) <$> pTop ? (\t->t=:(TTInt _))
145 >>= \s->modify (\t->{t & ifxs=[(p (App o App (Var i) <$ pOp ? ((==)i)), s):t.ifxs]})
146 >>| pure i
147
148 pExpression :: Parser Expression
149 pExpression = getState >>= \{ifxs}->flip (foldr ($))
150 (map fst $ sortBy (on (<) snd) ifxs)
151 $ pChainl (pure App)
152 $ Lambda <$ pToken TTLambda <*> pId <* pToken TTDot <*> pExpression
153 <|> pBrack ( Tuple <$> pExpression <* pToken TTComma <*> pExpression
154 <|> Var <$> pOp
155 <|> pExpression)
156 <|> (\(TTInt i)->Lit (Int i)) <$> pTop ? (\t->t=:(TTInt _))
157 <|> (\(TTBool i)->Lit (Bool i)) <$> pTop ? (\t->t=:(TTBool _))
158 <|> (\x->Var ['_':x]) <$ pId ? ((==)['code']) <*> pId
159 <|> Var <$> pId