956ca605ca9ab12fa41d2e00004826094735e912
[minfp.git] / parse.icl
1 implementation module parse
2
3 import Control.Applicative
4 import Control.Monad
5 import Control.Monad.State
6 import Control.Monad.Trans
7 import Data.Either
8 import Data.GenEq
9 import Data.Functor
10 import Data.Func
11 import Data.List
12 import Text.GenPrint
13 import StdEnv
14
15 import ast
16
17 (<:>) infixl 0 :: a (m [a]) -> m [a] | Functor m
18 (<:>) l r = (\xs->[l:xs]) <$> r
19
20 :: Token
21 = TTEq | TTSemiColon | TTLambda | TTDot | TTBrackOpen | TTBrackClose
22 | TTComma | TTBool Bool | TTInt Int | TTOp [Char] | TTIdent [Char]
23
24 derive gEq Token
25 derive gPrint Token
26
27 instance toString Token where toString t = printToString t
28
29 lex :: [Char] -> Either [String] [Token]
30 lex [] = pure []
31 lex ['//\n':ts] = lex ts
32 lex ['//',t:ts] = lex ['/','/':ts]
33 lex ['/**/':ts] = lex $ dropWhile ((<>)'\n') ts
34 lex ['/*',t:ts] = lex ['/','*':ts]
35 lex [';':ts] = TTSemiColon <:> lex ts
36 lex [')':ts] = TTBrackClose <:> lex ts
37 lex ['(':ts] = TTBrackOpen <:> lex ts
38 lex ['True':ts] = TTBool True <:> lex ts
39 lex ['False':ts] = TTBool False <:> lex ts
40 lex ['-',t:ts]
41 | isDigit t = lex [t:ts] >>= \v->case v of
42 [TTInt i:rest] = Right [TTInt (~i):rest]
43 x = pure x
44 lex [t:ts]
45 | isSpace t = lex ts
46 | isDigit t
47 # (i, ts) = span isDigit [t:ts]
48 = TTInt (toInt (toString i)) <:> lex ts
49 | isAlpha t
50 # (i, ts) = span isAlpha [t:ts]
51 = TTIdent i <:> lex ts
52 | isOp t
53 # (i, ts) = span isOp [t:ts]
54 | i =: ['='] = TTEq <:> lex ts
55 | i =: [','] = TTComma <:> lex ts
56 | i =: ['.'] = TTDot <:> lex ts
57 | i =: ['\\'] = TTLambda <:> lex ts
58 = TTOp i <:> lex ts
59 = Left ["Unexpected: " +++ toString t +++ " ord: " +++ toString (toInt t)]
60 where
61 isOp c = isMember c ['!@#$%^&*=+/?-_|\\\'",<>.:']
62
63 :: Parser a :== StateT ParseState (Either [String]) a
64 :: ParseState =
65 { tokens :: [Token]
66 , ifxs :: [((Parser Expression) -> Parser Expression, Int)]
67 }
68
69 pTop :: Parser Token
70 pTop = getState >>= \s->case s.tokens of
71 [t:ts] = put {s & tokens=ts} >>| pure t
72 [] = liftT (Left ["Fully consumed input"])
73
74 pEof :: Parser ()
75 pEof = getState >>= \s->case s.tokens of
76 [] = pure ()
77 [t:ts] = liftT (Left ["Expected EOF but got ":map toString [t:ts]])
78
79 (?) infixl 9 :: (Parser a) (a -> Bool) -> Parser a
80 (?) p f = p >>= \v->if (f v) (pure v) empty
81
82 pToken :: (Token -> Parser Token)
83 pToken = (?) pTop o (===)
84
85 pChainl :: (Parser (a a -> a)) (Parser a) -> Parser a
86 pChainl op p = foldl (flip ($)) <$> p <*> many (flip <$> op <*> p)
87
88 pChainr :: (Parser (a a -> a)) (Parser a) -> Parser a
89 pChainr op p = flip ($) <$> p <*> (flip <$> op <*> pChainr op p) <|> p
90
91 parse :: [Token] -> Either [String] [Function]
92 parse ts = fst <$> runStateT (reverse <$> pAST <* pEof) {tokens=ts, ifxs=[]}
93 where
94 pAST :: Parser [Function]
95 pAST = many pFunction >>= mapM \(id, args, body)->Function id args <$
96 modify (\t->{t & tokens=body}) <*> pExpression <* pEof
97
98 pId = (\(TTIdent i)->i) <$> pTop ? (\t->t=:(TTIdent _))
99 pOp = (\(TTOp i)->i) <$> pTop ? (\t->t=:(TTOp _))
100 pBrack p = pToken TTBrackOpen *> p <* pToken TTBrackClose
101
102 pFunction :: Parser ([Char], [[Char]], [Token])
103 pFunction
104 = (\x y z->(x, y, z))
105 <$> (pFunId <|> pId)
106 <*> many pId
107 <* pToken TTEq
108 <*> many (pTop ? ((=!=)TTSemiColon))
109 <* pToken TTSemiColon
110
111 pFunId :: Parser [Char]
112 pFunId = pOp
113 >>= \i->pChainr <$ pId ? ((==) ['ifxr']) <|> pChainl <$ pId ? ((==)['ifxl'])
114 >>= \p->(\(TTInt i)->i) <$> pTop ? (\t->t=:(TTInt _))
115 >>= \s->modify (\t->{t & ifxs=[(p (App o App (Var i) <$ pOp ? ((==)i)), s):t.ifxs]})
116 >>| pure i
117
118 pExpression :: Parser Expression
119 pExpression = getState >>= \{ifxs}->flip (foldr ($))
120 (map fst $ sortBy (on (<) snd) ifxs)
121 $ pChainl (pure App)
122 $ Lambda <$ pToken TTLambda <*> pId <* pToken TTDot <*> pExpression
123 <|> pBrack ( Tuple <$> pExpression <* pToken TTComma <*> pExpression
124 <|> Var <$> pOp
125 <|> pExpression)
126 <|> (\(TTInt i)->Lit (Int i)) <$> pTop ? (\t->t=:(TTInt _))
127 <|> (\(TTBool i)->Lit (Bool i)) <$> pTop ? (\t->t=:(TTBool _))
128 <|> (\x->Var ['_':x]) <$ pId ? ((==)['code']) <*> pId
129 <|> Var <$> pId