tuples are slow in parsing
[minfp.git] / parse.icl
1 implementation module parse
2
3 import Control.Applicative
4 import Control.Monad
5 import Control.Monad.State
6 import Control.Monad.Trans
7 import Data.Either
8 import Data.GenEq
9 import Data.Functor
10 import Data.Func
11 import Data.List
12 import Text.GenPrint
13 import StdEnv
14
15 import ast
16
17 (<:>) infixl 0 :: a (m [a]) -> m [a] | Functor m
18 (<:>) l r = (\xs->[l:xs]) <$> r
19
20 :: Token = TTSemiColon | TTBrackOpen | TTBrackClose | TTBool Bool | TTInt Int
21 | TTOp [Char] | TTIdent [Char]
22
23 derive gEq Token
24 derive gPrint Token
25
26 instance toString Token where toString t = printToString t
27
28 lex :: [Char] -> Either [String] [Token]
29 lex [] = pure []
30 lex ['//\n':ts] = lex ts
31 lex ['//',t:ts] = lex ['/','/':ts]
32 lex ['/**/':ts] = lex $ dropWhile ((<>)'\n') ts
33 lex ['/*',t:ts] = lex ['/','*':ts]
34 lex [';':ts] = TTSemiColon <:> lex ts
35 lex [')':ts] = TTBrackClose <:> lex ts
36 lex ['(':ts] = TTBrackOpen <:> lex ts
37 lex ['True':ts] = TTBool True <:> lex ts
38 lex ['False':ts] = TTBool False <:> lex ts
39 lex ['-',t:ts]
40 | isDigit t = lex [t:ts] >>= \v->case v of
41 [TTInt i:rest] = Right [TTInt (~i):rest]
42 x = pure x
43 lex [t:ts]
44 | isSpace t = lex ts
45 | isDigit t
46 # (d, ts) = span isDigit [t:ts]
47 = TTInt (toInt (toString d)) <:> lex ts
48 | isAlpha t
49 # (d, ts) = span isAlpha [t:ts]
50 = TTIdent d <:> lex ts
51 | isOp t
52 # (d, ts) = span isOp [t:ts]
53 = TTOp d <:> lex ts
54 = Left ["Unexpected: " +++ toString t +++ " ord: " +++ toString (toInt t)]
55 where
56 isOp c = isMember c ['!@#$%^&*=+/?-_|\\\'",<>.:']
57
58 :: Parser a :== StateT ParseState (Either [String]) a
59 :: ParseState =
60 { tokens :: [Token]
61 , ifxs :: [((Parser Expression) -> Parser Expression, Int)]
62 }
63
64 pTop :: Parser Token
65 pTop = getState >>= \s->case s.tokens of
66 [t:ts] = put {s & tokens=ts} >>| pure t
67 [] = liftT (Left ["Fully consumed input"])
68
69 pEof :: Parser ()
70 pEof = getState >>= \s->case s.tokens of
71 [] = pure ()
72 [t:ts] = liftT (Left ["Expected EOF but got ":map toString [t:ts]])
73
74 (?) infixl 9 :: (Parser a) (a -> Bool) -> Parser a
75 (?) p f = p >>= \v->if (f v) (pure v) empty
76
77 pToken :: (Token -> Parser Token)
78 pToken = (?) pTop o (===)
79
80 pChainl :: (Parser (a a -> a)) (Parser a) -> Parser a
81 pChainl op p = foldl (flip ($)) <$> p <*> many (flip <$> op <*> p)
82
83 pChainr :: (Parser (a a -> a)) (Parser a) -> Parser a
84 pChainr op p = flip ($) <$> p <*> (flip <$> op <*> pChainr op p) <|> p
85
86 parse :: [Token] -> Either [String] [Function]
87 parse ts = fst <$> runStateT (reverse <$> pAST <* pEof) {tokens=ts, ifxs=[]}
88 where
89 pAST :: Parser [Function]
90 pAST = many pFunction >>= mapM \(id, args, body)->Function id args <$
91 modify (\t->{t & tokens=body}) <*> pExpression <* pEof
92
93 pId = (\(TTIdent i)->i) <$> pTop ? (\t->t=:(TTIdent _))
94 pOp = (\(TTOp i)->i) <$> pTop ? (\t->t=:(TTOp _))
95 pInt = (\(TTInt i)->i) <$> pTop ? (\t->t=:(TTInt _))
96 pBrack p = pToken TTBrackOpen *> p <* pToken TTBrackClose
97
98 pFunction :: Parser ([Char], [[Char]], [Token])
99 pFunction
100 = (\x y z->(x, y, z))
101 <$> (pFunId <|> pId)
102 <*> many pId
103 <* pToken (TTOp ['='])
104 <*> many (pTop ? ((=!=)TTSemiColon))
105 <* pToken TTSemiColon
106
107 pFunId :: Parser [Char]
108 pFunId = pOp
109 >>= \i->pChainr <$ pId ? ((==) ['ifxr']) <|> pChainl <$ pId ? ((==)['ifxl'])
110 >>= \p->pInt
111 >>= \s->modify (\t->{t & ifxs=[(p (App o App (Var i) <$ pOp ? ((==)i)), s):t.ifxs]})
112 >>| pure i
113
114 pExpression :: Parser Expression
115 pExpression = getState >>= \{ifxs}->flip (foldr ($))
116 (map fst $ sortBy (on (<) snd) ifxs)
117 $ pChainl (pure App)
118 $ Lambda <$ pToken (TTOp ['\\']) <*> pId <* pToken (TTOp ['.']) <*> pExpression
119 <|> pBrack ( Tuple <$> pExpression <* pToken (TTOp [',']) <*> pExpression
120 <|> Var <$> pOp
121 <|> pExpression)
122 <|> Lit o Int <$> pInt
123 <|> (\(TTBool i)->Lit (Bool i)) <$> pTop ? (\t->t=:(TTBool _))
124 <|> (\x->Var ['_':x]) <$ pToken (TTIdent ['code']) <*> pId
125 <|> Var <$> pId