e5927bd39cfd347175721b997fd6f10f39c4524a
[fp.git] / parse.icl
1 implementation module parse
2
3 import StdEnv
4 import qualified _SystemStrictLists as SS
5 import StdStrictLists
6 import ast
7
8 import Control.Applicative
9 import Control.Monad
10 import Control.Monad.State
11 import Data.Data
12 import Data.Maybe
13 import Data.Either
14 import Data.Func
15 import Data.Functor
16 import Data.Bifunctor
17 import Data.GenCons
18 import Data.List
19 import Data.Monoid
20 import Data.Tuple
21
22 derive consName Token, Value
23
24 :: Expression | DelayedParse [Token]
25
26 :: Token
27 = IntToken Int | BoolToken Bool | CharToken Char | IdentToken String
28 | InfixrToken | InfixlToken | SemiColonToken | OpenBraceToken
29 | CloseBraceToken | EqualsToken | CodeToken | LambdaToken
30 | ArrowToken
31
32 lex :: [Char] -> [Token]
33 lex cs = lex` cs
34 where
35 lex` [] = []
36 lex` ['\n':cs] = lex` cs
37 lex` [c:cs]
38 | isSpace c = lex` cs
39 lex` [';':cs] = [SemiColonToken:lex` cs]
40 lex` ['=',c:cs]
41 | not (isIdent c) = [EqualsToken:lex` [c:cs]]
42 lex` ['(':cs] = [OpenBraceToken:lex` cs]
43 lex` [')':cs] = [CloseBraceToken:lex` cs]
44 lex` ['-','>':cs] = [ArrowToken:lex` cs]
45 lex` ['\\':cs] = [LambdaToken:lex` cs]
46 lex` ['\'',c,'\'':cs] = [CharToken c:lex` cs]
47 lex` ['T','r','u','e':cs] = [BoolToken True:lex` cs]
48 lex` ['c','o','d','e':cs] = [CodeToken:lex` cs]
49 lex` ['F','a','l','s','e':cs] = [BoolToken True:lex` cs]
50 lex` ['i','n','f','i','x','r':cs] = [InfixrToken:lex` cs]
51 lex` ['i','n','f','i','x','l':cs] = [InfixlToken:lex` cs]
52 lex` ['-',c:cs]
53 | isDigit c
54 = case lex` [c:cs] of
55 [IntToken i:ts] = [IntToken (~i):ts]
56 ts = ts
57 lex` [c:cs]
58 | isDigit c
59 # (i, cs) = span isDigit [c:cs]
60 = [IntToken $ toInt $ toString i:lex` cs]
61 | isIdent c
62 # (i, cs) = span (\c->isIdent c || isDigit c) cs
63 = [IdentToken $ toString [c:i]:lex` cs]
64 | isFunny c
65 # (i, cs) = span (\c->isFunny c || isDigit c) cs
66 = [IdentToken $ toString [c:i]:lex` cs]
67 = abort $ "Huh lexer failed on: " +++ toString (toInt c)
68
69 isIdent c = isAlpha c || elem c ['\'`']
70 isFunny = flip elem ['!@#$%^&*\|+-?/\'<>.:~=']
71
72 :: ParseState =
73 { tokens :: [Token]
74 , infixers :: [Infix]
75 }
76 :: Infix = Infix String Fixity Int
77 instance < Infix where (<) (Infix _ _ s) (Infix _ _ t) = s < t
78 :: Fixity = InfixL | InfixR
79
80 :: Parser a :== StateT ParseState (Either [String]) a
81
82 instance zero ParseState where zero = {tokens=[],infixers=[]}
83
84 (<?>) infixl 1 :: a (StateT b (Either (c a)) d) -> StateT b (Either (c a)) d | Semigroup (c a) & Applicative c
85 (<?>) err p = StateT $ first (flip mappend (pure err)) o runStateT p
86
87 top :: Parser Token
88 top = getState >>= \st=:{tokens}->case tokens of
89 [t:ts] = put {st & tokens=ts} >>| pure t
90 _ = empty
91
92 parseIf :: (a -> Bool) (Parser a) -> Parser a
93 parseIf pred p = p >>= \a->if (pred a) (pure a) empty
94
95 token t :== parseIf ((=+?=)t) top
96
97 (until) infix 5 :: (Parser a) (Parser b) -> Parser ([a], b)
98 (until) p grd = (tuple [] <$> grd) <|> (appFst o 'SS'._cons <$> p <*> p until grd)
99
100 parse :: [Token] ParseState -> Either [String] AST
101 parse ts st = runStateT parseAST {st & tokens=ts} >>= uncurry parse2
102
103 parse2 :: AST ParseState -> Either [String] AST
104 parse2 (AST fs) ps = AST <$> mapM pfun fs
105 where
106 pfun :: Function -> Either [String] Function
107 pfun (Function s a (DelayedParse ts)) =
108 Function s a <$> evalStateT parseExpr {ps & tokens=ts}
109 pfun x = pure x
110
111 cons x xs = [x:xs]
112
113 parseExpr :: Parser Expression
114 parseExpr = gets (\s->sort s.infixers)
115 >>= foldr ($) parseBasic o (\x xs->[x:xs]) parseApp o map op2parser
116 where
117 op2parser :: Infix -> ((Parser Expression) -> Parser Expression)
118 op2parser (Infix sym dir str) = if (dir =: InfixL) parseLOP parseROP
119 (\l op->Apply (Apply op l)) $ Variable <$> parseIf ((==)sym) parseIdent
120
121 parseApp :: ((Parser Expression) -> Parser Expression)
122 parseApp = parseLOP (const o Apply) (pure ())
123
124 parseLOP :: (a b a -> a) (c b) (c a) -> c a | Alternative c
125 parseLOP comb ops prev = foldl (uncurry o comb) <$> prev <*> many (tuple <$> ops <*> prev)
126
127 parseROP :: (a b a -> a) (c b) (c a) -> c a | Monad c & Alternative c
128 parseROP comb ops prev = prev >>= \e1->comb e1 <$> ops <*> parseROP comb ops prev <|> pure e1
129
130 parseBasic :: Parser Expression
131 parseBasic
132 //Bracketed
133 = token OpenBraceToken *> parseBracketed <* token CloseBraceToken
134 <|> Literal <$> Int <$> parseInt
135 <|> Code <$> (token CodeToken *> parseIdent)
136 <|> flip (foldr Lambda) <$> (token LambdaToken *> some parseIdent <* token ArrowToken) <*> parseExpr
137 <|> Variable <$> parseWithIfx ((o) not o flip elem)
138
139 parseBracketed :: Parser Expression
140 parseBracketed
141 //Curried prefix infix
142 = (\e op->Apply (Apply fpflip op) e) <$> parseIfx <*> parseExpr
143 //Regular Prefix infix
144 <|> parseIfx
145 //Curried flipped prefix infix
146 <|> Apply <$> parseExpr <*> parseIfx
147 //Parse regular expression
148 <|> parseExpr
149
150 fpflip :: Expression
151 fpflip = Lambda "x" $ Lambda "y" $ Lambda "z" $ Apply (Apply (Variable "x") (Variable "z")) (Variable "y")
152
153 parseWithIfx :: ([String] String -> Bool) -> Parser String
154 parseWithIfx f = gets (\s->[i\\Infix i _ _<-s.infixers]) >>= flip parseIf parseIdent o f
155
156 parseIfx :: Parser Expression
157 parseIfx = Variable <$> parseWithIfx (flip elem)
158
159 parseAST :: Parser AST
160 parseAST = AST <$> many parseFunction
161
162 parseFunction :: Parser Function
163 parseFunction = Function
164 <$> (parseIdent >>= \ident->(parseFixity ident <|> pure ()) >>| pure ident)
165 <*> many parseIdent
166 <*> (DelayedParse o fst <$> (token EqualsToken *> top until token SemiColonToken))
167
168 parseFixity :: String -> Parser ()
169 parseFixity ident = Infix ident
170 <$> (token InfixrToken *> pure InfixR <|> token InfixlToken *> pure InfixL)
171 <*> parseInt
172 >>= \f->modify \s->{s & infixers=[f:s.infixers]}
173
174 parseIdent :: Parser String
175 parseIdent = (\(IdentToken s)->s) <$> token (IdentToken "")
176
177 parseInt :: Parser Int
178 parseInt = (\(IntToken i)->i) <$> token (IntToken 0)
179
180 Start =
181 toString <$> parse (
182 lex $
183 ['id x = x;'] ++
184 ['const x y = x;'] ++
185 ['flip x y z = x z y;'] ++
186 ['. infixr 9 f g x = f $ g x;'] ++
187 ['$ infixr 0 = id;'] ++
188 ['& infixr 0 = flip $;'] ++
189 ['+ infixr 6 = code add;'] ++
190 ['- infixr 6 = code sub;'] ++
191 ['* infixr 7 = code mul;'] ++
192 ['/ infixr 7 = code div;'] ++
193 ['&& infixl 3 = code and;'] ++
194 ['|| infixl 2 = code or;'] ++
195 ['ap = f . g $ x;']
196 ) zero