inference from haskell writing
[fp.git] / parse.icl
1 implementation module parse
2
3 import StdEnv
4 import qualified _SystemStrictLists as SS
5 import StdStrictLists
6 import ast
7
8 import Control.Applicative
9 import Control.Monad
10 import Control.Monad.State
11 import Data.Data
12 import Data.Maybe
13 import Data.Either
14 import Data.Func
15 import Data.Functor
16 import Data.Bifunctor
17 import Data.GenCons
18 import Data.List
19 import Data.Monoid
20 import Data.Tuple
21
22 derive consName Token, Value
23
24 :: Expression | DelayedParse [Token]
25 :: Token
26 = IntToken Int | BoolToken Bool | CharToken Char | IdentToken String
27 | InfixrToken | InfixlToken | SemiColonToken | OpenBraceToken
28 | CloseBraceToken | EqualsToken | CodeToken | LambdaToken
29 | ArrowToken | LetToken | InToken
30 :: ParseState = {tokens :: [Token], infixers :: [Infix]}
31 :: Infix = Infix String Fixity Int
32 :: Fixity = InfixL | InfixR
33 :: Parser a :== StateT ParseState (Either [String]) a
34
35 instance < Infix where (<) (Infix _ _ s) (Infix _ _ t) = s < t
36 instance zero ParseState where zero = {tokens=[],infixers=[]}
37
38 lex :: [Char] -> [Token]
39 lex cs = lex` cs
40 where
41 lex` [] = []
42 lex` ['\n':cs] = lex` cs
43 lex` [c:cs] | isSpace c = lex` cs
44 lex` [';':cs] = [SemiColonToken:lex` cs]
45 lex` ['=',c:cs] | not (isIdent c) = [EqualsToken:lex` [c:cs]]
46 lex` ['(':cs] = [OpenBraceToken:lex` cs]
47 lex` [')':cs] = [CloseBraceToken:lex` cs]
48 lex` ['->':cs] = [ArrowToken:lex` cs]
49 lex` ['\\':cs] = [LambdaToken:lex` cs]
50 lex` ['\'',c,'\'':cs] = [CharToken c:lex` cs]
51 lex` ['let':cs] | whiteOrEnd cs = [LetToken:lex` cs]
52 lex` ['in':cs] | whiteOrEnd cs = [InToken:lex` cs]
53 lex` ['code':cs] | whiteOrEnd cs = [CodeToken:lex` cs]
54 lex` ['False':cs] | whiteOrEnd cs = [BoolToken True:lex` cs]
55 lex` ['True':cs] | whiteOrEnd cs = [BoolToken True:lex` cs]
56 lex` ['infixr':cs] | whiteOrEnd cs = [InfixrToken:lex` cs]
57 lex` ['infixl':cs] | whiteOrEnd cs = [InfixlToken:lex` cs]
58 lex` ['-',c:cs]
59 | isDigit c
60 = case lex` [c:cs] of
61 [IntToken i:ts] = [IntToken (~i):ts]
62 ts = ts
63 lex` [c:cs]
64 | isDigit c
65 # (i, cs) = span isDigit [c:cs]
66 = [IntToken $ toInt $ toString i:lex` cs]
67 | isIdent c
68 # (i, cs) = span (\c->isIdent c || isDigit c) cs
69 = [IdentToken $ toString [c:i]:lex` cs]
70 | isFunny c
71 # (i, cs) = span (\c->isFunny c || isDigit c) cs
72 = [IdentToken $ toString [c:i]:lex` cs]
73 = abort $ "Huh lexer failed on: " +++ toString (toInt c)
74
75 whiteOrEnd [] = True
76 whiteOrEnd [c:cs] = isSpace c
77
78 isIdent c = isAlpha c || elem c ['\'`']
79 isFunny = flip elem ['!@#$%^&*\|+-?/\'<>.:~=']
80
81 (<?>) infixl 1 :: a (StateT b (Either (c a)) d) -> StateT b (Either (c a)) d | Semigroup (c a) & Applicative c
82 (<?>) err p = StateT $ first (flip mappend (pure err)) o runStateT p
83
84 top :: Parser Token
85 top = getState >>= \st=:{tokens}->case tokens of
86 [t:ts] = put {st & tokens=ts} >>| pure t
87 _ = empty
88
89 parseIf :: (a -> Bool) (Parser a) -> Parser a
90 parseIf pred p = p >>= \a->if (pred a) (pure a) empty
91
92 token t :== parseIf ((=+?=)t) top
93
94 (until) infix 5 :: (Parser a) (Parser b) -> Parser ([a], b)
95 (until) p grd = (tuple [] <$> grd) <|> (appFst o 'SS'._cons <$> p <*> p until grd)
96
97 parse :: ParseState [Token] -> Either [String] AST
98 parse st ts = runStateT parseAST {st & tokens=ts} >>= uncurry parse2
99
100 parse2 :: AST ParseState -> Either [String] AST
101 parse2 (AST fs) ps = AST <$> mapM pfun fs
102 where
103 pfun :: Function -> Either [String] Function
104 pfun (Function s a (DelayedParse ts)) =
105 Function s a <$> evalStateT parseExpr {ps & tokens=ts}
106 pfun x = pure x
107
108 cons x xs = [x:xs]
109
110 parseExpr :: Parser Expression
111 parseExpr = gets (\s->sort s.infixers)
112 >>= foldr ($) parseBasic o (\x xs->[x:xs]) parseApp o map op2parser
113 where
114 op2parser :: Infix -> ((Parser Expression) -> Parser Expression)
115 op2parser (Infix sym dir str) = if (dir =: InfixL) parseLOP parseROP
116 (\l op->Apply (Apply op l)) $ Variable <$> parseIf ((==)sym) parseIdent
117
118 parseApp :: ((Parser Expression) -> Parser Expression)
119 parseApp = parseLOP (const o Apply) (pure ())
120
121 parseLOP :: (a b a -> a) (c b) (c a) -> c a | Alternative c
122 parseLOP comb ops prev = foldl (uncurry o comb) <$> prev <*> many (tuple <$> ops <*> prev)
123
124 parseROP :: (a b a -> a) (c b) (c a) -> c a | Monad c & Alternative c
125 parseROP comb ops prev = prev >>= \e1->comb e1 <$> ops <*> parseROP comb ops prev <|> pure e1
126
127 parseBasic :: Parser Expression
128 parseBasic
129 //Bracketed
130 = token OpenBraceToken *> parseBracketed <* token CloseBraceToken
131 <|> Let <$> (token LetToken *> parseIdent <* token EqualsToken) <*> (parseExpr <* token InToken) <*> parseExpr
132 <|> Literal <$> Int <$> parseInt
133 <|> Code <$> (token CodeToken *> parseIdent)
134 <|> flip (foldr Lambda) <$> (token LambdaToken *> some parseIdent <* token ArrowToken) <*> parseExpr
135 <|> Variable <$> parseWithIfx ((o) not o flip elem)
136
137 parseBracketed :: Parser Expression
138 parseBracketed
139 //Curried prefix infix
140 = (\op->Lambda "_v" o Apply (Apply op (Variable "v"))) <$> parseIfx <*> parseExpr
141 //Regular Prefix infix
142 <|> parseIfx
143 //Curried flipped prefix infix
144 <|> (\e op->Lambda "_v" (Apply (Apply op e) (Variable "v"))) <$> parseExpr <*> parseIfx
145 //Parse regular expression
146 <|> parseExpr
147
148 parseWithIfx :: ([String] String -> Bool) -> Parser String
149 parseWithIfx f = gets (\s->[i\\Infix i _ _<-s.infixers]) >>= flip parseIf parseIdent o f
150
151 parseIfx :: Parser Expression
152 parseIfx = Variable <$> parseWithIfx (flip elem)
153
154 parseAST :: Parser AST
155 parseAST = AST <$> many parseFunction
156
157 parseFunction :: Parser Function
158 parseFunction = Function
159 <$> (parseIdent >>= \ident->(parseFixity ident <|> pure ()) >>| pure ident)
160 <*> many parseIdent
161 <*> (DelayedParse o fst <$> (token EqualsToken *> top until token SemiColonToken))
162
163 parseFixity :: String -> Parser ()
164 parseFixity ident = Infix ident
165 <$> (token InfixrToken *> pure InfixR <|> token InfixlToken *> pure InfixL)
166 <*> parseInt
167 >>= \f->modify \s->{s & infixers=[f:s.infixers]}
168
169 parseIdent :: Parser String
170 parseIdent = (\(IdentToken s)->s) <$> token (IdentToken "")
171
172 parseInt :: Parser Int
173 parseInt = (\(IntToken i)->i) <$> token (IntToken 0)