parser, succinct
[fp.git] / parse.icl
1 implementation module parse
2
3 import StdEnv
4 import qualified _SystemStrictLists as SS
5 import StdStrictLists
6 import ast
7
8 import Control.Applicative
9 import Control.Monad
10 import Control.Monad.State
11 import Data.Data
12 import Data.Maybe
13 import Data.Either
14 import Data.Func
15 import Data.Functor
16 import Data.Bifunctor
17 import Data.GenCons
18 import Data.List
19 import Data.Monoid
20 import Data.Tuple
21
22 derive consName Token, Value
23
24 :: Expression | DelayedParse [Token]
25
26 :: Token
27 = IntToken Int | BoolToken Bool | CharToken Char | IdentToken String
28 | InfixrToken | InfixlToken | SemiColonToken | OpenBraceToken
29 | CloseBraceToken | EqualsToken
30
31 lex :: [Char] -> [Token]
32 lex cs = lex` cs
33 where
34 lex` [] = []
35 lex` ['\n':cs] = lex` cs
36 lex` [c:cs]
37 | isSpace c = lex` cs
38 lex` [';':cs] = [SemiColonToken:lex` cs]
39 lex` ['=',c:cs]
40 | not (isIdent c) = [EqualsToken:lex` [c:cs]]
41 lex` ['(':cs] = [OpenBraceToken:lex` cs]
42 lex` [')':cs] = [CloseBraceToken:lex` cs]
43 lex` ['\'',c,'\'':cs] = [CharToken c:lex` cs]
44 lex` ['T','r','u','e':cs] = [BoolToken True:lex` cs]
45 lex` ['F','a','l','s','e':cs] = [BoolToken True:lex` cs]
46 lex` ['i','n','f','i','x','r':cs] = [InfixrToken:lex` cs]
47 lex` ['i','n','f','i','x','l':cs] = [InfixlToken:lex` cs]
48 lex` ['-',c:cs]
49 | isDigit c
50 = case lex` [c:cs] of
51 [IntToken i:ts] = [IntToken (~i):ts]
52 ts = ts
53 lex` [c:cs]
54 | isDigit c
55 # (i, cs) = span isDigit [c:cs]
56 = [IntToken $ toInt $ toString i:lex` cs]
57 | isIdent c
58 # (i, cs) = span (\c->isIdent c || isDigit c) cs
59 = [IdentToken $ toString [c:i]:lex` cs]
60
61 isIdent c = isAlpha c || isMember c ['!@#$%^&*\|+?/_-\'<>.:~`=']
62
63 :: ParseState =
64 { tokens :: [Token]
65 , infixers :: [Infix]
66 }
67 :: Infix = Infix String Fixity Int
68 instance < Infix where (<) (Infix _ _ s) (Infix _ _ t) = s < t
69 :: Fixity = InfixL | InfixR
70
71 :: Parser a :== StateT ParseState (Either [String]) a
72
73 instance zero ParseState where zero = {tokens=[],infixers=[]}
74
75 (<?>) infixl 1 :: a (StateT b (Either (c a)) d) -> StateT b (Either (c a)) d | Semigroup (c a) & Applicative c
76 (<?>) err p = StateT $ first (flip mappend (pure err)) o runStateT p
77
78 top :: Parser Token
79 top = getState >>= \st=:{tokens}->case tokens of
80 [t:ts] = put {st & tokens=ts} >>| pure t
81 _ = empty
82
83 parseIf :: (a -> Bool) (Parser a) -> Parser a
84 parseIf pred p = p >>= \a->if (pred a) (pure a) empty
85
86 token t :== parseIf ((=+?=)t) top
87
88 (until) infix 5 :: (Parser a) (Parser b) -> Parser ([a], b)
89 (until) p grd = (tuple [] <$> grd) <|> (appFst o 'SS'._cons <$> p <*> p until grd)
90
91 parse :: [Token] ParseState -> Either [String] AST
92 parse ts st = runStateT parseAST {st & tokens=ts} >>= uncurry parse2
93
94 parse2 :: AST ParseState -> Either [String] AST
95 parse2 (AST fs) ps = AST <$> mapM pfun fs
96 where
97 pfun :: Function -> Either [String] Function
98 pfun (Function s a (DelayedParse ts)) =
99 Function s a <$> evalStateT parseExpr {ps & tokens=ts}
100 pfun x = pure x
101
102 cons x xs = [x:xs]
103
104 parseExpr :: Parser Expression
105 parseExpr = gets (\s->sort s.infixers)
106 >>= flip seq parseBasic o cons parseApp o reverse o map op2parser
107 where
108 op2parser (Infix sym dir str) = if (dir =: InfixL) parseLOP parseROP
109 (\l op->Apply $ Apply op l) (Variable <$> parseIf ((==)sym) parseIdent)
110
111 parseApp = parseLOP (const o Apply) (pure ())
112
113 parseLOP :: (a b a -> a) (c b) (c a) -> c a | Alternative c
114 parseLOP comb ops prev = foldl (uncurry o comb)
115 <$> prev <*> many (tuple <$> ops <*> prev)
116
117 parseROP :: (a b a -> a) (c b) (c a) -> c a | Monad c & Alternative c
118 parseROP comb ops prev = prev >>= \e1->
119 comb e1 <$> ops <*> parseROP comb ops prev <|> pure e1
120
121 parseBasic
122 = (token OpenBraceToken *> parseExpr <* token CloseBraceToken)
123 <|> Literal <$> Int <$> parseInt
124 <|> (gets (\s->[i\\Infix i _ _<-s.infixers])
125 >>= \ifs->Variable <$> parseIf (not o flip elem ifs) parseIdent)
126
127 parseAST :: Parser AST
128 parseAST = AST <$> many parseFunction
129
130 parseFunction :: Parser Function
131 parseFunction = Function
132 <$> (parseIdent >>= \ident->(parseFixity ident <|> pure ()) >>| pure ident)
133 <*> many parseIdent
134 <*> (DelayedParse o fst <$> (token EqualsToken *> top until token SemiColonToken))
135
136 parseFixity :: String -> Parser ()
137 parseFixity ident = Infix ident
138 <$> (token InfixrToken *> pure InfixR <|> token InfixlToken *> pure InfixL)
139 <*> parseInt
140 >>= \f->modify \s->{s & infixers=[f:s.infixers]}
141
142 parseIdent :: Parser String
143 parseIdent = (\(IdentToken s)->s) <$> token (IdentToken "")
144
145 parseInt :: Parser Int
146 parseInt = (\(IntToken i)->i) <$> token (IntToken 0)
147
148 Start = toString <$> parse (lex ['+ infixr 6 a b = 0; * infixr 7 a b = 0; ap = a + b * c + d;']) zero