Initial commit
[fp.git] / parse.icl
1 implementation module parse
2
3 import StdEnv
4 import qualified _SystemStrictLists as SS
5 import StdStrictLists
6 import ast
7
8 import Control.Applicative
9 import Control.Monad
10 import Control.Monad.State
11 import Data.Data
12 import Data.Maybe
13 import Data.Either
14 import Data.Func
15 import Data.Functor
16 import Data.Bifunctor
17 import Data.GenCons
18 import Data.List
19 import Data.Monoid
20 import Data.Tuple
21
22 derive consName Token, Value
23
24 :: Expression | DelayedParse [Token]
25
26 :: Token
27 = IntToken Int | BoolToken Bool | CharToken Char | IdentToken String
28 | InfixrToken | InfixlToken | SemiColonToken | OpenBraceToken
29 | CloseBraceToken | EqualsToken
30
31 lex :: [Char] -> [Token]
32 lex cs = lex` cs
33 where
34 lex` [] = []
35 lex` ['\n':cs] = lex` cs
36 lex` [c:cs]
37 | isSpace c = lex` cs
38 lex` [';':cs] = [SemiColonToken:lex` cs]
39 lex` ['=':cs] = [EqualsToken:lex` cs]
40 lex` ['(':cs] = [OpenBraceToken:lex` cs]
41 lex` [')':cs] = [CloseBraceToken:lex` cs]
42 lex` ['\'',c,'\'':cs] = [CharToken c:lex` cs]
43 lex` ['T','r','u','e':cs] = [BoolToken True:lex` cs]
44 lex` ['F','a','l','s','e':cs] = [BoolToken True:lex` cs]
45 lex` ['i','n','f','i','x','r':cs] = [InfixrToken:lex` cs]
46 lex` ['i','n','f','i','x','l':cs] = [InfixlToken:lex` cs]
47 lex` ['-',c:cs]
48 | isDigit c
49 = case lex` [c:cs] of
50 [IntToken i:ts] = [IntToken (~i):ts]
51 ts = ts
52 lex` [c:cs]
53 | isDigit c
54 # (i, cs) = span isDigit [c:cs]
55 = [IntToken $ toInt $ toString i:lex` cs]
56 | isIdent c
57 # (i, cs) = span (\c->isIdent c || isDigit c) cs
58 = [IdentToken $ toString [c:i]:lex` cs]
59 where
60 isIdent c = isAlpha c || isMember c ['!@#$%^&*\|+?/_-\'<>.:~`=']
61
62 :: ParseState =
63 { tokens :: [Token]
64 , infixers :: [Infix]
65 }
66 :: Infix = Infix String Fixity Int
67 instance < Infix where (<) (Infix _ _ s) (Infix _ _ t) = s < t
68 :: Fixity = InfixL | InfixR
69
70 :: Parser a :== StateT ParseState (Either [String]) a
71
72 instance zero ParseState where zero = {tokens=[],infixers=[]}
73
74 (<?>) infixl 1 :: a (StateT b (Either (c a)) d) -> StateT b (Either (c a)) d | Semigroup (c a) & Applicative c
75 (<?>) err p = StateT $ first (flip mappend (pure err)) o runStateT p
76
77 top :: Parser Token
78 top = getState >>= \st=:{tokens}->case tokens of
79 [t:ts] = put {st & tokens=ts} >>| pure t
80 _ = empty
81
82 parseIf :: (a -> Bool) (Parser a) -> Parser a
83 parseIf pred p = p >>= \a->if (pred a) (pure a) empty
84
85 satisfy p :== parseIf p top
86 token t :== satisfy ((=+?=)t)
87
88 (until) infix 5 :: (Parser a) (Parser b) -> Parser ([a], b)
89 (until) p grd = (tuple [] <$> grd) <|> (appFst o 'SS'._cons <$> p <*> p until grd)
90
91 parse :: [Token] ParseState -> Either [String] AST
92 parse ts st = runStateT parseAST {st & tokens=ts} >>= uncurry parse2
93
94 parse2 :: AST ParseState -> Either [String] AST
95 parse2 (AST fs) ps = AST <$> mapM pfun fs
96 where
97 pfun :: Function -> Either [String] Function
98 pfun (Function s a (DelayedParse ts)) =
99 Function s a <$> evalStateT parseExpr {ps & tokens=ts}
100 pfun x = pure x
101
102 parseExpr :: Parser Expression
103 parseExpr = gets (\s->sort s.infixers) >>= foldr ($) parseApp o map op2parser
104 where
105 op2parser (Infix sym dir str) = if (dir =: InfixL) parseLOP parseROP
106 (\l op->Apply $ Apply op l) (Variable <$> parseIf ((==)sym) parseIdent)
107
108 parseApp = parseLOP (const o Apply) (pure ()) parseBasic
109
110 parseLOP comb ops prev = prev
111 >>= \e1->many (tuple <$> ops <*> prev)
112 >>= foldM (\e->pure o uncurry (comb e)) e1
113
114 parseROP comb ops prev = prev
115 >>= \e1->optional (tuple <$> ops <*> parseROP comb ops prev)
116 >>= pure o maybe e1 (\(op,e2)->comb e1 op e2)//uncurry (comb e1))
117
118 parseBasic
119 = (token OpenBraceToken *> parseExpr <* token CloseBraceToken)
120 <|> Literal <$> Int <$> parseInt
121 <|> (gets (\s->[i\\Infix i _ _<-s.infixers])
122 >>= \ifs->Variable <$> parseIf (not o flip elem ifs) parseIdent)
123
124 parseAST :: Parser AST
125 parseAST = AST <$> many parseFunction
126
127 parseFunction :: Parser Function
128 parseFunction = Function
129 <$> (parseIdent >>= \ident->(parseFixity ident <|> pure ()) >>| pure ident)
130 <*> many parseIdent
131 <*> (DelayedParse o fst <$> (token EqualsToken *> top until token SemiColonToken))
132
133 parseFixity :: String -> Parser ()
134 parseFixity ident = Infix ident
135 <$> (token InfixrToken *> pure InfixR <|> token InfixlToken *> pure InfixL)
136 <*> parseInt
137 >>= \f->modify \s->{s & infixers=[f:s.infixers]}
138
139 parseIdent :: Parser String
140 parseIdent = (\(IdentToken s)->s) <$> token (IdentToken "")
141
142 parseInt :: Parser Int
143 parseInt = (\(IntToken i)->i) <$> token (IntToken 0)
144
145 Start = toString <$> parse (lex ['$ infixr 0 a b = -42; ap a b = a $ b $ c; ap a b = a b c;']) zero