Implemented lexer function
[cc1516.git] / lex.icl
1 implementation module lex
2
3 import Data.Either
4 import Data.List
5 import StdString
6 import System.CommandLine
7 import StdFile
8 import StdMisc
9 from StdFunc import id, const
10 import Data.Maybe
11 import Control.Applicative
12 import Control.Monad
13 import Control.Monad.State
14 import Data.Functor
15 from Data.Func import $
16
17 // Misschien moeten we hier continuation style van maken
18 instance toString lexerOutput where
19 toString l = "dit is een lexer output, danwel error\n"
20
21 lexer :: [Char] -> LexerOutput [Token]
22 lexer i = let (out, rest) = runParser (many anyT) i in case rest of
23 [] = out
24 _ = Left "Unexpected input after last token"
25
26 //lexer functions
27 identT = alpha >>= \a -> many (char '_' <|> alphaNum) >>= \as -> return $ IdentToken [a:as]
28 numberT = optional (char '-') >>= \sign -> (some digit) >>= \n -> case sign of
29 Nothing -> return $ NumberToken $ 5 //fromString n
30 _ -> return $ NumberToken $ -5 //(fromString n) * -1
31 charLT = CharToken <$> (char '\'' *> item <* char '\'')
32 char2T = item >>= \c1 -> case c1 of
33 ':' = char ':' >>| return DoubleColonToken
34 '<' = char '=' >>| return LesserEqToken
35 '>' = char '=' >>| return GreaterEqToken
36 '=' = char '=' >>| return EqualsToken
37 '&' = char '&' >>| return AmpersandsToken
38 '|' = char '|' >>| return PipesToken
39 '-' = char '>' >>| return ArrowToken
40 _ = zero
41 char1T = item >>= \c1 -> findT c1 charTokenMap
42 varT = string (fromString "var") >>| return VarToken
43 voidT = string (fromString "Void") >>| return VoidToken
44 returnT = string (fromString "return") >>| return ReturnToken
45 ifT = string (fromString "if") >>| return IfToken
46 elseT = string (fromString "else") >>| return ElseToken
47 whileT = string (fromString "while") >>| return WhileToken
48 trueT = string (fromString "True") >>| return TrueToken
49 falseT = string (fromString "False") >>| return FalseToken
50 //note, for the anyToken parser the order matters!
51 anyT = char2T <|> char1T <|> varT <|> voidT <|> returnT <|> ifT <|> elseT <|> whileT <|>
52 trueT <|> falseT <|> numberT <|> identT <|> charLT
53
54 Start :: *World -> *World
55 Start w
56 # (args, w) = getCommandLine w // We lezen nu nog standaard van stdin
57 # (out, w) = stdio w
58 # (toparse, out) = readEntireFile out
59 # out = out <<< toString (lexer toparse)
60 # (b, w) = fclose out w
61 | not b = setReturnCode 1 w
62 = w
63 where
64 readEntireFile :: *File -> *([Char], *File)
65 readEntireFile f
66 # (b, c, f) = freadc f
67 | not b = ([], f)
68 # (cs, f) = readEntireFile f
69 = ([c:cs], f)
70
71
72
73 charTokenMap = [('(', BraceOpenToken)
74 ,(')', BraceCloseToken)
75 ,('{', CBraceOpenToken)
76 ,('}', CBraceCloseToken)
77 ,('[', SquareOpenToken)
78 ,(']', SquareCloseToken)
79 ,(',', CommaToken)
80 ,(':', ColonToken)
81 ,(';', SColonToken)
82 ,('.', DotToken)
83 ,('+', PlusToken)
84 ,('-', DashToken)
85 ,('*', StarToken)
86 ,('/', SlashToken)
87 ,('%', PercentToken)
88 ,('=', AssignmentToken)
89 ,('<', LesserToken)
90 ,('>', BiggerToken)
91 ,('!', ExclamationToken)]
92 findT c [] = fail "Unrecognized character"
93 findT c [(k,v):xs] = if (c==k) (return v) (findT c xs)
94
95
96 // Clean adaption of Yard, a parsec like parser combinator
97 :: Parser a = Parser ([Char] -> (LexerOutput a, [Char]))
98
99 runParser :: (Parser a) [Char] -> (LexerOutput a, [Char])
100 runParser (Parser p) s = p s
101
102 instance Functor Parser where
103 fmap f s = liftM f s
104
105 instance Applicative Parser where
106 pure a = Parser $ \s -> (Right a, s)
107 (<*>) sf s = ap sf s
108
109 instance Monad Parser where
110 bind p f = Parser $ \s -> let (out, rest) = runParser p s in case out of
111 Left e = (Left e, rest)
112 Right t = runParser (f t) rest
113
114 //gives us some, many and optional
115 instance Alternative Parser where
116 empty = zero
117 (<|>) p1 p2 = Parser $ \s -> let (out, rest) = runParser p1 s in case out of
118 Left e = runParser p2 s
119 Right t = (Right t, rest)
120
121 //parser that fails with error
122 fail :: String -> Parser a
123 fail e = Parser $ \s -> (Left e, s)
124
125 //parser that always fails
126 zero :: Parser a
127 zero = fail "Zero parser"
128
129 //matches exactly one Char
130 item :: Parser Char
131 item = Parser $ \s -> case s of
132 [] = (Left "Unexpected empty input", s)
133 [x:xs] = (Right x, xs)
134
135 //matches any char which satisfies f
136 satisfy :: (Char -> Bool) -> Parser Char
137 satisfy f = item >>= (\r -> if (f r) (return r) zero)
138
139 //tries a parser, if it fails returns a default value
140 optionalDef :: a (Parser a) -> Parser a
141 optionalDef def p = p <|> return def
142
143 //matched given char
144 char :: Char -> Parser Char
145 char c = satisfy (\i -> c==i) //I hate that we can't do: satisfy (==c)
146
147 alpha :: Parser Char
148 alpha = satisfy isAlpha
149
150 digit :: Parser Char
151 digit = satisfy isDigit
152
153 alphaNum :: Parser Char
154 alphaNum = alpha <|> digit
155
156 //matches a given String
157 string :: [Char] -> Parser [Char]
158 string s = mapM_ char s >>| return s