Tokenizer functions implemented
[cc1516.git] / lex.icl
1 implementation module lex
2
3 import Data.Either
4 import Data.List
5 import StdString
6 import System.CommandLine
7 import StdFile
8 import StdMisc
9 from StdFunc import id, const
10 import Data.Maybe
11 import Control.Applicative
12 import Control.Monad
13 import Control.Monad.State
14 from Data.Func import $
15
16 // Misschien moeten we hier continuation style van maken
17 instance toString lexerOutput where
18 toString l = "dit is een lexer output, danwel error\n"
19
20 lexer :: [Char] -> LexerOutput [Token]
21 lexer _ = Left "Not Implemented"
22
23 //lexer functions
24 identT = alpha >>= \a -> many (char '_' <|> alphaNum) >>= \as -> return $ IdentToken [a:as]
25 numberT = optional (char '-') >>= \sign -> (some digit) >>= \n -> case sign of
26 Nothing -> return $ NumberToken $ 5 //fromString n
27 _ -> return $ NumberToken $ -5 //(fromString n) * -1
28 charLT = liftM CharToken item
29 char2T = item >>= \c1 -> case c1 of
30 ':' = char ':' >>| return DoubleColonToken
31 '<' = char '=' >>| return LesserEqToken
32 '>' = char '=' >>| return GreaterEqToken
33 '=' = char '=' >>| return EqualsToken
34 '&' = char '&' >>| return AmpersandsToken
35 '|' = char '|' >>| return PipesToken
36 '-' = char '>' >>| return ArrowToken
37 char1T = item >>= \c1 -> findT c1 charTokenMap
38 varT = string (fromString "var") >>| return VarToken
39 voidT = string (fromString "Void") >>| return VoidToken
40 returnT = string (fromString "return") >>| return ReturnToken
41 ifT = string (fromString "if") >>| return IfToken
42 elseT = string (fromString "else") >>| return ElseToken
43 whileT = string (fromString "while") >>| return WhileToken
44 trueT = string (fromString "True") >>| return TrueToken
45 falseT = string (fromString "False") >>| return FalseToken
46
47 Start :: *World -> *World
48 Start w
49 # (args, w) = getCommandLine w // We lezen nu nog standaard van stdin
50 # (out, w) = stdio w
51 # (toparse, out) = readEntireFile out
52 # out = out <<< toString (lexer toparse)
53 # (b, w) = fclose out w
54 | not b = setReturnCode 1 w
55 = w
56 where
57 readEntireFile :: *File -> *([Char], *File)
58 readEntireFile f
59 # (b, c, f) = freadc f
60 | not b = ([], f)
61 # (cs, f) = readEntireFile f
62 = ([c:cs], f)
63
64
65
66 charTokenMap = [('(', BraceOpenToken)
67 ,(')', BraceCloseToken)
68 ,('{', CBraceOpenToken)
69 ,('}', CBraceCloseToken)
70 ,('[', SquareOpenToken)
71 ,(']', SquareCloseToken)
72 ,(',', CommaToken)
73 ,(':', ColonToken)
74 ,(';', SColonToken)
75 ,('.', DotToken)
76 ,('+', PlusToken)
77 ,('-', DashToken)
78 ,('*', StarToken)
79 ,('/', SlashToken)
80 ,('%', PercentToken)
81 ,('=', AssignmentToken)
82 ,('<', LesserToken)
83 ,('>', BiggerToken)
84 ,('!', ExclamationToken)]
85 findT c [] = fail "Unrecognized character"
86 findT c [(k,v):xs] = if (c==k) (return v) (findT c xs)
87
88
89 // Clean adaption of Yard, a parsec like parser combinator
90 :: Parser a = Parser ([Char] -> (LexerOutput a, [Char]))
91
92 runParser :: (Parser a) [Char] -> (LexerOutput a, [Char])
93 runParser (Parser p) s = p s
94
95 instance Functor Parser where
96 fmap f s = liftM f s
97
98 instance Applicative Parser where
99 pure a = Parser $ \s -> (Right a, s)
100 (<*>) sf s = ap sf s
101
102 instance Monad Parser where
103 bind p f = Parser $ \s -> let (out, rest) = runParser p s in case out of
104 Left e = (Left e, rest)
105 Right t = runParser (f t) rest
106
107 //gives us some, many and optional
108 instance Alternative Parser where
109 empty = zero
110 (<|>) p1 p2 = Parser $ \s -> let (out, rest) = runParser p1 s in case out of
111 Left e = runParser p2 s
112 Right t = (Right t, rest)
113
114 //parser that fails with error
115 fail :: String -> Parser a
116 fail e = Parser $ \s -> (Left e, s)
117
118 //parser that always fails
119 zero :: Parser a
120 zero = fail "Zero parser"
121
122 //matches exactly one Char
123 item :: Parser Char
124 item = Parser $ \s -> case s of
125 [] = (Left "Unexpected empty input", s)
126 [x:xs] = (Right x, xs)
127
128 //matches any char which satisfies f
129 satisfy :: (Char -> Bool) -> Parser Char
130 satisfy f = item >>= (\r -> if (f r) (return r) zero)
131
132 //tries a parser, if it fails returns a default value
133 optionalDef :: a (Parser a) -> Parser a
134 optionalDef def p = p <|> return def
135
136 //matched given char
137 char :: Char -> Parser Char
138 char c = satisfy (\i -> c==i) //I hate that we can't do: satisfy (==c)
139
140 alpha :: Parser Char
141 alpha = satisfy isAlpha
142
143 digit :: Parser Char
144 digit = satisfy isDigit
145
146 alphaNum :: Parser Char
147 alphaNum = alpha <|> digit
148
149 //matches a given String
150 string :: [Char] -> Parser [Char]
151 string s = mapM_ char s >>| return s