implementation module lex import Data.Either import Data.List import StdString import System.CommandLine import StdFile import StdMisc from StdFunc import id, const import Data.Maybe import Control.Applicative import Control.Monad import Control.Monad.State import Data.Functor from Data.Func import $ from Text import class Text, instance Text String import qualified Text lexer :: [Char] -> LexerOutput [Token] lexer _ = undef // Misschien moeten we hier continuation style van maken instance toString (LexerOutput [Token]) where toString (Left l) = "Error: " +++ l toString (Right x) = 'Text'.concat (print 0 x) print :: Int [Token] -> [String] print i [(IdentToken l):rest] = [tab i:toString l:print i rest] print i [(NumberToken j):rest] = [tab i:toString j:print i rest] print i [(CharToken c):rest] = [tab i:"'":toString c:"'":print i rest] print i [VarToken:rest] = [tab i:"var":print i rest] print i [VoidToken:rest] = [tab i:"Void":print i rest] print i [ReturnToken:rest] = [tab i:"return":print i rest] print i [IfToken:rest] = [tab i:"if":print i rest] print i [ElseToken:rest] = [tab i:"else":print i rest] print i [WhileToken:rest] = [tab i:"while":print i rest] print i [TrueToken:rest] = [tab i:"True":print i rest] print i [FalseToken:rest] = [tab i:"False":print i rest] print i [BraceOpenToken:rest] = [tab i:"(":print i rest] print i [BraceCloseToken:rest] = [tab i:")":print i rest] print i [CBraceOpenToken:rest] = [tab i:"{\n":print (i+1) rest] print i [CBraceCloseToken:rest] = [tab i:"}\n":print (i-1) rest] print i [SquareOpenToken:rest] = [tab i:"[":print i rest] print i [SquareCloseToken:rest] = [tab i:"]":print i rest] print i [CommaToken:rest] = [tab i:", ":print i rest] print i [ColonToken:rest] = [tab i:":":print i rest] print i [SColonToken:rest] = [tab i:";\n":print i rest] print i [DotToken:rest] = [tab i:".":print i rest] print i [PlusToken:rest] = [tab i:" + ":print i rest] print i [DashToken:rest] = [tab i:" - ":print i rest] print i [StarToken:rest] = [tab i:" * ":print i rest] print i [SlashToken:rest] = [tab i:" / ":print i rest] print i [PercentToken:rest] = [tab i:" % ":print i rest] print i [AssignmentToken:rest] = [tab i:" = ":print i rest] print i [LesserToken:rest] = [tab i:" < ":print i rest] print i [BiggerToken:rest] = [tab i:" > ":print i rest] print i [ExclamationToken:rest] = [tab i:"!":print i rest] print i [DoubleColonToken:rest] = [tab i:" :: ":print i rest] print i [LesserEqToken:rest] = [tab i:" <= ":print i rest] print i [GreaterEqToken:rest] = [tab i:" >= ":print i rest] print i [EqualsToken:rest] = [tab i:" == ":print i rest] print i [AmpersandsToken:rest] = [tab i:" && ":print i rest] print i [PipesToken:rest] = [tab i:" || ":print i rest] print i [ArrowToken:rest] = [tab i:" -> ":print i rest] tab :: Int -> String tab 0 = "" tab i = "\t" +++ tab (i-1) lex :: [Char] -> LexerOutput [Token] lex ['v':'a':'r':rest] = undef lex ['V':'o':'i':'d':rest] = undef lex ['r':'e':'t':'u':'r':'n':rest] = undef lex ['i':'f':rest] = undef lex ['e':'l':'s':'e':rest] = undef lex ['w':'h':'i':'l':'e':rest] = undef lex ['T':'r':'u':'e':rest] = undef lex ['F':'a':'l':'s':'e':rest] = undef lex [':':':':rest] = undef lex ['<':'=':rest] = undef lex ['>':'=':rest] = undef lex ['=':'=':rest] = undef lex ['&':'&':rest] = undef lex ['|':'|':rest] = undef lex ['-':'>':rest] = undef lex ['(':rest] = undef lex [')':rest] = undef lex ['{':rest] = undef lex ['{':rest] = undef lex ['[':rest] = undef lex [':rest]':rest] = undef lex [',':rest] = undef lex [':':rest] = undef lex [';':rest] = undef lex ['.':rest] = undef lex ['+':rest] = undef lex ['-':rest] = undef lex ['*':rest] = undef lex ['/':rest] = undef lex ['%':rest] = undef lex ['=':rest] = undef lex ['<':rest] = undef lex ['>':rest] = undef lex ['!':rest] = undef lex ['\'':'\\':x'\'':rest] = case x of 'a' = undef // (CharToken '\a') 'b' = undef // (CharToken '\b') 'f' = undef // (CharToken '\f') 'n' = undef // (CharToken '\n') 'r' = undef // (CharToken '\r') 't' = undef // (CharToken '\t') 'v' = undef // (CharToken '\v') _ = Left ("Illegal escape: \\" +++ x) lex ['\'':x:'\'':rest] = undef lex [x:xs] | isSpace x = lex xs | isDigit x = undef //Parse Int | isAlpha x = undef //Parse ident | otherwise = Left ("Unexpected character: " +++ toString x) Start = "Hi" //:: LexerOutput a :== Either String a // // //runParser :: (Parser a) [Char] -> (LexerOutput a, [Char]) //runParser (Parser p) s = p s // //lexer :: [Char] -> LexerOutput [Token] // ////lexer functions //identT = alpha >>= \a -> many (char '_' <|> alphaNum) >>= \as -> return $ IdentToken [a:as] //numberT = optional (char '-') >>= \sign -> (some digit) >>= \n -> case sign of // Nothing -> return $ NumberToken $ 5 //fromString n // _ -> return $ NumberToken $ -5 //(fromString n) * -1 //charLT = CharToken <$> (char '\'' *> item <* char '\'') //char2T = item >>= \c1 -> case c1 of // ':' = char ':' >>| return DoubleColonToken // '<' = char '=' >>| return LesserEqToken // '>' = char '=' >>| return GreaterEqToken // '=' = char '=' >>| return EqualsToken // '&' = char '&' >>| return AmpersandsToken // '|' = char '|' >>| return PipesToken // '-' = char '>' >>| return ArrowToken // _ = zero //char1T = item >>= \c1 -> findT c1 charTokenMap //varT = string (fromString "var") >>| return VarToken //voidT = string (fromString "Void") >>| return VoidToken //returnT = string (fromString "return") >>| return ReturnToken //ifT = string (fromString "if") >>| return IfToken //elseT = string (fromString "else") >>| return ElseToken //whileT = string (fromString "while") >>| return WhileToken //trueT = string (fromString "True") >>| return TrueToken //falseT = string (fromString "False") >>| return FalseToken ////note, for the anyToken parser the order matters! //anyT = char2T <|> char1T <|> varT <|> voidT <|> returnT <|> ifT <|> elseT <|> whileT <|> // trueT <|> falseT <|> numberT <|> identT <|> charLT // //Start :: *World -> *World //Start w //# (args, w) = getCommandLine w // We lezen nu nog standaard van stdin //# (out, w) = stdio w //# (toparse, out) = readEntireFile out //# out = out <<< toString (lexer toparse) //# (b, w) = fclose out w //| not b = setReturnCode 1 w //= w // where // readEntireFile :: *File -> *([Char], *File) // readEntireFile f // # (b, c, f) = freadc f // | not b = ([], f) // # (cs, f) = readEntireFile f // = ([c:cs], f) // // // //charTokenMap = [('(', BraceOpenToken) // ,(')', BraceCloseToken) // ,('{', CBraceOpenToken) // ,('}', CBraceCloseToken) // ,('[', SquareOpenToken) // ,(']', SquareCloseToken) // ,(',', CommaToken) // ,(':', ColonToken) // ,(';', SColonToken) // ,('.', DotToken) // ,('+', PlusToken) // ,('-', DashToken) // ,('*', StarToken) // ,('/', SlashToken) // ,('%', PercentToken) // ,('=', AssignmentToken) // ,('<', LesserToken) // ,('>', BiggerToken) // ,('!', ExclamationToken)] //findT c [] = fail "Unrecognized character" //findT c [(k,v):xs] = if (c==k) (return v) (findT c xs) // // //// Clean adaption of Yard, a parsec like parser combinator //:: Parser a = Parser ([Char] -> (LexerOutput a, [Char])) // //runParser :: (Parser a) [Char] -> (LexerOutput a, [Char]) //runParser (Parser p) s = p s // //instance Functor Parser where // fmap f s = liftM f s // //instance Applicative Parser where // pure a = Parser $ \s -> (Right a, s) // (<*>) sf s = ap sf s // //instance Monad Parser where // bind p f = Parser $ \s -> let (out, rest) = runParser p s in case out of // Left e = (Left e, rest) // Right t = runParser (f t) rest // ////gives us some, many and optional //instance Alternative Parser where // empty = zero // (<|>) p1 p2 = Parser $ \s -> let (out, rest) = runParser p1 s in case out of // Left e = runParser p2 s // Right t = (Right t, rest) // ////parser that fails with error //fail :: String -> Parser a //fail e = Parser $ \s -> (Left e, s) // ////parser that always fails //zero :: Parser a //zero = fail "Zero parser" // ////matches exactly one Char //item :: Parser Char //item = Parser $ \s -> case s of // [] = (Left "Unexpected empty input", s) // [x:xs] = (Right x, xs) // ////matches any char which satisfies f //satisfy :: (Char -> Bool) -> Parser Char //satisfy f = item >>= (\r -> if (f r) (return r) zero) // ////tries a parser, if it fails returns a default value //optionalDef :: a (Parser a) -> Parser a //optionalDef def p = p <|> return def // ////matched given char //char :: Char -> Parser Char //char c = satisfy (\i -> c==i) //I hate that we can't do: satisfy (==c) // //alpha :: Parser Char //alpha = satisfy isAlpha // //digit :: Parser Char //digit = satisfy isDigit // //alphaNum :: Parser Char //alphaNum = alpha <|> digit // ////matches a given String //string :: [Char] -> Parser [Char] //string s = mapM_ char s >>| return s