From: pimjager Date: Thu, 11 Feb 2016 21:28:30 +0000 (+0100) Subject: Tokenizer functions implemented X-Git-Url: https://git.martlubbers.net/?a=commitdiff_plain;h=6910d87166dedb7032fd31684e52df3d0b25910e;p=cc1516.git Tokenizer functions implemented --- diff --git a/lex.dcl b/lex.dcl index f256af5..95bd0bd 100644 --- a/lex.dcl +++ b/lex.dcl @@ -4,7 +4,7 @@ import Data.Either :: Token //Value tokens - = IdentToken String // Identifier + = IdentToken [Char] // Identifier | NumberToken Int // Integer | CharToken Char // Character literal //Keyword tokens @@ -43,6 +43,7 @@ import Data.Either | EqualsToken // == | AmpersandsToken // && | PipesToken // || + | ArrowToken // -> :: LexerOutput a :== Either String a diff --git a/lex.icl b/lex.icl index 1475a0e..d46aaa8 100644 --- a/lex.icl +++ b/lex.icl @@ -20,6 +20,30 @@ instance toString lexerOutput where lexer :: [Char] -> LexerOutput [Token] lexer _ = Left "Not Implemented" +//lexer functions +identT = alpha >>= \a -> many (char '_' <|> alphaNum) >>= \as -> return $ IdentToken [a:as] +numberT = optional (char '-') >>= \sign -> (some digit) >>= \n -> case sign of + Nothing -> return $ NumberToken $ 5 //fromString n + _ -> return $ NumberToken $ -5 //(fromString n) * -1 +charLT = liftM CharToken item +char2T = item >>= \c1 -> case c1 of + ':' = char ':' >>| return DoubleColonToken + '<' = char '=' >>| return LesserEqToken + '>' = char '=' >>| return GreaterEqToken + '=' = char '=' >>| return EqualsToken + '&' = char '&' >>| return AmpersandsToken + '|' = char '|' >>| return PipesToken + '-' = char '>' >>| return ArrowToken +char1T = item >>= \c1 -> findT c1 charTokenMap +varT = string (fromString "var") >>| return VarToken +voidT = string (fromString "Void") >>| return VoidToken +returnT = string (fromString "return") >>| return ReturnToken +ifT = string (fromString "if") >>| return IfToken +elseT = string (fromString "else") >>| return ElseToken +whileT = string (fromString "while") >>| return WhileToken +trueT = string (fromString "True") >>| return TrueToken +falseT = string (fromString "False") >>| return FalseToken + Start :: *World -> *World Start w # (args, w) = getCommandLine w // We lezen nu nog standaard van stdin @@ -39,6 +63,29 @@ Start w +charTokenMap = [('(', BraceOpenToken) + ,(')', BraceCloseToken) + ,('{', CBraceOpenToken) + ,('}', CBraceCloseToken) + ,('[', SquareOpenToken) + ,(']', SquareCloseToken) + ,(',', CommaToken) + ,(':', ColonToken) + ,(';', SColonToken) + ,('.', DotToken) + ,('+', PlusToken) + ,('-', DashToken) + ,('*', StarToken) + ,('/', SlashToken) + ,('%', PercentToken) + ,('=', AssignmentToken) + ,('<', LesserToken) + ,('>', BiggerToken) + ,('!', ExclamationToken)] +findT c [] = fail "Unrecognized character" +findT c [(k,v):xs] = if (c==k) (return v) (findT c xs) + + // Clean adaption of Yard, a parsec like parser combinator :: Parser a = Parser ([Char] -> (LexerOutput a, [Char])) @@ -60,10 +107,9 @@ instance Monad Parser where //gives us some, many and optional instance Alternative Parser where empty = zero - (<|>) p1 p2 = parserAlternative p1 p2 -parserAlternative p1 p2 = Parser $ \s -> let (out, rest) = runParser p1 s in case out of - Left e = runParser p2 s - Right t = (Right t, rest) + (<|>) p1 p2 = Parser $ \s -> let (out, rest) = runParser p1 s in case out of + Left e = runParser p2 s + Right t = (Right t, rest) //parser that fails with error fail :: String -> Parser a @@ -85,7 +131,7 @@ satisfy f = item >>= (\r -> if (f r) (return r) zero) //tries a parser, if it fails returns a default value optionalDef :: a (Parser a) -> Parser a -optionalDef def p = parserAlternative p (return def) +optionalDef def p = p <|> return def //matched given char char :: Char -> Parser Char @@ -97,6 +143,9 @@ alpha = satisfy isAlpha digit :: Parser Char digit = satisfy isDigit +alphaNum :: Parser Char +alphaNum = alpha <|> digit + //matches a given String string :: [Char] -> Parser [Char] string s = mapM_ char s >>| return s \ No newline at end of file