X-Git-Url: https://git.martlubbers.net/?a=blobdiff_plain;f=lex.icl;h=6cb2e922e4615fb487f080e6372721b789d5633f;hb=74b900cf6db033a51e177f7f85d835dae44217e5;hp=e3fb2c10d3102be0919f3cd6dcfc9d83fd0846c5;hpb=fe40818bbc1d327f0adfbbbfe7ecfbe15d4eb053;p=cc1516.git diff --git a/lex.icl b/lex.icl index e3fb2c1..6cb2e92 100644 --- a/lex.icl +++ b/lex.icl @@ -1,15 +1,17 @@ implementation module lex import Control.Monad, Control.Applicative -import Data.Either, Data.Func +import Data.Either, Data.Func, Data.Void +import Data.Map from StdFunc import o import StdBool import StdList import StdChar import StdString +import StdTuple import yard -from AST import :: Pos(..) +import AST :: LexItem = LexToken Int TokenValue @@ -18,10 +20,12 @@ from AST import :: Pos(..) | LexEOF | LexItemError String +escapes :: Map Char Char +escapes = fromList [('a', toChar 7), ('b', '\b'), ('f', '\f'), ('n', '\n'), + ('r', '\r'), ('t', '\t'), ('v', '\v'), ('\'', '\''), ('"', '"')] + lexer :: [Char] -> LexerOutput -lexer r = case runParser (lexProgram 1 1) r of - (Right p, _) = Right p - (Left e, _) = Left e +lexer r = fst $ runParser (lexProgram 1 1) r lexProgram :: Int Int -> Parser Char [Token] lexProgram line column = lexToken >>= \t->case t of @@ -44,6 +48,7 @@ lexToken = lexKw "True" TrueToken <|> lexKw "False" FalseToken <|> lexKw "Int" IntTypeToken <|> lexKw "Bool" BoolTypeToken <|> lexKw "Char" CharTypeToken <|> + lexKw "Let" LetToken <|> //Character tokens lexEscape <|> lexCharacter <|> //Two char ops tokens @@ -62,9 +67,9 @@ lexToken = lexWord "/" SlashToken <|> lexWord "%" PercentToken <|> lexWord "=" AssignmentToken <|> lexWord "<" LesserToken <|> lexWord ">" BiggerToken <|> lexWord "!" ExclamationToken <|> - lexWord "-" DashToken <|> + lexWord "-" DashToken <|> lexWord "\\" BackslashToken <|> //Number and identifier tokens - lexNumber <|> lexIdentifier <|> + lexString <|> lexNumber <|> lexIdentifier <|> (item '\n' >>| pure LexNL) <|> //Whitespace (satisfy isSpace >>| (pure $ LexSpace 0 1)) <|> @@ -107,13 +112,15 @@ lexToken = lexEscape :: Parser Char LexItem lexEscape = item '\'' *> item '\\' *> top <* item '\'' - >>= \char->pure case char of - 'a' = LexToken 4 (CharToken $ toChar 7) - 'b' = LexToken 4 (CharToken '\b') - 'b' = LexToken 4 (CharToken '\b') - 'f' = LexToken 4 (CharToken '\f') - 'n' = LexToken 4 (CharToken '\n') - 'r' = LexToken 4 (CharToken '\t') - 'v' = LexToken 4 (CharToken '\v') - '\'' =LexToken 4 (CharToken '\'') - c = (LexItemError $ "Unknown escape: " +++ toString c) + >>= \char->pure case get char escapes of + Just e = LexToken 4 (CharToken e) + Nothing = LexItemError $ "Unknown escape: " +++ toString char + + lexString :: Parser Char LexItem + lexString = item '"' *> ( + many ( + (satisfy (\c->c <> '"' && c <> '\\')) <|> + (item '\\' *> top >>= \char->case get char escapes of + Just e = pure e + Nothing = empty) + ))<* item '"' >>= \cs-> pure $ LexToken (length cs) (StringToken cs)