implementation module lex
import Control.Monad, Control.Applicative
-import Data.Either, Data.Func, Data.Maybe, Data.Functor
+import Data.Either, Data.Func, Data.Void
+import Data.Map
from StdFunc import o
import StdBool
import StdList
import StdChar
import StdString
-
-from Text import class Text(textSize,concat), instance Text String
+import StdTuple
import yard
+import AST
+
+:: LexItem
+ = LexToken Int TokenValue
+ | LexSpace Int Int
+ | LexNL
+ | LexEOF
+ | LexItemError String
+
+escapes :: Map Char Char
+escapes = fromList [('a', toChar 7), ('b', '\b'), ('f', '\f'), ('n', '\n'),
+ ('r', '\r'), ('t', '\t'), ('v', '\v'), ('\'', '\''), ('"', '"')]
lexer :: [Char] -> LexerOutput
-lexer r = case runParser (lexProgram 1 1) r of
- (Right p, _) = Right p
- (Left e, _) = Left e
+lexer r = fst $ runParser (lexProgram 1 1) r
lexProgram :: Int Int -> Parser Char [Token]
lexProgram line column = lexToken >>= \t->case t of
- LexEOF = pure []
- (LexItemError e) = fail <?>
- PositionalError line column ("LexerError: " +++ e)
- (LexToken c t) = lexProgram line (column+c)
- >>= \rest->pure [{line=line, column=column, token=t}:rest]
- LexNL = lexProgram (line+1) 1
- (LexSpace l c) = lexProgram (line+l) (column+c)
-
-:: LexItem
- = LexToken Int TokenValue
- | LexSpace Int Int
- | LexNL
- | LexEOF
- | LexItemError String
+ LexEOF = pure []
+ LexNL = lexProgram (line+1) 1
+ (LexSpace l c) = lexProgram (line+l) (column+c)
+ (LexItemError e) = fail <?>
+ PositionalError line column ("LexerError: " +++ e)
+ (LexToken c t) = lexProgram line (column+c)
+ >>= \rest->pure [({line=line,col=column}, t):rest]
lexToken :: Parser Char LexItem
-lexToken =
+lexToken =
//Comments
lexBlockComment <|> lexComment <|>
//Keyword tokens
lexKw "True" TrueToken <|> lexKw "False" FalseToken <|>
lexKw "Int" IntTypeToken <|> lexKw "Bool" BoolTypeToken <|>
lexKw "Char" CharTypeToken <|>
+ lexKw "let" LetToken <|>
//Character tokens
lexEscape <|> lexCharacter <|>
//Two char ops tokens
lexWord "<=" LesserEqToken <|> lexWord ">=" GreaterEqToken <|>
lexWord "==" EqualsToken <|> lexWord "&&" AmpersandsToken <|>
lexWord "||" PipesToken <|> lexWord "[]" EmptyListToken <|>
- lexWord "->" ArrowToken <|>
+ lexWord "->" ArrowToken <|>
//One char ops tokens
- lexWord "(" BraceOpenToken <|>
- lexWord ")" BraceCloseToken <|> lexWord "{" CBraceOpenToken <|>
- lexWord "}" CBraceCloseToken <|> lexWord "[" SquareOpenToken <|>
- lexWord "]" SquareCloseToken <|> lexWord "," CommaToken <|>
- lexWord ":" ColonToken <|> lexWord ";" SColonToken <|>
- lexWord "." DotToken <|> lexWord "+" PlusToken <|>
- lexWord "*" StarToken <|> lexWord "/" SlashToken <|>
- lexWord "%" PercentToken <|> lexWord "=" AssignmentToken <|>
- lexWord "<" LesserToken <|> lexWord ">" BiggerToken <|>
- lexWord "!" ExclamationToken <|> lexWord "-" DashToken <|>
+ lexWord "(" BraceOpenToken <|> lexWord ")" BraceCloseToken <|>
+ lexWord "{" CBraceOpenToken <|> lexWord "}" CBraceCloseToken <|>
+ lexWord "[" SquareOpenToken <|> lexWord "]" SquareCloseToken <|>
+ lexWord "," CommaToken <|> lexWord ":" ColonToken <|>
+ lexWord ";" SColonToken <|> lexWord "." DotToken <|>
+ lexWord "+" PlusToken <|> lexWord "*" StarToken <|>
+ lexWord "/" SlashToken <|> lexWord "%" PercentToken <|>
+ lexWord "=" AssignmentToken <|> lexWord "<" LesserToken <|>
+ lexWord ">" BiggerToken <|> lexWord "!" ExclamationToken <|>
+ lexWord "-" DashToken <|> lexWord "\\" BackslashToken <|>
//Number and identifier tokens
- lexNumber <|> lexIdentifier <|>
+ lexString <|> lexNumber <|> lexIdentifier <|>
(item '\n' >>| pure LexNL) <|>
+ //Whitespace
(satisfy isSpace >>| (pure $ LexSpace 0 1)) <|>
(eof >>| pure LexEOF)
where
lexWord :: String TokenValue -> Parser Char LexItem
- lexWord s tv = list (fromString s) >>| pure (LexToken (textSize s) tv)
+ lexWord s tv = list ls >>| pure (LexToken (length ls) tv)
+ where ls = fromString s
lexKw :: String TokenValue -> Parser Char LexItem
lexKw kw tv = lexWord kw tv <* check (not o isIdentChar)
- lexUntilNL = top until (eof <|> (item '\n' >>| pure Void))
-
lexComment :: Parser Char LexItem
- lexComment = list (fromString "//") >>| lexUntilNL
- >>= \chars->pure LexNL
+ lexComment = list (fromString "//")
+ >>| top until (eof <|> (item '\n' >>| pure Void)) >>| pure LexNL
lexBlockComment :: Parser Char LexItem
- lexBlockComment = list (fromString "/*")
+ lexBlockComment = list (fromString "/*")
>>| (top until (list (fromString "*/")))
>>= \chars->pure $ widthHeight chars 0 0
where
widthHeight [x:xs] l c = widthHeight xs l (c+1)
lexNumber :: Parser Char LexItem
- lexNumber = toString <$> some (satisfy isDigit)
- >>= \si->pure $ LexToken (textSize si) (NumberToken $ toInt si)
+ lexNumber = some (satisfy isDigit) >>= \si->pure $
+ LexToken (length si) (NumberToken $ toInt $ toString si)
lexIdentifier :: Parser Char LexItem
- lexIdentifier = toString <$> some (satisfy isIdentChar)
- >>= \si->pure $ LexToken (textSize si) (IdentToken si)
+ lexIdentifier = some (satisfy isIdentChar)
+ >>= \si->pure $ LexToken (length si) (IdentToken $ toString si)
isIdentChar c = isAlphanum c || c == '_'
lexEscape :: Parser Char LexItem
lexEscape = item '\'' *> item '\\' *> top <* item '\''
- >>= \char->pure case char of
- 'a' = LexToken 4 (CharToken $ toChar 7)
- 'b' = LexToken 4 (CharToken '\b')
- 'b' = LexToken 4 (CharToken '\b')
- 'f' = LexToken 4 (CharToken '\f')
- 'n' = LexToken 4 (CharToken '\n')
- 'r' = LexToken 4 (CharToken '\t')
- 'v' = LexToken 4 (CharToken '\v')
- '\'' =LexToken 4 (CharToken '\'')
- c = (LexItemError $ "Unknown escape: " +++ toString c)
+ >>= \char->pure case get char escapes of
+ Just e = LexToken 4 (CharToken e)
+ Nothing = LexItemError $ "Unknown escape: " +++ toString char
+
+ lexString :: Parser Char LexItem
+ lexString = item '"' *> (
+ many (
+ (satisfy (\c->c <> '"' && c <> '\\')) <|>
+ (item '\\' *> top >>= \char->case get char escapes of
+ Just e = pure e
+ Nothing = empty)
+ ))<* item '"' >>= \cs-> pure $ LexToken (length cs) (StringToken cs)