implementation module lex
import Control.Monad, Control.Applicative
-import Data.Either, Data.Func, Data.Maybe, Data.Functor
-from StdFunc import o, const
+import Data.Either, Data.Func
+from StdFunc import o
import StdBool
import StdList
import StdChar
import StdString
-from Text import class Text(textSize,concat), instance Text String
-
import yard
+:: LexItem
+ = LexToken Int TokenValue
+ | LexSpace Int Int
+ | LexNL
+ | LexEOF
+ | LexItemError String
+
lexer :: [Char] -> LexerOutput
lexer r = case runParser (lexProgram 1 1) r of
(Right p, _) = Right p
lexProgram :: Int Int -> Parser Char [Token]
lexProgram line column = lexToken >>= \t->case t of
- LexEOF = pure []
- (LexItemError e) = fail <?>
- PositionalError line column ("LexerError: " +++ e)
- (LexToken c t) = lexProgram line (column+c)
- >>= \rest->pure [{line=line, column=column, token=t}:rest]
- LexNL = lexProgram (line+1) 1
- (LexSpace l c) = lexProgram (line+l) (column+c)
-
-:: LexItem
- = LexToken Int TokenValue
- | LexSpace Int Int
- | LexNL
- | LexEOF
- | LexItemError String
+ LexEOF = pure []
+ LexNL = lexProgram (line+1) 1
+ (LexSpace l c) = lexProgram (line+l) (column+c)
+ (LexItemError e) = fail <?>
+ PositionalError line column ("LexerError: " +++ e)
+ (LexToken c t) = lexProgram line (column+c)
+ >>= \rest->pure [{line=line, column=column, token=t}:rest]
lexToken :: Parser Char LexItem
-lexToken =
+lexToken =
//Comments
lexBlockComment <|> lexComment <|>
//Keyword tokens
lexWord "<=" LesserEqToken <|> lexWord ">=" GreaterEqToken <|>
lexWord "==" EqualsToken <|> lexWord "&&" AmpersandsToken <|>
lexWord "||" PipesToken <|> lexWord "[]" EmptyListToken <|>
- lexWord "->" ArrowToken <|>
+ lexWord "->" ArrowToken <|>
//One char ops tokens
- lexWord "(" BraceOpenToken <|>
- lexWord ")" BraceCloseToken <|> lexWord "{" CBraceOpenToken <|>
- lexWord "}" CBraceCloseToken <|> lexWord "[" SquareOpenToken <|>
- lexWord "]" SquareCloseToken <|> lexWord "," CommaToken <|>
- lexWord ":" ColonToken <|> lexWord ";" SColonToken <|>
- lexWord "." DotToken <|> lexWord "+" PlusToken <|>
- lexWord "*" StarToken <|> lexWord "/" SlashToken <|>
- lexWord "%" PercentToken <|> lexWord "=" AssignmentToken <|>
- lexWord "<" LesserToken <|> lexWord ">" BiggerToken <|>
- lexWord "!" ExclamationToken <|> lexWord "-" DashToken <|>
+ lexWord "(" BraceOpenToken <|> lexWord ")" BraceCloseToken <|>
+ lexWord "{" CBraceOpenToken <|> lexWord "}" CBraceCloseToken <|>
+ lexWord "[" SquareOpenToken <|> lexWord "]" SquareCloseToken <|>
+ lexWord "," CommaToken <|> lexWord ":" ColonToken <|>
+ lexWord ";" SColonToken <|> lexWord "." DotToken <|>
+ lexWord "+" PlusToken <|> lexWord "*" StarToken <|>
+ lexWord "/" SlashToken <|> lexWord "%" PercentToken <|>
+ lexWord "=" AssignmentToken <|> lexWord "<" LesserToken <|>
+ lexWord ">" BiggerToken <|> lexWord "!" ExclamationToken <|>
+ lexWord "-" DashToken <|>
//Number and identifier tokens
lexNumber <|> lexIdentifier <|>
(item '\n' >>| pure LexNL) <|>
+ //Whitespace
(satisfy isSpace >>| (pure $ LexSpace 0 1)) <|>
(eof >>| pure LexEOF)
where
lexWord :: String TokenValue -> Parser Char LexItem
- lexWord s tv = list (fromString s) >>| pure (LexToken (textSize s) tv)
+ lexWord s tv = list ls >>| pure (LexToken (length ls) tv)
+ where ls = fromString s
lexKw :: String TokenValue -> Parser Char LexItem
lexKw kw tv = lexWord kw tv <* check (not o isIdentChar)
- lexUntilNL = top until (eof <|> (item '\n' >>| pure Void))
-
lexComment :: Parser Char LexItem
- lexComment = list (fromString "//") >>| lexUntilNL
- >>| pure LexNL
+ lexComment = list (fromString "//")
+ >>| top until (eof <|> (item '\n' >>| pure Void)) >>| pure LexNL
lexBlockComment :: Parser Char LexItem
- lexBlockComment = list (fromString "/*")
+ lexBlockComment = list (fromString "/*")
>>| (top until (list (fromString "*/")))
>>= \chars->pure $ widthHeight chars 0 0
where
widthHeight [x:xs] l c = widthHeight xs l (c+1)
lexNumber :: Parser Char LexItem
- lexNumber = toString <$> some (satisfy isDigit)
- >>= \si->pure $ LexToken (textSize si) (NumberToken $ toInt si)
+ lexNumber = some (satisfy isDigit) >>= \si->pure $
+ LexToken (length si) (NumberToken $ toInt $ toString si)
lexIdentifier :: Parser Char LexItem
- lexIdentifier = toString <$> some (satisfy isIdentChar)
- >>= \si->pure $ LexToken (textSize si) (IdentToken si)
+ lexIdentifier = some (satisfy isIdentChar)
+ >>= \si->pure $ LexToken (length si) (IdentToken $ toString si)
isIdentChar c = isAlphanum c || c == '_'