X-Git-Url: https://git.martlubbers.net/?a=blobdiff_plain;f=src%2Flex.icl;h=4cf915ca825978e82112a79546841b6ddbc18d58;hb=eb91d7c6b2010be6a43de0a978373654ba3deacc;hp=a6f31ab8eb0e4abad9d597e629c9e0bfc091bfef;hpb=6c1945d3b85c0bf8ff2d56b4a6fb456d07a7282a;p=cc1516.git diff --git a/src/lex.icl b/src/lex.icl index a6f31ab..4cf915c 100644 --- a/src/lex.icl +++ b/src/lex.icl @@ -15,10 +15,14 @@ lexer r = case runParser lexProgram r of (Left e, _) = Left $ toString e lexProgram :: Parser Char [Token] -lexProgram = some lexToken >>= \ts->pure (map (\t->(0, 0, t)) ts) +lexProgram = some lexToken <* many (satisfy isSpace) <* eof + >>= \ts->pure $ (map (\t->(0, 0, t)) ts) ++ [(0, 0, EndOfFileToken)] lexToken :: Parser Char TokenValue lexToken = + //Comments + (list (fromString "//") >>| lexUntilNL >>| lexToken) <|> + (list (fromString "/*") >>| lexUntilCommentClose >>| lexToken) <|> //Keyword tokens (lexKw "var" VarToken) <|> (lexKw "Void" VoidToken) <|> @@ -40,8 +44,9 @@ lexToken = (lexOp "<=" LesserEqToken) <|> (lexOp ">=" GreaterEqToken) <|> (lexOp "==" EqualsToken) <|> (lexOp "&&" AmpersandsToken) <|> (lexOp "||" PipesToken) <|> (lexOp "[]" EmptyListToken) <|> - (lexOp "->" ArrowToken) <|> (lexOp "(" BraceOpenToken) <|> + (lexOp "->" ArrowToken) <|> //One char ops tokens + (lexOp "(" BraceOpenToken) <|> (lexOp ")" BraceCloseToken) <|> (lexOp "{" CBraceOpenToken) <|> (lexOp "}" CBraceCloseToken) <|> (lexOp "[" SquareOpenToken) <|> (lexOp "]" SquareCloseToken) <|> (lexOp "," CommaToken) <|> @@ -54,10 +59,11 @@ lexToken = //Number tokens (liftM (NumberToken o toInt o toString) $ some $ satisfy isDigit) <|> //Ident tokens - (liftM IdentToken $ some $ satisfy isIdentChar) <|> - (satisfy isSpace >>| lexToken) //<|> -// (eof >>| pure EndOfFileToken) + (liftM (IdentToken o toString) $ some $ satisfy isIdentChar) <|> + (satisfy isSpace >>| lexToken) where + lexUntilNL = top until (eof <|> (item '\n' >>| pure Void)) + lexUntilCommentClose = top until list (fromString "*/") isIdentChar c = isAlphanum c || c == '_' lexOp s tv = list (fromString s) >>| pure tv lexKw kw tv = lexOp kw tv <* check (not o isIdentChar) >>| pure tv