X-Git-Url: https://git.martlubbers.net/?a=blobdiff_plain;f=lex.icl;h=cbdcd653ab2f20f4778ffd7f863c721c64a6aadc;hb=82d6648e3777f5bb74fa1825f18cf41922f3b782;hp=34d913c0ee42ed21cdbf0354a4b4337f4657ff1e;hpb=d5190ca02578d0c678bf3f724feeaeb37ac766f8;p=cc1516.git diff --git a/lex.icl b/lex.icl index 34d913c..cbdcd65 100644 --- a/lex.icl +++ b/lex.icl @@ -1,40 +1,158 @@ -module lex - -:: Token - //Values - = IdentToken String // Identifier - | NumberToken Int // Integer - //Keywords - | VarToken // var keyword - | VoidToken // void keyword - | ReturnToken // return keyword - | IfToken // if keyword - | ElseToken // else keyword - | WhileToken // while keyword - | TrueToken // True keyword - | FalseToken // False keyword - //Single Characters - | BraceOpenToken // ( - | BraceCloseToken // ) - | CBraceOpenToken // { - | CBraceCloseToken // { - | SquareOpenToken // [ - | SquareCloseToken // ] - | CommaToken // , - | ColonToken // : - | SColonToken // ; - | DotToken // . - | PlusToken // + - | DashToken // - - | StarToken // * - | SlashToken // / - | PercentToken // % - | EqualsToken // = - | LesserToken // < - | BiggerToken // > - | ExclamationToken // ! - | AmpersandToken // & - | PipeToken // | - | SingleQuoteToken // ' - -Start = "Hello World!\n" +implementation module lex + +import Data.Either +import Data.List +import StdString +import System.CommandLine +import StdFile +import StdMisc +from StdFunc import id, const +import Data.Maybe +import Control.Applicative +import Control.Monad +import Control.Monad.State +import Data.Functor +from Data.Func import $ + +// Misschien moeten we hier continuation style van maken +instance toString lexerOutput where + toString l = "dit is een lexer output, danwel error\n" + +lexer :: [Char] -> LexerOutput [Token] +lexer i = let (out, rest) = runParser (many anyT) i in case rest of + [] = out + _ = Left "Unexpected input after last token" + +//lexer functions +identT = alpha >>= \a -> many (char '_' <|> alphaNum) >>= \as -> return $ IdentToken [a:as] +numberT = optional (char '-') >>= \sign -> (some digit) >>= \n -> case sign of + Nothing -> return $ NumberToken $ 5 //fromString n + _ -> return $ NumberToken $ -5 //(fromString n) * -1 +charLT = CharToken <$> (char '\'' *> item <* char '\'') +char2T = item >>= \c1 -> case c1 of + ':' = char ':' >>| return DoubleColonToken + '<' = char '=' >>| return LesserEqToken + '>' = char '=' >>| return GreaterEqToken + '=' = char '=' >>| return EqualsToken + '&' = char '&' >>| return AmpersandsToken + '|' = char '|' >>| return PipesToken + '-' = char '>' >>| return ArrowToken + _ = zero +char1T = item >>= \c1 -> findT c1 charTokenMap +varT = string (fromString "var") >>| return VarToken +voidT = string (fromString "Void") >>| return VoidToken +returnT = string (fromString "return") >>| return ReturnToken +ifT = string (fromString "if") >>| return IfToken +elseT = string (fromString "else") >>| return ElseToken +whileT = string (fromString "while") >>| return WhileToken +trueT = string (fromString "True") >>| return TrueToken +falseT = string (fromString "False") >>| return FalseToken +//note, for the anyToken parser the order matters! +anyT = char2T <|> char1T <|> varT <|> voidT <|> returnT <|> ifT <|> elseT <|> whileT <|> + trueT <|> falseT <|> numberT <|> identT <|> charLT + +Start :: *World -> *World +Start w +# (args, w) = getCommandLine w // We lezen nu nog standaard van stdin +# (out, w) = stdio w +# (toparse, out) = readEntireFile out +# out = out <<< toString (lexer toparse) +# (b, w) = fclose out w +| not b = setReturnCode 1 w += w + where + readEntireFile :: *File -> *([Char], *File) + readEntireFile f + # (b, c, f) = freadc f + | not b = ([], f) + # (cs, f) = readEntireFile f + = ([c:cs], f) + + + +charTokenMap = [('(', BraceOpenToken) + ,(')', BraceCloseToken) + ,('{', CBraceOpenToken) + ,('}', CBraceCloseToken) + ,('[', SquareOpenToken) + ,(']', SquareCloseToken) + ,(',', CommaToken) + ,(':', ColonToken) + ,(';', SColonToken) + ,('.', DotToken) + ,('+', PlusToken) + ,('-', DashToken) + ,('*', StarToken) + ,('/', SlashToken) + ,('%', PercentToken) + ,('=', AssignmentToken) + ,('<', LesserToken) + ,('>', BiggerToken) + ,('!', ExclamationToken)] +findT c [] = fail "Unrecognized character" +findT c [(k,v):xs] = if (c==k) (return v) (findT c xs) + + +// Clean adaption of Yard, a parsec like parser combinator +:: Parser a = Parser ([Char] -> (LexerOutput a, [Char])) + +runParser :: (Parser a) [Char] -> (LexerOutput a, [Char]) +runParser (Parser p) s = p s + +instance Functor Parser where + fmap f s = liftM f s + +instance Applicative Parser where + pure a = Parser $ \s -> (Right a, s) + (<*>) sf s = ap sf s + +instance Monad Parser where + bind p f = Parser $ \s -> let (out, rest) = runParser p s in case out of + Left e = (Left e, rest) + Right t = runParser (f t) rest + +//gives us some, many and optional +instance Alternative Parser where + empty = zero + (<|>) p1 p2 = Parser $ \s -> let (out, rest) = runParser p1 s in case out of + Left e = runParser p2 s + Right t = (Right t, rest) + +//parser that fails with error +fail :: String -> Parser a +fail e = Parser $ \s -> (Left e, s) + +//parser that always fails +zero :: Parser a +zero = fail "Zero parser" + +//matches exactly one Char +item :: Parser Char +item = Parser $ \s -> case s of + [] = (Left "Unexpected empty input", s) + [x:xs] = (Right x, xs) + +//matches any char which satisfies f +satisfy :: (Char -> Bool) -> Parser Char +satisfy f = item >>= (\r -> if (f r) (return r) zero) + +//tries a parser, if it fails returns a default value +optionalDef :: a (Parser a) -> Parser a +optionalDef def p = p <|> return def + +//matched given char +char :: Char -> Parser Char +char c = satisfy (\i -> c==i) //I hate that we can't do: satisfy (==c) + +alpha :: Parser Char +alpha = satisfy isAlpha + +digit :: Parser Char +digit = satisfy isDigit + +alphaNum :: Parser Char +alphaNum = alpha <|> digit + +//matches a given String +string :: [Char] -> Parser [Char] +string s = mapM_ char s >>| return s \ No newline at end of file