working lexer and printer
[cc1516.git] / lex.icl
1 implementation module lex
2
3 from Text import class Text, instance Text String
4 import Control.Monad
5 import Data.Either
6 import Data.Functor
7 import Data.List
8 import StdString
9 import StdBool
10 import StdFile
11 import System.CommandLine
12 import qualified Text
13
14 Start :: *World -> *World
15 Start w
16 # (out, w) = stdio w
17 # (toparse, out) = readEntireFile out
18 # (_, w) = fclose (out <<< "\n" <<< toString (lexer toparse)) w
19 = w
20
21 readEntireFile :: *File -> *([Char], *File)
22 readEntireFile f
23 # (b, c, f) = freadc f
24 | not b = ([], f)
25 # (cs, f) = readEntireFile f
26 = ([c:cs], f)
27
28 lexer :: [Char] -> LexerOutput
29 lexer [] = Right []
30 lexer x = case lex x of
31 (Right t, rest) = lexer rest >>= \ts.Right [t:ts]
32 (Left e, _) = Left e
33
34 untilNext :: [Char] -> [Char]
35 untilNext [] = []
36 untilNext ['*':'/':rest] = rest
37 untilNext [x:rest] = untilNext rest
38
39 lex :: [Char] -> (Either String Token, [Char])
40 lex [] = (Right EndOfFileToken, [])
41 //Comments
42 lex ['/':'/':rest] = lex (tl (dropWhile ((<>) '\n') rest))
43 lex ['/':'*':rest] = lex (untilNext rest)
44 //Keyword tokens
45 lex ['v':'a':'r':rest] = (Right VarToken, rest)
46 lex ['V':'o':'i':'d':rest] = (Right VoidToken, rest)
47 lex ['r':'e':'t':'u':'r':'n':rest] = (Right ReturnToken, rest)
48 lex ['i':'f':rest] = (Right IfToken, rest)
49 lex ['e':'l':'s':'e':rest] = (Right ElseToken, rest)
50 lex ['w':'h':'i':'l':'e':rest] = (Right WhileToken, rest)
51 lex ['T':'r':'u':'e':rest] = (Right TrueToken, rest)
52 lex ['F':'a':'l':'s':'e':rest] = (Right FalseToken, rest)
53 lex ['I':'n':'t':rest] = (Right IntTypeToken, rest)
54 lex ['C':'h':'a':'r':rest] = (Right CharTypeToken, rest)
55 lex ['B':'o':'o':'l':rest] = (Right BoolTypeToken, rest)
56 //Two character tokens
57 lex [':':':':rest] = (Right DoubleColonToken, rest)
58 lex ['<':'=':rest] = (Right LesserEqToken, rest)
59 lex ['>':'=':rest] = (Right GreaterEqToken, rest)
60 lex ['=':'=':rest] = (Right EqualsToken, rest)
61 lex ['&':'&':rest] = (Right AmpersandsToken, rest)
62 lex ['|':'|':rest] = (Right PipesToken, rest)
63 lex ['-':'>':rest] = (Right ArrowToken, rest)
64 //One character tokens
65 lex ['(':rest] = (Right BraceOpenToken, rest)
66 lex [')':rest] = (Right BraceCloseToken, rest)
67 lex ['{':rest] = (Right CBraceOpenToken, rest)
68 lex ['}':rest] = (Right CBraceCloseToken, rest)
69 lex ['[':rest] = (Right SquareOpenToken, rest)
70 lex [']':rest] = (Right SquareCloseToken, rest)
71 lex [',':rest] = (Right CommaToken, rest)
72 lex [':':rest] = (Right ColonToken, rest)
73 lex [';':rest] = (Right SColonToken, rest)
74 lex ['.':rest] = (Right DotToken, rest)
75 lex ['+':rest] = (Right PlusToken, rest)
76 lex ['-':rest] = (Right DashToken, rest)
77 lex ['*':rest] = (Right StarToken, rest)
78 lex ['/':rest] = (Right SlashToken, rest)
79 lex ['%':rest] = (Right PercentToken, rest)
80 lex ['=':rest] = (Right AssignmentToken, rest)
81 lex ['<':rest] = (Right LesserToken, rest)
82 lex ['>':rest] = (Right BiggerToken, rest)
83 lex ['!':rest] = (Right ExclamationToken, rest)
84 //Value tokens
85 lex ['\'':x:'\'':rest] = (Right (CharToken x), rest)
86 lex ['\'':'\\':x:'\'':rest] = case x of
87 'a' = (Right (CharToken (toChar 7)), rest) //Alarm
88 'b' = (Right (CharToken '\b'), rest) //Backspace
89 'f' = (Right (CharToken '\f'), rest) //Formfeed
90 'n' = (Right (CharToken '\n'), rest) //Newline
91 'r' = (Right (CharToken '\r'), rest) //Carriage Return
92 't' = (Right (CharToken '\t'), rest) //Horizontal tab
93 'v' = (Right (CharToken '\v'), rest) //Vertical tab
94 _ = (Left ("Unknown escape: \\" +++ toString x), [])
95 lex [x:xs]
96 | isSpace x = lex xs
97 | isDigit x = let (is, rest) = span isDigit xs in
98 (Right (NumberToken [x:is]), rest)
99 | isAlpha x = let (is, rest) = span (\c.isAlphanum c || c == '_') xs in
100 (Right (IdentToken [x:is]), rest)
101 | otherwise = (Left ("Unexpected character: " +++ toString x), [])
102
103 instance toString LexerOutput where
104 toString (Left l) = "Error: " +++ l
105 toString (Right x) = 'Text'.concat (print 0 x)
106
107 tab :: Int -> [String]
108 tab i = replicate i "\t"
109
110 print :: Int [Token] -> [String]
111 print _ [] = []
112 print i [(IdentToken l):rest] = [toString l:print i rest]
113 print i [(NumberToken j):rest] = [toString j:print i rest]
114 print i [(CharToken c):rest] = ["'":toString c:"'":print i rest]
115 print i [VarToken:rest] = ["var ":print i rest]
116 print i [ReturnToken:rest] = ["return ":print i rest]
117 print i [IfToken:rest] = ["if":print i rest]
118 print i [ElseToken:rest] = ["else":print i rest]
119 print i [WhileToken:rest] = ["while":print i rest]
120 print i [TrueToken:rest] = ["True":print i rest]
121 print i [FalseToken:rest] = ["False":print i rest]
122 print i [VoidToken:rest] = ["Void":print i rest]
123 print i [IntTypeToken:rest] = ["Int":print i rest]
124 print i [CharTypeToken:rest] = ["Char":print i rest]
125 print i [BoolTypeToken:rest] = ["Bool":print i rest]
126 print i [DoubleColonToken:rest] = [" :: ":print i rest]
127 print i [LesserEqToken:rest] = [" <= ":print i rest]
128 print i [GreaterEqToken:rest] = [" >= ":print i rest]
129 print i [EqualsToken:rest] = [" == ":print i rest]
130 print i [AmpersandsToken:rest] = [" && ":print i rest]
131 print i [PipesToken:rest] = [" || ":print i rest]
132 print i [ArrowToken:rest] = [" -> ":print i rest]
133 print i [BraceOpenToken:rest] = ["(":print i rest]
134 print i [BraceCloseToken:rest] = [")":print i rest]
135 print i [CBraceOpenToken:rest] = ["{\n":tab (i+1)] ++ print (i+1) rest
136 print i [CBraceCloseToken:rest] = case rest of
137 [CBraceCloseToken:_] = ["}\n":tab (i-2)] ++ print (i-1) rest
138 _ = ["}\n":tab (i-1)] ++ print (i-1) rest
139 print i [SquareOpenToken:rest] = ["[":print i rest]
140 print i [SquareCloseToken:rest] = ["]":print i rest]
141 print i [CommaToken:rest] = [", ":print i rest]
142 print i [ColonToken:rest] = [":":print i rest]
143 print i [SColonToken:rest] = case rest of
144 [CBraceCloseToken:_] = [";\n":tab (i-1)] ++ print i rest
145 _ = [";\n":tab i] ++ print i rest
146 print i [DotToken:rest] = [".":print i rest]
147 print i [PlusToken:rest] = [" + ":print i rest]
148 print i [DashToken:rest] = [" - ":print i rest]
149 print i [StarToken:rest] = [" * ":print i rest]
150 print i [SlashToken:rest] = [" / ":print i rest]
151 print i [PercentToken:rest] = [" % ":print i rest]
152 print i [AssignmentToken:rest] = [" = ":print i rest]
153 print i [LesserToken:rest] = [" < ":print i rest]
154 print i [BiggerToken:rest] = [" > ":print i rest]
155 print i [ExclamationToken:rest] = ["!":print i rest]
156 print i [EndOfFileToken:rest] = ["\n":print i rest]