make parser more robuust, add string literals and escapes
[ccc.git] / parse.y
1 %{
2 #include <stdio.h>
3
4 #include "ast.h"
5 #include "y.tab.h"
6
7 int yylex(void);
8 extern YYLTYPE yylloc;
9
10 void yyerror(struct ast **result, const char *str)
11 {
12 fprintf(stderr, "%d-%d: %s\n", yylloc.first_line, yylloc.last_column, str);
13 (void)result;
14 }
15
16 int yywrap()
17 {
18 return 1;
19 }
20
21 %}
22
23 %union {
24 struct expr *expr;
25 struct stmt *stmt;
26 struct list *list;
27 struct vardecl *vardecl;
28 struct decl *decl;
29 struct type *type;
30 char *ident;
31 }
32
33 %locations
34 %token <ident> IDENT
35 %token <expr> BOOL CHAR INTEGER STRING
36 %token ARROW ASSIGN BCLOSE BINAND BINOR BOPEN CCLOSE COMMA CONS COPEN DIVIDE
37 %token DOT ELSE ERROR IF INVERSE MINUS MODULO NIL PLUS POWER RETURN SEMICOLON
38 %token SCLOSE SOPEN TIMES TBOOL TCHAR TINT TVOID VAR WHILE
39
40 %parse-param { struct ast **result }
41
42 %right BINOR
43 %right BINAND
44 %nonassoc EQ NEQ LEQ LE GEQ GE
45 %right CONS
46 %left PLUS MINUS
47 %left TIMES DIVIDE MODULO
48 %right POWER
49
50 %type <ast> start
51 %type <decl> fundecl
52 %type <expr> expr
53 %type <list> args body decls fargs field fnargs nargs funtype vardecls bbody
54 %type <stmt> stmt
55 %type <type> type ftype
56 %type <vardecl> vardecl
57
58 %%
59
60 start : decls { *result = ast($1); } ;
61 decls
62 : /* empty */ { $$ = NULL; }
63 | decls vardecl { $$ = list_cons(decl_var($2), $1); }
64 | decls fundecl { $$ = list_cons($2, $1); }
65 ;
66 vardecl
67 : VAR IDENT ASSIGN expr SEMICOLON { $$ = vardecl(NULL, $2, $4); }
68 | type IDENT ASSIGN expr SEMICOLON { $$ = vardecl($1, $2, $4); }
69 ;
70 fundecl
71 : IDENT BOPEN args BCLOSE CONS CONS funtype ARROW ftype COPEN vardecls body CCLOSE
72 { $$ = decl_fun($1, $3, $7, $9, $11, $12); }
73 | IDENT BOPEN args BCLOSE COPEN vardecls body CCLOSE
74 { $$ = decl_fun($1, $3, NULL, NULL, $6, $7); }
75 ;
76 vardecls
77 : /* empty */ { $$ = NULL; }
78 | vardecls vardecl { $$ = list_cons($2, $1); }
79 ;
80 funtype
81 : /* empty */ { $$ = NULL; }
82 | funtype ftype { $$ = list_cons($2, $1); }
83 ;
84 /* don't allow vardecls to be fully polymorph, this complicates parsing a lot */
85 type
86 : BOPEN ftype COMMA ftype BCLOSE { $$ = type_tuple($2, $4); }
87 | SOPEN ftype SCLOSE { $$ = type_list($2); }
88 | TBOOL { $$ = type_basic(btbool); }
89 | TCHAR { $$ = type_basic(btchar); }
90 | TINT { $$ = type_basic(btint); }
91 | TVOID { $$ = type_basic(btvoid); }
92 ;
93 ftype
94 : BOPEN ftype COMMA ftype BCLOSE { $$ = type_tuple($2, $4); }
95 | SOPEN ftype SCLOSE { $$ = type_list($2); }
96 | TBOOL { $$ = type_basic(btbool); }
97 | TCHAR { $$ = type_basic(btchar); }
98 | TINT { $$ = type_basic(btint); }
99 | TVOID { $$ = type_basic(btvoid); }
100 | IDENT { $$ = type_var($1); }
101 ;
102 args
103 : /* empty */ { $$ = NULL; }
104 | nargs
105 ;
106 nargs
107 : nargs COMMA IDENT { $$ = list_cons($3, $1); }
108 | IDENT { $$ = list_cons($1, NULL); }
109 ;
110 fargs
111 : /* empty */ { $$ = NULL; }
112 | fnargs
113 ;
114 fnargs
115 : fnargs COMMA expr { $$ = list_cons($3, $1); }
116 | expr { $$ = list_cons($1, NULL); }
117 ;
118 body
119 : /* empty */ { $$ = NULL; }
120 | body stmt { $$ = list_cons($2, $1); }
121 ;
122 field
123 : /* empty */ { $$ = NULL; }
124 | field DOT IDENT { $$ = list_cons($3, $1); }
125 ;
126 bbody
127 : COPEN body CCLOSE { $$ = $2; }
128 | stmt { $$ = list_cons($1, NULL); }
129 ;
130 stmt
131 : IF BOPEN expr BCLOSE bbody { $$ = stmt_if($3, $5, NULL); }
132 | IF BOPEN expr BCLOSE bbody ELSE bbody { $$ = stmt_if($3, $5, $7); }
133 | WHILE BOPEN expr BCLOSE bbody { $$ = stmt_while($3, $5); }
134 | IDENT field ASSIGN expr SEMICOLON { $$ = stmt_assign($1, $2, $4); }
135 | RETURN expr SEMICOLON { $$ = stmt_return($2); }
136 | RETURN SEMICOLON { $$ = stmt_return(NULL); }
137 | vardecl { $$ = stmt_vardecl($1); }
138 | expr SEMICOLON { $$ = stmt_expr($1); }
139 ;
140 expr
141 : expr BINOR expr { $$ = expr_binop($1, binor, $3); }
142 | expr BINAND expr { $$ = expr_binop($1, binand, $3); }
143 | expr EQ expr { $$ = expr_binop($1, eq, $3); }
144 | expr NEQ expr { $$ = expr_binop($1, neq, $3); }
145 | expr LEQ expr { $$ = expr_binop($1, leq, $3); }
146 | expr LE expr { $$ = expr_binop($1, le, $3); }
147 | expr GEQ expr { $$ = expr_binop($1, geq, $3); }
148 | expr GE expr { $$ = expr_binop($1, ge, $3); }
149 | expr CONS expr { $$ = expr_binop($1, cons, $3); }
150 | expr PLUS expr { $$ = expr_binop($1, plus, $3); }
151 | expr MINUS expr { $$ = expr_binop($1, minus, $3); }
152 | expr TIMES expr { $$ = expr_binop($1, times, $3); }
153 | expr DIVIDE expr { $$ = expr_binop($1, divide, $3); }
154 | expr MODULO expr { $$ = expr_binop($1, modulo, $3); }
155 | expr POWER expr { $$ = expr_binop($1, power, $3); }
156 | MINUS expr %prec TIMES { $$ = expr_unop(negate, $2); }
157 | INVERSE expr %prec TIMES { $$ = expr_unop(inverse, $2); }
158 | IDENT BOPEN fargs BCLOSE field { $$ = expr_funcall($1, $3, $5); }
159 | BOPEN expr COMMA expr BCLOSE { $$ = expr_tuple($2, $4); }
160 | BOPEN expr BCLOSE { $$ = $2; }
161 | INTEGER
162 | BOOL
163 | CHAR
164 | STRING
165 | IDENT field { $$ = expr_ident($1, $2); }
166 | NIL { $$ = expr_nil(); }
167 ;