From 863197f642a4b006739fb016c6ede4fa514dcb2a Mon Sep 17 00:00:00 2001 From: Mart Lubbers Date: Mon, 8 Feb 2021 13:28:02 +0100 Subject: [PATCH] types and locations --- Makefile | 3 +- ast.c | 188 ++++++++++++++++++++++++++++++++++++++++++++++-------- ast.h | 42 ++++++++++-- expr.c | 2 +- input.txt | 8 +++ parse.y | 57 +++++++++++------ scan.l | 17 ++++- 7 files changed, 262 insertions(+), 55 deletions(-) diff --git a/Makefile b/Makefile index fec3916..dfb7600 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,6 @@ CFLAGS:=-Wall -Wextra -Werror -std=c99 -pedantic-errors -D_XOPEN_SOURCE=700 -ggdb -YFLAGS:=-d -Wcounterexamples +YFLAGS:=-d --locations# -Wcounterexamples +LFLAGS:=-X OBJECTS:=scan.o parse.o ast.o util.o diff --git a/ast.c b/ast.c index 622ba74..cb484c4 100644 --- a/ast.c +++ b/ast.c @@ -4,6 +4,7 @@ #include "util.h" #include "ast.h" +#include "y.tab.h" static const char *binop_str[] = { [binor] = "||", [binand] = "&&", [eq] = "==", [neq] = "!=", @@ -14,6 +15,10 @@ static const char *binop_str[] = { static const char *fieldspec_str[] = { [fst] = "fst", [snd] = "snd", [hd] = "hd", [tl] = "tl"}; static const char *unop_str[] = { [inverse] = "!", [negate] = "-", }; +static const char *basictype_str[] = { + [btbool] = "Bool", [btchar] = "Char", [btint] = "Int", + [btvoid] = "Void", +}; struct ast *ast(struct list *decls) { @@ -22,19 +27,34 @@ struct ast *ast(struct list *decls) return res; } -struct decl *decl_fun(char *ident, struct list *args, struct list *body) +struct vardecl *vardecl(struct type *type, char *ident, struct expr *expr) +{ + struct vardecl *res = safe_malloc(sizeof(struct vardecl)); + res->type = type; + res->ident = ident; + res->expr = expr; + return res; +} + +struct decl *decl_fun(char *ident, struct list *args, struct list *atypes, + struct type *rtype, struct list *vars, struct list *body) { struct decl *res = safe_malloc(sizeof(struct decl)); res->type = dfundecl; res->data.dfun.ident = ident; res->data.dfun.args = (char **) list_to_array(args, &res->data.dfun.nargs, true); + res->data.dfun.atypes = (struct type **) + list_to_array(atypes, &res->data.dfun.natypes, true); + res->data.dfun.rtype = rtype; + res->data.dfun.vars = (struct vardecl **) + list_to_array(vars, &res->data.dfun.nvar, true); res->data.dfun.body = (struct stmt **) list_to_array(body, &res->data.dfun.nbody, true); return res; } -struct decl *decl_var(struct vardecl vardecl) +struct decl *decl_var(struct vardecl *vardecl) { struct decl *res = safe_malloc(sizeof(struct decl)); res->type = dvardecl; @@ -79,14 +99,6 @@ struct stmt *stmt_expr(struct expr *expr) return res; } -struct stmt *stmt_vardecl(struct vardecl vardecl) -{ - struct stmt *res = safe_malloc(sizeof(struct stmt)); - res->type = svardecl; - res->data.svardecl = vardecl; - return res; -} - struct stmt *stmt_while(struct expr *pred, struct list *body) { struct stmt *res = safe_malloc(sizeof(struct stmt)); @@ -217,6 +229,50 @@ struct expr *expr_unop(enum unop op, struct expr *l) return res; } +struct type *type_list(struct type *type) +{ + struct type *res = safe_malloc(sizeof(struct type)); + res->type = tlist; + res->data.tlist = type; + return res; +} + +struct type *type_tuple(struct type *l, struct type *r) +{ + struct type *res = safe_malloc(sizeof(struct type)); + res->type = ttuple; + res->data.ttuple.l = l; + res->data.ttuple.r = r; + return res; +} + +struct type *type_var(char *ident) +{ + struct type *res = safe_malloc(sizeof(struct type)); + if (strcmp(ident, "Int") == 0) { + res->type = tbasic; + res->data.tbasic = btint; + free(ident); + } else if (strcmp(ident, "Char") == 0) { + res->type = tbasic; + res->data.tbasic = btchar; + free(ident); + } else if (strcmp(ident, "Bool") == 0) { + res->type = tbasic; + res->data.tbasic = btbool; + free(ident); + } else if (strcmp(ident, "Void") == 0) { + res->type = tbasic; + res->data.tbasic = btvoid; + free(ident); + } else { + res->type = tvar; + res->data.tvar = ident; + } + return res; +} + + const char *cescapes[] = { [0] = "\\0", [1] = "\\x01", [2] = "\\x02", [3] = "\\x03", [4] = "\\x04", [5] = "\\x05", [6] = "\\x06", [7] = "\\a", [8] = "\\b", @@ -237,6 +293,18 @@ void ast_print(struct ast *ast, FILE *out) decl_print(ast->decls[i], 0, out); } +void vardecl_print(struct vardecl *decl, int indent, FILE *out) +{ + pindent(indent, out); + if (decl->type == NULL) + safe_fprintf(out, "var"); + else + type_print(decl->type, out); + safe_fprintf(out, " %s = ", decl->ident); + expr_print(decl->expr, out); + safe_fprintf(out, ";\n"); +} + void decl_print(struct decl *decl, int indent, FILE *out) { if (decl == NULL) @@ -250,17 +318,26 @@ void decl_print(struct decl *decl, int indent, FILE *out) if (i < decl->data.dfun.nargs - 1) safe_fprintf(out, ", "); } - safe_fprintf(out, ") {\n"); + safe_fprintf(out, ")"); + if (decl->data.dfun.rtype != NULL) { + safe_fprintf(out, " :: "); + for (int i = 0; idata.dfun.natypes; i++) { + type_print(decl->data.dfun.atypes[i], out); + safe_fprintf(out, " "); + } + safe_fprintf(out, "-> "); + type_print(decl->data.dfun.rtype, out); + } + safe_fprintf(out, " {\n"); + for (int i = 0; idata.dfun.nvar; i++) + vardecl_print(decl->data.dfun.vars[i], indent+1, out); for (int i = 0; idata.dfun.nbody; i++) stmt_print(decl->data.dfun.body[i], indent+1, out); pindent(indent, out); safe_fprintf(out, "}\n"); break; case dvardecl: - pindent(indent, out); - safe_fprintf(out, "var %s = ", decl->data.dvar.ident); - expr_print(decl->data.dvar.expr, out); - safe_fprintf(out, ";\n"); + vardecl_print(decl->data.dvar, indent, out); break; default: die("Unsupported decl node\n"); @@ -304,12 +381,6 @@ void stmt_print(struct stmt *stmt, int indent, FILE *out) expr_print(stmt->data.sexpr, out); safe_fprintf(out, ";\n"); break; - case svardecl: - pindent(indent, out); - safe_fprintf(out, "var %s = ", stmt->data.svardecl.ident); - expr_print(stmt->data.svardecl.expr, out); - safe_fprintf(out, ";\n"); - break; case swhile: pindent(indent, out); safe_fprintf(out, "while ("); @@ -388,6 +459,34 @@ void expr_print(struct expr *expr, FILE *out) } } +void type_print(struct type *type, FILE *out) +{ + if (type == NULL) + return; + switch (type->type) { + case tbasic: + safe_fprintf(out, "%s", basictype_str[type->data.tbasic]); + break; + case tlist: + safe_fprintf(out, "["); + type_print(type->data.tlist, out); + safe_fprintf(out, "]"); + break; + case ttuple: + safe_fprintf(out, "("); + type_print(type->data.ttuple.l, out); + safe_fprintf(out, ","); + type_print(type->data.ttuple.r, out); + safe_fprintf(out, ")"); + break; + case tvar: + safe_fprintf(out, "%s", type->data.tvar); + break; + default: + die("Unsupported type node\n"); + } +} + void ast_free(struct ast *ast) { if (ast == NULL) @@ -398,6 +497,14 @@ void ast_free(struct ast *ast) free(ast); } +void vardecl_free(struct vardecl *decl) +{ + type_free(decl->type); + free(decl->ident); + expr_free(decl->expr); + free(decl); +} + void decl_free(struct decl *decl) { if (decl == NULL) @@ -408,13 +515,19 @@ void decl_free(struct decl *decl) for (int i = 0; idata.dfun.nargs; i++) free(decl->data.dfun.args[i]); free(decl->data.dfun.args); + for (int i = 0; idata.dfun.natypes; i++) + type_free(decl->data.dfun.atypes[i]); + free(decl->data.dfun.atypes); + type_free(decl->data.dfun.rtype); + for (int i = 0; idata.dfun.nvar; i++) + vardecl_free(decl->data.dfun.vars[i]); + free(decl->data.dfun.vars); for (int i = 0; idata.dfun.nbody; i++) stmt_free(decl->data.dfun.body[i]); free(decl->data.dfun.body); break; case dvardecl: - free(decl->data.dvar.ident); - expr_free(decl->data.dvar.expr); + vardecl_free(decl->data.dvar); break; default: die("Unsupported decl node\n"); @@ -446,10 +559,6 @@ void stmt_free(struct stmt *stmt) case sexpr: expr_free(stmt->data.sexpr); break; - case svardecl: - free(stmt->data.svardecl.ident); - expr_free(stmt->data.svardecl.expr); - break; case swhile: expr_free(stmt->data.swhile.pred); for (int i = 0; idata.swhile.nbody; i++) @@ -464,6 +573,8 @@ void stmt_free(struct stmt *stmt) void expr_free(struct expr *expr) { + if (expr == NULL) + return; switch(expr->type) { case ebinop: expr_free(expr->data.ebinop.l); @@ -499,3 +610,26 @@ void expr_free(struct expr *expr) } free(expr); } + +void type_free(struct type *type) +{ + if (type == NULL) + return; + switch (type->type) { + case tbasic: + break; + case tlist: + type_free(type->data.tlist); + break; + case ttuple: + type_free(type->data.ttuple.l); + type_free(type->data.ttuple.r); + break; + case tvar: + free(type->data.tvar); + break; + default: + die("Unsupported type node\n"); + } + free(type); +} diff --git a/ast.h b/ast.h index 6b8f427..ee9b840 100644 --- a/ast.h +++ b/ast.h @@ -5,6 +5,8 @@ #include #include "util.h" +struct ast; +#include "y.tab.h" struct ast { int ndecls; @@ -12,10 +14,25 @@ struct ast { }; struct vardecl { + struct type *type; char *ident; struct expr *expr; }; +enum basictype {btbool, btchar, btint, btvoid}; +struct type { + enum {tbasic,tlist,ttuple,tvar} type; + union { + enum basictype tbasic; + struct type *tlist; + struct { + struct type *l; + struct type *r; + } ttuple; + char *tvar; + } data; +}; + struct decl { enum {dfundecl, dvardecl} type; union { @@ -23,15 +40,20 @@ struct decl { char *ident; int nargs; char **args; + int natypes; + struct type **atypes; + struct type *rtype; + int nvar; + struct vardecl **vars; int nbody; struct stmt **body; } dfun; - struct vardecl dvar; + struct vardecl *dvar; } data; }; struct stmt { - enum {sassign, sif, sreturn, sexpr, svardecl, swhile} type; + enum {sassign, sif, sreturn, sexpr, swhile} type; union { struct { char *ident; @@ -46,7 +68,6 @@ struct stmt { } sif; struct expr *sreturn; struct expr *sexpr; - struct vardecl svardecl; struct { struct expr *pred; int nbody; @@ -96,8 +117,11 @@ struct expr { struct ast *ast(struct list *decls); -struct decl *decl_fun(char *ident, struct list *args, struct list *body); -struct decl *decl_var(struct vardecl vardecl); +struct vardecl *vardecl(struct type *type, char *ident, struct expr *expr); + +struct decl *decl_fun(char *ident, struct list *args, struct list *atypes, + struct type *rtype, struct list *vars, struct list *body); +struct decl *decl_var(struct vardecl *vardecl); struct stmt *stmt_assign(char *ident, struct expr *expr); struct stmt *stmt_if(struct expr *pred, struct list *then, struct list *els); @@ -116,14 +140,22 @@ struct expr *expr_nil(); struct expr *expr_tuple(struct expr *left, struct expr *right); struct expr *expr_unop(enum unop op, struct expr *l); +struct type *type_list(struct type *type); +struct type *type_tuple(struct type *l, struct type *r); +struct type *type_var(char *ident); + void ast_print(struct ast *, FILE *out); +void vardecl_print(struct vardecl *decl, int indent, FILE *out); void decl_print(struct decl *ast, int indent, FILE *out); void stmt_print(struct stmt *ast, int indent, FILE *out); void expr_print(struct expr *ast, FILE *out); +void type_print(struct type *type, FILE *out); void ast_free(struct ast *); +void vardecl_free(struct vardecl *decl); void decl_free(struct decl *ast); void stmt_free(struct stmt *ast); void expr_free(struct expr *ast); +void type_free(struct type *type); #endif diff --git a/expr.c b/expr.c index 725e397..7bf8782 100644 --- a/expr.c +++ b/expr.c @@ -1,7 +1,7 @@ #include #include -#include "ast.h" +#include "ast.h" #include "y.tab.h" extern int yylex_destroy(void); diff --git a/input.txt b/input.txt index 44ec6ef..a9d1fde 100644 --- a/input.txt +++ b/input.txt @@ -2,6 +2,8 @@ var x = 5 == 5; var y = x; fun(x){ var x = 5; + Int y = 6; + x y = 6; 6; if(true){5;}else{5;} '\t'; @@ -10,3 +12,9 @@ f(); f(x); f(1, 2, []); y = 5+x.fst.snd; } +fun(x) :: Int Bool -> Int { +} +fun(x) :: -> Void { +} +fun(x) :: a b c [a] ([a], b) -> Void { +} diff --git a/parse.y b/parse.y index 33e9881..7cad6e5 100644 --- a/parse.y +++ b/parse.y @@ -1,18 +1,16 @@ %{ #include -#include -#include #include "ast.h" - #include "y.tab.h" int yylex(void); +extern YYLTYPE yylloc; void yyerror(struct ast **result, const char *str) { - ast_free(*result); - fprintf(stderr, "error: %s\n", str); + fprintf(stderr, "%d-%d: %s\n", yylloc.first_line, yylloc.last_column, str); + (void)result; } int yywrap() @@ -22,24 +20,26 @@ int yywrap() %} -%union -{ +%union { struct expr *expr; struct stmt *stmt; struct list *list; - struct vardecl vardecl; + struct vardecl *vardecl; struct decl *decl; + struct type *type; char *ident; } -//%define parse.error verbose +%locations %token IDENT %token BOOL CHAR INTEGER -%token ASSIGN BCLOSE BINAND BINOR BOPEN CCLOSE COMMA CONS COPEN DIVIDE DOT ELSE -%token IF INVERSE MINUS MODULO NIL PLUS POWER RETURN SEMICOLON TIMES VAR WHILE +%token ARROW ASSIGN BCLOSE BINAND BINOR BOPEN CCLOSE COMMA CONS COPEN DIVIDE +%token DOT ELSE ERROR IF INVERSE MINUS MODULO NIL PLUS POWER RETURN SEMICOLON +%token SCLOSE SOPEN TIMES VAR WHILE %parse-param { struct ast **result } +%right ARROW %right BINOR %right BINAND %nonassoc EQ NEQ LEQ LE GEQ GE @@ -49,26 +49,44 @@ int yywrap() %right POWER %type start -%type expr -%type args body decls fargs field fnargs nargs %type fundecl -%type vardecl +%type expr +%type args body decls fargs field fnargs nargs funtype vardecls %type stmt +%type type +%type vardecl %% start : decls { *result = ast($1); } ; decls : { $$ = NULL; } - | decls vardecl SEMICOLON { $$ = list_cons(decl_var($2), $1); } + | decls vardecl { $$ = list_cons(decl_var($2), $1); } | decls fundecl { $$ = list_cons($2, $1); } ; vardecl - : VAR IDENT ASSIGN expr { $$ = (struct vardecl) {.ident=$2, .expr=$4}; } + : VAR IDENT ASSIGN expr SEMICOLON { $$ = vardecl(NULL, $2, $4); } + | type IDENT ASSIGN expr SEMICOLON { $$ = vardecl($1, $2, $4); } ; fundecl - : IDENT BOPEN args BCLOSE COPEN body CCLOSE - { $$ = decl_fun($1, $3, $6); } + : IDENT BOPEN args BCLOSE CONS CONS funtype ARROW type COPEN vardecls body CCLOSE + { $$ = decl_fun($1, $3, $7, $9, $11, $12); } + | IDENT BOPEN args BCLOSE COPEN vardecls body CCLOSE + { $$ = decl_fun($1, $3, NULL, NULL, $6, $7); } + ; +vardecls + : { $$ = NULL; } + | vardecls vardecl { $$ = list_cons($2, $1); } + ; +funtype + : { $$ = NULL; } + | funtype type { $$ = list_cons($2, $1); } + ; +type + : BOPEN type COMMA type BCLOSE { $$ = type_tuple($2, $4); } + | BOPEN type BCLOSE { $$ = $2; } + | SOPEN type SCLOSE { $$ = type_list($2); } + | IDENT { $$ = type_var($1); } ; args : { $$ = NULL; } @@ -99,7 +117,6 @@ stmt | IDENT ASSIGN expr SEMICOLON { $$ = stmt_assign($1, $3); } | RETURN expr SEMICOLON { $$ = stmt_return($2); } | RETURN SEMICOLON { $$ = stmt_return(NULL); } - | vardecl SEMICOLON { $$ = stmt_vardecl($1); } ; field : { $$ = NULL; } @@ -120,7 +137,7 @@ expr | expr DIVIDE expr { $$ = expr_binop($1, divide, $3); } | expr MODULO expr { $$ = expr_binop($1, modulo, $3); } | expr POWER expr { $$ = expr_binop($1, power, $3); } - | MINUS expr { $$ = expr_unop(negate, $2); } + | MINUS expr %prec TIMES { $$ = expr_unop(negate, $2); } | INVERSE expr %prec TIMES { $$ = expr_unop(inverse, $2); } | BOPEN expr COMMA expr BCLOSE { $$ = expr_tuple($2, $4); } | BOPEN expr BCLOSE { $$ = $2; } diff --git a/scan.l b/scan.l index 048d104..b1fb5be 100644 --- a/scan.l +++ b/scan.l @@ -3,9 +3,21 @@ %{ #include +#define YY_USER_ACTION \ + yylloc.first_line = yylloc.last_line; \ + yylloc.first_column = yylloc.last_column; \ + for(int i = 0; yytext[i] != '\0'; i++) { \ + if(yytext[i] == '\n') { \ + yylloc.last_line++; \ + yylloc.last_column = 0; \ + } \ + else { \ + yylloc.last_column++; \ + } \ + } + #include "ast.h" #include "y.tab.h" -extern YYSTYPE yylval; %} @@ -18,6 +30,7 @@ var return VAR; true { yylval.expr = expr_bool(true); return BOOL; } false { yylval.expr = expr_bool(false); return BOOL; } return return RETURN; +-> return ARROW; = return ASSIGN; ! return INVERSE; \|\| return BINOR; @@ -40,6 +53,8 @@ return return RETURN; \{ return COPEN; \} return CCLOSE; \; return SEMICOLON; +\[ return SOPEN; +\] return SCLOSE; \[\] return NIL; \. return DOT; , return COMMA; -- 2.20.1