From: Mart Lubbers Date: Mon, 8 Feb 2021 15:51:07 +0000 (+0100) Subject: edgecases X-Git-Url: https://git.martlubbers.net/?a=commitdiff_plain;h=a601959e253ceb0a5386b666495c325ec1114f8d;p=ccc.git edgecases --- diff --git a/ast.c b/ast.c index 8b087f0..0a17870 100644 --- a/ast.c +++ b/ast.c @@ -62,11 +62,13 @@ struct decl *decl_var(struct vardecl *vardecl) return res; } -struct stmt *stmt_assign(char *ident, struct expr *expr) +struct stmt *stmt_assign(char *ident, struct list *fields, struct expr *expr) { struct stmt *res = safe_malloc(sizeof(struct stmt)); res->type = sassign; res->data.sassign.ident = ident; + res->data.sassign.fields = (char **) + list_to_array(fields, &res->data.sassign.nfield, true); res->data.sassign.expr = expr; return res; } @@ -142,12 +144,14 @@ struct expr *expr_char(const char *c) struct expr *res = safe_malloc(sizeof(struct expr)); res->type = echar; //regular char - if (strlen(c) == 3) + if (c[0] == '\'' && c[2] == '\'') res->data.echar = c[1]; //escape - if (strlen(c) == 4) + else if (c[0] == '\'' && c[1] == '\\' && c[3] == '\'') switch(c[2]) { case '0': res->data.echar = '\0'; break; + case '\'': res->data.echar = '\''; break; + case '\\': res->data.echar = '\\'; break; case 'a': res->data.echar = '\a'; break; case 'b': res->data.echar = '\b'; break; case 't': res->data.echar = '\t'; break; @@ -156,8 +160,10 @@ struct expr *expr_char(const char *c) case 'r': res->data.echar = '\r'; break; } //hex escape - if (strlen(c) == 6) + else if (c[0] == '\'' && c[1] == '\\' && c[2] == 'x' && c[5] == '\'') res->data.echar = (fromHex(c[3])<<4)+fromHex(c[4]); + else + die("malformed character: %s\n", c); return res; } @@ -220,6 +226,16 @@ struct expr *expr_tuple(struct expr *left, struct expr *right) return res; } +struct expr *expr_string(char *str) +{ + struct expr *res = safe_malloc(sizeof(struct expr)); + res->type = estring; + res->data.estring = safe_strdup(str+1); + res->data.estring[strlen(res->data.estring)-1] = '\0'; + //TODO escapes + return res; +} + struct expr *expr_unop(enum unop op, struct expr *l) { struct expr *res = safe_malloc(sizeof(struct expr)); @@ -282,15 +298,16 @@ struct type *type_var(char *ident) const char *cescapes[] = { - [0] = "\\0", [1] = "\\x01", [2] = "\\x02", [3] = "\\x03", - [4] = "\\x04", [5] = "\\x05", [6] = "\\x06", [7] = "\\a", [8] = "\\b", - [9] = "\\t", [10] = "\\n", [11] = "\\v", [12] = "\\f", [13] = "\\r", - [14] = "\\x0E", [15] = "\\x0F", [16] = "\\x10", [17] = "\\x11", - [18] = "\\x12", [19] = "\\x13", [20] = "\\x14", [21] = "\\x15", - [22] = "\\x16", [23] = "\\x17", [24] = "\\x18", [25] = "\\x19", - [26] = "\\x1A", [27] = "\\x1B", [28] = "\\x1C", [29] = "\\x1D", - [30] = "\\x1E", [31] = "\\x1F", - [127] = "\\x7F" + [0] = "0", [1] = "x01", [2] = "x02", [3] = "x03", + [4] = "x04", [5] = "x05", [6] = "x06", [7] = "a", [8] = "b", + [9] = "t", [10] = "n", [11] = "v", [12] = "f", [13] = "r", + [14] = "x0E", [15] = "x0F", [16] = "x10", [17] = "x11", + [18] = "x12", [19] = "x13", [20] = "x14", [21] = "x15", + [22] = "x16", [23] = "x17", [24] = "x18", [25] = "x19", + [26] = "x1A", [27] = "x1B", [28] = "x1C", [29] = "x1D", + [30] = "x1E", [31] = "x1F", + ['\\'] = "\\", ['\''] = "'", + [127] = "x7F" }; void ast_print(struct ast *ast, FILE *out) @@ -360,6 +377,8 @@ void stmt_print(struct stmt *stmt, int indent, FILE *out) case sassign: pindent(indent, out); fprintf(out, "%s", stmt->data.sassign.ident); + for (int i = 0; idata.sassign.nfield; i++) + fprintf(out, ".%s", stmt->data.sassign.fields[i]); safe_fprintf(out, " = "); expr_print(stmt->data.sassign.expr, out); safe_fprintf(out, ";\n"); @@ -421,13 +440,16 @@ void expr_print(struct expr *expr, FILE *out) safe_fprintf(out, "%s", expr->data.ebool ? "true" : "false"); break; case echar: - if (expr->data.echar < 0) + if (expr->data.echar < 0) { safe_fprintf(out, "'?'"); - if (expr->data.echar < ' ' || expr->data.echar == 127) - safe_fprintf(out, "'%s'", + } else if (expr->data.echar < ' ' || expr->data.echar == 127 + || expr->data.echar == '\\' + || expr->data.echar == '\'') { + safe_fprintf(out, "'\\%s'", cescapes[(int)expr->data.echar]); - else + } else { safe_fprintf(out, "'%c'", expr->data.echar); + } break; case efuncall: safe_fprintf(out, "%s(", expr->data.efuncall.ident); @@ -457,6 +479,9 @@ void expr_print(struct expr *expr, FILE *out) expr_print(expr->data.etuple.right, out); safe_fprintf(out, ")"); break; + case estring: + safe_fprintf(out, "\"%s\"", expr->data.estring); + break; case eunop: safe_fprintf(out, "(%s", unop_str[expr->data.eunop.op]); expr_print(expr->data.eunop.l, out); @@ -550,6 +575,8 @@ void stmt_free(struct stmt *stmt) switch(stmt->type) { case sassign: free(stmt->data.sassign.ident); + for (int i = 0; idata.sassign.nfield; i++) + free(stmt->data.sassign.fields[i]); expr_free(stmt->data.sassign.expr); break; case sif: @@ -610,6 +637,9 @@ void expr_free(struct expr *expr) expr_free(expr->data.etuple.left); expr_free(expr->data.etuple.right); break; + case estring: + free(expr->data.estring); + break; case eunop: expr_free(expr->data.eunop.l); break; diff --git a/ast.h b/ast.h index 6b31e0f..37ac2d5 100644 --- a/ast.h +++ b/ast.h @@ -57,6 +57,8 @@ struct stmt { union { struct { char *ident; + int nfield; + char **fields; struct expr *expr; } sassign; struct { @@ -84,7 +86,7 @@ enum fieldspec {fst,snd,hd,tl}; enum unop {negate,inverse}; struct expr { enum {ebinop, ebool, echar, efuncall, eident, eint, enil, etuple, - eunop} type; + estring, eunop} type; union { bool ebool; struct { @@ -108,6 +110,7 @@ struct expr { struct expr *left; struct expr *right; } etuple; + char *estring; struct { enum unop op; struct expr *l; @@ -123,7 +126,7 @@ struct decl *decl_fun(char *ident, struct list *args, struct list *atypes, struct type *rtype, struct list *vars, struct list *body); struct decl *decl_var(struct vardecl *vardecl); -struct stmt *stmt_assign(char *ident, struct expr *expr); +struct stmt *stmt_assign(char *ident, struct list *fields, struct expr *expr); struct stmt *stmt_if(struct expr *pred, struct list *then, struct list *els); struct stmt *stmt_return(struct expr *rtrn); struct stmt *stmt_expr(struct expr *expr); @@ -138,6 +141,7 @@ struct expr *expr_int(int integer); struct expr *expr_ident(char *ident, struct list *fields); struct expr *expr_nil(); struct expr *expr_tuple(struct expr *left, struct expr *right); +struct expr *expr_string(char *str); struct expr *expr_unop(enum unop op, struct expr *l); struct type *type_basic(enum basictype type); diff --git a/expr.c b/expr.c index 7bf8782..96383c6 100644 --- a/expr.c +++ b/expr.c @@ -7,15 +7,10 @@ extern int yylex_destroy(void); int main() { - fprintf(stderr, "sizeof(struct ast): %lu\n", sizeof(struct ast)); - fprintf(stderr, "sizeof(struct vardecl): %lu\n", sizeof(struct vardecl)); - fprintf(stderr, "sizeof(struct decl): %lu\n", sizeof(struct decl)); - fprintf(stderr, "sizeof(struct stmt): %lu\n", sizeof(struct stmt)); - fprintf(stderr, "sizeof(struct expr): %lu\n", sizeof(struct expr)); struct ast *result; int r = yyparse(&result); if (r != 0) - return r; + return 1; yylex_destroy(); ast_print(result, stdout); ast_free(result); diff --git a/input.txt b/input.txt index a9d1fde..a723fc8 100644 --- a/input.txt +++ b/input.txt @@ -3,10 +3,17 @@ var y = x; fun(x){ var x = 5; Int y = 6; - x y = 6; + 6; + x.fst = 5; 6; if(true){5;}else{5;} '\t'; + '\''; + '\\'; + '\x01'; + '\xaa'; + "abr"; + return 5; f(); f(x); f(1, 2, []); @@ -16,5 +23,11 @@ fun(x) :: Int Bool -> Int { } fun(x) :: -> Void { } -fun(x) :: a b c [a] ([a], b) -> Void { +fun(x) :: /* abc */ a b c [a] ([a], b) -> Void { } +/* abc */ +/* +*/ +//abc +var y = 0; +//blurp diff --git a/parse.y b/parse.y index dcfbad6..dc85dac 100644 --- a/parse.y +++ b/parse.y @@ -32,7 +32,7 @@ int yywrap() %locations %token IDENT -%token BOOL CHAR INTEGER +%token BOOL CHAR INTEGER STRING %token ARROW ASSIGN BCLOSE BINAND BINOR BOPEN CCLOSE COMMA CONS COPEN DIVIDE %token DOT ELSE ERROR IF INVERSE MINUS MODULO NIL PLUS POWER RETURN SEMICOLON %token SCLOSE SOPEN TIMES TBOOL TCHAR TINT TVOID VAR WHILE @@ -50,9 +50,9 @@ int yywrap() %type start %type fundecl %type expr -%type args body decls fargs field fnargs nargs funtype vardecls +%type args body decls fargs field fnargs nargs funtype vardecls bbody %type stmt -%type type +%type type ftype %type vardecl %% @@ -65,9 +65,10 @@ decls ; vardecl : VAR IDENT ASSIGN expr SEMICOLON { $$ = vardecl(NULL, $2, $4); } + | type IDENT ASSIGN expr SEMICOLON { $$ = vardecl($1, $2, $4); } ; fundecl - : IDENT BOPEN args BCLOSE CONS CONS funtype ARROW type COPEN vardecls body CCLOSE + : IDENT BOPEN args BCLOSE CONS CONS funtype ARROW ftype COPEN vardecls body CCLOSE { $$ = decl_fun($1, $3, $7, $9, $11, $12); } | IDENT BOPEN args BCLOSE COPEN vardecls body CCLOSE { $$ = decl_fun($1, $3, NULL, NULL, $6, $7); } @@ -78,9 +79,16 @@ vardecls ; funtype : /* empty */ { $$ = NULL; } - | funtype type { $$ = list_cons($2, $1); } - | funtype IDENT { $$ = list_cons(type_var($2), $1); } + | funtype ftype { $$ = list_cons($2, $1); } ; +ftype + : BOPEN ftype COMMA ftype BCLOSE { $$ = type_tuple($2, $4); } + | SOPEN ftype SCLOSE { $$ = type_list($2); } + | TBOOL { $$ = type_basic(btbool); } + | TCHAR { $$ = type_basic(btchar); } + | TINT { $$ = type_basic(btint); } + | TVOID { $$ = type_basic(btvoid); } + | IDENT { $$ = type_var($1); } type : BOPEN type COMMA type BCLOSE { $$ = type_tuple($2, $4); } | SOPEN type SCLOSE { $$ = type_list($2); } @@ -109,19 +117,23 @@ body : /* empty */ { $$ = NULL; } | body stmt { $$ = list_cons($2, $1); } ; +field + : /* empty */ { $$ = NULL; } + | field DOT IDENT { $$ = list_cons($3, $1); } + ; +bbody + : COPEN body CCLOSE { $$ = $2; } + | stmt { $$ = list_cons($1, NULL); } + ; stmt - : IF BOPEN expr BCLOSE COPEN body CCLOSE ELSE COPEN body CCLOSE - { $$ = stmt_if($3, $6, $10); } - | WHILE BOPEN expr BCLOSE COPEN body CCLOSE - { $$ = stmt_while($3, $6); } - | IDENT ASSIGN expr SEMICOLON { $$ = stmt_assign($1, $3); } + : IF BOPEN expr BCLOSE bbody { $$ = stmt_if($3, $5, NULL); } + | IF BOPEN expr BCLOSE bbody ELSE bbody { $$ = stmt_if($3, $5, $7); } + | WHILE BOPEN expr BCLOSE bbody { $$ = stmt_while($3, $5); } + | IDENT field ASSIGN expr SEMICOLON { $$ = stmt_assign($1, $2, $4); } | RETURN expr SEMICOLON { $$ = stmt_return($2); } | RETURN SEMICOLON { $$ = stmt_return(NULL); } | expr SEMICOLON { $$ = stmt_expr($1); } ; -field - : /* empty */ { $$ = NULL; } - | field DOT IDENT { $$ = list_cons($3, $1); } expr : expr BINOR expr { $$ = expr_binop($1, binor, $3); } | expr BINAND expr { $$ = expr_binop($1, binand, $3); } @@ -146,6 +158,7 @@ expr | INTEGER | BOOL | CHAR + | STRING | IDENT field { $$ = expr_ident($1, $2); } | NIL { $$ = expr_nil(); } ; diff --git a/scan.l b/scan.l index 23545c0..59dec5f 100644 --- a/scan.l +++ b/scan.l @@ -21,8 +21,14 @@ %} +%start IN_COMMENT + %% +{ +\/\* BEGIN(IN_COMMENT); +[ \n\t] ; +\/\/.*\n? ; if return IF; else return ELSE; while return WHILE; @@ -62,12 +68,18 @@ Void return TVOID; \[\] return NIL; \. return DOT; , return COMMA; -'([^']|\\[abtnvfr]|\\x[0-9a-fA-F]{2})' { +\"([^"]|\\\")*\" { + yylval.expr = expr_string(yytext); return STRING; } +'([^\\']|\\['0\\abtnvfr]|\\x[0-9a-fA-F][0-9a-fA-F])' { yylval.expr = expr_char(yytext); return CHAR; } [0-9]+ { yylval.expr = expr_int(atoi(yytext)); return INTEGER; } [_a-zA-Z][_a-zA-Z0-9]* { yylval.ident = safe_strdup(yytext); return IDENT; } -[ \n\t] ; +} +{ +\*\/ BEGIN(INITIAL); +. ; +} %%