work on type inference some more
[ccc.git] / ast.c
diff --git a/ast.c b/ast.c
index 0a17870..1e93a71 100644 (file)
--- a/ast.c
+++ b/ast.c
@@ -4,21 +4,19 @@
 
 #include "util.h"
 #include "ast.h"
-#include "y.tab.h"
+#include "type.h"
+#include "list.h"
+#include "parse.h"
 
-static const char *binop_str[] = {
+const char *binop_str[] = {
        [binor] = "||", [binand] = "&&", [eq] = "==", [neq] = "!=",
        [leq] = "<=", [le] = "<", [geq] = ">=", [ge] = ">", [cons] = ":",
        [plus] = "+", [minus] = "-", [times] = "*", [divide] = "/",
        [modulo] = "%", [power] = "^",
 };
-static const char *fieldspec_str[] = {
+const char *fieldspec_str[] = {
        [fst] = "fst", [snd] = "snd", [hd] = "hd", [tl] = "tl"};
-static const char *unop_str[] = { [inverse] = "!", [negate] = "-", };
-static const char *basictype_str[] = {
-       [btbool] = "Bool", [btchar] = "Char", [btint] = "Int",
-       [btvoid] = "Void",
-};
+const char *unop_str[] = { [inverse] = "!", [negate] = "-", };
 
 struct ast *ast(struct list *decls)
 {
@@ -35,22 +33,23 @@ struct vardecl *vardecl(struct type *type, char *ident, struct expr *expr)
        res->expr = expr;
        return res;
 }
+struct fundecl *fundecl(char *ident, struct list *args, struct list *atypes,
+       struct type *rtype, struct list *body)
+{
+       struct fundecl *res = safe_malloc(sizeof(struct fundecl));
+       res->ident = ident;
+       res->args = (char **)list_to_array(args, &res->nargs, true);
+       res->atypes = (struct type **)list_to_array(atypes, &res->natypes, true);
+       res->rtype = rtype;
+       res->body = (struct stmt **)list_to_array(body, &res->nbody, true);
+       return res;
+}
 
-struct decl *decl_fun(char *ident, struct list *args, struct list *atypes,
-       struct type *rtype, struct list *vars, struct list *body)
+struct decl *decl_fun(struct fundecl *fundecl)
 {
        struct decl *res = safe_malloc(sizeof(struct decl));
        res->type = dfundecl;
-       res->data.dfun.ident = ident;
-       res->data.dfun.args = (char **)
-               list_to_array(args, &res->data.dfun.nargs, true);
-       res->data.dfun.atypes = (struct type **)
-               list_to_array(atypes, &res->data.dfun.natypes, true);
-       res->data.dfun.rtype = rtype;
-       res->data.dfun.vars = (struct vardecl **)
-               list_to_array(vars, &res->data.dfun.nvar, true);
-       res->data.dfun.body = (struct stmt **)
-               list_to_array(body, &res->data.dfun.nbody, true);
+       res->data.dfun = fundecl;
        return res;
 }
 
@@ -68,7 +67,7 @@ struct stmt *stmt_assign(char *ident, struct list *fields, struct expr *expr)
        res->type = sassign;
        res->data.sassign.ident = ident;
        res->data.sassign.fields = (char **)
-               list_to_array(fields, &res->data.sassign.nfield, true);
+               list_to_array(fields, &res->data.sassign.nfields, true);
        res->data.sassign.expr = expr;
        return res;
 }
@@ -101,6 +100,14 @@ struct stmt *stmt_expr(struct expr *expr)
        return res;
 }
 
+struct stmt *stmt_vardecl(struct vardecl *vardecl)
+{
+       struct stmt *res = safe_malloc(sizeof(struct stmt));
+       res->type = svardecl;
+       res->data.svardecl = vardecl;
+       return res;
+}
+
 struct stmt *stmt_while(struct expr *pred, struct list *body)
 {
        struct stmt *res = safe_malloc(sizeof(struct stmt));
@@ -128,52 +135,44 @@ struct expr *expr_bool(bool b)
        res->data.ebool = b;
        return res;
 }
-int fromHex(char c)
-{
-       if (c >= '0' && c <= '9')
-               return c-'0';
-       if (c >= 'a' && c <= 'f')
-               return c-'a'+10;
-       if (c >= 'A' && c <= 'F')
-               return c-'A'+10;
-       return -1;
-}
 
-struct expr *expr_char(const char *c)
+struct expr *expr_char(char *c)
 {
        struct expr *res = safe_malloc(sizeof(struct expr));
        res->type = echar;
-       //regular char
-       if (c[0] == '\'' && c[2] == '\'')
-               res->data.echar = c[1];
-       //escape
-       else if (c[0] == '\'' && c[1] == '\\' && c[3] == '\'')
-               switch(c[2]) {
-               case '0': res->data.echar = '\0'; break;
-               case '\'': res->data.echar = '\''; break;
-               case '\\': res->data.echar = '\\'; break;
-               case 'a': res->data.echar = '\a'; break;
-               case 'b': res->data.echar = '\b'; break;
-               case 't': res->data.echar = '\t'; break;
-               case 'v': res->data.echar = '\v'; break;
-               case 'f': res->data.echar = '\f'; break;
-               case 'r': res->data.echar = '\r'; break;
-               }
-       //hex escape
-       else if (c[0] == '\'' && c[1] == '\\' && c[2] == 'x' && c[5] == '\'')
-               res->data.echar = (fromHex(c[3])<<4)+fromHex(c[4]);
-       else
-               die("malformed character: %s\n", c);
+       res->data.echar = unescape_char(c)[0];
        return res;
 }
 
-struct expr *expr_funcall(char *ident, struct list *args)
+static void set_fields(enum fieldspec **farray, int *n, struct list *fields)
+{
+       void **els = list_to_array(fields, n, true);
+       *farray = (enum fieldspec *)safe_malloc(*n*sizeof(enum fieldspec));
+       for (int i = 0; i<*n; i++) {
+               char *t = els[i];
+               if (strcmp(t, "fst") == 0)
+                       (*farray)[i] = fst;
+               else if (strcmp(t, "snd") == 0)
+                       (*farray)[i] = snd;
+               else if (strcmp(t, "hd") == 0)
+                       (*farray)[i] = hd;
+               else if (strcmp(t, "tl") == 0)
+                       (*farray)[i] = tl;
+               free(t);
+       }
+       free(els);
+}
+
+
+struct expr *expr_funcall(char *ident, struct list *args, struct list *fields)
 {
        struct expr *res = safe_malloc(sizeof(struct expr));
        res->type = efuncall;
        res->data.efuncall.ident = ident;
        res->data.efuncall.args = (struct expr **)
                list_to_array(args, &res->data.efuncall.nargs, true);
+       set_fields(&res->data.efuncall.fields,
+               &res->data.efuncall.nfields, fields);
        return res;
 }
 
@@ -190,23 +189,7 @@ struct expr *expr_ident(char *ident, struct list *fields)
        struct expr *res = safe_malloc(sizeof(struct expr));
        res->type = eident;
        res->data.eident.ident = ident;
-
-       void **els = list_to_array(fields, &res->data.eident.nfields, true);
-       res->data.eident.fields = (enum fieldspec *)safe_malloc(
-               res->data.eident.nfields*sizeof(enum fieldspec));
-       for (int i = 0; i<res->data.eident.nfields; i++) {
-               char *t = els[i];
-               if (strcmp(t, "fst") == 0)
-                       res->data.eident.fields[i] = fst;
-               else if (strcmp(t, "snd") == 0)
-                       res->data.eident.fields[i] = snd;
-               else if (strcmp(t, "hd") == 0)
-                       res->data.eident.fields[i] = hd;
-               else if (strcmp(t, "tl") == 0)
-                       res->data.eident.fields[i] = tl;
-               free(t);
-       }
-       free(els);
+       set_fields(&res->data.eident.fields, &res->data.eident.nfields, fields);
        return res;
 }
 
@@ -230,9 +213,15 @@ struct expr *expr_string(char *str)
 {
        struct expr *res = safe_malloc(sizeof(struct expr));
        res->type = estring;
-       res->data.estring = safe_strdup(str+1);
-       res->data.estring[strlen(res->data.estring)-1] = '\0';
-       //TODO escapes
+       res->data.estring.nchars = 0;
+       res->data.estring.chars = safe_malloc(strlen(str)+1);
+       char *p = res->data.estring.chars;
+       while(*str != '\0') {
+               str = unescape_char(str);
+               *p++ = *str++;
+               res->data.estring.nchars++;
+       }
+       *p = '\0';
        return res;
 }
 
@@ -245,77 +234,12 @@ struct expr *expr_unop(enum unop op, struct expr *l)
        return res;
 }
 
-struct type *type_basic(enum basictype type)
-{
-       struct type *res = safe_malloc(sizeof(struct type));
-       res->type = tbasic;
-       res->data.tbasic = type;
-       return res;
-}
-
-struct type *type_list(struct type *type)
-{
-       struct type *res = safe_malloc(sizeof(struct type));
-       res->type = tlist;
-       res->data.tlist = type;
-       return res;
-}
-
-struct type *type_tuple(struct type *l, struct type *r)
-{
-       struct type *res = safe_malloc(sizeof(struct type));
-       res->type = ttuple;
-       res->data.ttuple.l = l;
-       res->data.ttuple.r = r;
-       return res;
-}
-
-struct type *type_var(char *ident)
-{
-       struct type *res = safe_malloc(sizeof(struct type));
-       if (strcmp(ident, "Int") == 0) {
-               res->type = tbasic;
-               res->data.tbasic = btint;
-               free(ident);
-       } else if (strcmp(ident, "Char") == 0) {
-               res->type = tbasic;
-               res->data.tbasic = btchar;
-               free(ident);
-       } else if (strcmp(ident, "Bool") == 0) {
-               res->type = tbasic;
-               res->data.tbasic = btbool;
-               free(ident);
-       } else if (strcmp(ident, "Void") == 0) {
-               res->type = tbasic;
-               res->data.tbasic = btvoid;
-               free(ident);
-       } else {
-               res->type = tvar;
-               res->data.tvar = ident;
-       }
-       return res;
-}
-
-
-const char *cescapes[] = {
-       [0] = "0", [1] = "x01", [2] = "x02", [3] = "x03",
-       [4] = "x04", [5] = "x05", [6] = "x06", [7] = "a", [8] = "b",
-       [9] = "t", [10] = "n", [11] = "v", [12] = "f", [13] = "r",
-       [14] = "x0E", [15] = "x0F", [16] = "x10", [17] = "x11",
-       [18] = "x12", [19] = "x13", [20] = "x14", [21] = "x15",
-       [22] = "x16", [23] = "x17", [24] = "x18", [25] = "x19",
-       [26] = "x1A", [27] = "x1B", [28] = "x1C", [29] = "x1D",
-       [30] = "x1E", [31] = "x1F",
-       ['\\'] = "\\", ['\''] = "'",
-       [127] = "x7F"
-};
-
 void ast_print(struct ast *ast, FILE *out)
 {
        if (ast == NULL)
                return;
        for (int i = 0; i<ast->ndecls; i++)
-               decl_print(ast->decls[i], 0, out);
+               decl_print(ast->decls[i], out);
 }
 
 void vardecl_print(struct vardecl *decl, int indent, FILE *out)
@@ -330,39 +254,46 @@ void vardecl_print(struct vardecl *decl, int indent, FILE *out)
        safe_fprintf(out, ";\n");
 }
 
-void decl_print(struct decl *decl, int indent, FILE *out)
+void fundecl_print(struct fundecl *decl, FILE *out)
+{
+       safe_fprintf(out, "%s (", decl->ident);
+       for (int i = 0; i<decl->nargs; i++) {
+               safe_fprintf(out, "%s", decl->args[i]);
+               if (i < decl->nargs - 1)
+                       safe_fprintf(out, ", ");
+       }
+       safe_fprintf(out, ")");
+       if (decl->rtype != NULL) {
+               safe_fprintf(out, " :: ");
+               for (int i = 0; i<decl->natypes; i++) {
+                       type_print(decl->atypes[i], out);
+                       safe_fprintf(out, " ");
+               }
+               safe_fprintf(out, "-> ");
+               type_print(decl->rtype, out);
+       }
+       safe_fprintf(out, " {\n");
+       for (int i = 0; i<decl->nbody; i++)
+               stmt_print(decl->body[i], 1, out);
+       safe_fprintf(out, "}\n");
+}
+
+void decl_print(struct decl *decl, FILE *out)
 {
        if (decl == NULL)
                return;
        switch(decl->type) {
        case dfundecl:
-               pindent(indent, out);
-               safe_fprintf(out, "%s (", decl->data.dfun.ident);
-               for (int i = 0; i<decl->data.dfun.nargs; i++) {
-                       safe_fprintf(out, "%s", decl->data.dfun.args[i]);
-                       if (i < decl->data.dfun.nargs - 1)
-                               safe_fprintf(out, ", ");
-               }
-               safe_fprintf(out, ")");
-               if (decl->data.dfun.rtype != NULL) {
-                       safe_fprintf(out, " :: ");
-                       for (int i = 0; i<decl->data.dfun.natypes; i++) {
-                               type_print(decl->data.dfun.atypes[i], out);
-                               safe_fprintf(out, " ");
-                       }
-                       safe_fprintf(out, "-> ");
-                       type_print(decl->data.dfun.rtype, out);
-               }
-               safe_fprintf(out, " {\n");
-               for (int i = 0; i<decl->data.dfun.nvar; i++)
-                       vardecl_print(decl->data.dfun.vars[i], indent+1, out);
-               for (int i = 0; i<decl->data.dfun.nbody; i++)
-                       stmt_print(decl->data.dfun.body[i], indent+1, out);
-               pindent(indent, out);
-               safe_fprintf(out, "}\n");
+               fundecl_print(decl->data.dfun, out);
                break;
        case dvardecl:
-               vardecl_print(decl->data.dvar, indent, out);
+               vardecl_print(decl->data.dvar, 0, out);
+               break;
+       case dcomp:
+               fprintf(out, "//<<<comp\n");
+               for (int i = 0; i<decl->data.dcomp.ndecls; i++)
+                       fundecl_print(decl->data.dcomp.decls[i], out);
+               fprintf(out, "//>>>comp\n");
                break;
        default:
                die("Unsupported decl node\n");
@@ -377,7 +308,7 @@ void stmt_print(struct stmt *stmt, int indent, FILE *out)
        case sassign:
                pindent(indent, out);
                fprintf(out, "%s", stmt->data.sassign.ident);
-               for (int i = 0; i<stmt->data.sassign.nfield; i++)
+               for (int i = 0; i<stmt->data.sassign.nfields; i++)
                        fprintf(out, ".%s", stmt->data.sassign.fields[i]);
                safe_fprintf(out, " = ");
                expr_print(stmt->data.sassign.expr, out);
@@ -408,14 +339,16 @@ void stmt_print(struct stmt *stmt, int indent, FILE *out)
                expr_print(stmt->data.sexpr, out);
                safe_fprintf(out, ";\n");
                break;
+       case svardecl:
+               vardecl_print(stmt->data.svardecl, indent, out);
+               break;
        case swhile:
                pindent(indent, out);
                safe_fprintf(out, "while (");
                expr_print(stmt->data.swhile.pred, out);
                safe_fprintf(out, ") {\n");
-               for (int i = 0; i<stmt->data.swhile.nbody; i++) {
+               for (int i = 0; i<stmt->data.swhile.nbody; i++)
                        stmt_print(stmt->data.swhile.body[i], indent+1, out);
-               }
                pindent(indent, out);
                safe_fprintf(out, "}\n");
                break;
@@ -428,6 +361,7 @@ void expr_print(struct expr *expr, FILE *out)
 {
        if (expr == NULL)
                return;
+       char buf[] = "\\xff";
        switch(expr->type) {
        case ebinop:
                safe_fprintf(out, "(");
@@ -440,16 +374,8 @@ void expr_print(struct expr *expr, FILE *out)
                safe_fprintf(out, "%s", expr->data.ebool ? "true" : "false");
                break;
        case echar:
-               if (expr->data.echar < 0) {
-                       safe_fprintf(out, "'?'");
-               } else if (expr->data.echar < ' ' || expr->data.echar == 127
-                               || expr->data.echar == '\\'
-                               || expr->data.echar == '\'') {
-                       safe_fprintf(out, "'\\%s'",
-                               cescapes[(int)expr->data.echar]);
-               } else {
-                       safe_fprintf(out, "'%c'", expr->data.echar);
-               }
+               safe_fprintf(out, "'%s'",
+                       escape_char(expr->data.echar, buf, false));
                break;
        case efuncall:
                safe_fprintf(out, "%s(", expr->data.efuncall.ident);
@@ -459,6 +385,9 @@ void expr_print(struct expr *expr, FILE *out)
                                safe_fprintf(out, ", ");
                }
                safe_fprintf(out, ")");
+               for (int i = 0; i<expr->data.efuncall.nfields; i++)
+                       fprintf(out, ".%s",
+                               fieldspec_str[expr->data.efuncall.fields[i]]);
                break;
        case eint:
                safe_fprintf(out, "%d", expr->data.eint);
@@ -480,7 +409,11 @@ void expr_print(struct expr *expr, FILE *out)
                safe_fprintf(out, ")");
                break;
        case estring:
-               safe_fprintf(out, "\"%s\"", expr->data.estring);
+               safe_fprintf(out, "\"");
+               for (int i = 0; i<expr->data.estring.nchars; i++)
+                       safe_fprintf(out, "%s", escape_char(
+                               expr->data.estring.chars[i], buf, true));
+               safe_fprintf(out, "\"");
                break;
        case eunop:
                safe_fprintf(out, "(%s", unop_str[expr->data.eunop.op]);
@@ -492,34 +425,6 @@ void expr_print(struct expr *expr, FILE *out)
        }
 }
 
-void type_print(struct type *type, FILE *out)
-{
-       if (type == NULL)
-               return;
-       switch (type->type) {
-       case tbasic:
-               safe_fprintf(out, "%s", basictype_str[type->data.tbasic]);
-               break;
-       case tlist:
-               safe_fprintf(out, "[");
-               type_print(type->data.tlist, out);
-               safe_fprintf(out, "]");
-               break;
-       case ttuple:
-               safe_fprintf(out, "(");
-               type_print(type->data.ttuple.l, out);
-               safe_fprintf(out, ",");
-               type_print(type->data.ttuple.r, out);
-               safe_fprintf(out, ")");
-               break;
-       case tvar:
-               safe_fprintf(out, "%s", type->data.tvar);
-               break;
-       default:
-               die("Unsupported type node\n");
-       }
-}
-
 void ast_free(struct ast *ast)
 {
        if (ast == NULL)
@@ -538,26 +443,34 @@ void vardecl_free(struct vardecl *decl)
        free(decl);
 }
 
+void fundecl_free(struct fundecl *decl)
+{
+       free(decl->ident);
+       for (int i = 0; i<decl->nargs; i++)
+               free(decl->args[i]);
+       free(decl->args);
+       for (int i = 0; i<decl->natypes; i++)
+               type_free(decl->atypes[i]);
+       free(decl->atypes);
+       type_free(decl->rtype);
+       for (int i = 0; i<decl->nbody; i++)
+               stmt_free(decl->body[i]);
+       free(decl->body);
+       free(decl);
+}
+
 void decl_free(struct decl *decl)
 {
        if (decl == NULL)
                return;
        switch(decl->type) {
+       case dcomp:
+               for (int i = 0; i<decl->data.dcomp.ndecls; i++)
+                       fundecl_free(decl->data.dcomp.decls[i]);
+               free(decl->data.dcomp.decls);
+               break;
        case dfundecl:
-               free(decl->data.dfun.ident);
-               for (int i = 0; i<decl->data.dfun.nargs; i++)
-                       free(decl->data.dfun.args[i]);
-               free(decl->data.dfun.args);
-               for (int i = 0; i<decl->data.dfun.natypes; i++)
-                       type_free(decl->data.dfun.atypes[i]);
-               free(decl->data.dfun.atypes);
-               type_free(decl->data.dfun.rtype);
-               for (int i = 0; i<decl->data.dfun.nvar; i++)
-                       vardecl_free(decl->data.dfun.vars[i]);
-               free(decl->data.dfun.vars);
-               for (int i = 0; i<decl->data.dfun.nbody; i++)
-                       stmt_free(decl->data.dfun.body[i]);
-               free(decl->data.dfun.body);
+               fundecl_free(decl->data.dfun);
                break;
        case dvardecl:
                vardecl_free(decl->data.dvar);
@@ -575,8 +488,9 @@ void stmt_free(struct stmt *stmt)
        switch(stmt->type) {
        case sassign:
                free(stmt->data.sassign.ident);
-               for (int i = 0; i<stmt->data.sassign.nfield; i++)
+               for (int i = 0; i<stmt->data.sassign.nfields; i++)
                        free(stmt->data.sassign.fields[i]);
+               free(stmt->data.sassign.fields);
                expr_free(stmt->data.sassign.expr);
                break;
        case sif:
@@ -600,6 +514,9 @@ void stmt_free(struct stmt *stmt)
                        stmt_free(stmt->data.swhile.body[i]);
                free(stmt->data.swhile.body);
                break;
+       case svardecl:
+               vardecl_free(stmt->data.svardecl);
+               break;
        default:
                die("Unsupported stmt node\n");
        }
@@ -623,6 +540,7 @@ void expr_free(struct expr *expr)
                free(expr->data.efuncall.ident);
                for (int i = 0; i<expr->data.efuncall.nargs; i++)
                        expr_free(expr->data.efuncall.args[i]);
+               free(expr->data.efuncall.fields);
                free(expr->data.efuncall.args);
                break;
        case eint:
@@ -638,7 +556,7 @@ void expr_free(struct expr *expr)
                expr_free(expr->data.etuple.right);
                break;
        case estring:
-               free(expr->data.estring);
+               free(expr->data.estring.chars);
                break;
        case eunop:
                expr_free(expr->data.eunop.l);
@@ -648,26 +566,3 @@ void expr_free(struct expr *expr)
        }
        free(expr);
 }
-
-void type_free(struct type *type)
-{
-       if (type == NULL)
-               return;
-       switch (type->type) {
-       case tbasic:
-               break;
-       case tlist:
-               type_free(type->data.tlist);
-               break;
-       case ttuple:
-               type_free(type->data.ttuple.l);
-               type_free(type->data.ttuple.r);
-               break;
-       case tvar:
-               free(type->data.tvar);
-               break;
-       default:
-               die("Unsupported type node\n");
-       }
-       free(type);
-}