From 824dc49b592e352f0e6d6a9415afc0c5197b3d1f Mon Sep 17 00:00:00 2001 From: Mart Lubbers Date: Tue, 9 Feb 2021 13:10:02 +0100 Subject: [PATCH] octal escapes --- input.txt | 2 +- scan.l | 14 ++++++++--- util.c | 73 +++++++++++++++++++++++++++++++++++++++++++------------ 3 files changed, 68 insertions(+), 21 deletions(-) diff --git a/input.txt b/input.txt index c1579d6..3f8969e 100644 --- a/input.txt +++ b/input.txt @@ -15,7 +15,7 @@ fun(x){ "abr"; "a\br"; "a\br\""; - "a\xaar\\"; + "a\xaar\\\0377\01\xa"; return 5; f(); f(x); f(1, 2, []); diff --git a/scan.l b/scan.l index 9337331..38dab0c 100644 --- a/scan.l +++ b/scan.l @@ -1,3 +1,9 @@ +D [0-9] +H [0-9a-fA-F] +E ([0\\abtnvfr]|x{H}{H}?|0[0-3]{O}{O}|0{O}{O}?) +I [a-zA-Z_] +O [0-7] + %option noinput %option nounput %{ @@ -68,13 +74,13 @@ Void return TVOID; \[\] return NIL; \. return DOT; , return COMMA; -\"([^\\"]|\\[\"0\\abtnvfr]|\\x[0-9a-fA-F][0-9a-fA-F])*\" { +\"([^\\"]|\\(\"|{E}))*\" { yylval.expr = expr_string(trimquotes(yytext)); return STRING; } -'([^\\']|\\['0\\abtnvfr]|\\x[0-9a-fA-F][0-9a-fA-F])' { +'([^\\']|\\('|{E}))' { yylval.expr = expr_char(trimquotes(yytext)); return CHAR; } -[0-9]+ { +{D}+ { yylval.expr = expr_int(atoi(yytext)); return INTEGER; } -[_a-zA-Z][_a-zA-Z0-9]* { +{I}({I}|{D})* { yylval.ident = safe_strdup(yytext); return IDENT; } } { diff --git a/util.c b/util.c index 91166fd..09747c9 100644 --- a/util.c +++ b/util.c @@ -2,6 +2,7 @@ #include #include #include +#include #include "util.h" @@ -76,32 +77,72 @@ char *escape_char(char c, char *buf, bool str) case '\'': strcpy(buf, str ? "'" : "\\'"); break; case '"': strcpy(buf, str ? "\\\"" : "\""); break; default: - if (c >= ' ' && c < 127) { + if (c >= ' ' && c < 127) sprintf(buf, "%c", c); - } else { + else sprintf(buf, "\\x%02x", (unsigned char)c); - } break; } return buf; } +bool isodigit(char c) +{ + return c >= '0' && c <= '7'; +} + +int fromOctal(char c) +{ + if (isodigit(c)) + return c-'0'; + return -1; +} + char *unescape_char(char *c) { - //escape if (c[0] == '\\') { - switch (c[1]) { - case '0': c[1] = '\0'; break; - case '\'': c[1] = '\''; break; - case '\\': c[1] = '\\'; break; - case '"': c[1] = '"'; break; - case 'a': c[1] = '\a'; break; - case 'b': c[1] = '\b'; break; - case 't': c[1] = '\t'; break; - case 'v': c[1] = '\v'; break; - case 'f': c[1] = '\f'; break; - case 'r': c[1] = '\r'; break; - case 'x': c[3] = (fromHex(c[2])<<4)+fromHex(c[3]); c+=2; break; + if (c[1] == 'x' && isxdigit(c[2])) { + //two hex + if (isxdigit(c[3])) { + c[3] = (fromHex(c[2])*16)+fromHex(c[3]); + c+=2; + //one hex + } else { + c[2] = fromHex(c[2]); + c++; + } + } else if (c[1] == '0' && isodigit(c[2])) { + if (isodigit(c[3])) { + //three octal + if (isodigit(c[4])) { + c[4] = fromOctal(c[2])*64 + +fromOctal(c[3])*8 + +fromOctal(c[4]); + c+=2; + //two octal + } else { + c[3] = fromOctal(c[2])*8 + +fromOctal(c[3]); + c+=2; + } + // one octal + } else { + c[2] = fromOctal(c[2]); + c++; + } + } else { + switch (c[1]) { + case '0': c[1] = '\0'; break; + case '\'': c[1] = '\''; break; + case '\\': c[1] = '\\'; break; + case '"': c[1] = '"'; break; + case 'a': c[1] = '\a'; break; + case 'b': c[1] = '\b'; break; + case 't': c[1] = '\t'; break; + case 'v': c[1] = '\v'; break; + case 'f': c[1] = '\f'; break; + case 'r': c[1] = '\r'; break; + } } c++; } -- 2.20.1