octal escapes
authorMart Lubbers <mart@martlubbers.net>
Tue, 9 Feb 2021 12:10:02 +0000 (13:10 +0100)
committerMart Lubbers <mart@martlubbers.net>
Tue, 9 Feb 2021 12:10:02 +0000 (13:10 +0100)
input.txt
scan.l
util.c

index c1579d6..3f8969e 100644 (file)
--- a/input.txt
+++ b/input.txt
@@ -15,7 +15,7 @@ fun(x){
        "abr";
        "a\br";
        "a\br\"";
-       "a\xaar\\";
+       "a\xaar\\\0377\01\xa";
 return 5;
 f(); 
 f(x); f(1, 2, []);
diff --git a/scan.l b/scan.l
index 9337331..38dab0c 100644 (file)
--- a/scan.l
+++ b/scan.l
@@ -1,3 +1,9 @@
+D [0-9]
+H [0-9a-fA-F]
+E ([0\\abtnvfr]|x{H}{H}?|0[0-3]{O}{O}|0{O}{O}?)
+I [a-zA-Z_]
+O [0-7]
+
 %option noinput
 %option nounput
 %{
@@ -68,13 +74,13 @@ Void        return TVOID;
 \[\]        return NIL;
 \.          return DOT;
 ,           return COMMA;
-\"([^\\"]|\\[\"0\\abtnvfr]|\\x[0-9a-fA-F][0-9a-fA-F])*\" {
+\"([^\\"]|\\(\"|{E}))*\" {
        yylval.expr = expr_string(trimquotes(yytext)); return STRING; }
-'([^\\']|\\['0\\abtnvfr]|\\x[0-9a-fA-F][0-9a-fA-F])' {
+'([^\\']|\\('|{E}))' {
        yylval.expr = expr_char(trimquotes(yytext)); return CHAR; }
-[0-9]+ {
+{D}+ {
        yylval.expr = expr_int(atoi(yytext)); return INTEGER; }
-[_a-zA-Z][_a-zA-Z0-9]* {
+{I}({I}|{D})* {
        yylval.ident = safe_strdup(yytext); return IDENT; }
 }
 <IN_COMMENT>{
diff --git a/util.c b/util.c
index 91166fd..09747c9 100644 (file)
--- a/util.c
+++ b/util.c
@@ -2,6 +2,7 @@
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
+#include <ctype.h>
 
 #include "util.h"
 
@@ -76,32 +77,72 @@ char *escape_char(char c, char *buf, bool str)
        case '\'': strcpy(buf, str ? "'" : "\\'"); break;
        case '"': strcpy(buf, str ? "\\\"" : "\""); break;
        default:
-               if (c >= ' ' && c < 127) {
+               if (c >= ' ' && c < 127)
                        sprintf(buf, "%c", c);
-               } else {
+               else
                        sprintf(buf, "\\x%02x", (unsigned char)c);
-               }
                break;
        }
        return buf;
 }
 
+bool isodigit(char c)
+{
+       return c >= '0' && c <= '7';
+}
+
+int fromOctal(char c)
+{
+       if (isodigit(c))
+               return c-'0';
+       return -1;
+}
+
 char *unescape_char(char *c)
 {
-       //escape
        if (c[0] == '\\') {
-               switch (c[1]) {
-               case '0': c[1] = '\0'; break;
-               case '\'': c[1] = '\''; break;
-               case '\\': c[1] = '\\'; break;
-               case '"': c[1] = '"'; break;
-               case 'a': c[1] = '\a'; break;
-               case 'b': c[1] = '\b'; break;
-               case 't': c[1] = '\t'; break;
-               case 'v': c[1] = '\v'; break;
-               case 'f': c[1] = '\f'; break;
-               case 'r': c[1] = '\r'; break;
-               case 'x': c[3] = (fromHex(c[2])<<4)+fromHex(c[3]); c+=2; break;
+               if (c[1] == 'x' && isxdigit(c[2])) {
+                       //two hex
+                       if (isxdigit(c[3])) {
+                               c[3] = (fromHex(c[2])*16)+fromHex(c[3]);
+                               c+=2;
+                       //one hex
+                       } else {
+                               c[2] = fromHex(c[2]);
+                               c++;
+                       }
+               } else if (c[1] == '0' && isodigit(c[2])) {
+                       if (isodigit(c[3])) {
+                               //three octal
+                               if (isodigit(c[4])) {
+                                       c[4] = fromOctal(c[2])*64
+                                               +fromOctal(c[3])*8
+                                               +fromOctal(c[4]);
+                                       c+=2;
+                               //two octal
+                               } else {
+                                       c[3] = fromOctal(c[2])*8
+                                               +fromOctal(c[3]);
+                                       c+=2;
+                               }
+                       // one octal
+                       } else {
+                               c[2] = fromOctal(c[2]);
+                               c++;
+                       }
+               } else {
+                       switch (c[1]) {
+                       case '0': c[1] = '\0'; break;
+                       case '\'': c[1] = '\''; break;
+                       case '\\': c[1] = '\\'; break;
+                       case '"': c[1] = '"'; break;
+                       case 'a': c[1] = '\a'; break;
+                       case 'b': c[1] = '\b'; break;
+                       case 't': c[1] = '\t'; break;
+                       case 'v': c[1] = '\v'; break;
+                       case 'f': c[1] = '\f'; break;
+                       case 'r': c[1] = '\r'; break;
+                       }
                }
                c++;
        }