From 7d800a655de565826fd2404fd11d85dccd9c11d1 Mon Sep 17 00:00:00 2001 From: Vladimir Kalinin Date: Wed, 16 Jan 2013 08:01:27 +0400 Subject: [PATCH] Unicode literals --- Examples/test-suite/csharp_features.i | 4 ++ Source/CParse/cscanner.c | 14 +++++- Source/CParse/parser.y | 42 +++++++++++++++-- Source/Swig/cwrap.c | 2 +- Source/Swig/scanner.c | 68 ++++++++++++++++++++++++--- Source/Swig/stype.c | 11 ++++- Source/Swig/swig.h | 3 ++ Source/Swig/swigscan.h | 3 ++ Source/Swig/typesys.c | 4 ++ 9 files changed, 137 insertions(+), 14 deletions(-) diff --git a/Examples/test-suite/csharp_features.i b/Examples/test-suite/csharp_features.i index 578a56a10..f77e3fc9c 100644 --- a/Examples/test-suite/csharp_features.i +++ b/Examples/test-suite/csharp_features.i @@ -1,4 +1,5 @@ %module csharp_features +%include "wchar.i" // SWIG gets the method modifiers wrong occasionally, like with private inheritance, %csmethodmodifiers can fix this %csmethodmodifiers Derived::VirtualMethod() "public virtual" @@ -19,6 +20,9 @@ public: class MoreDerived : public Derived { public: int variable; + // test wide char literals support for C# module + void methodWithDefault1(const wchar_t* s = L"literal with escapes \x1234"){} + void methodWithDefault2(wchar_t c = L'\x1234'){} }; %} diff --git a/Source/CParse/cscanner.c b/Source/CParse/cscanner.c index 23152e884..14a4f4bd0 100644 --- a/Source/CParse/cscanner.c +++ b/Source/CParse/cscanner.c @@ -383,6 +383,10 @@ static int yylook(void) { case SWIG_TOKEN_STRING: yylval.id = Swig_copy_string(Char(Scanner_text(scan))); return STRING; + + case SWIG_TOKEN_WSTRING: + yylval.id = Swig_copy_string(Char(Scanner_text(scan))); + return WSTRING; case SWIG_TOKEN_CHAR: yylval.str = NewString(Scanner_text(scan)); @@ -391,7 +395,15 @@ static int yylook(void) { Printf(stdout,"%d\n", Len(Scanner_text(scan))); } return CHARCONST; - + + case SWIG_TOKEN_WCHAR: + yylval.str = NewString(Scanner_text(scan)); + if (Len(yylval.str) == 0) { + Swig_error(cparse_file, cparse_line, "Empty character constant\n"); + Printf(stdout,"%d\n", Len(Scanner_text(scan))); + } + return WCHARCONST; + /* Numbers */ case SWIG_TOKEN_INT: diff --git a/Source/CParse/parser.y b/Source/CParse/parser.y index d91b7adce..cd032eb00 100644 --- a/Source/CParse/parser.y +++ b/Source/CParse/parser.y @@ -13,8 +13,8 @@ * some point. Beware. * ----------------------------------------------------------------------------- */ +%expect 6 %{ - #define yylex yylex char cvsroot_parser_y[] = "$Id$"; @@ -1635,9 +1635,9 @@ static void tag_nodes(Node *n, const_String_or_char_ptr attrname, DOH *value) { %token ID %token HBLOCK %token POUND -%token STRING +%token STRING WSTRING %token INCLUDE IMPORT INSERT -%token CHARCONST +%token CHARCONST WCHARCONST %token NUM_INT NUM_FLOAT NUM_UNSIGNED NUM_LONG NUM_ULONG NUM_LONGLONG NUM_ULONGLONG NUM_BOOL %token TYPEDEF %token TYPE_INT TYPE_UNSIGNED TYPE_SHORT TYPE_LONG TYPE_FLOAT TYPE_DOUBLE TYPE_CHAR TYPE_WCHAR TYPE_VOID TYPE_SIGNED TYPE_BOOL TYPE_COMPLEX TYPE_TYPEDEF TYPE_RAW TYPE_NON_ISO_INT8 TYPE_NON_ISO_INT16 TYPE_NON_ISO_INT32 TYPE_NON_ISO_INT64 @@ -1728,7 +1728,7 @@ static void tag_nodes(Node *n, const_String_or_char_ptr attrname, DOH *value) { %type abstract_declarator direct_abstract_declarator ctor_end; %type typemap_type; %type idcolon idcolontail idcolonnt idcolontailnt idtemplate stringbrace stringbracesemi; -%type string stringnum ; +%type string stringnum wstring; %type template_parms; %type cpp_end cpp_vend; %type rename_namewarn; @@ -6001,7 +6001,7 @@ definetype : { /* scanner_check_typedef(); */ } expr { $$ = $2; if ($$.type == T_STRING) { $$.rawval = NewStringf("\"%(escape)s\"",$$.val); - } else if ($$.type != T_CHAR) { + } else if ($$.type != T_CHAR && $$.type != T_WSTRING && $$.type != T_WCHAR) { $$.rawval = 0; } $$.bitfield = 0; @@ -6127,6 +6127,11 @@ valexpr : exprnum { $$ = $1; } $$.type = T_ULONG; } | exprcompound { $$ = $1; } + | wstring { + $$.val = NewString($1); + $$.rawval = NewStringf("L\"%s\"", $$.val); + $$.type = T_WSTRING; + } | CHARCONST { $$.val = NewString($1); if (Len($$.val)) { @@ -6139,6 +6144,18 @@ valexpr : exprnum { $$ = $1; } $$.throws = 0; $$.throwf = 0; } + | WCHARCONST { + $$.val = NewString($1); + if (Len($$.val)) { + $$.rawval = NewStringf("L\'%s\'", $$.val); + } else { + $$.rawval = NewString("L'\\0'"); + } + $$.type = T_WCHAR; + $$.bitfield = 0; + $$.throws = 0; + $$.throwf = 0; + } /* grouping */ | LPAREN expr RPAREN %prec CAST { @@ -6661,6 +6678,21 @@ string : string STRING { } | STRING { $$ = $1;} ; +/* Concatenated wide strings: L"str1" L"str2" */ +wstring : wstring WSTRING { + $$ = (char *) malloc(strlen($1)+strlen($2)+1); + strcpy($$,$1); + strcat($$,$2); + } +/* Concatenated wide string and normal string literal: L"str1" "str2" */ +/*not all the compilers support this concatenation mode, so perhaps better to postpone it*/ + /*| wstring STRING { here $2 comes unescaped, we have to escape it back first via NewStringf("%(escape)s)" + $$ = (char *) malloc(strlen($1)+strlen($2)+1); + strcpy($$,$1); + strcat($$,$2); + }*/ + | WSTRING { $$ = $1;} + ; stringbrace : string { $$ = NewString($1); diff --git a/Source/Swig/cwrap.c b/Source/Swig/cwrap.c index 4a461fce8..d335bb0e9 100644 --- a/Source/Swig/cwrap.c +++ b/Source/Swig/cwrap.c @@ -274,7 +274,7 @@ int Swig_cargs(Wrapper *w, ParmList *p) { Delete(defname); Delete(defvalue); } - } else if (!pvalue && ((tycode == T_POINTER) || (tycode == T_STRING))) { + } else if (!pvalue && ((tycode == T_POINTER) || (tycode == T_STRING) || (tycode == T_WSTRING))) { pvalue = (String *) "0"; } if (!altty) { diff --git a/Source/Swig/scanner.c b/Source/Swig/scanner.c index 446a68eb7..c3562c7cc 100644 --- a/Source/Swig/scanner.c +++ b/Source/Swig/scanner.c @@ -509,7 +509,6 @@ static int look(Scanner * s) { state = 4; /* Possibly a SWIG directive */ /* Look for possible identifiers or unicode/delimiter strings */ - else if ((isalpha(c)) || (c == '_') || (s->idstart && strchr(s->idstart, c))) { state = 7; @@ -867,11 +866,15 @@ static int look(Scanner * s) { break; case 7: /* Identifier or true/false or unicode/custom delimiter string */ - if (c=='R') { /* Possibly CUSTOM DELIMITER string */ + if (c == 'R') { /* Possibly CUSTOM DELIMITER string */ state = 72; break; } - else if (c!='u' && c!='U' && c!='L') { /* Definitely an identifier */ + else if (c == 'L') { /* Probably identifier but may be a wide string literal */ + state = 77; + break; + } + else if (c != 'u' && c != 'U') { /* Definitely an identifier */ state = 70; break; } @@ -879,14 +882,14 @@ static int look(Scanner * s) { if ((c = nextchar(s)) == 0) { state = 76; } - else if (c=='\"') { /* Definitely u, U or L string */ + else if (c == '\"') { /* Definitely u, U or L string */ retract(s, 1); state = 1000; } - else if (c=='R') { /* Possibly CUSTOM DELIMITER u, U, L string */ + else if (c == 'R') { /* Possibly CUSTOM DELIMITER u, U, L string */ state = 73; } - else if (c=='8') { /* Possibly u8 string */ + else if (c == '8') { /* Possibly u8 string */ state = 71; } else { @@ -950,6 +953,59 @@ static int look(Scanner * s) { break; + case 77: /*identifier or wide string literal*/ + if ((c = nextchar(s)) == 0) + return SWIG_TOKEN_ID; + else if (c == '\"') { + s->start_line = s->line; + Clear(s->text); + state = 78; + } + else if (c == '\'') { + s->start_line = s->line; + Clear(s->text); + state = 79; + } + else if (isalnum(c) || (c == '_') || (c == '$')) + state = 7; + else { + retract(s, 1); + return SWIG_TOKEN_ID; + } + break; + + case 78: /* Processing a wide string literal*/ + if ((c = nextchar(s)) == 0) { + Swig_error(cparse_file, cparse_start_line, "Unterminated wide string\n"); + return SWIG_TOKEN_ERROR; + } + if (c == '\"') { + Delitem(s->text, DOH_END); + return SWIG_TOKEN_WSTRING; + } else if (c == '\\') { + if ((c = nextchar(s)) == 0) { + Swig_error(cparse_file, cparse_start_line, "Unterminated wide string\n"); + return SWIG_TOKEN_ERROR; + } + } + break; + + case 79: /* Processing a wide char literal */ + if ((c = nextchar(s)) == 0) { + Swig_error(cparse_file, cparse_start_line, "Unterminated character constant\n"); + return SWIG_TOKEN_ERROR; + } + if (c == '\'') { + Delitem(s->text, DOH_END); + return (SWIG_TOKEN_WCHAR); + } else if (c == '\\') { + if ((c = nextchar(s)) == 0) { + Swig_error(cparse_file, cparse_start_line, "Unterminated wide char literal\n"); + return SWIG_TOKEN_ERROR; + } + } + break; + case 75: /* Special identifier $ */ if ((c = nextchar(s)) == 0) return SWIG_TOKEN_DOLLAR; diff --git a/Source/Swig/stype.c b/Source/Swig/stype.c index 32946ead5..b63530b53 100644 --- a/Source/Swig/stype.c +++ b/Source/Swig/stype.c @@ -131,12 +131,21 @@ SwigType *NewSwigType(int t) { case T_UCHAR: return NewString("unsigned char"); break; - case T_STRING:{ + case T_STRING: { SwigType *t = NewString("char"); SwigType_add_pointer(t); return t; break; } + case T_WCHAR: + return NewString("wchar_t"); + break; + case T_WSTRING: { + SwigType *t = NewString("wchar_t"); + SwigType_add_pointer(t); + return t; + break; + } case T_LONGLONG: return NewString("long long"); break; diff --git a/Source/Swig/swig.h b/Source/Swig/swig.h index dc65d9b0d..dceb73753 100644 --- a/Source/Swig/swig.h +++ b/Source/Swig/swig.h @@ -100,6 +100,9 @@ extern "C" { #define T_SYMBOL 98 #define T_ERROR 99 +/* wide string literal, may contain escaped wide chars like \x1234 as well as normal escape sequences */ +#define T_WSTRING 39 + /* --- File interface --- */ diff --git a/Source/Swig/swigscan.h b/Source/Swig/swigscan.h index b07812fbe..ae2e9ea4d 100644 --- a/Source/Swig/swigscan.h +++ b/Source/Swig/swigscan.h @@ -67,6 +67,9 @@ extern void Scanner_freeze_line(Scanner *s, int val); #define SWIG_TOKEN_QUESTION 30 /* ? */ #define SWIG_TOKEN_COMMENT 31 /* C or C++ comment */ #define SWIG_TOKEN_BOOL 32 /* true or false */ +#define SWIG_TOKEN_WSTRING 33 /* L"str" */ +#define SWIG_TOKEN_WCHAR 34 /* L'c' */ + #define SWIG_TOKEN_ILLEGAL 99 #define SWIG_TOKEN_ERROR -1 diff --git a/Source/Swig/typesys.c b/Source/Swig/typesys.c index a72ce5198..2de9bcb1d 100644 --- a/Source/Swig/typesys.c +++ b/Source/Swig/typesys.c @@ -1194,6 +1194,8 @@ int SwigType_type(SwigType *t) { if (strncmp(c, "p.", 2) == 0) { if (SwigType_type(c + 2) == T_CHAR) return T_STRING; + else if (SwigType_type(c + 2) == T_WCHAR) + return T_WSTRING; else return T_POINTER; } @@ -1236,6 +1238,8 @@ int SwigType_type(SwigType *t) { return T_SCHAR; if (strcmp(c, "unsigned char") == 0) return T_UCHAR; + if (strcmp(c, "wchar_t") == 0) + return T_WCHAR; if (strcmp(c, "float") == 0) return T_FLOAT; if (strcmp(c, "double") == 0)