Major overhaul of C/C++ scanner API. Unified tokenizing code so that tokens are scanned by a common code base

git-svn-id: https://swig.svn.sourceforge.net/svnroot/swig/trunk@9639 626c5289-ae23-0410-ae9c-e8d60b6d4f22
This commit is contained in:
Dave Beazley 2007-01-12 19:01:40 +00:00
commit b92d8e5cff
9 changed files with 564 additions and 1170 deletions

View file

@ -1,6 +1,29 @@
Version 1.3.32 (in progress)
============================
01/12/2007: beazley
New command line option -macroerrors. When supplied, this will force
the C scanner/parser to report proper location information for code contained
inside SWIG macros (defined with %define). By default, SWIG merely reports
errors on the line at which a macro is used. With this option, you
can expand the error back to its source---something which may simplify
debugging.
01/12/2007: beazley
[Internals] Major overhaul of C/C++ scanning implementation. For quite
some time, SWIG contained two completely independent C/C++ tokenizers--
the legacy scanner in CParse/cscanner.c and a general purpose scanner
in Swig/scanner.c. SWIG still has two scanning modules, but the C parser
scanner (CParse/cscanner.c) now relies upon the general purpose
scanner found in Swig/scanner.c. As a result, it is much smaller and
less complicated. This change also makes it possible to maintain all
of the low-level C tokenizing in one central location instead of two
places as before.
***POTENTIAL FLAKINESS***
This change may cause problems with accurate line number reporting
as well as error reporting more generally. I have tried to resolve this
as much as possible, but there might be some corner cases.
01/12/2007: mgossage
[Lua] Added typemap throws for std::string*, typemap for SWIGTYPE DYNAMIC,
changed the existing throws typemap to throw a string instead of making a copy of

View file

@ -26,7 +26,7 @@ constant expressions.
<p>
All of these functions are declared in <tt>Source/Swig/swigscan.h</tt>. This API is considered to be stable.
<h2>Creation and Deletion of Scanner</h2>
<h2>Creation and Deletion of Scanners</h2>
The following functions are used to create and destroy a scanner object. More than one scanner object can be created and used
as necessary.
@ -118,17 +118,23 @@ Changes the current filename and line number of the scanner.<
</blockquote>
<p>
<b><tt>String *Scanner_get_file(Scanner *s)</tt></b>
<b><tt>String *Scanner_file(Scanner *s)</tt></b>
<blockquote>
Gets the current filename associated with text in the scanner.
</blockquote>
<p>
<b><tt>int Scanner_get_line(Scanner *s)</tt></b>
<b><tt>int Scanner_line(Scanner *s)</tt></b>
<blockquote>
Gets the current line number associated with text in the scanner.
</blockquote>
<p>
<b><tt>int Scanner_start_line(Scanner *s)</tt></b>
<blockquote>
Gets the starting line number of the last token returned by the scanner.
</blockquote>
<p>
<b><tt>void Scanner_idstart(Scanner *s, char *idchar)</tt></b>
<blockquote>
@ -159,6 +165,13 @@ A convenience function that returns 0 or 1 depending on whether <tt>tokval</tt>
operator overloading).
</blockquote>
<p>
<b><tt>void Scanner_freeze_line(int val)</tt></b>
<blockquote>
Freezes the current line number depending upon whether or not <tt>val</tt> is 1 or 0. When the line number is frozen, newline characters will not result in
updates to the line number. This is sometimes useful in tracking line numbers through complicated macro expansions.
</blockquote>
<h2>Token Codes</h2>

View file

@ -20,7 +20,7 @@ extern "C" {
#endif
/* cscanner.c */
extern char *cparse_file;
extern String *cparse_file;
extern int cparse_line;
extern int cparse_cplusplus;
extern int cparse_start_line;
@ -34,6 +34,8 @@ extern "C" {
extern void scanner_ignore_typedef(void);
extern void scanner_last_id(int);
extern void scanner_clear_rename(void);
extern void scanner_set_location(String_or_char *, int line);
extern void Swig_cparse_follow_locators(int);
extern void start_inline(char *, int);
extern String *scanner_ccode;
extern int yylex();

File diff suppressed because it is too large Load diff

View file

@ -1547,9 +1547,8 @@ declaration : swig_directive { $$ = $1; }
| SEMI { $$ = 0; }
| error {
$$ = 0;
if (!Swig_error_count()) {
Swig_error(cparse_file, cparse_line,"Syntax error in input(1).\n");
}
Swig_error(cparse_file, cparse_line,"Syntax error in input(1).\n");
exit(1);
}
/* Out of class constructor/destructor declarations */
| c_constructor_decl {
@ -1864,13 +1863,11 @@ fragment_directive: FRAGMENT LPAREN fname COMMA kwargs RPAREN HBLOCK {
include_directive: includetype options string LBRACKET {
$1.filename = Swig_copy_string(cparse_file);
$1.line = cparse_line;
cparse_file = Swig_copy_string($3);
cparse_line = 0;
scanner_set_location($3,1);
} interface RBRACKET {
String *mname = 0;
$$ = $6;
cparse_file = $1.filename;
cparse_line = $1.line;
scanner_set_location($1.filename,$1.line);
if (strcmp($1.type,"include") == 0) set_nodeType($$,"include");
if (strcmp($1.type,"import") == 0) {
mname = $2 ? Getattr($2,"module") : 0;
@ -3103,9 +3100,8 @@ c_constructor_decl : storage_class type LPAREN parms RPAREN ctor_end {
}
}
if (err) {
if (!Swig_error_count()) {
Swig_error(cparse_file,cparse_line,"Syntax error in input(2).\n");
}
Swig_error(cparse_file,cparse_line,"Syntax error in input(2).\n");
exit(1);
}
}
;
@ -3954,12 +3950,11 @@ cpp_members : cpp_member cpp_members {
| error {
int start_line = cparse_line;
skip_decl();
if (!Swig_error_count()) {
Swig_error(cparse_file,start_line,"Syntax error in input(3).\n");
}
} cpp_members {
$$ = $3;
}
Swig_error(cparse_file,start_line,"Syntax error in input(3).\n");
exit(1);
} cpp_members {
$$ = $3;
}
;
/* ======================================================================

View file

@ -81,6 +81,7 @@ static const char *usage2 = (const char *) "\
-importall - Follow all #include statements as imports\n\
-includeall - Follow all #include statements\n\
-l<ifile> - Include SWIG library file <ifile>\n\
-macroerrors - Report errors inside macros\n\
-makedefault - Create default constructors/destructors (the default)\n\
-M - List all dependencies\n\
-MD - Is equivalent to `-M -MF <file>', except `-E' is not implied\n\
@ -502,6 +503,9 @@ void SWIG_getoptions(int argc, char *argv[]) {
} else if (strcmp(argv[i], "-notemplatereduce") == 0) {
SWIG_cparse_template_reduce(0);
Swig_mark_arg(i);
} else if (strcmp(argv[i], "-macroerrors") == 0) {
Swig_cparse_follow_locators(1);
Swig_mark_arg(i);
} else if (strcmp(argv[i], "-swiglib") == 0) {
if (SwigLibWin)
printf("%s\n", Char(SwigLibWin));

View file

@ -909,7 +909,10 @@ static String *expand_macro(String *name, List *args) {
#else
/* Use simplified around markers to properly count lines in cscanner.c */
if (strchr(Char(g), '\n')) {
Printf(f, "/*@SWIG:%s,%d,%s@*/%s/*@SWIG@*/", Getfile(macro), Getline(macro), name, g);
#if 0
Printf(f, "/*@SWIG:%s@*/%s/*@SWIG@*/", name, g);
#endif
} else {
Append(f, g);
}

View file

@ -22,12 +22,12 @@ struct Scanner {
char *idstart; /* Optional identifier start characters */
int nexttoken; /* Next token to be returned */
int start_line; /* Starting line of certain declarations */
int string_start;
int line;
int yylen; /* Length of text pushed into text */
String *file;
String *error; /* Last error message (if any) */
int error_line; /* Error line number */
int freeze_line; /* Suspend line number updates */
};
/* -----------------------------------------------------------------------------
@ -43,13 +43,13 @@ Scanner *NewScanner() {
s->file = 0;
s->nexttoken = -1;
s->start_line = 1;
s->string_start = 0;
s->yylen = 0;
s->idstart = "";
s->scanobjs = NewList();
s->text = NewStringEmpty();
s->str = 0;
s->error = 0;
s->freeze_line = 0;
return s;
}
@ -85,7 +85,6 @@ void Scanner_clear(Scanner * s) {
s->line = 1;
s->nexttoken = -1;
s->start_line = 0;
s->string_start = 0;
s->yylen = 0;
}
@ -99,8 +98,10 @@ void Scanner_clear(Scanner * s) {
void Scanner_push(Scanner * s, String *txt) {
assert(s && txt);
Push(s->scanobjs, txt);
if (s->str)
if (s->str) {
Setline(s->str,s->line);
Delete(s->str);
}
s->str = txt;
DohIncref(s->str);
s->line = Getline(txt);
@ -113,12 +114,14 @@ void Scanner_push(Scanner * s, String *txt) {
* call to Scanner_token().
* ----------------------------------------------------------------------------- */
void Scanner_pushtoken(Scanner * s, int nt, String_or_char *val) {
void Scanner_pushtoken(Scanner * s, int nt, const String_or_char *val) {
assert(s);
assert((nt >= 0) && (nt < SWIG_MAXTOKENS));
s->nexttoken = nt;
Clear(s->text);
Append(s->text,val);
if (val != s->text) {
Clear(s->text);
Append(s->text,val);
}
}
/* -----------------------------------------------------------------------------
@ -130,25 +133,35 @@ void Scanner_pushtoken(Scanner * s, int nt, String_or_char *val) {
void Scanner_set_location(Scanner * s, String *file, int line) {
Setline(s->str, line);
Setfile(s->str, file);
s->line = line;
}
/* -----------------------------------------------------------------------------
* Scanner_get_file()
* Scanner_file()
*
* Get the current file.
* ----------------------------------------------------------------------------- */
String *Scanner_get_file(Scanner * s) {
String *Scanner_file(Scanner * s) {
return Getfile(s->str);
}
/* -----------------------------------------------------------------------------
* Scanner_get_line()
* Scanner_line()
*
* Get the current line number
* ----------------------------------------------------------------------------- */
int Scanner_get_line(Scanner * s) {
return Getline(s->str);
int Scanner_line(Scanner * s) {
return s->line;
}
/* -----------------------------------------------------------------------------
* Scanner_start_line()
*
* Get the line number on which the current token starts
* ----------------------------------------------------------------------------- */
int Scanner_start_line(Scanner * s) {
return s->start_line;
}
/* -----------------------------------------------------------------------------
@ -167,7 +180,6 @@ void Scanner_idstart(Scanner * s, char *id) {
* Returns the next character from the scanner or 0 if end of the string.
* ----------------------------------------------------------------------------- */
static char nextchar(Scanner * s) {
char c[2] = { 0, 0 };
int nc;
if (!s->str)
return 0;
@ -183,12 +195,10 @@ static char nextchar(Scanner * s) {
DohIncref(s->str);
}
}
if (nc == '\n')
if ((nc == '\n') && (!s->freeze_line))
s->line++;
c[0] = (char) nc;
c[1] = 0;
Append(s->text, c);
return c[0];
Putc(nc,s->text);
return nc;
}
/* -----------------------------------------------------------------------------
@ -219,6 +229,17 @@ Scanner_errline(Scanner *s) {
return s->error_line;
}
/* -----------------------------------------------------------------------------
* Scanner_freeze_line()
*
* Freezes the current line number.
* ----------------------------------------------------------------------------- */
void
Scanner_freeze_line(Scanner *s, int val) {
s->freeze_line = val;
}
/* -----------------------------------------------------------------------------
* retract()
*
@ -233,7 +254,7 @@ static void retract(Scanner * s, int n) {
assert(n <= l);
for (i = 0; i < n; i++) {
if (str[l - 1] == '\n') {
s->line--;
if (!s->freeze_line) s->line--;
}
Seek(s->str, -1, SEEK_CUR);
Delitem(s->text, DOH_END);
@ -335,11 +356,8 @@ static void get_escape(Scanner *s) {
break;
case 10:
if (!isdigit(c)) {
char tmp[2];
retract(s,1);
tmp[0] = (char) result;
tmp[1] = 0;
Append(s->text, tmp);
Putc((char)result,s->text);
return;
}
result = (result << 3) + (c - '0');
@ -347,11 +365,8 @@ static void get_escape(Scanner *s) {
break;
case 20:
if (!isxdigit(c)) {
char tmp[2];
retract(s,1);
tmp[0] = (char) result;
tmp[1] = 0;
Append(s->text,tmp);
Putc((char)result, s->text);
return;
}
if (isdigit(c))
@ -374,11 +389,10 @@ static void get_escape(Scanner *s) {
static int look(Scanner * s) {
int state;
int c = 0;
int comment_start = 0;
state = 0;
Clear(s->text);
Setline(s->text, Getline(s->str));
s->start_line = s->line;
Setfile(s->text, Getfile(s->str));
while (1) {
switch (state) {
@ -394,7 +408,7 @@ static int look(Scanner * s) {
retract(s, 1);
state = 1000;
Clear(s->text);
Setline(s->text, Getline(s->str));
Setline(s->text, s->line);
Setfile(s->text, Getfile(s->str));
}
break;
@ -455,7 +469,7 @@ static int look(Scanner * s) {
else if (c == '@')
return SWIG_TOKEN_AT;
else if (c == '$')
return SWIG_TOKEN_DOLLAR;
state = 75;
else if (c == '#')
return SWIG_TOKEN_POUND;
else if (c == '?')
@ -465,11 +479,12 @@ static int look(Scanner * s) {
else if (c == '/') {
state = 1; /* Comment (maybe) */
comment_start = s->line;
s->start_line = s->line;
}
else if (c == '\"') {
state = 2; /* Possibly a string */
s->string_start = s->line;
s->start_line = s->line;
Clear(s->text);
}
else if (c == ':')
@ -477,10 +492,12 @@ static int look(Scanner * s) {
else if (c == '0')
state = 83; /* An octal or hex value */
else if (c == '\'') {
s->string_start = s->line;
s->start_line = s->line;
Clear(s->text);
state = 9; /* A character constant */
} else if (c == '`') {
s->string_start = s->line;
s->start_line = s->line;
Clear(s->text);
state = 900;
}
@ -516,7 +533,7 @@ static int look(Scanner * s) {
break;
case 10: /* C++ style comment */
if ((c = nextchar(s)) == 0) {
set_error(s,comment_start,"Unterminated comment");
set_error(s,s->start_line,"Unterminated comment");
return SWIG_TOKEN_ERROR;
}
if (c == '\n') {
@ -528,7 +545,7 @@ static int look(Scanner * s) {
break;
case 11: /* C style comment block */
if ((c = nextchar(s)) == 0) {
set_error(s,comment_start,"Unterminated comment");
set_error(s,s->start_line,"Unterminated comment");
return SWIG_TOKEN_ERROR;
}
if (c == '*') {
@ -539,7 +556,7 @@ static int look(Scanner * s) {
break;
case 12: /* Still in C style comment */
if ((c = nextchar(s)) == 0) {
set_error(s,comment_start,"Unterminated comment");
set_error(s,s->start_line,"Unterminated comment");
return SWIG_TOKEN_ERROR;
}
if (c == '*') {
@ -553,12 +570,14 @@ static int look(Scanner * s) {
case 2: /* Processing a string */
if ((c = nextchar(s)) == 0) {
set_error(s,s->string_start, "Unterminated string");
set_error(s,s->start_line, "Unterminated string");
return SWIG_TOKEN_ERROR;
}
if (c == '\"') {
Delitem(s->text, DOH_END);
return SWIG_TOKEN_STRING;
} else if (c == '\\') {
Delitem(s->text, DOH_END);
get_escape(s);
} else
state = 2;
@ -647,6 +666,7 @@ static int look(Scanner * s) {
if (c == '}') {
Delitem(s->text, DOH_END);
Delitem(s->text, DOH_END);
Seek(s->text,0,SEEK_SET);
return SWIG_TOKEN_CODEBLOCK;
} else {
state = 40;
@ -710,6 +730,19 @@ static int look(Scanner * s) {
return SWIG_TOKEN_ID;
}
break;
case 75: /* Special identifier $ */
if ((c = nextchar(s)) == 0)
return SWIG_TOKEN_DOLLAR;
if (isalnum(c) || (c == '_') || (c == '*') || (c == '&')) {
state = 7;
} else {
retract(s,1);
if (Len(s->text) == 1) return SWIG_TOKEN_DOLLAR;
return SWIG_TOKEN_ID;
}
break;
case 8: /* A numerical digit */
if ((c = nextchar(s)) == 0)
return SWIG_TOKEN_INT;
@ -880,12 +913,14 @@ static int look(Scanner * s) {
/* A character constant */
case 9:
if ((c = nextchar(s)) == 0) {
set_error(s,s->string_start,"Unterminated character constant");
set_error(s,s->start_line,"Unterminated character constant");
return SWIG_TOKEN_ERROR;
}
if (c == '\'') {
Delitem(s->text, DOH_END);
return (SWIG_TOKEN_CHAR);
} else if (c == '\\') {
Delitem(s->text, DOH_END);
get_escape(s);
}
break;
@ -919,7 +954,7 @@ static int look(Scanner * s) {
case 210: /* MINUS, MINUSMINUS, MINUSEQUAL, ARROW */
if ((c = nextchar(s)) == 0)
return SWIG_TOKEN_MINUS;
else if (c == '+')
else if (c == '-')
return SWIG_TOKEN_MINUSMINUS;
else if (c == '=')
return SWIG_TOKEN_MINUSEQUAL;
@ -993,10 +1028,11 @@ static int look(Scanner * s) {
/* Reverse string */
case 900:
if ((c = nextchar(s)) == 0) {
set_error(s,s->string_start,"Unterminated character constant");
set_error(s,s->start_line,"Unterminated character constant");
return SWIG_TOKEN_ERROR;
}
if (c == '`') {
Delitem(s->text, DOH_END);
return (SWIG_TOKEN_RSTRING);
}
break;
@ -1021,8 +1057,13 @@ int Scanner_token(Scanner * s) {
s->nexttoken = -1;
return t;
}
Clear(s->text);
s->start_line = 0;
t = look(s);
if (!s->start_line) {
Setline(s->text,s->line);
} else {
Setline(s->text,s->start_line);
}
return t;
}
@ -1047,14 +1088,15 @@ void Scanner_skip_line(Scanner * s) {
int done = 0;
Clear(s->text);
Setfile(s->text, Getfile(s->str));
Setline(s->text, Getline(s->str));
Setline(s->text, s->line);
while (!done) {
if ((c = nextchar(s)) == 0)
return;
if (c == '\\')
if (c == '\\') {
c = nextchar(s);
else if (c == '\n')
} else if (c == '\n') {
done = 1;
}
}
return;
}
@ -1076,7 +1118,7 @@ int Scanner_skip_balanced(Scanner * s, int startchar, int endchar) {
temp[0] = (char) startchar;
Clear(s->text);
Setfile(s->text, Getfile(s->str));
Setline(s->text, Getline(s->str));
Setline(s->text, s->line);
Append(s->text, temp);
while (num_levels > 0) {

View file

@ -15,18 +15,20 @@ extern Scanner *NewScanner();
extern void DelScanner(Scanner *);
extern void Scanner_clear(Scanner *);
extern void Scanner_push(Scanner *, String *);
extern void Scanner_pushtoken(Scanner *, int, String_or_char *value);
extern void Scanner_pushtoken(Scanner *, int, const String_or_char *value);
extern int Scanner_token(Scanner *);
extern String *Scanner_text(Scanner *);
extern void Scanner_skip_line(Scanner *);
extern int Scanner_skip_balanced(Scanner *, int startchar, int endchar);
extern void Scanner_set_location(Scanner *, String *file, int line);
extern String *Scanner_get_file(Scanner *);
extern int Scanner_get_line(Scanner *);
extern String *Scanner_file(Scanner *);
extern int Scanner_line(Scanner *);
extern int Scanner_start_line(Scanner *);
extern void Scanner_idstart(Scanner *, char *idchar);
extern String *Scanner_errmsg(Scanner *);
extern int Scanner_errline(Scanner *);
extern int Scanner_isoperator(int tokval);
extern void Scanner_freeze_line(Scanner *s, int val);
/* Note: Tokens in range 100+ are for C/C++ operators */