swig/Source/Swig/scanner.c
Dave Beazley 8bd2ea80fe Added cvstags
git-svn-id: https://swig.svn.sourceforge.net/svnroot/swig/trunk/SWIG@54 626c5289-ae23-0410-ae9c-e8d60b6d4f22
2000-01-12 04:09:03 +00:00

750 lines
19 KiB
C

/****************************************************************************
* Simplified Wrapper and Interface Generator (SWIG)
*
* Author : David Beazley
*
* Department of Computer Science
* University of Chicago
* 1100 E 58th Street
* Chicago, IL 60637
* beazley@cs.uchicago.edu
*
* Please read the file LICENSE for the copyright and terms by which SWIG
* can be used and distributed.
****************************************************************************/
static char cvsroot[] = "$Header$";
#include "swigcore.h"
struct Scanner {
DOH *text; /* Current token value */
DOH *scanobjs; /* Objects being scanned */
DOH *str; /* Current object being scanned */
char *idstart; /* Optional identifier start characters */
int nexttoken; /* Next token to be returned */
int start_line; /* Starting line of certain declarations */
int string_start;
int line;
int yylen; /* Length of text pushed into text */
DOH *file;
};
#include <ctype.h>
/* -----------------------------------------------------------------------------
* $Header$
*
* scanner.c
*
* A generic C-based lexical scanner.
* ----------------------------------------------------------------------------- */
/* -----------------------------------------------------------------------------
* Scanner *NewScanner() - Create a new scanner object.
* ----------------------------------------------------------------------------- */
Scanner *NewScanner()
{
Scanner *s;
s = (Scanner *) malloc(sizeof(Scanner));
s->line = 1;
s->file = 0;
s->nexttoken = -1;
s->start_line = 1;
s->string_start = 0;
s->yylen = 0;
s->idstart = "";
s->scanobjs = NewList();
s->text = NewString("");
s->str = 0;
return s;
}
/* -----------------------------------------------------------------------------
* DelScanner(Scanner *s) - Delete a Scanner object
* ----------------------------------------------------------------------------- */
void DelScanner(Scanner *s)
{
assert(s);
Delete(s->scanobjs);
Delete(s->text);
Delete(s->file);
free(s);
}
/* -----------------------------------------------------------------------------
* Scanner_clear(Scanner *s) - Clear a scanner object
* ----------------------------------------------------------------------------- */
void Scanner_clear(Scanner *s) {
assert(s);
Delete(s->str);
Clear(s->text);
Clear(s->scanobjs);
s->line = 1;
s->nexttoken = -1;
s->start_line = 0;
s->string_start = 0;
s->yylen = 0;
}
/* -----------------------------------------------------------------------------
* Scanner_push(Scanner *s, DOH *txt) - Push text into the scanner
* ----------------------------------------------------------------------------- */
void Scanner_push(Scanner *s, DOH *txt)
{
assert(s && txt);
Push(s->scanobjs,txt);
if (s->str) Delete(s->str);
s->str = txt;
Incref(s->str);
s->line = Getline(txt);
}
/* -----------------------------------------------------------------------------
* Scanner_pushtoken(Scanner *s, int nt)
*
* Set the next processing token.
* ----------------------------------------------------------------------------- */
void Scanner_pushtoken(Scanner *s, int nt) {
assert(s);
assert((nt >= 0) && (nt < MAXTOKENS));
s->nexttoken = nt;
}
/* -----------------------------------------------------------------------------
* Scanner_set_location(Scanner *s, DOH *file, int line)
* ----------------------------------------------------------------------------- */
void
Scanner_set_location(Scanner *s, DOH *file, int line)
{
Setline(s->str,line);
Setfile(s->str,file);
}
/* -----------------------------------------------------------------------------
* Scanner_get_file(Scanner *s) - Get current file
* ----------------------------------------------------------------------------- */
DOH *
Scanner_get_file(Scanner *s) {
return Getfile(s->str);
}
int
Scanner_get_line(Scanner *s) {
return Getline(s->str);
}
/* -----------------------------------------------------------------------------
* char nextchar(Scanner *s)
*
* Returns the next character from the scanner or 0 if end of the string.
* ----------------------------------------------------------------------------- */
static char
nextchar(Scanner *s)
{
char c[2] = {0,0};
int nc;
if (!s->str) return 0;
while ((nc = Getc(s->str)) == EOF) {
Delete(s->str);
s->str = 0;
Delitem(s->scanobjs,0);
if (Len(s->scanobjs) == 0) return 0;
s->str = Getitem(s->scanobjs,0);
s->line = Getline(s->str);
if (s->str)
Incref(s->str);
}
if (nc == '\n') s->line++;
c[0] = (char) nc;
c[1] = 0;
Append(s->text,c);
return c[0];
}
/* -----------------------------------------------------------------------------
* void retract(Scanner *s, int n)
*
* Retract n characters
* ----------------------------------------------------------------------------- */
static void
retract(Scanner *s, int n) {
int i, l;
char *str;
str = Char(s->text);
l = Len(s->text);
assert(n <= l);
for (i = 0; i < n; i++) {
if (str[l-1] == '\n') {
s->line--;
}
/* // Ungetc(str[l-1],s->str); */
Seek(s->str,-1, SEEK_CUR);
Delitem(s->text,DOH_END);
}
}
/* -----------------------------------------------------------------------------
* int look(Scanner *s)
*
* Get the next token.
* ----------------------------------------------------------------------------- */
static int
look(Scanner *s)
{
int state;
char c = 0;
state = 0;
Clear(s->text);
Setline(s->text, Getline(s->str));
Setfile(s->text, Getfile(s->str));
while(1) {
switch(state) {
case 0 :
if((c = nextchar(s)) == 0) return(0);
/* Process delimeters */
if (c == '\n') {
return TOKEN_ENDLINE;
} else if (!isspace(c)) {
retract(s,1);
state = 1000;
Clear(s->text);
Setline(s->text, Getline(s->str));
Setfile(s->text, Getfile(s->str));
}
break;
case 1000:
if ((c = nextchar(s)) == 0) return (0);
if (c == '%') state = 4; /* Possibly a SWIG directive */
/* Look for possible identifiers */
else if ((isalpha(c)) || (c == '_') || (strchr(s->idstart,c))) state = 7;
/* Look for single character symbols */
else if (c == '(') return TOKEN_LPAREN;
else if (c == ')') return TOKEN_RPAREN;
else if (c == ';') return TOKEN_SEMI;
else if (c == ',') return TOKEN_COMMA;
else if (c == '*') return TOKEN_STAR;
else if (c == '}') return TOKEN_RBRACE;
else if (c == '{') return TOKEN_LBRACE;
else if (c == '=') state = 33;
else if (c == '+') return TOKEN_PLUS;
else if (c == '-') return TOKEN_MINUS;
else if (c == '&') state = 31;
else if (c == '|') state = 32;
else if (c == '^') return TOKEN_XOR;
else if (c == '<') state = 60;
else if (c == '>') state = 61;
else if (c == '~') return TOKEN_NOT;
else if (c == '!') state = 3;
else if (c == '\\') return TOKEN_BACKSLASH;
else if (c == '[') return TOKEN_LBRACKET;
else if (c == ']') return TOKEN_RBRACKET;
else if (c == '@') return TOKEN_AT;
else if (c == '$') return TOKEN_DOLLAR;
else if (c == '#') return TOKEN_POUND;
/* Look for multi-character sequences */
else if (c == '/') state = 1; /* Comment (maybe) */
else if (c == '\"') {
state = 2; /* Possibly a string */
s->string_start = s->line;
}
else if (c == ':') state = 5; /* maybe double colon */
else if (c == '0') state = 83; /* An octal or hex value */
else if (c == '\'') {
s->string_start = s->line;
state = 9; /* A character constant */
}
else if (c == '.') state = 100; /* Maybe a number, maybe just a period */
else if (isdigit(c)) state = 8; /* A numerical value */
else state = 99; /* An error */
break;
case 1: /* Comment block */
if ((c = nextchar(s)) == 0) return(0);
if (c == '/') {
state = 10; /* C++ style comment */
Clear(s->text);
Setline(s->text, Getline(s->str));
Setfile(s->text, Getfile(s->str));
Append(s->text," ");
} else if (c == '*') {
state = 11; /* C style comment */
Clear(s->text);
Setline(s->text, Getline(s->str));
Setfile(s->text, Getfile(s->str));
Append(s->text," ");
} else {
retract(s,1);
return TOKEN_SLASH;
}
break;
case 10: /* C++ style comment */
if ((c = nextchar(s)) == 0) {
/* add_error(0,"Unterminated comment",comment_start); */
return 0;
}
if (c == '\n') {
return TOKEN_ENDLINE;
} else {
state = 10;
}
break;
case 11: /* C style comment block */
if ((c = nextchar(s)) == 0) {
/* add_error(0,"Unterminated comment",comment_start); */
return 0;
}
if (c == '*') {
state = 12;
} else {
state = 11;
}
break;
case 12: /* Still in C style comment */
if ((c = nextchar(s)) == 0) {
/* add_error(0,"Unterminated comment",comment_start); */
return 0;
}
if (c == '*') {
state = 12;
} else if (c == '/') {
Clear(s->text);
state = 0;
} else {
state = 11;
}
break;
case 2: /* Processing a string */
if ((c = nextchar(s)) == 0) {
/* add_error(0,"Unterminated string", string_start); */
return 0;
}
if (c == '\"') {
return TOKEN_STRING;
} else if (c == '\\') {
state = 21; /* Possibly an escape sequence. */
break;
} else state = 2;
break;
case 21: /* An escape sequence. get next character, then go
back to processing strings */
if ((c = nextchar(s)) == 0) return 0;
state = 2;
break;
case 3: /* Maybe a not equals */
if ((c = nextchar(s)) == 0) return TOKEN_LNOT;
else if (c == '=') return TOKEN_NOTEQUAL;
else {
retract(s,1);
return TOKEN_LNOT;
}
break;
case 31: /* AND or Logical AND */
if ((c = nextchar(s)) == 0) return TOKEN_AND;
else if (c == '&') return TOKEN_LAND;
else {
retract(s,1);
return TOKEN_AND;
}
break;
case 32: /* OR or Logical OR */
if ((c = nextchar(s)) == 0) return TOKEN_OR;
else if (c == '|') return TOKEN_LOR;
else {
retract(s,1);
return TOKEN_OR;
}
break;
case 33: /* EQUAL or EQUALTO */
if ((c = nextchar(s)) == 0) return TOKEN_EQUAL;
else if (c == '=') return TOKEN_EQUALTO;
else {
retract(s,1);
return TOKEN_EQUAL;
}
break;
case 4: /* A wrapper generator directive (maybe) */
if (( c= nextchar(s)) == 0) return TOKEN_PERCENT;
if (c == '{') {
state = 40; /* Include block */
Clear(s->text);
Setline(s->text, Getline(s->str));
Setfile(s->text, Getfile(s->str));
s->start_line = s->line;
}
else if (strchr(s->idstart,'%') && ((isalpha(c)) || (c == '_'))) state = 7;
else {
retract(s,1);
return TOKEN_PERCENT;
}
break;
case 40: /* Process an include block */
if ((c = nextchar(s)) == 0) {
/* add_error(0,"Unterminated code block.", start_line); */
return 0;
}
if (c == '%') state = 41;
break;
case 41: /* Still processing include block */
if ((c = nextchar(s)) == 0) {
/* add_error(0,"Unterminated code block.", start_line); */
return 0;
}
if (c == '}') {
Delitem(s->text,DOH_END);
Delitem(s->text,DOH_END);
return TOKEN_CODEBLOCK;
} else {
state = 40;
}
break;
case 5: /* Maybe a double colon */
if (( c = nextchar(s)) == 0) return TOKEN_COLON;
if ( c == ':') return TOKEN_DCOLON;
else {
retract(s,1);
return TOKEN_COLON;
}
break;
case 60: /* shift operators */
if ((c = nextchar(s)) == 0) return TOKEN_LESSTHAN;
if (c == '<') return TOKEN_LSHIFT;
else if (c == '=') return TOKEN_LTEQUAL;
else {
retract(s,1);
return TOKEN_LESSTHAN;
}
break;
case 61:
if ((c = nextchar(s)) == 0) return TOKEN_GREATERTHAN;
if (c == '>') return TOKEN_RSHIFT;
else if (c == '=') return TOKEN_GTEQUAL;
else {
retract(s,1);
return TOKEN_GREATERTHAN;
}
break;
case 7: /* Identifier */
if ((c = nextchar(s)) == 0) return TOKEN_ID;
if (isalnum(c) || (c == '_') || (c == '$')) {
state = 7;
} else {
retract(s,1);
return TOKEN_ID;
}
break;
case 8: /* A numerical digit */
if ((c = nextchar(s)) == 0) return TOKEN_INT;
if (c == '.') {state = 81;}
else if ((c == 'e') || (c == 'E')) {state = 86;}
else if ((c == 'f') || (c == 'F')) {
Delitem(s->text,DOH_END);
return TOKEN_FLOAT;
} else if (isdigit(c)) { state = 8;}
else if ((c == 'l') || (c == 'L')) {
state = 87;
} else if ((c == 'u') || (c == 'U')) {
state = 88;
} else {
retract(s,1);
return TOKEN_INT;
}
break;
case 81: /* A floating pointer number of some sort */
if ((c = nextchar(s)) == 0) return TOKEN_DOUBLE;
if (isdigit(c)) state = 81;
else if ((c == 'e') || (c == 'E')) state = 82;
else if ((c == 'f') || (c == 'F') || (c == 'l') || (c == 'L')) {
Delitem(s->text,DOH_END);
return TOKEN_FLOAT;
} else {
retract(s,1);
return(TOKEN_DOUBLE);
}
break;
case 82:
if ((c = nextchar(s)) == 0) {
retract(s,1);
return TOKEN_INT;
}
if ((isdigit(c)) || (c == '-') || (c == '+')) state = 86;
else {
retract(s,2);
return(TOKEN_INT);
}
break;
case 83:
/* Might be a hexidecimal or octal number */
if ((c = nextchar(s)) == 0) return TOKEN_INT;
if (isdigit(c)) state = 84;
else if ((c == 'x') || (c == 'X')) state = 85;
else if (c == '.') state = 81;
else if ((c == 'l') || (c == 'L')) {
state = 87;
} else if ((c == 'u') || (c == 'U')) {
state = 88;
} else {
retract(s,1);
return TOKEN_INT;
}
break;
case 84:
/* This is an octal number */
if ((c = nextchar(s)) == 0) return TOKEN_INT;
if (isdigit(c)) state = 84;
else if ((c == 'l') || (c == 'L')) {
state = 87;
} else if ((c == 'u') || (c == 'U')) {
state = 88;
} else {
retract(s,1);
return TOKEN_INT;
}
break;
case 85:
/* This is an hex number */
if ((c = nextchar(s)) == 0) return TOKEN_INT;
if ((isdigit(c)) || (c=='a') || (c=='b') || (c=='c') ||
(c=='d') || (c=='e') || (c=='f') || (c=='A') ||
(c=='B') || (c=='C') || (c=='D') || (c=='E') ||
(c=='F'))
state = 85;
else if ((c == 'l') || (c == 'L')) {
state = 87;
} else if ((c == 'u') || (c == 'U')) {
state = 88;
} else {
retract(s,1);
return TOKEN_INT;
}
break;
case 86:
/* Rest of floating point number */
if ((c = nextchar(s)) == 0) return TOKEN_DOUBLE;
if (isdigit(c)) state = 86;
else if ((c == 'f') || (c == 'F')) {
Delitem(s->text,DOH_END);
return TOKEN_FLOAT;
} else if ((c == 'l') || (c == 'L')) {
Delitem(s->text,DOH_END);
return TOKEN_DOUBLE;
} else {
retract(s,1);
return TOKEN_DOUBLE;
}
break;
case 87 :
/* A long integer of some sort */
if ((c = nextchar(s)) == 0) return TOKEN_LONG;
if ((c == 'u') || (c == 'U')) {
return TOKEN_ULONG;
} else {
retract(s,1);
return TOKEN_LONG;
}
/* An unsigned number */
case 88:
if ((c = nextchar(s)) == 0) return TOKEN_UINT;
if ((c == 'l') || (c == 'L')) {
return TOKEN_ULONG;
} else {
retract(s,1);
return TOKEN_UINT;
}
/* A character constant */
case 9:
if ((c = nextchar(s)) == 0) {
/* add_error(0,"Unterminated character constant", string_start); */
return 0;
}
if (c == '\'') {
return(TOKEN_CHAR);
} else if (c == '\\') state = 91;
break;
case 91:
if ((c = nextchar(s)) == 0) {
/* add_error(0,"Unterminated character constant", string_start); */
return 0;
}
state = 9;
break;
/* A period or maybe a floating point number */
case 100:
if ((c = nextchar(s)) == 0) return (0);
if (isdigit(c)) state = 81;
else {
retract(s,1);
return TOKEN_PERIOD;
}
break;
/* An illegal character */
default:
return TOKEN_ILLEGAL;
}
}
}
/* -----------------------------------------------------------------------------
* int Scanner_token(Scanner *s)
*
* Return the next token or 0 if at the end of the string
* ----------------------------------------------------------------------------- */
int
Scanner_token(Scanner *s)
{
int t;
Clear(s->text);
if (s->nexttoken >= 0) {
t = s->nexttoken;
s->nexttoken = -1;
return t;
}
t = look(s);
return t;
}
/* -----------------------------------------------------------------------------
* DOH *Scanner_text(Scanner *s)
*
* Return the text associated with the last token returned.
* ----------------------------------------------------------------------------- */
DOH *
Scanner_text(Scanner *s)
{
return s->text;
}
/* -----------------------------------------------------------------------------
* void Scanner_skip_line(Scanner *s)
*
* Skips to the end of a line
* ----------------------------------------------------------------------------- */
void
Scanner_skip_line(Scanner *s)
{
char c;
int done = 0;
Clear(s->text);
Setfile(s->text,Getfile(s->str));
Setline(s->text,Getline(s->str));
while (!done) {
if ((c = nextchar(s)) == 0) return;
if (c == '\\') c = nextchar(s);
else if (c == '\n') done = 1;
}
return;
}
/* -----------------------------------------------------------------------------
* void Scanner_skip_balanced(Scanner *s, int start, int end)
*
* Skips a piece of code enclosed in begin/end symbols such as '{...}' or
* (...). Ignores symbols inside comments or strings.
* ----------------------------------------------------------------------------- */
int
Scanner_skip_balanced(Scanner *s, int startchar, int endchar)
{
char c;
int num_levels = 1;
int l;
int state = 0;
char temp[2] = {0,0};
l = s->line;
temp[0] = (char) startchar;
Clear(s->text);
Setfile(s->text,Getfile(s->str));
Setline(s->text,Getline(s->str));
Append(s->text,temp);
while (num_levels > 0) {
if ((c = nextchar(s)) == 0) {
return -1;
}
switch(state) {
case 0:
if (c == startchar) num_levels++;
else if (c == endchar) num_levels--;
else if (c == '/') state = 10;
else if (c == '\"') state = 20;
else if (c == '\'') state = 30;
break;
case 10:
if (c == '/') state = 11;
else if (c == '*') state = 12;
else state = 0;
break;
case 11:
if (c == '\n') state = 0;
else state = 11;
break;
case 12:
if (c == '*') state = 13;
break;
case 13:
if (c == '*') state = 13;
else if (c == '/') state = 0;
else state = 12;
break;
case 20:
if (c == '\"') state = 0;
else if (c == '\\') state = 21;
break;
case 21:
state = 20;
break;
case 30:
if (c == '\'') state = 0;
else if (c == '\\') state = 31;
break;
case 31:
state = 30;
break;
default:
break;
}
}
return 0;
}