swig/Source/Swig/scanner.c
2005-11-28 17:32:56 +00:00

814 lines
21 KiB
C

/* -----------------------------------------------------------------------------
* scanner.c
*
* This file implements a general purpose C/C++ compatible lexical scanner.
* This scanner isn't intended to be plugged directly into a parser built
* with yacc. Rather, it contains a lot of generic code that could be used
* to easily construct yacc-compatible scanners.
*
* Author(s) : David Beazley (beazley@cs.uchicago.edu)
*
* Copyright (C) 1999-2000. The University of Chicago
* See the file LICENSE for information on usage and redistribution.
* ----------------------------------------------------------------------------- */
char cvsroot_scanner_c[] = "$Header$";
#include "swig.h"
#include <ctype.h>
struct SwigScanner {
String *text; /* Current token value */
List *scanobjs; /* Objects being scanned */
String *str; /* Current object being scanned */
char *idstart; /* Optional identifier start characters */
int nexttoken; /* Next token to be returned */
int start_line; /* Starting line of certain declarations */
int string_start;
int line;
int yylen; /* Length of text pushed into text */
String *file;
};
/* -----------------------------------------------------------------------------
* NewSwigScanner()
*
* Create a new scanner object
* ----------------------------------------------------------------------------- */
SwigScanner *
NewSwigScanner() {
SwigScanner *s;
s = (SwigScanner *) malloc(sizeof(SwigScanner));
s->line = 1;
s->file = 0;
s->nexttoken = -1;
s->start_line = 1;
s->string_start = 0;
s->yylen = 0;
s->idstart = "";
s->scanobjs = NewList();
s->text = NewStringEmpty();
s->str = 0;
return s;
}
/* -----------------------------------------------------------------------------
* DelSwigScanner()
*
* Delete a scanner object.
* ----------------------------------------------------------------------------- */
void
DelSwigScanner(SwigScanner *s) {
assert(s);
Delete(s->scanobjs);
Delete(s->text);
Delete(s->file);
free(s);
}
/* -----------------------------------------------------------------------------
* SwigScanner_clear()
*
* Clear the contents of a scanner object.
* ----------------------------------------------------------------------------- */
void
SwigScanner_clear(SwigScanner *s) {
assert(s);
Delete(s->str);
Clear(s->text);
Clear(s->scanobjs);
s->line = 1;
s->nexttoken = -1;
s->start_line = 0;
s->string_start = 0;
s->yylen = 0;
}
/* -----------------------------------------------------------------------------
* SwigScanner_push()
*
* Push some new text into the scanner. The scanner will start parsing this text
* immediately before returning to the old text.
* ----------------------------------------------------------------------------- */
void
SwigScanner_push(SwigScanner *s, String *txt) {
assert(s && txt);
Push(s->scanobjs,txt);
if (s->str) Delete(s->str);
s->str = txt;
DohIncref(s->str);
s->line = Getline(txt);
}
/* -----------------------------------------------------------------------------
* SwigScanner_pushtoken()
*
* Push a token into the scanner. This token will be returned on the next
* call to SwigScanner_token().
* ----------------------------------------------------------------------------- */
void
SwigScanner_pushtoken(SwigScanner *s, int nt) {
assert(s);
assert((nt >= 0) && (nt < SWIG_MAXTOKENS));
s->nexttoken = nt;
}
/* -----------------------------------------------------------------------------
* SwigScanner_set_location()
*
* Set the file and line number location of the scanner.
* ----------------------------------------------------------------------------- */
void
SwigScanner_set_location(SwigScanner *s, String *file, int line) {
Setline(s->str,line);
Setfile(s->str,file);
}
/* -----------------------------------------------------------------------------
* SwigScanner_get_file()
*
* Get the current file.
* ----------------------------------------------------------------------------- */
String *
SwigScanner_get_file(SwigScanner *s) {
return Getfile(s->str);
}
/* -----------------------------------------------------------------------------
* SwigScanner_get_line()
*
* Get the current line number
* ----------------------------------------------------------------------------- */
int
SwigScanner_get_line(SwigScanner *s) {
return Getline(s->str);
}
/* -----------------------------------------------------------------------------
* SwigScanner_idstart()
*
* Change the set of additional characters that can be used to start an identifier.
* ----------------------------------------------------------------------------- */
void
SwigScanner_idstart(SwigScanner *s, char *id) {
s->idstart = Swig_copy_string(id);
}
/* -----------------------------------------------------------------------------
* nextchar()
*
* Returns the next character from the scanner or 0 if end of the string.
* ----------------------------------------------------------------------------- */
static char
nextchar(SwigScanner *s)
{
char c[2] = {0,0};
int nc;
if (!s->str) return 0;
while ((nc = Getc(s->str)) == EOF) {
Delete(s->str);
s->str = 0;
Delitem(s->scanobjs,0);
if (Len(s->scanobjs) == 0) return 0;
s->str = Getitem(s->scanobjs,0);
if (s->str) {
s->line = Getline(s->str);
DohIncref(s->str);
}
}
if (nc == '\n') s->line++;
c[0] = (char) nc;
c[1] = 0;
Append(s->text,c);
return c[0];
}
/* -----------------------------------------------------------------------------
* retract()
*
* Retract n characters
* ----------------------------------------------------------------------------- */
static void
retract(SwigScanner *s, int n) {
int i, l;
char *str;
str = Char(s->text);
l = Len(s->text);
assert(n <= l);
for (i = 0; i < n; i++) {
if (str[l-1] == '\n') {
s->line--;
}
/* // Ungetc(str[l-1],s->str); */
Seek(s->str,-1, SEEK_CUR);
Delitem(s->text,DOH_END);
}
}
/* -----------------------------------------------------------------------------
* look()
*
* Return the raw value of the next token.
* ----------------------------------------------------------------------------- */
static int
look(SwigScanner *s) {
int state;
int c = 0;
state = 0;
Clear(s->text);
Setline(s->text, Getline(s->str));
Setfile(s->text, Getfile(s->str));
while(1) {
switch(state) {
case 0 :
if((c = nextchar(s)) == 0) return(0);
/* Process delimeters */
if (c == '\n') {
return SWIG_TOKEN_ENDLINE;
} else if (!isspace(c)) {
retract(s,1);
state = 1000;
Clear(s->text);
Setline(s->text, Getline(s->str));
Setfile(s->text, Getfile(s->str));
}
break;
case 1000:
if ((c = nextchar(s)) == 0) return (0);
if (c == '%') state = 4; /* Possibly a SWIG directive */
/* Look for possible identifiers */
else if ((isalpha(c)) || (c == '_') || (strchr(s->idstart,c))) state = 7;
/* Look for single character symbols */
else if (c == '(') return SWIG_TOKEN_LPAREN;
else if (c == ')') return SWIG_TOKEN_RPAREN;
else if (c == ';') return SWIG_TOKEN_SEMI;
else if (c == ',') return SWIG_TOKEN_COMMA;
else if (c == '*') return SWIG_TOKEN_STAR;
else if (c == '}') return SWIG_TOKEN_RBRACE;
else if (c == '{') return SWIG_TOKEN_LBRACE;
else if (c == '=') state = 33;
else if (c == '+') return SWIG_TOKEN_PLUS;
else if (c == '-') return SWIG_TOKEN_MINUS;
else if (c == '&') state = 31;
else if (c == '|') state = 32;
else if (c == '^') return SWIG_TOKEN_XOR;
else if (c == '<') state = 60;
else if (c == '>') state = 61;
else if (c == '~') return SWIG_TOKEN_NOT;
else if (c == '!') state = 3;
else if (c == '\\') return SWIG_TOKEN_BACKSLASH;
else if (c == '[') return SWIG_TOKEN_LBRACKET;
else if (c == ']') return SWIG_TOKEN_RBRACKET;
else if (c == '@') return SWIG_TOKEN_AT;
else if (c == '$') return SWIG_TOKEN_DOLLAR;
else if (c == '#') return SWIG_TOKEN_POUND;
/* Look for multi-character sequences */
else if (c == '/') state = 1; /* Comment (maybe) */
else if (c == '\"') {
state = 2; /* Possibly a string */
s->string_start = s->line;
}
else if (c == ':') state = 5; /* maybe double colon */
else if (c == '0') state = 83; /* An octal or hex value */
else if (c == '\'') {
s->string_start = s->line;
state = 9; /* A character constant */
}
else if (c == '`') {
s->string_start = s->line;
state = 900;
}
else if (c == '.') state = 100; /* Maybe a number, maybe just a period */
else if (isdigit(c)) state = 8; /* A numerical value */
else state = 99; /* An error */
break;
case 1: /* Comment block */
if ((c = nextchar(s)) == 0) return(0);
if (c == '/') {
state = 10; /* C++ style comment */
Clear(s->text);
Setline(s->text, Getline(s->str));
Setfile(s->text, Getfile(s->str));
Append(s->text," ");
} else if (c == '*') {
state = 11; /* C style comment */
Clear(s->text);
Setline(s->text, Getline(s->str));
Setfile(s->text, Getfile(s->str));
Append(s->text," ");
} else {
retract(s,1);
return SWIG_TOKEN_SLASH;
}
break;
case 10: /* C++ style comment */
if ((c = nextchar(s)) == 0) {
/* add_error(0,"Unterminated comment",comment_start); */
return 0;
}
if (c == '\n') {
return SWIG_TOKEN_ENDLINE;
} else {
state = 10;
}
break;
case 11: /* C style comment block */
if ((c = nextchar(s)) == 0) {
/* add_error(0,"Unterminated comment",comment_start); */
return 0;
}
if (c == '*') {
state = 12;
} else {
state = 11;
}
break;
case 12: /* Still in C style comment */
if ((c = nextchar(s)) == 0) {
/* add_error(0,"Unterminated comment",comment_start); */
return 0;
}
if (c == '*') {
state = 12;
} else if (c == '/') {
Clear(s->text);
state = 0;
} else {
state = 11;
}
break;
case 2: /* Processing a string */
if ((c = nextchar(s)) == 0) {
/* add_error(0,"Unterminated string", string_start); */
return 0;
}
if (c == '\"') {
return SWIG_TOKEN_STRING;
} else if (c == '\\') {
state = 21; /* Possibly an escape sequence. */
break;
} else state = 2;
break;
case 21: /* An escape sequence. get next character, then go
back to processing strings */
if ((c = nextchar(s)) == 0) return 0;
state = 2;
break;
case 3: /* Maybe a not equals */
if ((c = nextchar(s)) == 0) return SWIG_TOKEN_LNOT;
else if (c == '=') return SWIG_TOKEN_NOTEQUAL;
else {
retract(s,1);
return SWIG_TOKEN_LNOT;
}
break;
case 31: /* AND or Logical AND */
if ((c = nextchar(s)) == 0) return SWIG_TOKEN_AND;
else if (c == '&') return SWIG_TOKEN_LAND;
else {
retract(s,1);
return SWIG_TOKEN_AND;
}
break;
case 32: /* OR or Logical OR */
if ((c = nextchar(s)) == 0) return SWIG_TOKEN_OR;
else if (c == '|') return SWIG_TOKEN_LOR;
else {
retract(s,1);
return SWIG_TOKEN_OR;
}
break;
case 33: /* EQUAL or EQUALTO */
if ((c = nextchar(s)) == 0) return SWIG_TOKEN_EQUAL;
else if (c == '=') return SWIG_TOKEN_EQUALTO;
else {
retract(s,1);
return SWIG_TOKEN_EQUAL;
}
break;
case 4: /* A wrapper generator directive (maybe) */
if (( c= nextchar(s)) == 0) return SWIG_TOKEN_PERCENT;
if (c == '{') {
state = 40; /* Include block */
Clear(s->text);
Setline(s->text, Getline(s->str));
Setfile(s->text, Getfile(s->str));
s->start_line = s->line;
}
else if (strchr(s->idstart,'%') && ((isalpha(c)) || (c == '_'))) state = 7;
else {
retract(s,1);
return SWIG_TOKEN_PERCENT;
}
break;
case 40: /* Process an include block */
if ((c = nextchar(s)) == 0) {
/* add_error(0,"Unterminated code block.", start_line); */
return 0;
}
if (c == '%') state = 41;
break;
case 41: /* Still processing include block */
if ((c = nextchar(s)) == 0) {
/* add_error(0,"Unterminated code block.", start_line); */
return 0;
}
if (c == '}') {
Delitem(s->text,DOH_END);
Delitem(s->text,DOH_END);
return SWIG_TOKEN_CODEBLOCK;
} else {
state = 40;
}
break;
case 5: /* Maybe a double colon */
if (( c = nextchar(s)) == 0) return SWIG_TOKEN_COLON;
if ( c == ':') return SWIG_TOKEN_DCOLON;
else {
retract(s,1);
return SWIG_TOKEN_COLON;
}
break;
case 60: /* shift operators */
if ((c = nextchar(s)) == 0) return SWIG_TOKEN_LESSTHAN;
if (c == '<') return SWIG_TOKEN_LSHIFT;
else if (c == '=') return SWIG_TOKEN_LTEQUAL;
else {
retract(s,1);
return SWIG_TOKEN_LESSTHAN;
}
break;
case 61:
if ((c = nextchar(s)) == 0) return SWIG_TOKEN_GREATERTHAN;
if (c == '>') return SWIG_TOKEN_RSHIFT;
else if (c == '=') return SWIG_TOKEN_GTEQUAL;
else {
retract(s,1);
return SWIG_TOKEN_GREATERTHAN;
}
break;
case 7: /* Identifier */
if ((c = nextchar(s)) == 0) return SWIG_TOKEN_ID;
if (isalnum(c) || (c == '_') || (c == '$')) {
state = 7;
} else {
retract(s,1);
return SWIG_TOKEN_ID;
}
break;
case 8: /* A numerical digit */
if ((c = nextchar(s)) == 0) return SWIG_TOKEN_INT;
if (c == '.') {state = 81;}
else if ((c == 'e') || (c == 'E')) {state = 86;}
else if ((c == 'f') || (c == 'F')) {
Delitem(s->text,DOH_END);
return SWIG_TOKEN_FLOAT;
} else if (isdigit(c)) { state = 8;}
else if ((c == 'l') || (c == 'L')) {
state = 87;
} else if ((c == 'u') || (c == 'U')) {
state = 88;
} else {
retract(s,1);
return SWIG_TOKEN_INT;
}
break;
case 81: /* A floating pointer number of some sort */
if ((c = nextchar(s)) == 0) return SWIG_TOKEN_DOUBLE;
if (isdigit(c)) state = 81;
else if ((c == 'e') || (c == 'E')) state = 82;
else if ((c == 'f') || (c == 'F') || (c == 'l') || (c == 'L')) {
Delitem(s->text,DOH_END);
return SWIG_TOKEN_FLOAT;
} else {
retract(s,1);
return(SWIG_TOKEN_DOUBLE);
}
break;
case 82:
if ((c = nextchar(s)) == 0) {
retract(s,1);
return SWIG_TOKEN_INT;
}
if ((isdigit(c)) || (c == '-') || (c == '+')) state = 86;
else {
retract(s,2);
return(SWIG_TOKEN_INT);
}
break;
case 83:
/* Might be a hexidecimal or octal number */
if ((c = nextchar(s)) == 0) return SWIG_TOKEN_INT;
if (isdigit(c)) state = 84;
else if ((c == 'x') || (c == 'X')) state = 85;
else if (c == '.') state = 81;
else if ((c == 'l') || (c == 'L')) {
state = 87;
} else if ((c == 'u') || (c == 'U')) {
state = 88;
} else {
retract(s,1);
return SWIG_TOKEN_INT;
}
break;
case 84:
/* This is an octal number */
if ((c = nextchar(s)) == 0) return SWIG_TOKEN_INT;
if (isdigit(c)) state = 84;
else if ((c == 'l') || (c == 'L')) {
state = 87;
} else if ((c == 'u') || (c == 'U')) {
state = 88;
} else {
retract(s,1);
return SWIG_TOKEN_INT;
}
break;
case 85:
/* This is an hex number */
if ((c = nextchar(s)) == 0) return SWIG_TOKEN_INT;
if ((isdigit(c)) || (c=='a') || (c=='b') || (c=='c') ||
(c=='d') || (c=='e') || (c=='f') || (c=='A') ||
(c=='B') || (c=='C') || (c=='D') || (c=='E') ||
(c=='F'))
state = 85;
else if ((c == 'l') || (c == 'L')) {
state = 87;
} else if ((c == 'u') || (c == 'U')) {
state = 88;
} else {
retract(s,1);
return SWIG_TOKEN_INT;
}
break;
case 86:
/* Rest of floating point number */
if ((c = nextchar(s)) == 0) return SWIG_TOKEN_DOUBLE;
if (isdigit(c)) state = 86;
else if ((c == 'f') || (c == 'F')) {
Delitem(s->text,DOH_END);
return SWIG_TOKEN_FLOAT;
} else if ((c == 'l') || (c == 'L')) {
Delitem(s->text,DOH_END);
return SWIG_TOKEN_DOUBLE;
} else {
retract(s,1);
return SWIG_TOKEN_DOUBLE;
}
break;
case 87 :
/* A long integer of some sort */
if ((c = nextchar(s)) == 0) return SWIG_TOKEN_LONG;
if ((c == 'u') || (c == 'U')) {
return SWIG_TOKEN_ULONG;
} else if ((c == 'l') || (c == 'L')) {
state = 870;
} else {
retract(s,1);
return SWIG_TOKEN_LONG;
}
break;
/* A long long integer */
case 870:
if ((c = nextchar(s)) == 0) return SWIG_TOKEN_LONGLONG;
if ((c == 'u') || (c == 'U')) {
return SWIG_TOKEN_ULONGLONG;
} else {
retract(s,1);
return SWIG_TOKEN_LONGLONG;
}
/* An unsigned number */
case 88:
if ((c = nextchar(s)) == 0) return SWIG_TOKEN_UINT;
if ((c == 'l') || (c == 'L')) {
state = 880;
} else {
retract(s,1);
return SWIG_TOKEN_UINT;
}
break;
/* Possibly an unsigned long long or unsigned long */
case 880:
if ((c = nextchar(s)) == 0) return SWIG_TOKEN_ULONG;
if ((c == 'l') || (c == 'L')) return SWIG_TOKEN_ULONGLONG;
else {
retract(s,1);
return SWIG_TOKEN_ULONG;
}
/* A character constant */
case 9:
if ((c = nextchar(s)) == 0) {
/* add_error(0,"Unterminated character constant", string_start); */
return 0;
}
if (c == '\'') {
return(SWIG_TOKEN_CHAR);
} else if (c == '\\') state = 91;
break;
case 91:
if ((c = nextchar(s)) == 0) {
/* add_error(0,"Unterminated character constant", string_start); */
return 0;
}
state = 9;
break;
/* A period or maybe a floating point number */
case 100:
if ((c = nextchar(s)) == 0) return (0);
if (isdigit(c)) state = 81;
else {
retract(s,1);
return SWIG_TOKEN_PERIOD;
}
break;
/* An illegal character */
/* Reverse string */
case 900:
if ((c = nextchar(s)) == 0) {
/* add_error(0,"Unterminated character constant", string_start); */
return 0;
}
if (c == '`') {
return(SWIG_TOKEN_RSTRING);
}
break;
default:
return SWIG_TOKEN_ILLEGAL;
}
}
}
/* -----------------------------------------------------------------------------
* SwigScanner_token()
*
* Real entry point to return the next token. Returns 0 if at end of input.
* ----------------------------------------------------------------------------- */
int
SwigScanner_token(SwigScanner *s) {
int t;
Clear(s->text);
if (s->nexttoken >= 0) {
t = s->nexttoken;
s->nexttoken = -1;
return t;
}
t = look(s);
return t;
}
/* -----------------------------------------------------------------------------
* SwigScanner_text()
*
* Return the lexene associated with the last returned token.
* ----------------------------------------------------------------------------- */
String *
SwigScanner_text(SwigScanner *s) {
return s->text;
}
/* -----------------------------------------------------------------------------
* SwigScanner_skip_line()
*
* Skips to the end of a line
* ----------------------------------------------------------------------------- */
void
SwigScanner_skip_line(SwigScanner *s) {
char c;
int done = 0;
Clear(s->text);
Setfile(s->text,Getfile(s->str));
Setline(s->text,Getline(s->str));
while (!done) {
if ((c = nextchar(s)) == 0) return;
if (c == '\\') c = nextchar(s);
else if (c == '\n') done = 1;
}
return;
}
/* -----------------------------------------------------------------------------
* SwigScanner_skip_balanced()
*
* Skips a piece of code enclosed in begin/end symbols such as '{...}' or
* (...). Ignores symbols inside comments or strings.
* ----------------------------------------------------------------------------- */
int
SwigScanner_skip_balanced(SwigScanner *s, int startchar, int endchar) {
char c;
int num_levels = 1;
int l;
int state = 0;
char temp[2] = {0,0};
l = s->line;
temp[0] = (char) startchar;
Clear(s->text);
Setfile(s->text,Getfile(s->str));
Setline(s->text,Getline(s->str));
Append(s->text,temp);
while (num_levels > 0) {
if ((c = nextchar(s)) == 0) {
return -1;
}
switch(state) {
case 0:
if (c == startchar) num_levels++;
else if (c == endchar) num_levels--;
else if (c == '/') state = 10;
else if (c == '\"') state = 20;
else if (c == '\'') state = 30;
break;
case 10:
if (c == '/') state = 11;
else if (c == '*') state = 12;
else state = 0;
break;
case 11:
if (c == '\n') state = 0;
else state = 11;
break;
case 12:
if (c == '*') state = 13;
break;
case 13:
if (c == '*') state = 13;
else if (c == '/') state = 0;
else state = 12;
break;
case 20:
if (c == '\"') state = 0;
else if (c == '\\') state = 21;
break;
case 21:
state = 20;
break;
case 30:
if (c == '\'') state = 0;
else if (c == '\\') state = 31;
break;
case 31:
state = 30;
break;
default:
break;
}
}
return 0;
}