Move Doxygen comment mangling from the parser to the lexer.

This is a more logical place to do this and it also simplifies the parser
code, e.g. the parser doesn't get the ignored (called "structural" for some
reason in the code) Doxygen comments from the lexer at all any more instead of
having to ignore them on its own. It also allows to define doxygen_comment and
doxygen_post_comment rules in a simpler way and avoid shift/reduce conflicts
for the sequences of Doxygen [post] comments by specifying their associativity.

In principle, the lexer could also take care of concatenating the subsequent
Doxygen comments in a single one, as this would also seem to belong to it
rather than the parser, but this doesn't seem to provide any immediate gains
and so isn't done by this commit.
This commit is contained in:
Vadim Zeitlin 2015-07-22 17:39:04 +02:00
commit dc9cecb943
2 changed files with 80 additions and 106 deletions

View file

@ -53,6 +53,52 @@ static int rename_active = 0;
/* Doxygen comments scanning */
int scan_doxygen_comments = 0;
int isStructuralDoxygen(String *s){
static const char* const structuralTags[] = {
"addtogroup",
"callgraph",
"callergraph",
"category",
"def",
"defgroup",
"dir",
"example",
"file",
"headerfile",
"internal",
"mainpage",
"name",
"nosubgrouping",
"overload",
"package",
"page",
"protocol",
"relates",
"relatesalso",
"showinitializer",
"weakgroup",
};
unsigned n;
char *slashPointer = Strchr(s, '\\');
char *atPointer = Strchr(s,'@');
if (slashPointer == NULL && atPointer == NULL) return 0;
else if( slashPointer == NULL) slashPointer = atPointer;
slashPointer++; /* skip backslash or at sign */
for (n = 0; n < sizeof(structuralTags)/sizeof(structuralTags[0]); n++) {
const size_t len = strlen(structuralTags[n]);
if (strncmp(slashPointer, structuralTags[n], len) == 0) {
/* Take care to avoid false positives with prefixes of other tags. */
if (slashPointer[len] == '\0' || isspace(slashPointer[len]))
return 1;
}
}
return 0;
}
/* -----------------------------------------------------------------------------
* Swig_cparse_cplusplus()
* ----------------------------------------------------------------------------- */
@ -376,21 +422,25 @@ static int yylook(void) {
Scanner_locator(scan, cmt);
}
if (scan_doxygen_comments) { /* else just skip this node, to avoid crashes in parser module*/
if (strncmp(loc, "/**<", 4) == 0 || strncmp(loc, "///<", 4) == 0||strncmp(loc, "/*!<", 4) == 0||strncmp(loc, "//!<", 4) == 0) {
/* printf("Doxygen Post Comment: %s lines %d-%d [%s]\n", Char(Scanner_file(scan)), Scanner_start_line(scan), Scanner_line(scan), loc); */
yylval.str = NewString(loc);
Setline(yylval.str, Scanner_start_line(scan));
Setfile(yylval.str, Scanner_file(scan));
return DOXYGENPOSTSTRING;
}
if (strncmp(loc, "/**", 3) == 0 || strncmp(loc, "///", 3) == 0||strncmp(loc, "/*!", 3) == 0||strncmp(loc, "//!", 3) == 0) {
/* printf("Doxygen Comment: %s lines %d-%d [%s]\n", Char(Scanner_file(scan)), Scanner_start_line(scan), Scanner_line(scan), loc); */
/* ignore comments like / * * * and / * * /, which are also ignored by Doxygen */
if (loc[3] != '*' && loc[3] != '/') {
yylval.str = NewString(loc);
/* Check for all possible Doxygen comment start markers while ignoring
comments starting with a row of asterisks or slashes just as
Doxygen itself does. */
if (Len(cmt) > 3 && loc[0] == '/' &&
((loc[1] == '/' && ((loc[2] == '/' && loc[3] != '/') || loc[2] == '!')) ||
(loc[1] == '*' && ((loc[2] == '*' && loc[3] != '*') || loc[2] == '!')))) {
int is_post_comment = loc[3] == '<';
if (is_post_comment || !isStructuralDoxygen(loc)) {
int begin = is_post_comment ? 4 : 3;
int end = Len(cmt);
if (loc[end - 1] == '/' && loc[end - 2] == '*') {
end -= 2;
}
yylval.str = NewStringWithSize(loc + begin, end - begin);
Setline(yylval.str, Scanner_start_line(scan));
Setfile(yylval.str, Scanner_file(scan));
return DOXYGENSTRING;
return is_post_comment ? DOXYGENPOSTSTRING : DOXYGENSTRING;
}
}
}
@ -930,7 +980,7 @@ int yylex(void) {
case POUND:
return yylex();
case SWIG_TOKEN_COMMENT:
return yylex();
return yylex();
default:
return (l);
}

View file

@ -61,58 +61,12 @@ static int cparse_externc = 0;
int ignore_nested_classes = 0;
int kwargs_supported = 0;
/* -----------------------------------------------------------------------------
* Doxygen Comment Globals and Assist Functions
* Doxygen Comment Globals
* ----------------------------------------------------------------------------- */
static String *currentDeclComment = NULL; /* Comment of C/C++ declaration. */
static Node *previousNode = NULL; /* Pointer to the previous node (for post comments) */
static Node *currentNode = NULL; /* Pointer to the current node (for post comments) */
int isStructuralDoxygen(String *s){
static const char* const structuralTags[] = {
"addtogroup",
"callgraph",
"callergraph",
"category",
"def",
"defgroup",
"dir",
"example",
"file",
"headerfile",
"internal",
"mainpage",
"name",
"nosubgrouping",
"overload",
"package",
"page",
"protocol",
"relates",
"relatesalso",
"showinitializer",
"weakgroup",
};
unsigned n;
char *slashPointer = Strchr(s, '\\');
char *atPointer = Strchr(s,'@');
if (slashPointer == NULL && atPointer == NULL) return 0;
else if( slashPointer == NULL) slashPointer = atPointer;
slashPointer++; /* skip backslash or at sign */
for (n = 0; n < sizeof(structuralTags)/sizeof(structuralTags[0]); n++) {
const size_t len = strlen(structuralTags[n]);
if (strncmp(slashPointer, structuralTags[n], len) == 0) {
/* Take care to avoid false positives with prefixes of other tags. */
if (slashPointer[len] == '\0' || isspace(slashPointer[len]))
return 1;
}
}
return 0;
}
/* -----------------------------------------------------------------------------
* Assist Functions
* ----------------------------------------------------------------------------- */
@ -1470,8 +1424,14 @@ static void mark_nodes_as_extend(Node *n) {
%token <str> OPERATOR
%token <str> CONVERSIONOPERATOR
%token PARSETYPE PARSEPARM PARSEPARMS
%token <str> DOXYGENSTRING
%token <str> DOXYGENPOSTSTRING
/* Make Doxygen comment left associative to avoid shift/reduce conflicts for
several of them in a row, it doesn't really matter in which order we
concatenate them but this order must be defined. */
%token <str> DOXYGENSTRING
%left DOXYGENSTRING
%token <str> DOXYGENPOSTSTRING
%left DOXYGENPOSTSTRING
%left CAST
%left QUESTIONMARK
@ -1546,9 +1506,7 @@ static void mark_nodes_as_extend(Node *n) {
%type <node> fname stringtype;
%type <node> featattr;
%type <str> doxygen_comment;
%type <str> doxygen_comment_item;
%type <str> doxygen_post_comment;
%type <str> doxygen_post_comment_item;
%type <node> lambda_introducer lambda_body;
%type <pl> lambda_tail;
%type <node> optional_constant_directive;
@ -3474,56 +3432,22 @@ c_constructor_decl : storage_class type LPAREN parms RPAREN ctor_end {
A Doxygen Comment (a string in Doxygen Format)
------------------------------------------------------------ */
doxygen_comment_item : DOXYGENSTRING {
DohReplace($1, "/**", "", 0);
DohReplace($1, "/*!", "", 0);
DohReplace($1, "///", "", 0);
DohReplace($1, "//!", "", 0);
DohReplace($1, "*/", "", 0);
/* Throw out all structural comments */
if (isStructuralDoxygen($1)) {
Delete($1);
$1 = 0;
}
$$ = $1;
}
| doxygen_comment_item doxygen_comment_item {
if ($1) {
if ($2)
Append($1, $2);
}
else {
$1 = $2;
}
$$ = $1;
}
;
doxygen_comment : doxygen_comment_item {
doxygen_comment : DOXYGENSTRING {
$$ = $1;
}
;
doxygen_post_comment_item : DOXYGENPOSTSTRING {
DohReplace($1, "///<", "", 0);
DohReplace($1, "/**<", "", 0);
DohReplace($1, "/*!<", "", 0);
DohReplace($1, "//!<", "", 0);
DohReplace($1, "*/", "", 0);
$$ = $1;
}
| doxygen_post_comment_item doxygen_post_comment_item {
| DOXYGENSTRING doxygen_comment {
Append($1, $2);
$$ = $1;
}
;
doxygen_post_comment : doxygen_post_comment_item {
doxygen_post_comment : DOXYGENPOSTSTRING {
$$ = $1;
}
| DOXYGENPOSTSTRING doxygen_post_comment {
Append($1, $2);
$$ = $1;
}
;
/* ======================================================================