Move Doxygen comments concatenation from the parser to the lexer.

This allows to write the grammar in a simpler way without running into
shift/reduce conflicts all the time because a Doxygen post comment can often
be either reduced with the preceding token or shifted if there is another
Doxygen post comment after it.

Just take care of concatenating the comments in the lexer, which makes it
handling of comment tokens slightly more complex as it now needs to look ahead
at the next tokens, but it's worse the simplifications in the parser.

No changes in behaviour.
This commit is contained in:
Vadim Zeitlin 2015-07-26 20:55:07 +02:00
commit 4884f8cb3c
2 changed files with 72 additions and 62 deletions

View file

@ -416,33 +416,72 @@ static int yylook(void) {
case SWIG_TOKEN_COMMENT:
{
String *cmt = Scanner_text(scan);
char *loc = Char(cmt);
if ((strncmp(loc,"/*@SWIG",7) == 0) && (loc[Len(cmt)-3] == '@')) {
Scanner_locator(scan, cmt);
}
if (scan_doxygen_comments) { /* else just skip this node, to avoid crashes in parser module*/
/* Check for all possible Doxygen comment start markers while ignoring
comments starting with a row of asterisks or slashes just as
Doxygen itself does. */
if (Len(cmt) > 3 && loc[0] == '/' &&
((loc[1] == '/' && ((loc[2] == '/' && loc[3] != '/') || loc[2] == '!')) ||
(loc[1] == '*' && ((loc[2] == '*' && loc[3] != '*') || loc[2] == '!')))) {
int is_post_comment = loc[3] == '<';
typedef enum {
DOX_COMMENT_PRE = -1,
DOX_COMMENT_NONE,
DOX_COMMENT_POST
} comment_kind_t;
comment_kind_t existing_comment = DOX_COMMENT_NONE;
if (is_post_comment || !isStructuralDoxygen(loc)) {
int begin = is_post_comment ? 4 : 3;
int end = Len(cmt);
if (loc[end - 1] == '/' && loc[end - 2] == '*') {
end -= 2;
/* Concatenate or skip all consecutive comments at once. */
do {
String *cmt = Scanner_text(scan);
char *loc = Char(cmt);
if ((strncmp(loc,"/*@SWIG",7) == 0) && (loc[Len(cmt)-3] == '@')) {
Scanner_locator(scan, cmt);
}
if (scan_doxygen_comments) { /* else just skip this node, to avoid crashes in parser module*/
/* Check for all possible Doxygen comment start markers while ignoring
comments starting with a row of asterisks or slashes just as
Doxygen itself does. */
if (Len(cmt) > 3 && loc[0] == '/' &&
((loc[1] == '/' && ((loc[2] == '/' && loc[3] != '/') || loc[2] == '!')) ||
(loc[1] == '*' && ((loc[2] == '*' && loc[3] != '*') || loc[2] == '!')))) {
comment_kind_t this_comment = loc[3] == '<' ? DOX_COMMENT_POST
: DOX_COMMENT_PRE;
if (existing_comment != DOX_COMMENT_NONE && this_comment != existing_comment) {
/* We can't concatenate together Doxygen pre- and post-comments. */
break;
}
yylval.str = NewStringWithSize(loc + begin, end - begin);
Setline(yylval.str, Scanner_start_line(scan));
Setfile(yylval.str, Scanner_file(scan));
return is_post_comment ? DOXYGENPOSTSTRING : DOXYGENSTRING;
if (this_comment == DOX_COMMENT_POST || !isStructuralDoxygen(loc)) {
String *str;
int begin = this_comment == DOX_COMMENT_POST ? 4 : 3;
int end = Len(cmt);
if (loc[end - 1] == '/' && loc[end - 2] == '*') {
end -= 2;
}
str = NewStringWithSize(loc + begin, end - begin);
if (existing_comment == DOX_COMMENT_NONE) {
yylval.str = str;
Setline(yylval.str, Scanner_start_line(scan));
Setfile(yylval.str, Scanner_file(scan));
} else {
Append(yylval.str, str);
}
existing_comment = this_comment;
}
}
}
do {
tok = Scanner_token(scan);
} while (tok == SWIG_TOKEN_ENDLINE);
} while (tok == SWIG_TOKEN_COMMENT);
Scanner_pushtoken(scan, tok, Scanner_text(scan));
switch (existing_comment) {
case DOX_COMMENT_PRE:
return DOXYGENSTRING;
case DOX_COMMENT_NONE:
break;
case DOX_COMMENT_POST:
return DOXYGENPOSTSTRING;
}
}
break;

View file

@ -1425,13 +1425,8 @@ static void mark_nodes_as_extend(Node *n) {
%token <str> CONVERSIONOPERATOR
%token PARSETYPE PARSEPARM PARSEPARMS
/* Make Doxygen comment left associative to avoid shift/reduce conflicts for
several of them in a row, it doesn't really matter in which order we
concatenate them but this order must be defined. */
%token <str> DOXYGENSTRING
%left DOXYGENSTRING
%token <str> DOXYGENPOSTSTRING
%left DOXYGENPOSTSTRING
%left CAST
%left QUESTIONMARK
@ -1505,8 +1500,6 @@ static void mark_nodes_as_extend(Node *n) {
%type <ptype> type_specifier primitive_type_list ;
%type <node> fname stringtype;
%type <node> featattr;
%type <str> doxygen_comment;
%type <str> doxygen_post_comment;
%type <node> lambda_introducer lambda_body;
%type <pl> lambda_tail;
%type <str> virt_specifier_seq;
@ -1562,11 +1555,11 @@ interface : interface declaration {
appendChild($1,$2);
$$ = $1;
}
| interface doxygen_comment {
| interface DOXYGENSTRING {
currentDeclComment = $2;
$$ = $1;
}
| interface doxygen_post_comment {
| interface DOXYGENPOSTSTRING {
Node *node = lastChild($1);
if (node) {
set_comment(node, $2);
@ -3427,28 +3420,6 @@ c_constructor_decl : storage_class type LPAREN parms RPAREN ctor_end {
}
;
/* ------------------------------------------------------------
A Doxygen Comment (a string in Doxygen Format)
------------------------------------------------------------ */
doxygen_comment : DOXYGENSTRING {
$$ = $1;
}
| DOXYGENSTRING doxygen_comment {
Append($1, $2);
$$ = $1;
}
;
doxygen_post_comment : DOXYGENPOSTSTRING {
$$ = $1;
}
| DOXYGENPOSTSTRING doxygen_post_comment {
Append($1, $2);
$$ = $1;
}
;
/* ======================================================================
* C++ Support
* ====================================================================== */
@ -4446,11 +4417,11 @@ cpp_member_no_dox : c_declaration { $$ = $1; }
cpp_member : cpp_member_no_dox {
$$ = $1;
}
| doxygen_comment cpp_member_no_dox {
| DOXYGENSTRING cpp_member_no_dox {
$$ = $2;
set_comment($2, $1);
}
| cpp_member_no_dox doxygen_post_comment {
| cpp_member_no_dox DOXYGENPOSTSTRING {
$$ = $1;
set_comment($1, $2);
}
@ -4827,7 +4798,7 @@ ptail : COMMA parm ptail {
set_nextSibling($2,$3);
$$ = $2;
}
| COMMA doxygen_post_comment parm ptail {
| COMMA DOXYGENPOSTSTRING parm ptail {
set_comment(previousNode, $2);
set_nextSibling($3,$4);
$$ = $3;
@ -4871,11 +4842,11 @@ parm_no_dox : rawtype parameter_declarator {
parm : parm_no_dox {
$$ = $1;
}
| doxygen_comment parm_no_dox {
| DOXYGENSTRING parm_no_dox {
$$ = $2;
set_comment($2, $1);
}
| parm_no_dox doxygen_post_comment {
| parm_no_dox DOXYGENPOSTSTRING {
$$ = $1;
set_comment($1, $2);
}
@ -5999,19 +5970,19 @@ enumlist_item : optional_ignored_define edecl_with_dox optional_ignored_define {
edecl_with_dox : edecl {
$$ = $1;
}
| doxygen_comment edecl {
| DOXYGENSTRING edecl {
$$ = $2;
set_comment($2, $1);
}
| edecl doxygen_post_comment {
| edecl DOXYGENPOSTSTRING {
$$ = $1;
set_comment($1, $2);
}
| doxygen_post_comment edecl {
| DOXYGENPOSTSTRING edecl {
$$ = $2;
set_comment(previousNode, $1);
}
| doxygen_post_comment edecl doxygen_post_comment {
| DOXYGENPOSTSTRING edecl DOXYGENPOSTSTRING {
$$ = $2;
set_comment(previousNode, $1);
set_comment($2, $3);