From dc9cecb9435fc5e69c78629443da062b1907a67d Mon Sep 17 00:00:00 2001 From: Vadim Zeitlin Date: Wed, 22 Jul 2015 17:39:04 +0200 Subject: [PATCH] Move Doxygen comment mangling from the parser to the lexer. This is a more logical place to do this and it also simplifies the parser code, e.g. the parser doesn't get the ignored (called "structural" for some reason in the code) Doxygen comments from the lexer at all any more instead of having to ignore them on its own. It also allows to define doxygen_comment and doxygen_post_comment rules in a simpler way and avoid shift/reduce conflicts for the sequences of Doxygen [post] comments by specifying their associativity. In principle, the lexer could also take care of concatenating the subsequent Doxygen comments in a single one, as this would also seem to belong to it rather than the parser, but this doesn't seem to provide any immediate gains and so isn't done by this commit. --- Source/CParse/cscanner.c | 78 +++++++++++++++++++++++----- Source/CParse/parser.y | 108 ++++++--------------------------------- 2 files changed, 80 insertions(+), 106 deletions(-) diff --git a/Source/CParse/cscanner.c b/Source/CParse/cscanner.c index a987c4fd2..bc7463589 100644 --- a/Source/CParse/cscanner.c +++ b/Source/CParse/cscanner.c @@ -53,6 +53,52 @@ static int rename_active = 0; /* Doxygen comments scanning */ int scan_doxygen_comments = 0; +int isStructuralDoxygen(String *s){ + static const char* const structuralTags[] = { + "addtogroup", + "callgraph", + "callergraph", + "category", + "def", + "defgroup", + "dir", + "example", + "file", + "headerfile", + "internal", + "mainpage", + "name", + "nosubgrouping", + "overload", + "package", + "page", + "protocol", + "relates", + "relatesalso", + "showinitializer", + "weakgroup", + }; + + unsigned n; + char *slashPointer = Strchr(s, '\\'); + char *atPointer = Strchr(s,'@'); + if (slashPointer == NULL && atPointer == NULL) return 0; + else if( slashPointer == NULL) slashPointer = atPointer; + + slashPointer++; /* skip backslash or at sign */ + + for (n = 0; n < sizeof(structuralTags)/sizeof(structuralTags[0]); n++) { + const size_t len = strlen(structuralTags[n]); + if (strncmp(slashPointer, structuralTags[n], len) == 0) { + /* Take care to avoid false positives with prefixes of other tags. */ + if (slashPointer[len] == '\0' || isspace(slashPointer[len])) + return 1; + } + } + + return 0; +} + /* ----------------------------------------------------------------------------- * Swig_cparse_cplusplus() * ----------------------------------------------------------------------------- */ @@ -376,21 +422,25 @@ static int yylook(void) { Scanner_locator(scan, cmt); } if (scan_doxygen_comments) { /* else just skip this node, to avoid crashes in parser module*/ - if (strncmp(loc, "/**<", 4) == 0 || strncmp(loc, "///<", 4) == 0||strncmp(loc, "/*!<", 4) == 0||strncmp(loc, "//!<", 4) == 0) { - /* printf("Doxygen Post Comment: %s lines %d-%d [%s]\n", Char(Scanner_file(scan)), Scanner_start_line(scan), Scanner_line(scan), loc); */ - yylval.str = NewString(loc); - Setline(yylval.str, Scanner_start_line(scan)); - Setfile(yylval.str, Scanner_file(scan)); - return DOXYGENPOSTSTRING; - } - if (strncmp(loc, "/**", 3) == 0 || strncmp(loc, "///", 3) == 0||strncmp(loc, "/*!", 3) == 0||strncmp(loc, "//!", 3) == 0) { - /* printf("Doxygen Comment: %s lines %d-%d [%s]\n", Char(Scanner_file(scan)), Scanner_start_line(scan), Scanner_line(scan), loc); */ - /* ignore comments like / * * * and / * * /, which are also ignored by Doxygen */ - if (loc[3] != '*' && loc[3] != '/') { - yylval.str = NewString(loc); + /* Check for all possible Doxygen comment start markers while ignoring + comments starting with a row of asterisks or slashes just as + Doxygen itself does. */ + if (Len(cmt) > 3 && loc[0] == '/' && + ((loc[1] == '/' && ((loc[2] == '/' && loc[3] != '/') || loc[2] == '!')) || + (loc[1] == '*' && ((loc[2] == '*' && loc[3] != '*') || loc[2] == '!')))) { + int is_post_comment = loc[3] == '<'; + + if (is_post_comment || !isStructuralDoxygen(loc)) { + int begin = is_post_comment ? 4 : 3; + int end = Len(cmt); + if (loc[end - 1] == '/' && loc[end - 2] == '*') { + end -= 2; + } + + yylval.str = NewStringWithSize(loc + begin, end - begin); Setline(yylval.str, Scanner_start_line(scan)); Setfile(yylval.str, Scanner_file(scan)); - return DOXYGENSTRING; + return is_post_comment ? DOXYGENPOSTSTRING : DOXYGENSTRING; } } } @@ -930,7 +980,7 @@ int yylex(void) { case POUND: return yylex(); case SWIG_TOKEN_COMMENT: - return yylex(); + return yylex(); default: return (l); } diff --git a/Source/CParse/parser.y b/Source/CParse/parser.y index 7afccea01..30928f73f 100644 --- a/Source/CParse/parser.y +++ b/Source/CParse/parser.y @@ -61,58 +61,12 @@ static int cparse_externc = 0; int ignore_nested_classes = 0; int kwargs_supported = 0; /* ----------------------------------------------------------------------------- - * Doxygen Comment Globals and Assist Functions + * Doxygen Comment Globals * ----------------------------------------------------------------------------- */ static String *currentDeclComment = NULL; /* Comment of C/C++ declaration. */ static Node *previousNode = NULL; /* Pointer to the previous node (for post comments) */ static Node *currentNode = NULL; /* Pointer to the current node (for post comments) */ -int isStructuralDoxygen(String *s){ - static const char* const structuralTags[] = { - "addtogroup", - "callgraph", - "callergraph", - "category", - "def", - "defgroup", - "dir", - "example", - "file", - "headerfile", - "internal", - "mainpage", - "name", - "nosubgrouping", - "overload", - "package", - "page", - "protocol", - "relates", - "relatesalso", - "showinitializer", - "weakgroup", - }; - - unsigned n; - char *slashPointer = Strchr(s, '\\'); - char *atPointer = Strchr(s,'@'); - if (slashPointer == NULL && atPointer == NULL) return 0; - else if( slashPointer == NULL) slashPointer = atPointer; - - slashPointer++; /* skip backslash or at sign */ - - for (n = 0; n < sizeof(structuralTags)/sizeof(structuralTags[0]); n++) { - const size_t len = strlen(structuralTags[n]); - if (strncmp(slashPointer, structuralTags[n], len) == 0) { - /* Take care to avoid false positives with prefixes of other tags. */ - if (slashPointer[len] == '\0' || isspace(slashPointer[len])) - return 1; - } - } - - return 0; -} - /* ----------------------------------------------------------------------------- * Assist Functions * ----------------------------------------------------------------------------- */ @@ -1470,8 +1424,14 @@ static void mark_nodes_as_extend(Node *n) { %token OPERATOR %token CONVERSIONOPERATOR %token PARSETYPE PARSEPARM PARSEPARMS -%token DOXYGENSTRING -%token DOXYGENPOSTSTRING + +/* Make Doxygen comment left associative to avoid shift/reduce conflicts for + several of them in a row, it doesn't really matter in which order we + concatenate them but this order must be defined. */ +%token DOXYGENSTRING +%left DOXYGENSTRING +%token DOXYGENPOSTSTRING +%left DOXYGENPOSTSTRING %left CAST %left QUESTIONMARK @@ -1546,9 +1506,7 @@ static void mark_nodes_as_extend(Node *n) { %type fname stringtype; %type featattr; %type doxygen_comment; -%type doxygen_comment_item; %type doxygen_post_comment; -%type doxygen_post_comment_item; %type lambda_introducer lambda_body; %type lambda_tail; %type optional_constant_directive; @@ -3474,56 +3432,22 @@ c_constructor_decl : storage_class type LPAREN parms RPAREN ctor_end { A Doxygen Comment (a string in Doxygen Format) ------------------------------------------------------------ */ -doxygen_comment_item : DOXYGENSTRING { - DohReplace($1, "/**", "", 0); - DohReplace($1, "/*!", "", 0); - DohReplace($1, "///", "", 0); - DohReplace($1, "//!", "", 0); - DohReplace($1, "*/", "", 0); - - /* Throw out all structural comments */ - if (isStructuralDoxygen($1)) { - Delete($1); - $1 = 0; - } - $$ = $1; - } - | doxygen_comment_item doxygen_comment_item { - if ($1) { - if ($2) - Append($1, $2); - } - else { - $1 = $2; - } - $$ = $1; - } - ; - -doxygen_comment : doxygen_comment_item { +doxygen_comment : DOXYGENSTRING { $$ = $1; } - ; - - -doxygen_post_comment_item : DOXYGENPOSTSTRING { - DohReplace($1, "///<", "", 0); - DohReplace($1, "/**<", "", 0); - DohReplace($1, "/*!<", "", 0); - DohReplace($1, "//!<", "", 0); - DohReplace($1, "*/", "", 0); - - $$ = $1; - } - | doxygen_post_comment_item doxygen_post_comment_item { + | DOXYGENSTRING doxygen_comment { Append($1, $2); $$ = $1; } ; -doxygen_post_comment : doxygen_post_comment_item { +doxygen_post_comment : DOXYGENPOSTSTRING { $$ = $1; } + | DOXYGENPOSTSTRING doxygen_post_comment { + Append($1, $2); + $$ = $1; + } ; /* ======================================================================