Move Doxygen comment mangling from the parser to the lexer.

This is a more logical place to do this and it also simplifies the parser code, e.g. the parser doesn't get the ignored (called "structural" for some reason in the code) Doxygen comments from the lexer at all any more instead of having to ignore them on its own. It also allows to define doxygen_comment and doxygen_post_comment rules in a simpler way and avoid shift/reduce conflicts for the sequences of Doxygen [post] comments by specifying their associativity. In principle, the lexer could also take care of concatenating the subsequent Doxygen comments in a single one, as this would also seem to belong to it rather than the parser, but this doesn't seem to provide any immediate gains and so isn't done by this commit.
2015-07-22 17:39:04 +02:00 · 2015-07-22 17:39:04 +02:00 · dc9cecb943
commit dc9cecb943
parent e191360c9f
2 changed files with 80 additions and 106 deletions
--- a/Source/CParse/cscanner.c
+++ b/Source/CParse/cscanner.c
@ -53,6 +53,52 @@ static int rename_active = 0;
 /* Doxygen comments scanning */
 int scan_doxygen_comments = 0;

+int isStructuralDoxygen(String *s){
+	static const char* const structuralTags[] = {
+	  "addtogroup",
+	  "callgraph",
+	  "callergraph",
+	  "category",
+	  "def",
+	  "defgroup",
+	  "dir",
+	  "example",
+	  "file",
+	  "headerfile",
+	  "internal",
+	  "mainpage",
+	  "name",
+	  "nosubgrouping",
+	  "overload",
+	  "package",
+	  "page",
+	  "protocol",
+	  "relates",
+	  "relatesalso",
+	  "showinitializer",
+	  "weakgroup",
+	};
+
+	unsigned n;
+	char *slashPointer = Strchr(s, '\\');
+	char *atPointer = Strchr(s,'@');
+	if (slashPointer == NULL && atPointer == NULL) return 0;
+	else if( slashPointer == NULL) slashPointer = atPointer;
+
+	slashPointer++; /* skip backslash or at sign */
+
+	for (n = 0; n < sizeof(structuralTags)/sizeof(structuralTags[0]); n++) {
+	  const size_t len = strlen(structuralTags[n]);
+	  if (strncmp(slashPointer, structuralTags[n], len) == 0) {
+	    /* Take care to avoid false positives with prefixes of other tags. */
+	    if (slashPointer[len] == '\0' || isspace(slashPointer[len]))
+	      return 1;
+	  }
+	}
+
+	return 0;
+}
+
 /* -----------------------------------------------------------------------------
 * Swig_cparse_cplusplus()
 * ----------------------------------------------------------------------------- */
@ -376,21 +422,25 @@ static int yylook(void) {
 	  Scanner_locator(scan, cmt);
 	}
 	if (scan_doxygen_comments) { /* else just skip this node, to avoid crashes in parser module*/
-	  if (strncmp(loc, "/**<", 4) == 0 || strncmp(loc, "///<", 4) == 0||strncmp(loc, "/*!<", 4) == 0||strncmp(loc, "//!<", 4) == 0) {
-	    /* printf("Doxygen Post Comment: %s lines %d-%d [%s]\n", Char(Scanner_file(scan)), Scanner_start_line(scan), Scanner_line(scan), loc); */
-	    yylval.str =  NewString(loc);
-	    Setline(yylval.str, Scanner_start_line(scan));
-	    Setfile(yylval.str, Scanner_file(scan));
-	    return DOXYGENPOSTSTRING;
-	  }
-	  if (strncmp(loc, "/**", 3) == 0 || strncmp(loc, "///", 3) == 0||strncmp(loc, "/*!", 3) == 0||strncmp(loc, "//!", 3) == 0) {
-	    /* printf("Doxygen Comment: %s lines %d-%d [%s]\n", Char(Scanner_file(scan)), Scanner_start_line(scan), Scanner_line(scan), loc); */
-	    /* ignore comments like / * * * and / * * /,  which are also ignored by Doxygen */
-	    if (loc[3] != '*'  &&  loc[3] != '/') {
-	      yylval.str =  NewString(loc);
+	  /* Check for all possible Doxygen comment start markers while ignoring
+	     comments starting with a row of asterisks or slashes just as
+	     Doxygen itself does. */
+	  if (Len(cmt) > 3 && loc[0] == '/' &&
+	      ((loc[1] == '/' && ((loc[2] == '/' && loc[3] != '/') || loc[2] == '!')) ||
+	       (loc[1] == '*' && ((loc[2] == '*' && loc[3] != '*') || loc[2] == '!')))) {
+	    int is_post_comment = loc[3] == '<';
+
+	    if (is_post_comment || !isStructuralDoxygen(loc)) {
+	      int begin = is_post_comment ? 4 : 3;
+	      int end = Len(cmt);
+	      if (loc[end - 1] == '/' && loc[end - 2] == '*') {
+		end -= 2;
+	      }
+
+	      yylval.str =  NewStringWithSize(loc + begin, end - begin);
 	      Setline(yylval.str, Scanner_start_line(scan));
 	      Setfile(yylval.str, Scanner_file(scan));
-	      return DOXYGENSTRING;
+	      return is_post_comment ? DOXYGENPOSTSTRING : DOXYGENSTRING;
 	    }
 	  }
 	}
@ -930,7 +980,7 @@ int yylex(void) {
  case POUND:
    return yylex();
  case SWIG_TOKEN_COMMENT:
-	  return yylex();
+    return yylex();
  default:
    return (l);
  }
--- a/Source/CParse/parser.y
+++ b/Source/CParse/parser.y
@ -61,58 +61,12 @@ static int      cparse_externc = 0;
 int		ignore_nested_classes = 0;
 int		kwargs_supported = 0;
 /* -----------------------------------------------------------------------------
- *                            Doxygen Comment Globals and Assist Functions
+ *                            Doxygen Comment Globals
 * ----------------------------------------------------------------------------- */
 static String *currentDeclComment = NULL; /* Comment of C/C++ declaration. */
 static Node *previousNode = NULL; /* Pointer to the previous node (for post comments) */
 static Node *currentNode = NULL; /* Pointer to the current node (for post comments) */

-int isStructuralDoxygen(String *s){
-	static const char* const structuralTags[] = {
-	  "addtogroup",
-	  "callgraph",
-	  "callergraph",
-	  "category",
-	  "def",
-	  "defgroup",
-	  "dir",
-	  "example",
-	  "file",
-	  "headerfile",
-	  "internal",
-	  "mainpage",
-	  "name",
-	  "nosubgrouping",
-	  "overload",
-	  "package",
-	  "page",
-	  "protocol",
-	  "relates",
-	  "relatesalso",
-	  "showinitializer",
-	  "weakgroup",
-	};
-
-	unsigned n;
-	char *slashPointer = Strchr(s, '\\');
-	char *atPointer = Strchr(s,'@');
-	if (slashPointer == NULL && atPointer == NULL) return 0;
-	else if( slashPointer == NULL) slashPointer = atPointer;
-
-	slashPointer++; /* skip backslash or at sign */
-
-	for (n = 0; n < sizeof(structuralTags)/sizeof(structuralTags[0]); n++) {
-	  const size_t len = strlen(structuralTags[n]);
-	  if (strncmp(slashPointer, structuralTags[n], len) == 0) {
-	    /* Take care to avoid false positives with prefixes of other tags. */
-	    if (slashPointer[len] == '\0' || isspace(slashPointer[len]))
-	      return 1;
-	  }
-	}
-
-	return 0;
-}
-
 /* -----------------------------------------------------------------------------
 *                            Assist Functions
 * ----------------------------------------------------------------------------- */
@ -1470,8 +1424,14 @@ static void mark_nodes_as_extend(Node *n) {
 %token <str> OPERATOR
 %token <str> CONVERSIONOPERATOR
 %token PARSETYPE PARSEPARM PARSEPARMS
-%token <str> DOXYGENSTRING 
-%token <str> DOXYGENPOSTSTRING 
+
+/* Make Doxygen comment left associative to avoid shift/reduce conflicts for
+   several of them in a row, it doesn't really matter in which order we
+   concatenate them but this order must be defined. */
+%token <str> DOXYGENSTRING
+%left DOXYGENSTRING
+%token <str> DOXYGENPOSTSTRING
+%left DOXYGENPOSTSTRING

 %left  CAST
 %left  QUESTIONMARK
@ -1546,9 +1506,7 @@ static void mark_nodes_as_extend(Node *n) {
 %type <node>     fname stringtype;
 %type <node>     featattr;
 %type <str>	 doxygen_comment;
-%type <str>	 doxygen_comment_item;
 %type <str>	 doxygen_post_comment;
-%type <str>	 doxygen_post_comment_item;
 %type <node>     lambda_introducer lambda_body;
 %type <pl>       lambda_tail;
 %type <node>     optional_constant_directive;
@ -3474,56 +3432,22 @@ c_constructor_decl : storage_class type LPAREN parms RPAREN ctor_end {
   A Doxygen Comment (a string in Doxygen Format)
   ------------------------------------------------------------ */

-doxygen_comment_item : DOXYGENSTRING {
-		  DohReplace($1, "/**", "", 0);
-		  DohReplace($1, "/*!", "", 0);
-		  DohReplace($1, "///", "", 0);
-		  DohReplace($1, "//!", "", 0);
-		  DohReplace($1, "*/", "", 0);
-
-		  /* Throw out all structural comments */
-		  if (isStructuralDoxygen($1)) {
-		    Delete($1);
-		    $1 = 0;
-		  }
-		  $$ = $1;
-		}
-		| doxygen_comment_item doxygen_comment_item {
-		  if ($1) {
-		    if ($2)
-		      Append($1, $2);
-		  }
-		  else {
-		    $1 = $2;
-		  }
-		  $$ = $1;
-		}
-		;
-
-doxygen_comment : doxygen_comment_item {
+doxygen_comment : DOXYGENSTRING {
                  $$ = $1;
 		}
-		;
-
-
-doxygen_post_comment_item : DOXYGENPOSTSTRING {
-		  DohReplace($1, "///<", "", 0);
-		  DohReplace($1, "/**<", "", 0);
-		  DohReplace($1, "/*!<", "", 0);
-		  DohReplace($1, "//!<", "", 0);
-		  DohReplace($1, "*/", "", 0);
-		  
-		  $$ = $1;
-		}
-		| doxygen_post_comment_item doxygen_post_comment_item {
+		| DOXYGENSTRING doxygen_comment {
 		  Append($1, $2);
 		  $$ = $1;
 		}
 		;

-doxygen_post_comment : doxygen_post_comment_item {
+doxygen_post_comment : DOXYGENPOSTSTRING {
                  $$ = $1;
 		}
+		| DOXYGENPOSTSTRING doxygen_post_comment {
+		  Append($1, $2);
+		  $$ = $1;
+		}
 		;

 /* ======================================================================