Move Doxygen comments concatenation from the parser to the lexer.

This allows to write the grammar in a simpler way without running into shift/reduce conflicts all the time because a Doxygen post comment can often be either reduced with the preceding token or shifted if there is another Doxygen post comment after it. Just take care of concatenating the comments in the lexer, which makes it handling of comment tokens slightly more complex as it now needs to look ahead at the next tokens, but it's worse the simplifications in the parser. No changes in behaviour.
2015-07-26 20:55:07 +02:00 · 2015-07-26 20:55:07 +02:00 · 4884f8cb3c
commit 4884f8cb3c
parent 864945ba2e
2 changed files with 72 additions and 62 deletions
--- a/Source/CParse/cscanner.c
+++ b/Source/CParse/cscanner.c
@ -416,33 +416,72 @@ static int yylook(void) {
      
    case SWIG_TOKEN_COMMENT:
      {
-	String *cmt = Scanner_text(scan);
-	char *loc = Char(cmt);
-	if ((strncmp(loc,"/*@SWIG",7) == 0) && (loc[Len(cmt)-3] == '@')) {
-	  Scanner_locator(scan, cmt);
-	}
-	if (scan_doxygen_comments) { /* else just skip this node, to avoid crashes in parser module*/
-	  /* Check for all possible Doxygen comment start markers while ignoring
-	     comments starting with a row of asterisks or slashes just as
-	     Doxygen itself does. */
-	  if (Len(cmt) > 3 && loc[0] == '/' &&
-	      ((loc[1] == '/' && ((loc[2] == '/' && loc[3] != '/') || loc[2] == '!')) ||
-	       (loc[1] == '*' && ((loc[2] == '*' && loc[3] != '*') || loc[2] == '!')))) {
-	    int is_post_comment = loc[3] == '<';
+	typedef enum {
+	  DOX_COMMENT_PRE = -1,
+	  DOX_COMMENT_NONE,
+	  DOX_COMMENT_POST
+	} comment_kind_t;
+	comment_kind_t existing_comment = DOX_COMMENT_NONE;

-	    if (is_post_comment || !isStructuralDoxygen(loc)) {
-	      int begin = is_post_comment ? 4 : 3;
-	      int end = Len(cmt);
-	      if (loc[end - 1] == '/' && loc[end - 2] == '*') {
-		end -= 2;
+	/* Concatenate or skip all consecutive comments at once. */
+	do {
+	  String *cmt = Scanner_text(scan);
+	  char *loc = Char(cmt);
+	  if ((strncmp(loc,"/*@SWIG",7) == 0) && (loc[Len(cmt)-3] == '@')) {
+	    Scanner_locator(scan, cmt);
+	  }
+	  if (scan_doxygen_comments) { /* else just skip this node, to avoid crashes in parser module*/
+	    /* Check for all possible Doxygen comment start markers while ignoring
+	       comments starting with a row of asterisks or slashes just as
+	       Doxygen itself does. */
+	    if (Len(cmt) > 3 && loc[0] == '/' &&
+		((loc[1] == '/' && ((loc[2] == '/' && loc[3] != '/') || loc[2] == '!')) ||
+		 (loc[1] == '*' && ((loc[2] == '*' && loc[3] != '*') || loc[2] == '!')))) {
+	      comment_kind_t this_comment = loc[3] == '<' ? DOX_COMMENT_POST
+							  : DOX_COMMENT_PRE;
+
+	      if (existing_comment != DOX_COMMENT_NONE && this_comment != existing_comment) {
+		/* We can't concatenate together Doxygen pre- and post-comments. */
+		break;
 	      }

-	      yylval.str =  NewStringWithSize(loc + begin, end - begin);
-	      Setline(yylval.str, Scanner_start_line(scan));
-	      Setfile(yylval.str, Scanner_file(scan));
-	      return is_post_comment ? DOXYGENPOSTSTRING : DOXYGENSTRING;
+	      if (this_comment == DOX_COMMENT_POST || !isStructuralDoxygen(loc)) {
+		String *str;
+
+		int begin = this_comment == DOX_COMMENT_POST ? 4 : 3;
+		int end = Len(cmt);
+		if (loc[end - 1] == '/' && loc[end - 2] == '*') {
+		  end -= 2;
+		}
+
+		str = NewStringWithSize(loc + begin, end - begin);
+
+		if (existing_comment == DOX_COMMENT_NONE) {
+		  yylval.str = str;
+		  Setline(yylval.str, Scanner_start_line(scan));
+		  Setfile(yylval.str, Scanner_file(scan));
+		} else {
+		  Append(yylval.str, str);
+		}
+
+		existing_comment = this_comment;
+	      }
 	    }
 	  }
+	  do {
+	    tok = Scanner_token(scan);
+	  } while (tok == SWIG_TOKEN_ENDLINE);
+	} while (tok == SWIG_TOKEN_COMMENT);
+
+	Scanner_pushtoken(scan, tok, Scanner_text(scan));
+
+	switch (existing_comment) {
+	  case DOX_COMMENT_PRE:
+	    return DOXYGENSTRING;
+	  case DOX_COMMENT_NONE:
+	    break;
+	  case DOX_COMMENT_POST:
+	    return DOXYGENPOSTSTRING;
 	}
      }
      break;
--- a/Source/CParse/parser.y
+++ b/Source/CParse/parser.y
@ -1425,13 +1425,8 @@ static void mark_nodes_as_extend(Node *n) {
 %token <str> CONVERSIONOPERATOR
 %token PARSETYPE PARSEPARM PARSEPARMS

-/* Make Doxygen comment left associative to avoid shift/reduce conflicts for
-   several of them in a row, it doesn't really matter in which order we
-   concatenate them but this order must be defined. */
 %token <str> DOXYGENSTRING
-%left DOXYGENSTRING
 %token <str> DOXYGENPOSTSTRING
-%left DOXYGENPOSTSTRING

 %left  CAST
 %left  QUESTIONMARK
@ -1505,8 +1500,6 @@ static void mark_nodes_as_extend(Node *n) {
 %type <ptype>    type_specifier primitive_type_list ;
 %type <node>     fname stringtype;
 %type <node>     featattr;
-%type <str>	 doxygen_comment;
-%type <str>	 doxygen_post_comment;
 %type <node>     lambda_introducer lambda_body;
 %type <pl>       lambda_tail;
 %type <str>      virt_specifier_seq;
@ -1562,11 +1555,11 @@ interface      : interface declaration {
                   appendChild($1,$2);
                   $$ = $1;
               }
-               | interface doxygen_comment {
+               | interface DOXYGENSTRING {
                   currentDeclComment = $2; 
                   $$ = $1;
               }
-               | interface doxygen_post_comment {
+               | interface DOXYGENPOSTSTRING {
                   Node *node = lastChild($1);
                   if (node) {
                       set_comment(node, $2);
@ -3427,28 +3420,6 @@ c_constructor_decl : storage_class type LPAREN parms RPAREN ctor_end {
                }
                ;

-/* ------------------------------------------------------------
-   A Doxygen Comment (a string in Doxygen Format)
-   ------------------------------------------------------------ */
-
-doxygen_comment : DOXYGENSTRING {
-                  $$ = $1;
-		}
-		| DOXYGENSTRING doxygen_comment {
-		  Append($1, $2);
-		  $$ = $1;
-		}
-		;
-
-doxygen_post_comment : DOXYGENPOSTSTRING {
-                  $$ = $1;
-		}
-		| DOXYGENPOSTSTRING doxygen_post_comment {
-		  Append($1, $2);
-		  $$ = $1;
-		}
-		;
-
 /* ======================================================================
 *                       C++ Support
 * ====================================================================== */
@ -4446,11 +4417,11 @@ cpp_member_no_dox : c_declaration { $$ = $1; }
 cpp_member   : cpp_member_no_dox {
 		$$ = $1;
 	     }
-             | doxygen_comment cpp_member_no_dox {
+             | DOXYGENSTRING cpp_member_no_dox {
 	         $$ = $2;
 		 set_comment($2, $1);
 	     }
-             | cpp_member_no_dox doxygen_post_comment {
+             | cpp_member_no_dox DOXYGENPOSTSTRING {
 	         $$ = $1;
 		 set_comment($1, $2);
 	     }
@ -4827,7 +4798,7 @@ ptail          : COMMA parm ptail {
                 set_nextSibling($2,$3);
 		 $$ = $2;
                }
-	       | COMMA doxygen_post_comment parm ptail {
+	       | COMMA DOXYGENPOSTSTRING parm ptail {
 		 set_comment(previousNode, $2);
                 set_nextSibling($3,$4);
 		 $$ = $3;
@ -4871,11 +4842,11 @@ parm_no_dox	: rawtype parameter_declarator {
 parm		: parm_no_dox {
 		  $$ = $1;
 		}
-		| doxygen_comment parm_no_dox {
+		| DOXYGENSTRING parm_no_dox {
 		  $$ = $2;
 		  set_comment($2, $1);
 		}
-		| parm_no_dox doxygen_post_comment {
+		| parm_no_dox DOXYGENPOSTSTRING {
 		  $$ = $1;
 		  set_comment($1, $2);
 		}
@ -5999,19 +5970,19 @@ enumlist_item	: optional_ignored_define edecl_with_dox optional_ignored_define {
 edecl_with_dox	: edecl {
 		  $$ = $1;
 		}
-		| doxygen_comment edecl {
+		| DOXYGENSTRING edecl {
 		  $$ = $2;
 		  set_comment($2, $1);
 		}
-		| edecl doxygen_post_comment {
+		| edecl DOXYGENPOSTSTRING {
 		  $$ = $1;
 		  set_comment($1, $2);
 		}
-		| doxygen_post_comment edecl {
+		| DOXYGENPOSTSTRING edecl {
 		  $$ = $2;
 		  set_comment(previousNode, $1);
 		}
-		| doxygen_post_comment edecl doxygen_post_comment {
+		| DOXYGENPOSTSTRING edecl DOXYGENPOSTSTRING {
 		  $$ = $2;
 		  set_comment(previousNode, $1);
 		  set_comment($2, $3);