From 4884f8cb3c8d2c2a30b2d3f2be10d8f148b36f67 Mon Sep 17 00:00:00 2001
From: Vadim Zeitlin <vz-swig@zeitlins.org>
Date: Sun, 26 Jul 2015 20:55:07 +0200
Subject: [PATCH] Move Doxygen comments concatenation from the parser to the
 lexer.

This allows to write the grammar in a simpler way without running into
shift/reduce conflicts all the time because a Doxygen post comment can often
be either reduced with the preceding token or shifted if there is another
Doxygen post comment after it.

Just take care of concatenating the comments in the lexer, which makes it
handling of comment tokens slightly more complex as it now needs to look ahead
at the next tokens, but it's worse the simplifications in the parser.

No changes in behaviour.
---
 Source/CParse/cscanner.c | 83 +++++++++++++++++++++++++++++-----------
 Source/CParse/parser.y   | 51 ++++++------------------
 2 files changed, 72 insertions(+), 62 deletions(-)

diff --git a/Source/CParse/cscanner.c b/Source/CParse/cscanner.c
index bc7463589..f1390e283 100644
--- a/Source/CParse/cscanner.c
+++ b/Source/CParse/cscanner.c
@@ -416,33 +416,72 @@ static int yylook(void) {
       
     case SWIG_TOKEN_COMMENT:
       {
-	String *cmt = Scanner_text(scan);
-	char *loc = Char(cmt);
-	if ((strncmp(loc,"/*@SWIG",7) == 0) && (loc[Len(cmt)-3] == '@')) {
-	  Scanner_locator(scan, cmt);
-	}
-	if (scan_doxygen_comments) { /* else just skip this node, to avoid crashes in parser module*/
-	  /* Check for all possible Doxygen comment start markers while ignoring
-	     comments starting with a row of asterisks or slashes just as
-	     Doxygen itself does. */
-	  if (Len(cmt) > 3 && loc[0] == '/' &&
-	      ((loc[1] == '/' && ((loc[2] == '/' && loc[3] != '/') || loc[2] == '!')) ||
-	       (loc[1] == '*' && ((loc[2] == '*' && loc[3] != '*') || loc[2] == '!')))) {
-	    int is_post_comment = loc[3] == '<';
+	typedef enum {
+	  DOX_COMMENT_PRE = -1,
+	  DOX_COMMENT_NONE,
+	  DOX_COMMENT_POST
+	} comment_kind_t;
+	comment_kind_t existing_comment = DOX_COMMENT_NONE;
 
-	    if (is_post_comment || !isStructuralDoxygen(loc)) {
-	      int begin = is_post_comment ? 4 : 3;
-	      int end = Len(cmt);
-	      if (loc[end - 1] == '/' && loc[end - 2] == '*') {
-		end -= 2;
+	/* Concatenate or skip all consecutive comments at once. */
+	do {
+	  String *cmt = Scanner_text(scan);
+	  char *loc = Char(cmt);
+	  if ((strncmp(loc,"/*@SWIG",7) == 0) && (loc[Len(cmt)-3] == '@')) {
+	    Scanner_locator(scan, cmt);
+	  }
+	  if (scan_doxygen_comments) { /* else just skip this node, to avoid crashes in parser module*/
+	    /* Check for all possible Doxygen comment start markers while ignoring
+	       comments starting with a row of asterisks or slashes just as
+	       Doxygen itself does. */
+	    if (Len(cmt) > 3 && loc[0] == '/' &&
+		((loc[1] == '/' && ((loc[2] == '/' && loc[3] != '/') || loc[2] == '!')) ||
+		 (loc[1] == '*' && ((loc[2] == '*' && loc[3] != '*') || loc[2] == '!')))) {
+	      comment_kind_t this_comment = loc[3] == '<' ? DOX_COMMENT_POST
+							  : DOX_COMMENT_PRE;
+
+	      if (existing_comment != DOX_COMMENT_NONE && this_comment != existing_comment) {
+		/* We can't concatenate together Doxygen pre- and post-comments. */
+		break;
 	      }
 
-	      yylval.str =  NewStringWithSize(loc + begin, end - begin);
-	      Setline(yylval.str, Scanner_start_line(scan));
-	      Setfile(yylval.str, Scanner_file(scan));
-	      return is_post_comment ? DOXYGENPOSTSTRING : DOXYGENSTRING;
+	      if (this_comment == DOX_COMMENT_POST || !isStructuralDoxygen(loc)) {
+		String *str;
+
+		int begin = this_comment == DOX_COMMENT_POST ? 4 : 3;
+		int end = Len(cmt);
+		if (loc[end - 1] == '/' && loc[end - 2] == '*') {
+		  end -= 2;
+		}
+
+		str = NewStringWithSize(loc + begin, end - begin);
+
+		if (existing_comment == DOX_COMMENT_NONE) {
+		  yylval.str = str;
+		  Setline(yylval.str, Scanner_start_line(scan));
+		  Setfile(yylval.str, Scanner_file(scan));
+		} else {
+		  Append(yylval.str, str);
+		}
+
+		existing_comment = this_comment;
+	      }
 	    }
 	  }
+	  do {
+	    tok = Scanner_token(scan);
+	  } while (tok == SWIG_TOKEN_ENDLINE);
+	} while (tok == SWIG_TOKEN_COMMENT);
+
+	Scanner_pushtoken(scan, tok, Scanner_text(scan));
+
+	switch (existing_comment) {
+	  case DOX_COMMENT_PRE:
+	    return DOXYGENSTRING;
+	  case DOX_COMMENT_NONE:
+	    break;
+	  case DOX_COMMENT_POST:
+	    return DOXYGENPOSTSTRING;
 	}
       }
       break;
diff --git a/Source/CParse/parser.y b/Source/CParse/parser.y
index fa19c1638..2051b8ff4 100644
--- a/Source/CParse/parser.y
+++ b/Source/CParse/parser.y
@@ -1425,13 +1425,8 @@ static void mark_nodes_as_extend(Node *n) {
 %token <str> CONVERSIONOPERATOR
 %token PARSETYPE PARSEPARM PARSEPARMS
 
-/* Make Doxygen comment left associative to avoid shift/reduce conflicts for
-   several of them in a row, it doesn't really matter in which order we
-   concatenate them but this order must be defined. */
 %token <str> DOXYGENSTRING
-%left DOXYGENSTRING
 %token <str> DOXYGENPOSTSTRING
-%left DOXYGENPOSTSTRING
 
 %left  CAST
 %left  QUESTIONMARK
@@ -1505,8 +1500,6 @@ static void mark_nodes_as_extend(Node *n) {
 %type <ptype>    type_specifier primitive_type_list ;
 %type <node>     fname stringtype;
 %type <node>     featattr;
-%type <str>	 doxygen_comment;
-%type <str>	 doxygen_post_comment;
 %type <node>     lambda_introducer lambda_body;
 %type <pl>       lambda_tail;
 %type <str>      virt_specifier_seq;
@@ -1562,11 +1555,11 @@ interface      : interface declaration {
                    appendChild($1,$2);
                    $$ = $1;
                }
-               | interface doxygen_comment {
+               | interface DOXYGENSTRING {
                    currentDeclComment = $2; 
                    $$ = $1;
                }
-               | interface doxygen_post_comment {
+               | interface DOXYGENPOSTSTRING {
                    Node *node = lastChild($1);
                    if (node) {
                        set_comment(node, $2);
@@ -3427,28 +3420,6 @@ c_constructor_decl : storage_class type LPAREN parms RPAREN ctor_end {
                 }
                 ;
 
-/* ------------------------------------------------------------
-   A Doxygen Comment (a string in Doxygen Format)
-   ------------------------------------------------------------ */
-
-doxygen_comment : DOXYGENSTRING {
-                  $$ = $1;
-		}
-		| DOXYGENSTRING doxygen_comment {
-		  Append($1, $2);
-		  $$ = $1;
-		}
-		;
-
-doxygen_post_comment : DOXYGENPOSTSTRING {
-                  $$ = $1;
-		}
-		| DOXYGENPOSTSTRING doxygen_post_comment {
-		  Append($1, $2);
-		  $$ = $1;
-		}
-		;
-
 /* ======================================================================
  *                       C++ Support
  * ====================================================================== */
@@ -4446,11 +4417,11 @@ cpp_member_no_dox : c_declaration { $$ = $1; }
 cpp_member   : cpp_member_no_dox {
 		$$ = $1;
 	     }
-             | doxygen_comment cpp_member_no_dox {
+             | DOXYGENSTRING cpp_member_no_dox {
 	         $$ = $2;
 		 set_comment($2, $1);
 	     }
-             | cpp_member_no_dox doxygen_post_comment {
+             | cpp_member_no_dox DOXYGENPOSTSTRING {
 	         $$ = $1;
 		 set_comment($1, $2);
 	     }
@@ -4827,7 +4798,7 @@ ptail          : COMMA parm ptail {
                  set_nextSibling($2,$3);
 		 $$ = $2;
                 }
-	       | COMMA doxygen_post_comment parm ptail {
+	       | COMMA DOXYGENPOSTSTRING parm ptail {
 		 set_comment(previousNode, $2);
                  set_nextSibling($3,$4);
 		 $$ = $3;
@@ -4871,11 +4842,11 @@ parm_no_dox	: rawtype parameter_declarator {
 parm		: parm_no_dox {
 		  $$ = $1;
 		}
-		| doxygen_comment parm_no_dox {
+		| DOXYGENSTRING parm_no_dox {
 		  $$ = $2;
 		  set_comment($2, $1);
 		}
-		| parm_no_dox doxygen_post_comment {
+		| parm_no_dox DOXYGENPOSTSTRING {
 		  $$ = $1;
 		  set_comment($1, $2);
 		}
@@ -5999,19 +5970,19 @@ enumlist_item	: optional_ignored_define edecl_with_dox optional_ignored_define {
 edecl_with_dox	: edecl {
 		  $$ = $1;
 		}
-		| doxygen_comment edecl {
+		| DOXYGENSTRING edecl {
 		  $$ = $2;
 		  set_comment($2, $1);
 		}
-		| edecl doxygen_post_comment {
+		| edecl DOXYGENPOSTSTRING {
 		  $$ = $1;
 		  set_comment($1, $2);
 		}
-		| doxygen_post_comment edecl {
+		| DOXYGENPOSTSTRING edecl {
 		  $$ = $2;
 		  set_comment(previousNode, $1);
 		}
-		| doxygen_post_comment edecl doxygen_post_comment {
+		| DOXYGENPOSTSTRING edecl DOXYGENPOSTSTRING {
 		  $$ = $2;
 		  set_comment(previousNode, $1);
 		  set_comment($2, $3);