Add more robust comment extraction

2020-04-28 22:52:24 -06:00 · 2020-04-28 22:52:24 -06:00 · 418e5db825
commit 418e5db825
parent c083b443e4
2 changed files with 102 additions and 50 deletions
--- a/nimterop/ast2.nim
+++ b/nimterop/ast2.nim
@ -691,7 +691,7 @@ proc newRecListTree(gState: State, name: string, node: TSNode): PNode =
        let
          fdecl = node[i].anyChildInTree("field_declaration_list")
          edecl = node[i].anyChildInTree("enumerator_list")
-          commentNodes = gState.getNextCommentNodes(node[i])
+          commentNodes = gState.getCommentNodes(node[i])

          # `tname` is name of nested struct / union / enum just
          # added, passed on as type name for field in `newIdentDefs()`
@ -727,7 +727,7 @@ proc addTypeObject(gState: State, node: TSNode, typeDef: PNode = nil, fname = ""
  # If `fname` is set, use it as the name when creating new PNode
  # If `istype` is set, this is a typedef, else struct/union
  decho("addTypeObject()")
-  let commentNodes = gState.getPrevCommentNodes(node.tsNodeParent())
+  let commentNodes = gState.getCommentNodes(node.tsNodeParent())

  let
    # Object has fields or not
@ -853,7 +853,7 @@ proc addTypeObject(gState: State, node: TSNode, typeDef: PNode = nil, fname = ""
      # Current node has fields
      let
        origname = gState.getNodeVal(node.getAtom())
-        commentNodes = gState.getNextCommentNodes(node)
+        commentNodes = gState.getCommentNodes(node)

        # Fix issue #185
        name =
@ -898,7 +898,7 @@ proc addTypeTyped(gState: State, node: TSNode, ftname = "", offset = 0) =
  decho("addTypeTyped()")
  let
    start = getStartAtom(node)
-    commentNodes = gState.getPrevCommentNodes(node)
+    commentNodes = gState.getCommentNodes(node)
  for i in start+1+offset ..< node.len:
    # Add a type of a specific type
    let
@ -1017,7 +1017,7 @@ proc addTypeArray(gState: State, node: TSNode) =
    # node[start] = identifier = type name
    (tname, _, info) = gState.getNameInfo(node[start].getAtom(), nskType, parent = "addTypeArray")
    tident = gState.getIdent(tname, info, exported = false)
-    commentNodes = gState.getPrevCommentNodes(node)
+    commentNodes = gState.getCommentNodes(node)

  # Could have multiple types, comma separated
  for i in start+1 ..< node.len:
@ -1405,7 +1405,7 @@ proc addEnum(gState: State, node: TSNode) =
      #   nkIdent(name) <- set the comment here
      #  )
      # )
-      defineNode[0][1].comment = gState.getCommentsStr(gState.getPrevCommentNodes(node))
+      defineNode[0][1].comment = gState.getCommentsStr(gState.getCommentNodes(node))
      gState.enumSection.add defineNode

      # Create const for fields
@ -1422,7 +1422,7 @@ proc addEnum(gState: State, node: TSNode) =

        let
          atom = en.getAtom()
-          commentNodes = gState.getNextCommentNodes(en)
+          commentNodes = gState.getCommentNodes(en)
          fname = gState.getIdentifier(gState.getNodeVal(atom), nskEnumField)

        if fname.nBl and gState.addNewIdentifer(fname):
@ -1640,7 +1640,7 @@ proc addDecl(gState: State, node: TSNode) =

  let
    start = getStartAtom(node)
-    commentNodes = gState.getPrevCommentNodes(node)
+    commentNodes = gState.getCommentNodes(node)

  for i in start+1 ..< node.len:
    if not node[i].firstChildInTree("function_declarator").isNil:
@ -1665,7 +1665,7 @@ proc addDef(gState: State, node: TSNode) =

  let
    start = getStartAtom(node)
-    commentNodes = gState.getPrevCommentNodes(node)
+    commentNodes = gState.getCommentNodes(node)

  if node[start+1].getName() == "function_declarator":
    if gState.isIncludeHeader():
--- a/nimterop/getters.nim
+++ b/nimterop/getters.nim
@ -1,5 +1,4 @@
 import dynlib, macros, os, sequtils, sets, strformat, strutils, tables, times
-import algorithm

 import regex

@ -572,30 +571,30 @@ proc getPreprocessor*(gState: State, fullpath: string): string =

  # Include content only from file
  for line in execAction(cmd).output.splitLines():
-    if line.strip() != "":
-      if line.len > 1 and line[0 .. 1] == "# ":
-        start = false
+    # We want to keep blank lines here for comment processing
+    if line.len > 1 and line[0 .. 1] == "# ":
+      start = false
+      let
+        saniLine = line.sanitizePath(noQuote = true)
+      if sfile in saniLine:
+        start = true
+      elif not ("\\" in line) and not ("/" in line) and extractFilename(sfile) in line:
+        start = true
+      elif gState.recurse:
        let
-          saniLine = line.sanitizePath(noQuote = true)
-        if sfile in saniLine:
+          pDir = sfile.expandFilename().parentDir().sanitizePath(noQuote = true)
+        if pDir.Bl or pDir in saniLine:
          start = true
-        elif not ("\\" in line) and not ("/" in line) and extractFilename(sfile) in line:
-          start = true
-        elif gState.recurse:
-          let
-            pDir = sfile.expandFilename().parentDir().sanitizePath(noQuote = true)
-          if pDir.Bl or pDir in saniLine:
-            start = true
-          else:
-            for inc in gState.includeDirs:
-              if inc.absolutePath().sanitizePath(noQuote = true) in saniLine:
-                start = true
-                break
-      else:
-        if start:
-          if "#undef" in line:
-            continue
-          rdata.add line
+        else:
+          for inc in gState.includeDirs:
+            if inc.absolutePath().sanitizePath(noQuote = true) in saniLine:
+              start = true
+              break
+    else:
+      if start:
+        if "#undef" in line:
+          continue
+        rdata.add line
  return rdata.join("\n")

 converter toString*(kind: Kind): string =
@ -644,32 +643,85 @@ proc getCommentsStr*(gState: State, commentNodes: seq[TSNode]): string =
      result &= "\n  " & gState.getNodeVal(commentNode).
                          replace(re" *(//|/\*\*|\*\*/|/\*|\*/|\*)", "").replace("\n", "\n  ").strip()

-proc getPrevCommentNodes*(gState: State, node: TSNode, maxSearch=1): seq[TSNode] =
-  ## Here we want to go until the node we get is not a comment
-  ## for cases with multiple ``//`` comments instead of one ``/* */``
-  ## section
+proc getCommentNodes*(gState: State, node: TSNode, maxSearch=1): seq[TSNode] =
+  ## Get a set of comment nodes in order of priority. Will search up to ``maxSearch``
+  ## nodes before and after the current node
+  ##
+  ## Priority is (closest line number) > comment before > comment after.
+  ## This priority might need to be changed based on the project, but
+  ## for now it is good enough
+
+  # Skip this if we don't want comments
  if gState.nocomments:
    return

-  var sibling = node.tsNodePrevNamedSibling()
-  var i = 0
+  let (line, _) = gState.getLineCol(node)

-  # Search for the starting comment up to maxSearch nodes away
-  while not sibling.isNil and i < maxSearch:
-    # Once a comment is found, find all of the comments right next to
-    # it so that we can get multiple // style comments
-    while not sibling.isNil and sibling.getName() == "comment":
-      result.add(sibling)
-      sibling = sibling.tsNodePrevNamedSibling()
+  # Keep track of both directions from a node
+  var
+    prevSibling = node.tsNodePrevNamedSibling()
+    nextSibling = node.tsNodeNextNamedSibling()
+    nilNode: TSNode

-    if sibling.isNil:
+  var
+    i = 0
+    prevSiblingDistance, nextSiblingDistance: int
+    lowestDistance: int
+    commentsFound = false
+
+  while not commentsFound and i < maxSearch:
+
+    # Distance from the current node will tell us approximately if the
+    # comment belongs to the node. The closer it is in terms of line
+    # numbers, the more we can be sure it's the comment we want
+    if not prevSibling.isNil:
+      prevSiblingDistance = abs(gState.getLineCol(prevSibling)[0] - line)
+    if not nextSibling.isNil:
+      nextSiblingDistance = abs(gState.getLineCol(nextSibling)[0] - line)
+
+    lowestDistance = min(prevSiblingDistance, nextSiblingDistance)
+
+    if prevSiblingDistance > maxSearch:
+      # If the line is out of range, skip searching
+      prevSibling = nilNode # Can't do `= nil`
+
+    if nextSiblingDistance > maxSearch:
+      # If the line is out of range, skip searching
+      prevSibling = nilNode
+
+    while (
+      not prevSibling.isNil and
+      prevSibling.getName() == "comment" and
+      prevSiblingDistance == lowestDistance
+    ):
+      # Put the previous nodes in reverse order so the comments
+      # make logical sense
+      result.insert(prevSibling, 0)
+      prevSibling = prevSibling.tsNodePrevNamedSibling()
+      commentsFound = true
+
+    if commentsFound:
      break

-    sibling = sibling.tsNodePrevNamedSibling()
-    i += 1
+    while (
+      not nextSibling.isNil and
+      nextSibling.getName() == "comment" and
+      nextSiblingDistance == lowestDistance
+    ):
+      result.add(nextSibling)
+      nextSibling = nextSibling.tsNodeNextNamedSibling()
+      commentsFound = true

-  # reverse the comments because we got them in reverse order
-  result.reverse
+    if commentsFound:
+      break
+
+    # Go to next sibling pair
+    if not prevSibling.isNil:
+      prevSibling = prevSibling.tsNodePrevNamedSibling()
+    if not nextSibling.isNil:
+      nextSibling = nextSibling.tsNodeNextNamedSibling()
+
+    i += 1

 proc getNextCommentNodes*(gState: State, node: TSNode, maxSearch=1): seq[TSNode] =
  ## Searches the next nodes up to maxSearch nodes away for a comment