Add more robust comment extraction

This commit is contained in:
Joey Yakimowich-Payne 2020-04-28 22:52:24 -06:00
commit 418e5db825
2 changed files with 102 additions and 50 deletions

View file

@ -691,7 +691,7 @@ proc newRecListTree(gState: State, name: string, node: TSNode): PNode =
let
fdecl = node[i].anyChildInTree("field_declaration_list")
edecl = node[i].anyChildInTree("enumerator_list")
commentNodes = gState.getNextCommentNodes(node[i])
commentNodes = gState.getCommentNodes(node[i])
# `tname` is name of nested struct / union / enum just
# added, passed on as type name for field in `newIdentDefs()`
@ -727,7 +727,7 @@ proc addTypeObject(gState: State, node: TSNode, typeDef: PNode = nil, fname = ""
# If `fname` is set, use it as the name when creating new PNode
# If `istype` is set, this is a typedef, else struct/union
decho("addTypeObject()")
let commentNodes = gState.getPrevCommentNodes(node.tsNodeParent())
let commentNodes = gState.getCommentNodes(node.tsNodeParent())
let
# Object has fields or not
@ -853,7 +853,7 @@ proc addTypeObject(gState: State, node: TSNode, typeDef: PNode = nil, fname = ""
# Current node has fields
let
origname = gState.getNodeVal(node.getAtom())
commentNodes = gState.getNextCommentNodes(node)
commentNodes = gState.getCommentNodes(node)
# Fix issue #185
name =
@ -898,7 +898,7 @@ proc addTypeTyped(gState: State, node: TSNode, ftname = "", offset = 0) =
decho("addTypeTyped()")
let
start = getStartAtom(node)
commentNodes = gState.getPrevCommentNodes(node)
commentNodes = gState.getCommentNodes(node)
for i in start+1+offset ..< node.len:
# Add a type of a specific type
let
@ -1017,7 +1017,7 @@ proc addTypeArray(gState: State, node: TSNode) =
# node[start] = identifier = type name
(tname, _, info) = gState.getNameInfo(node[start].getAtom(), nskType, parent = "addTypeArray")
tident = gState.getIdent(tname, info, exported = false)
commentNodes = gState.getPrevCommentNodes(node)
commentNodes = gState.getCommentNodes(node)
# Could have multiple types, comma separated
for i in start+1 ..< node.len:
@ -1405,7 +1405,7 @@ proc addEnum(gState: State, node: TSNode) =
# nkIdent(name) <- set the comment here
# )
# )
defineNode[0][1].comment = gState.getCommentsStr(gState.getPrevCommentNodes(node))
defineNode[0][1].comment = gState.getCommentsStr(gState.getCommentNodes(node))
gState.enumSection.add defineNode
# Create const for fields
@ -1422,7 +1422,7 @@ proc addEnum(gState: State, node: TSNode) =
let
atom = en.getAtom()
commentNodes = gState.getNextCommentNodes(en)
commentNodes = gState.getCommentNodes(en)
fname = gState.getIdentifier(gState.getNodeVal(atom), nskEnumField)
if fname.nBl and gState.addNewIdentifer(fname):
@ -1640,7 +1640,7 @@ proc addDecl(gState: State, node: TSNode) =
let
start = getStartAtom(node)
commentNodes = gState.getPrevCommentNodes(node)
commentNodes = gState.getCommentNodes(node)
for i in start+1 ..< node.len:
if not node[i].firstChildInTree("function_declarator").isNil:
@ -1665,7 +1665,7 @@ proc addDef(gState: State, node: TSNode) =
let
start = getStartAtom(node)
commentNodes = gState.getPrevCommentNodes(node)
commentNodes = gState.getCommentNodes(node)
if node[start+1].getName() == "function_declarator":
if gState.isIncludeHeader():

View file

@ -1,5 +1,4 @@
import dynlib, macros, os, sequtils, sets, strformat, strutils, tables, times
import algorithm
import regex
@ -572,30 +571,30 @@ proc getPreprocessor*(gState: State, fullpath: string): string =
# Include content only from file
for line in execAction(cmd).output.splitLines():
if line.strip() != "":
if line.len > 1 and line[0 .. 1] == "# ":
start = false
# We want to keep blank lines here for comment processing
if line.len > 1 and line[0 .. 1] == "# ":
start = false
let
saniLine = line.sanitizePath(noQuote = true)
if sfile in saniLine:
start = true
elif not ("\\" in line) and not ("/" in line) and extractFilename(sfile) in line:
start = true
elif gState.recurse:
let
saniLine = line.sanitizePath(noQuote = true)
if sfile in saniLine:
pDir = sfile.expandFilename().parentDir().sanitizePath(noQuote = true)
if pDir.Bl or pDir in saniLine:
start = true
elif not ("\\" in line) and not ("/" in line) and extractFilename(sfile) in line:
start = true
elif gState.recurse:
let
pDir = sfile.expandFilename().parentDir().sanitizePath(noQuote = true)
if pDir.Bl or pDir in saniLine:
start = true
else:
for inc in gState.includeDirs:
if inc.absolutePath().sanitizePath(noQuote = true) in saniLine:
start = true
break
else:
if start:
if "#undef" in line:
continue
rdata.add line
else:
for inc in gState.includeDirs:
if inc.absolutePath().sanitizePath(noQuote = true) in saniLine:
start = true
break
else:
if start:
if "#undef" in line:
continue
rdata.add line
return rdata.join("\n")
converter toString*(kind: Kind): string =
@ -644,32 +643,85 @@ proc getCommentsStr*(gState: State, commentNodes: seq[TSNode]): string =
result &= "\n " & gState.getNodeVal(commentNode).
replace(re" *(//|/\*\*|\*\*/|/\*|\*/|\*)", "").replace("\n", "\n ").strip()
proc getPrevCommentNodes*(gState: State, node: TSNode, maxSearch=1): seq[TSNode] =
## Here we want to go until the node we get is not a comment
## for cases with multiple ``//`` comments instead of one ``/* */``
## section
proc getCommentNodes*(gState: State, node: TSNode, maxSearch=1): seq[TSNode] =
## Get a set of comment nodes in order of priority. Will search up to ``maxSearch``
## nodes before and after the current node
##
## Priority is (closest line number) > comment before > comment after.
## This priority might need to be changed based on the project, but
## for now it is good enough
# Skip this if we don't want comments
if gState.nocomments:
return
var sibling = node.tsNodePrevNamedSibling()
var i = 0
let (line, _) = gState.getLineCol(node)
# Search for the starting comment up to maxSearch nodes away
while not sibling.isNil and i < maxSearch:
# Once a comment is found, find all of the comments right next to
# it so that we can get multiple // style comments
while not sibling.isNil and sibling.getName() == "comment":
result.add(sibling)
sibling = sibling.tsNodePrevNamedSibling()
# Keep track of both directions from a node
var
prevSibling = node.tsNodePrevNamedSibling()
nextSibling = node.tsNodeNextNamedSibling()
nilNode: TSNode
if sibling.isNil:
var
i = 0
prevSiblingDistance, nextSiblingDistance: int
lowestDistance: int
commentsFound = false
while not commentsFound and i < maxSearch:
# Distance from the current node will tell us approximately if the
# comment belongs to the node. The closer it is in terms of line
# numbers, the more we can be sure it's the comment we want
if not prevSibling.isNil:
prevSiblingDistance = abs(gState.getLineCol(prevSibling)[0] - line)
if not nextSibling.isNil:
nextSiblingDistance = abs(gState.getLineCol(nextSibling)[0] - line)
lowestDistance = min(prevSiblingDistance, nextSiblingDistance)
if prevSiblingDistance > maxSearch:
# If the line is out of range, skip searching
prevSibling = nilNode # Can't do `= nil`
if nextSiblingDistance > maxSearch:
# If the line is out of range, skip searching
prevSibling = nilNode
while (
not prevSibling.isNil and
prevSibling.getName() == "comment" and
prevSiblingDistance == lowestDistance
):
# Put the previous nodes in reverse order so the comments
# make logical sense
result.insert(prevSibling, 0)
prevSibling = prevSibling.tsNodePrevNamedSibling()
commentsFound = true
if commentsFound:
break
sibling = sibling.tsNodePrevNamedSibling()
i += 1
while (
not nextSibling.isNil and
nextSibling.getName() == "comment" and
nextSiblingDistance == lowestDistance
):
result.add(nextSibling)
nextSibling = nextSibling.tsNodeNextNamedSibling()
commentsFound = true
# reverse the comments because we got them in reverse order
result.reverse
if commentsFound:
break
# Go to next sibling pair
if not prevSibling.isNil:
prevSibling = prevSibling.tsNodePrevNamedSibling()
if not nextSibling.isNil:
nextSibling = nextSibling.tsNodeNextNamedSibling()
i += 1
proc getNextCommentNodes*(gState: State, node: TSNode, maxSearch=1): seq[TSNode] =
## Searches the next nodes up to maxSearch nodes away for a comment