New Nim AST based backend - #defines working

This commit is contained in:
Ganesh Viswanathan 2019-12-29 04:46:19 -06:00
commit 0ae87e82f5
5 changed files with 298 additions and 177 deletions

133
nimterop/ast2.nim Normal file
View file

@ -0,0 +1,133 @@
import macros, os, strutils, tables, times
import compiler/[ast, idents, options, renderer]
import "."/treesitter/api
import "."/[compat, globals, getters]
proc addConst(nimState: NimState, node: TSNode) =
echo "Const: " & nimState.getNodeVal(node)
if node.len() >= 2:
let
constDef = newNode(nkConstDef)
(name, info) = nimState.getNameInfo(node.getAtom(), nskConst)
ident = nimState.getIdent(name, info)
val = nimState.getNodeVal(node[1]).getLit()
if val.kind != nkNilLit:
constDef.add ident
constDef.add newNode(nkEmpty)
constDef.add val
nimState.constSection.add constDef
proc addType(nimState: NimState, node: TSNode) =
echo "Type: " & nimState.getNodeVal(node)
proc addEnum(nimState: NimState, node: TSNode) =
echo "Enum: " & nimState.getNodeVal(node)
proc addProc(nimState: NimState, node: TSNode) =
echo "Proc: " & nimState.getNodeVal(node)
proc processNode(nimState: NimState, node: TSNode): bool =
result = true
case node.getName()
of "preproc_def":
nimState.addConst(node)
of "type_definition":
if node.inTree("struct_specifier"):
nimState.addType(node)
elif node.inTree("enum_specifier"):
nimState.addEnum(node)
else:
# Unknown type
result = false
of "struct_specifier":
nimState.addType(node)
of "enum_specifier":
nimState.addEnum(node)
of "declaration":
nimState.addProc(node)
else:
# Unknown
result = false
proc searchTree(nimState: NimState, root: TSNode) =
# Search AST generated by tree-sitter for recognized elements
var
node = root
nextnode: TSNode
depth = 0
processed = false
while true:
if not node.tsNodeIsNull() and depth > -1:
processed = nimState.processNode(node)
else:
break
if not processed and node.len() != 0:
nextnode = node[0]
depth += 1
else:
nextnode = node.tsNodeNextNamedSibling()
if nextnode.tsNodeIsNull():
while true:
node = node.tsNodeParent()
depth -= 1
if depth == -1:
break
if node == root:
break
if not node.tsNodeNextNamedSibling().tsNodeIsNull():
node = node.tsNodeNextNamedSibling()
break
else:
node = nextnode
if node == root:
break
proc printNimHeader*() =
echo """# Generated at $1
# Command line:
# $2 $3
{.hint[ConvFromXtoItselfNotNeeded]: off.}
import nimterop/types
""" % [$now(), getAppFilename(), commandLineParams().join(" ")]
proc printNim*(gState: State, fullpath: string, root: TSNode) =
var
nimState = new(NimState)
fp = fullpath.replace("\\", "/")
nimState.identifiers = newTable[string, string]()
nimState.gState = gState
nimState.currentHeader = getCurrentHeader(fullpath)
nimState.sourceFile = fullpath
# Nim compiler objects
nimState.identCache = newIdentCache()
nimState.config = newConfigRef()
nimState.constSection = newNode(nkConstSection)
nimState.enumSection = newNode(nkStmtList)
nimState.procSection = newNode(nkStmtList)
nimState.typeSection = newNode(nkTypeSection)
nimState.searchTree(root)
var
tree = newNode(nkStmtList)
tree.add nimState.enumSection
tree.add nimState.constSection
tree.add nimState.typeSection
tree.add nimState.procSection
echo tree.renderTree()

View file

@ -2,6 +2,8 @@ import dynlib, macros, os, sequtils, sets, strformat, strutils, tables, times
import regex
import compiler/[ast, idents, lineinfos, msgs, pathutils]
import "."/[build, compat, globals, plugin, treesitter/api]
const gReserved = """
@ -26,6 +28,8 @@ when while
xor
yield""".split(Whitespace).toHashSet()
# Types related
const gTypeMap* = {
# char
"char": "cchar",
@ -91,6 +95,21 @@ proc getType*(str: string): string =
if gTypeMap.hasKey(result):
result = gTypeMap[result]
proc getPtrType*(str: string): string =
result = case str:
of "ptr cchar":
"cstring"
of "ptr ptr cchar":
"ptr cstring"
of "ptr object":
"pointer"
of "ptr ptr object":
"ptr pointer"
else:
str
# Identifier related
proc checkIdentifier(name, kind, parent, origName: string) =
let
parentStr = if parent.nBl: parent & ":" else: ""
@ -165,6 +184,8 @@ proc addNewIdentifer*(nimState: NimState, name: string, override = false): bool
nimState.identifiers[nimName] = name
result = true
# Overrides related
proc getOverride*(nimState: NimState, name: string, kind: NimSymKind): string =
doAssert name.nBl, "Blank identifier error"
@ -189,34 +210,96 @@ proc getOverrideFinal*(nimState: NimState, kind: NimSymKind): string =
for i in nimState.gState.onSymbolOverrideFinal(typ):
result &= "\n" & nimState.getOverride(i, kind)
proc getPtrType*(str: string): string =
result = case str:
of "ptr cchar":
"cstring"
of "ptr ptr cchar":
"ptr cstring"
of "ptr object":
"pointer"
of "ptr ptr object":
"ptr pointer"
else:
str
proc getLit*(str: string): string =
proc getLit*(str: string): PNode =
# Used to convert #define literals into const
let
str = str.replace(re"/[/*].*?(?:\*/)?$", "").strip()
if str.contains(re"^[\-]?[\d]*[.]?[\d]+$") or # decimal
str.contains(re"^0x[\da-fA-F]+$") or # hexadecimal
str.contains(re"^'[[:ascii:]]'$") or # char
str.contains(re"""^"[[:ascii:]]+"$"""): # char *
return str
if str.contains(re"^[\-]?[\d]+$"): # decimal
result = newIntNode(nkIntLit, parseInt(str))
elif str.contains(re"^[\-]?[\d]*[.]?[\d]+$"): # float
result = newFloatNode(nkFloatLit, parseFloat(str))
elif str.contains(re"^0x[\da-fA-F]+$"): # hexadecimal
result = newIntNode(nkIntLit, parseHexInt(str))
elif str.contains(re"^'[[:ascii:]]'$"): # char
result = newNode(nkCharLit)
result.intVal = str[1].int64
elif str.contains(re"""^"[[:ascii:]]+"$"""): # char *
result = newStrNode(nkStrLit, str[1 .. ^2])
else:
result = newNode(nkNilLit)
# TSNode shortcuts
proc len*(node: TSNode): uint =
if not node.tsNodeIsNull:
result = node.tsNodeNamedChildCount().uint
proc `[]`*(node: TSNode, i: BiggestUInt): TSNode =
if i < node.len():
result = node.tsNodeNamedChild(i.uint32)
proc getName*(node: TSNode): string {.inline.} =
if not node.tsNodeIsNull:
return $node.tsNodeType()
proc getNodeVal*(nimState: NimState, node: TSNode): string =
return nimState.gState.code[node.tsNodeStartByte() .. node.tsNodeEndByte()-1].strip()
if not node.tsNodeIsNull:
return nimState.gState.code[node.tsNodeStartByte() .. node.tsNodeEndByte()-1].strip()
proc getAtom*(node: TSNode): TSNode =
if not node.tsNodeIsNull:
# Get child node which is topmost atom
if node.getName() in gAtoms:
return node
elif node.len() != 0:
return node[0].getAtom()
proc getPtrCount*(node: TSNode): string =
if not node.tsNodeIsNull:
# Get number of ptr nodes in tree
var
cnode = node
while "pointer_declarator" in cnode.getName():
result &= "ptr "
if cnode.len() != 0:
cnode = cnode[0]
else:
break
proc getDeclarator*(node: TSNode): TSNode =
if not node.tsNodeIsNull:
# Return if child is a function or array declarator
if node.getName() in ["function_declarator", "array_declarator"]:
return node
elif node.len() != 0:
return node[0].getDeclarator()
proc inTree*(node: TSNode, ntype: string): bool =
# Search for node type in tree - first children
result = false
var
cnode = node
while not cnode.tsNodeIsNull:
if cnode.getName() == ntype:
return true
cnode = cnode[0]
proc inChildren*(node: TSNode, ntype: string): bool =
# Search for node type in immediate children
result = false
for i in 0 ..< node.len():
if (node[i]).getName() == ntype:
result = true
break
proc getLineCol*(gState: State, node: TSNode): tuple[line, col: int] =
# Get line number and column info for node
result.line = 1
result.col = 1
for i in 0 .. node.tsNodeStartByte().int-1:
@ -225,6 +308,54 @@ proc getLineCol*(gState: State, node: TSNode): tuple[line, col: int] =
result.line += 1
result.col += 1
proc getTSNodeNamedChildCountSansComments*(node: TSNode): int =
for i in 0 ..< node.len():
if node.getName() != "comment":
result += 1
proc getPxName*(node: TSNode, offset: int): string =
# Get the xth (grand)parent of the node
var
np = node
count = 0
while not np.tsNodeIsNull() and count < offset:
np = np.tsNodeParent()
count += 1
if count == offset and not np.tsNodeIsNull():
return np.getName()
# Compiler shortcuts
proc getLineInfo*(nimState: NimState, node: TSNode): TLineInfo =
# Get Nim equivalent line:col info from node
let
(line, col) = nimState.gState.getLineCol(node)
result = newLineInfo(nimState.config, nimState.sourceFile.AbsoluteFile, line, col)
proc getIdent*(nimState: NimState, name: string, info: TLineInfo, exported = true): PNode =
# Get ident PNode for name + info
let
exp = getIdent(nimState.identCache, "*")
ident = getIdent(nimState.identCache, name)
if exported:
result = newNode(nkPostfix)
result.add newIdentNode(exp, info)
result.add newIdentNode(ident, info)
else:
result = newIdentNode(ident, info)
proc getNameInfo*(nimState: NimState, node: TSNode, kind: NimSymKind, parent = ""):
tuple[name: string, info: TLineInfo] =
# Shortcut to get identifier name and info (node value and line:col)
let
name = nimState.getNodeVal(node)
result.name = nimState.getIdentifier(name, kind, parent)
result.info = nimState.getLineInfo(node)
proc getCurrentHeader*(fullpath: string): string =
("header" & fullpath.splitFile().name.multiReplace([(".", ""), ("-", "")]))
@ -291,91 +422,6 @@ proc getPreprocessor*(gState: State, fullpath: string, mode = "cpp"): string =
replace(re"__attribute__[ ]*\(\(.*?\)\)([ ,;])", "$1").
removeStatic()
converter toString*(kind: Kind): string =
return case kind:
of exactlyOne:
""
of oneOrMore:
"+"
of zeroOrMore:
"*"
of zeroOrOne:
"?"
of orWithNext:
"!"
converter toKind*(kind: string): Kind =
return case kind:
of "+":
oneOrMore
of "*":
zeroOrMore
of "?":
zeroOrOne
of "!":
orWithNext
else:
exactlyOne
proc getNameKind*(name: string): tuple[name: string, kind: Kind, recursive: bool] =
if name[0] == '^':
result.recursive = true
result.name = name[1 .. ^1]
else:
result.name = name
result.kind = $name[^1]
if result.kind != exactlyOne:
result.name = result.name[0 .. ^2]
proc getTSNodeNamedChildCountSansComments*(node: TSNode): int =
if node.tsNodeNamedChildCount() != 0:
for i in 0 .. node.tsNodeNamedChildCount()-1:
if $node.tsNodeType() != "comment":
result += 1
proc getTSNodeNamedChildNames*(node: TSNode): seq[string] =
if node.tsNodeNamedChildCount() != 0:
for i in 0 .. node.tsNodeNamedChildCount()-1:
let
name = $node.tsNodeNamedChild(i).tsNodeType()
if name != "comment":
result.add(name)
proc getRegexForAstChildren*(ast: ref Ast): string =
result = "^"
for i in 0 .. ast.children.len-1:
let
kind: string = ast.children[i].kind
begin = if result[^1] == '|': "" else: "(?:"
case kind:
of "!":
result &= &"{begin}{ast.children[i].name}|"
else:
result &= &"{begin}{ast.children[i].name}){kind}"
result &= "$"
proc getAstChildByName*(ast: ref Ast, name: string): ref Ast =
for i in 0 .. ast.children.len-1:
if name in ast.children[i].name.split("|"):
return ast.children[i]
if ast.children.len == 1 and ast.children[0].name == ".":
return ast.children[0]
proc getPxName*(node: TSNode, offset: int): string =
var
np = node
count = 0
while not np.tsNodeIsNull() and count < offset:
np = np.tsNodeParent()
count += 1
if count == offset and not np.tsNodeIsNull():
return $np.tsNodeType()
proc getNimExpression*(nimState: NimState, expr: string): string =
var
clean = expr.multiReplace([("\n", " "), ("\r", "")])
@ -435,42 +481,6 @@ proc getSplitComma*(joined: seq[string]): seq[string] =
for i in joined:
result = result.concat(i.split(","))
proc getHeader*(nimState: NimState): string =
result =
if nimState.gState.dynlib.Bl:
&", header: {nimState.currentHeader}"
else:
""
proc getDynlib*(nimState: NimState): string =
result =
if nimState.gState.dynlib.nBl:
&", dynlib: {nimState.gState.dynlib}"
else:
""
proc getImportC*(nimState: NimState, origName, nimName: string): string =
if nimName != origName:
result = &"importc: \"{origName}\"{nimState.getHeader()}"
else:
result = nimState.impShort
proc getPragma*(nimState: NimState, pragmas: varargs[string]): string =
result = ""
for pragma in pragmas.items():
if pragma.nBl:
result &= pragma & ", "
if result.nBl:
result = " {." & result[0 .. ^3] & ".}"
result = result.replace(nimState.impShort & ", cdecl", nimState.impShort & "C")
let
dy = nimState.getDynlib()
if ", cdecl" in result and dy.nBl:
result = result.replace(".}", dy & ".}")
proc getComments*(nimState: NimState, strip = false): string =
if not nimState.gState.nocomments and nimState.commentStr.nBl:
result = "\n" & nimState.commentStr

View file

@ -1,6 +1,6 @@
import sequtils, sets, tables
import regex
import compiler/[ast, idents, options]
import "."/plugin
@ -34,24 +34,6 @@ const
].concat(toSeq(gExpressions.items))
type
Kind = enum
exactlyOne
oneOrMore # +
zeroOrMore # *
zeroOrOne # ?
orWithNext # !
Ast = object
name*: string
kind*: Kind
recursive*: bool
children*: seq[ref Ast]
when not declared(CIMPORT):
tonim*: proc (ast: ref Ast, node: TSNode, nimState: NimState)
regex*: Regex
AstTable {.used.} = TableRef[string, seq[ref Ast]]
State = ref object
compile*, defines*, headers*, includeDirs*, searchDirs*, prefix*, suffix*, symOverride*: seq[string]
@ -67,7 +49,12 @@ type
NimState {.used.} = ref object
identifiers*: TableRef[string, string]
commentStr*, constStr*, debugStr*, enumStr*, procStr*, skipStr*, typeStr*: string
commentStr*, debugStr*, skipStr*: string
# Nim compiler objects
constSection*, enumSection*, procSection*, typeSection*: PNode
identCache*: IdentCache
config*: ConfigRef
gState*: State
@ -94,7 +81,7 @@ type CompileMode = enum
const modeDefault {.used.} = $cpp # TODO: USE this everywhere relevant
when not declared(CIMPORT):
export gAtoms, gExpressions, gEnumVals, Kind, Ast, AstTable, State, NimState,
export gAtoms, gExpressions, gEnumVals, State, NimState,
nBl, Bl, CompileMode, modeDefault
# Redirect output to file when required

1
nimterop/nim.cfg Normal file
View file

@ -0,0 +1 @@
--path:"$nim"

View file

@ -2,7 +2,7 @@ import os, osproc, strformat, strutils, times
import "."/treesitter/[api, c, cpp]
import "."/[ast, compat, globals, getters, grammar]
import "."/[ast2, compat, globals, getters]
proc printLisp(gState: State, root: TSNode) =
var
@ -53,7 +53,7 @@ proc printLisp(gState: State, root: TSNode) =
if node == root:
break
proc process(gState: State, path: string, astTable: AstTable) =
proc process(gState: State, path: string) =
doAssert existsFile(path), &"Invalid path {path}"
var
@ -93,7 +93,7 @@ proc process(gState: State, path: string, astTable: AstTable) =
if gState.past:
gState.printLisp(root)
elif gState.pnim:
gState.printNim(path, root, astTable)
gState.printNim(path, root)
elif gState.preprocess:
gecho gState.code
@ -109,7 +109,6 @@ proc main(
nocomments = false,
output = "",
past = false,
pgrammar = false,
pluginSourcePath: string = "",
pnim = false,
prefix: seq[string] = @[],
@ -167,19 +166,12 @@ proc main(
doAssert gState.outputHandle.open(outputFile, fmWrite),
&"Failed to write to {outputFile}"
# Process grammar into AST
let
astTable = parseGrammar()
if pgrammar:
# Print AST of grammar
gState.printGrammar(astTable)
elif source.nBl:
if source.nBl:
# Print source after preprocess or Nim output
if gState.pnim:
gState.printNimHeader()
for src in source:
gState.process(src.expandSymlinkAbs(), astTable)
gState.process(src.expandSymlinkAbs())
# Close outputFile
if outputFile.len != 0:
@ -241,7 +233,6 @@ when isMainModule:
"nocomments": "exclude top-level comments from output",
"output": "file to output content - default stdout",
"past": "print AST output",
"pgrammar": "print grammar",
"pluginSourcePath": "Nim file to build and load as a plugin",
"pnim": "print Nim output",
"preprocess": "run preprocessor on header",
@ -260,7 +251,6 @@ when isMainModule:
"nocomments": 'c',
"output": 'o',
"past": 'a',
"pgrammar": 'g',
"pnim": 'n',
"prefix": 'E',
"preprocess": 'p',