Merge pull request #191 from jyapayne/poc_print_ast

[ast2] Enable parsing some C Macro expressions to Nim expressions using treesitter
This commit is contained in:
genotrance 2020-04-26 11:29:27 -05:00 committed by GitHub
commit 73ef7c4ccd
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 822 additions and 122 deletions

View file

@ -1,12 +1,12 @@
import macros, os, sequtils, sets, strformat, strutils, tables, times
import regex
import options as opts
import compiler/[ast, idents, lineinfos, modulegraphs, msgs, options, parser, renderer]
import compiler/[ast, idents, lineinfos, modulegraphs, msgs, options, renderer]
import "."/treesitter/api
import "."/[globals, getters]
import "."/[globals, getters, exprparser, comphelp, tshelp]
proc getPtrType*(str: string): string =
result = case str:
@ -19,55 +19,6 @@ proc getPtrType*(str: string): string =
else:
str
proc handleError*(conf: ConfigRef, info: TLineInfo, msg: TMsgKind, arg: string) =
# Raise exception in parseString() instead of exiting for errors
if msg < warnMin:
raise newException(Exception, msgKindToString(msg))
proc parseString(gState: State, str: string): PNode =
# Parse a string into Nim AST - use custom error handler that raises
# an exception rather than exiting on failure
try:
result = parseString(
str, gState.identCache, gState.config, errorHandler = handleError
)
except:
decho getCurrentExceptionMsg()
proc getLit*(gState: State, str: string, expression = false): PNode =
# Used to convert #define literals into const and expressions
# in array sizes
#
# `expression` is true when `str` should be converted into a Nim expression
let
str = str.replace(re"/[/*].*?(?:\*/)?$", "").strip()
if str.contains(re"^[\-]?[\d]+$"): # decimal
result = newIntNode(nkIntLit, parseInt(str))
elif str.contains(re"^[\-]?[\d]*[.]?[\d]+$"): # float
result = newFloatNode(nkFloatLit, parseFloat(str))
elif str.contains(re"^0x[\da-fA-F]+$"): # hexadecimal
result = gState.parseString(str)
elif str.contains(re"^'[[:ascii:]]'$"): # char
result = newNode(nkCharLit)
result.intVal = str[1].int64
elif str.contains(re"""^"[[:ascii:]]+"$"""): # char *
result = newStrNode(nkStrLit, str[1 .. ^2])
else:
let
str =
if expression: gState.getNimExpression(str)
else: str
result = gState.parseString(str)
if result.isNil:
result = newNode(nkNilLit)
proc getOverrideOrSkip(gState: State, node: TSNode, origname: string, kind: NimSymKind): PNode =
# Check if symbol `origname` of `kind` and `origname` has any cOverride defined
# and use that if present
@ -90,6 +41,7 @@ proc getOverrideOrSkip(gState: State, node: TSNode, origname: string, kind: NimS
result = pnode[0][0]
else:
gecho &"\n# $1'{origname}' skipped" % skind
gState.skippedSyms.incl origname
if gState.debug:
gState.skipStr &= &"\n{gState.getNodeVal(node)}"
@ -148,15 +100,36 @@ proc newConstDef(gState: State, node: TSNode, fname = "", fval = ""): PNode =
fval
else:
gState.getNodeVal(node[1])
valident =
gState.getLit(val)
var valident = newNode(nkNone)
withCodeAst(val, gState.mode):
# This section is a hack for determining that the first
# node is a type, which shouldn't be accepted by a const
# def section. Need to replace this with some other mechanism
# to handle type aliases
var maybeTyNode: TSNode
# Take the very first node, which may be 2 levels
# down if there is an error node
if root.len > 0 and root[0].getName() == "ERROR":
maybeTyNode = root[0][0]
elif root.len > 0:
maybeTyNode = root[0]
if not maybeTyNode.isNil:
let name = maybeTyNode.getName()
case name
of "type_descriptor", "sized_type_specifier":
discard
else:
# Can't do gState.parseCExpression(root) here for some reason?
# get a SEGFAULT if we use root
valident = gState.parseCExpression(val)
if name.Bl:
# Name skipped or overridden since blank
result = gState.getOverrideOrSkip(node, origname, nskConst)
elif valident.kind in {nkCharLit .. nkStrLit} or
(valident.kind == nkStmtList and valident.len > 0 and
valident[0].kind in {nkCharLit .. nkStrLit}):
elif valident.kind != nkNone:
if gState.addNewIdentifer(name):
# const X* = Y
#
@ -180,6 +153,7 @@ proc newConstDef(gState: State, node: TSNode, fname = "", fval = ""): PNode =
gecho &"# const '{origname}' is duplicate, skipped"
else:
gecho &"# const '{origname}' has invalid value '{val}'"
gState.skippedSyms.incl origname
proc addConst(gState: State, node: TSNode) =
# Add a const to the AST
@ -1012,8 +986,8 @@ proc getTypeArray(gState: State, node: TSNode, tident: PNode, name: string): PNo
# type name[X] => array[X, type]
let
# Size of array could be a Nim expression
size = gState.getLit(gState.getNodeVal(cnode[1]), expression = true)
if size.kind != nkNilLit:
size = gState.parseCExpression(gState.getNodeVal(cnode[1]))
if size.kind != nkNone:
result = gState.newArrayTree(cnode, result, size)
cnode = cnode[0]
elif cnode.len == 1:
@ -1417,6 +1391,9 @@ proc addEnum(gState: State, node: TSNode) =
# Create const for fields
var
fnames: HashSet[string]
# Hold all of field information so that we can add all of them
# after the const identifiers has been updated
fieldDeclarations: seq[tuple[fname: string, fval: string, cexpr: Option[TSNode]]]
for i in 0 .. enumlist.len - 1:
let
en = enumlist[i]
@ -1435,20 +1412,25 @@ proc addEnum(gState: State, node: TSNode) =
fval = &"({prev} + 1).{name}"
if en.len > 1 and en[1].getName() in gEnumVals:
# Explicit value
fval = "(" & gState.getNimExpression(gState.getNodeVal(en[1]), name) & ")." & name
# Cannot use newConstDef() since parseString(fval) adds backticks to and/or
gState.constSection.add gState.parseString(&"const {fname}* = {fval}")[0][0]
fieldDeclarations.add((fname, "", some(en[1])))
else:
fieldDeclarations.add((fname, fval, none(TSNode)))
fnames.incl fname
prev = fname
# Add fields to list of consts after processing enum so that we don't cast
# enum field to itself
gState.constIdentifiers.incl fnames
# parseCExpression requires all const identifiers to be present for the enum
for (fname, fval, cexprNode) in fieldDeclarations:
var fval = fval
if cexprNode.isSome:
fval = "(" & $gState.parseCExpression(gState.getNodeVal(cexprNode.get()), name) & ")." & name
# Cannot use newConstDef() since parseString(fval) adds backticks to and/or
gState.constSection.add gState.parseString(&"const {fname}* = {fval}")[0][0]
# Add other names
if node.getName() == "type_definition" and node.len > 1:
gState.addTypeTyped(node, ftname = name, offset = offset)

18
nimterop/comphelp.nim Normal file
View file

@ -0,0 +1,18 @@
import compiler/[ast, lineinfos, msgs, options, parser, renderer]
import "."/[globals, getters]
proc handleError*(conf: ConfigRef, info: TLineInfo, msg: TMsgKind, arg: string) =
# Raise exception in parseString() instead of exiting for errors
if msg < warnMin:
raise newException(Exception, msgKindToString(msg))
proc parseString*(gState: State, str: string): PNode =
# Parse a string into Nim AST - use custom error handler that raises
# an exception rather than exiting on failure
try:
result = parseString(
str, gState.identCache, gState.config, errorHandler = handleError
)
except:
decho getCurrentExceptionMsg()

587
nimterop/exprparser.nim Normal file
View file

@ -0,0 +1,587 @@
import strformat, strutils, macros, sets
import regex
import compiler/[ast, renderer]
import "."/treesitter/[api, c, cpp]
import "."/[globals, getters, comphelp, tshelp]
# This version of exprparser should be able to handle:
#
# All integers + integer like expressions (hex, octal, suffixes)
# All floating point expressions (except for C++'s hex floating point stuff)
# Strings and character literals, including C's escape characters (not sure if this is the same as C++'s escape characters or not)
# Math operators (+, -, /, *)
# Some Unary operators (-, !, ~). ++, --, and & are yet to be implemented
# Any identifiers
# C type descriptors (int, char, etc)
# Boolean values (true, false)
# Shift expressions (containing anything in this list)
# Cast expressions (containing anything in this list)
# Math expressions (containing anything in this list)
# Sizeof expressions (containing anything in this list)
# Cast expressions (containing anything in this list)
# Parentheses expressions (containing anything in this list)
# Expressions containing other expressions
#
# In addition to the above, it should also handle most type coercions, except
# for where Nim can't (such as uint + -int)
type
ExprParseError* = object of CatchableError
template val(node: TSNode): string =
gState.currentExpr.getNodeVal(node)
proc printDebugExpr*(gState: State, node: TSNode) =
if gState.debug:
gecho ("Input => " & node.val).getCommented()
gecho gState.currentExpr.printLisp(node).getCommented()
proc getExprIdent*(gState: State, identName: string, kind = nskConst, parent = ""): PNode =
## Gets a cPlugin transformed identifier from `identName`
##
## Returns PNode(nkNone) if the identifier is blank
result = newNode(nkNone)
if identName notin gState.skippedSyms:
var ident = identName
if ident != "_":
# Process the identifier through cPlugin
ident = gState.getIdentifier(ident, kind, parent)
if kind == nskType:
result = gState.getIdent(ident)
elif ident.nBl and ident in gState.constIdentifiers:
if gState.currentTyCastName.nBl:
ident = ident & "." & gState.currentTyCastName
result = gState.getIdent(ident)
proc getExprIdent*(gState: State, node: TSNode, kind = nskConst, parent = ""): PNode =
## Gets a cPlugin transformed identifier from `identName`
##
## Returns PNode(nkNone) if the identifier is blank
gState.getExprIdent(node.val, kind, parent)
proc parseChar(charStr: string): uint8 {.inline.} =
## Parses a character literal out of a string. This is needed
## because treesitter gives unescaped characters when parsing
## strings.
if charStr.len == 1:
return charStr[0].uint8
# Handle octal, hex, unicode?
if charStr.startsWith("\\x"):
result = parseHexInt(charStr.replace("\\x", "0x")).uint8
elif charStr.len == 4: # Octal
result = parseOctInt("0o" & charStr[1 ..< charStr.len]).uint8
if result == 0:
case charStr
of "\\0":
result = ord('\0')
of "\\a":
result = 0x07
of "\\b":
result = 0x08
of "\\e":
result = 0x1B
of "\\f":
result = 0x0C
of "\\n":
result = '\n'.uint8
of "\\r":
result = 0x0D
of "\\t":
result = 0x09
of "\\v":
result = 0x0B
of "\\\\":
result = 0x5C
of "\\'":
result = '\''.uint8
of "\\\"":
result = '\"'.uint8
of "\\?":
result = 0x3F
else:
discard
if result > uint8.high:
result = uint8.high
proc getCharLit(charStr: string): PNode {.inline.} =
## Convert a character string into a proper Nim char lit node
result = newNode(nkCharLit)
result.intVal = parseChar(charStr).int64
proc getFloatNode(number, suffix: string): PNode {.inline.} =
## Get a Nim float node from a C float expression + suffix
let floatSuffix = number[number.len-1]
try:
case floatSuffix
of 'l', 'L':
# TODO: handle long double (128 bits)
# result = newNode(nkFloat128Lit)
result = newFloatNode(nkFloat64Lit, parseFloat(number[0 ..< number.len - 1]))
of 'f', 'F':
result = newFloatNode(nkFloat64Lit, parseFloat(number[0 ..< number.len - 1]))
else:
result = newFloatNode(nkFloatLit, parseFloat(number))
except ValueError:
raise newException(ExprParseError, &"Could not parse float value \"{number}\".")
proc getIntNode(number, suffix: string): PNode {.inline.} =
## Get a Nim int node from a C integer expression + suffix
case suffix
of "u", "U":
result = newNode(nkUintLit)
of "l", "L":
result = newNode(nkInt32Lit)
of "ul", "UL":
result = newNode(nkUint32Lit)
of "ll", "LL":
result = newNode(nkInt64Lit)
of "ull", "ULL":
result = newNode(nkUint64Lit)
else:
result = newNode(nkIntLit)
# I realize these regex are wasteful on performance, but
# couldn't come up with a better idea.
if number.contains(re"0[xX]"):
result.intVal = parseHexInt(number)
result.flags = {nfBase16}
elif number.contains(re"0[bB]"):
result.intVal = parseBinInt(number)
result.flags = {nfBase2}
elif number.contains(re"0[oO]"):
result.intVal = parseOctInt(number)
result.flags = {nfBase8}
else:
result.intVal = parseInt(number)
proc getNumNode(number, suffix: string): PNode {.inline.} =
## Convert a C number to a Nim number PNode
if number.contains("."):
getFloatNode(number, suffix)
else:
getIntNode(number, suffix)
proc processNumberLiteral(gState: State, node: TSNode): PNode =
## Parse a number literal from a TSNode. Can be a float, hex, long, etc
result = newNode(nkNone)
let nodeVal = node.val
var match: RegexMatch
const reg = re"(\-)?(0\d+|0[xX][0-9a-fA-F]+|0[bB][01]+|\d+\.\d*[fFlL]?|\d*\.\d+[fFlL]?|\d+)([ulUL]*)"
let found = nodeVal.find(reg, match)
if found:
let
prefix = if match.group(0).len > 0: nodeVal[match.group(0)[0]] else: ""
number = nodeVal[match.group(1)[0]]
suffix = nodeVal[match.group(2)[0]]
result = getNumNode(number, suffix)
if result.kind != nkNone and prefix == "-":
result = nkPrefix.newTree(
gState.getIdent("-"),
result
)
else:
raise newException(ExprParseError, &"Could not find a number in number_literal: \"{nodeVal}\"")
proc processCharacterLiteral(gState: State, node: TSNode): PNode =
# Input => 'G'
#
# (char_literal 1 1 3 "'G'")
#
# Output => 'G'
#
# nkCharLit("G")
let val = node.val
result = getCharLit(val[1 ..< val.len - 1])
proc processStringLiteral(gState: State, node: TSNode): PNode =
# Input => "\n\rfoobar\0\'"
#
# (string_literal 1 1 16 ""\n\rfoobar\0\'""
# (escape_sequence 1 2 2 "\n")
# (escape_sequence 1 4 2 "\r")
# (escape_sequence 1 12 2 "\0")
# (escape_sequence 1 14 2 "\'")
# )
#
# Output => "\n\cfoobar\x00\'"
#
# nkStrLit("\x0A\x0Dfoobar\x00\'")
let
nodeVal = node.val
strVal = nodeVal[1 ..< nodeVal.len - 1]
const
str = "(\\\\x[[:xdigit:]]{2}|\\\\\\d{3}|\\\\0|\\\\a|\\\\b|\\\\e|\\\\f|\\\\n|\\\\r|\\\\t|\\\\v|\\\\\\\\|\\\\'|\\\\\"|[[:ascii:]])"
reg = re(str)
# Convert the c string escape sequences/etc to Nim chars
var nimStr = newStringOfCap(nodeVal.len)
for m in strVal.findAll(reg):
nimStr.add(parseChar(strVal[m.group(0)[0]]).chr)
result = newStrNode(nkStrLit, nimStr)
proc processTSNode(gState: State, node: TSNode, typeofNode: var PNode): PNode
proc processParenthesizedExpr(gState: State, node: TSNode, typeofNode: var PNode): PNode =
# Input => (a + b)
#
# (parenthesized_expression 1 1 7
# (math_expression 1 2 5
# (identifier 1 2 1 "a")
# (identifier 1 6 1 "b")
# )
# )
#
# Output => (typeof(a)(a + typeof(a)(b)))
#
# nkPar(
# nkCall(
# nkCall(
# nkIdent("typeof"),
# nkIdent("a")
# ),
# nkInfix(
# nkIdent("+"),
# nkIdent("a"),
# nkCall(
# nkCall(
# nkIdent("typeof"),
# nkIdent("a")
# ),
# nkIdent("b")
# )
# )
# )
# )
result = newNode(nkPar)
for i in 0 ..< node.len():
result.add(gState.processTSNode(node[i], typeofNode))
proc processCastExpression(gState: State, node: TSNode, typeofNode: var PNode): PNode =
# Input => (int)a
#
# (cast_expression 1 1 6 "(int)a"
# (type_descriptor 1 2 3 "int"
# (primitive_type 1 2 3 "int")
# )
# (identifier 1 6 1 "a")
# )
#
# Output => cast[cint](a)
#
# nkCast(
# nkIdent("cint"),
# nkIdent("a")
# )
result = nkCast.newTree(
gState.processTSNode(node[0], typeofNode),
gState.processTSNode(node[1], typeofNode)
)
proc getNimUnarySym(csymbol: string): string =
## Get the Nim equivalent of a unary C symbol
##
## TODO: Add ++, --,
case csymbol
of "+", "-":
result = csymbol
of "~", "!":
result = "not"
else:
raise newException(ExprParseError, &"Unsupported unary symbol \"{csymbol}\"")
proc getNimBinarySym(csymbol: string): string =
case csymbol
of "|", "||":
result = "or"
of "&", "&&":
result = "and"
of "^":
result = "xor"
of "==", "!=",
"+", "-", "/", "*",
">", "<", ">=", "<=":
result = csymbol
of "%":
result = "mod"
of "<<":
result = "shl"
of ">>":
result = "shr"
else:
raise newException(ExprParseError, &"Unsupported binary symbol \"{csymbol}\"")
proc processBinaryExpression(gState: State, node: TSNode, typeofNode: var PNode): PNode =
# Node has left and right children ie: (2 + 7)
#
# Input => a == b
#
# (equality_expression 1 1 6
# (identifier 1 1 1 "a")
# (identifier 1 6 1 "b")
# )
#
# Output => a == typeof(a)(b)
#
# nkInfix(
# nkIdent("=="),
# nkIdent("a"),
# nkCall(
# nkCall(
# nkIdent("typeof"),
# nkIdent("a")
# ),
# nkIdent("b")
# )
# )
result = newNode(nkInfix)
let
left = node[0]
right = node[1]
binarySym = node.tsNodeChild(1).val.strip()
nimSym = getNimBinarySym(binarySym)
result.add gState.getIdent(nimSym)
let leftNode = gState.processTSNode(left, typeofNode)
if typeofNode.isNil:
typeofNode = nkCall.newTree(
gState.getIdent("typeof"),
leftNode
)
let rightNode = gState.processTSNode(right, typeofNode)
result.add leftNode
result.add nkCall.newTree(
typeofNode,
rightNode
)
if binarySym == "/":
# Special case. Nim's operators generally output
# the same type they take in, except for division.
# So we need to emulate C here and cast the whole
# expression to the type of the first arg
result = nkCall.newTree(
typeofNode,
result
)
proc processUnaryExpression(gState: State, node: TSNode, typeofNode: var PNode): PNode =
# Input => !a
#
# (logical_expression 1 1 2 "!a"
# (identifier 1 2 1 "a")
# )
#
# Output => (not a)
#
# nkPar(
# nkPrefix(
# nkIdent("not"),
# nkIdent("a")
# )
# )
result = newNode(nkPar)
let
child = node[0]
unarySym = node.tsNodeChild(0).val.strip()
nimSym = getNimUnarySym(unarySym)
if nimSym == "-":
# Special case. The minus symbol must be in front of an integer,
# so we have to make a gentle cast here to coerce it to one.
# Might be bad because we are overwriting the type
# There's probably a better way of doing this
if typeofNode.isNil:
typeofNode = gState.getIdent("int64")
result.add nkPrefix.newTree(
gState.getIdent(unarySym),
nkPar.newTree(
nkCall.newTree(
gState.getIdent("int64"),
gState.processTSNode(child, typeofNode)
)
)
)
else:
result.add nkPrefix.newTree(
gState.getIdent(nimSym),
gState.processTSNode(child, typeofNode)
)
proc processUnaryOrBinaryExpression(gState: State, node: TSNode, typeofNode: var PNode): PNode =
## Processes both unary (-1, ~true, !something) and binary (a + b, c * d) expressions
if node.len > 1:
# Node has left and right children ie: (2 + 7)
result = processBinaryExpression(gState, node, typeofNode)
elif node.len() == 1:
# Node has only one child, ie -(20 + 7)
result = processUnaryExpression(gState, node, typeofNode)
else:
raise newException(ExprParseError, &"Invalid {node.getName()} \"{node.val}\"")
proc processSizeofExpression(gState: State, node: TSNode, typeofNode: var PNode): PNode =
# Input => sizeof(int)
#
# (sizeof_expression 1 1 11 "sizeof(int)"
# (type_descriptor 1 8 3 "int"
# (primitive_type 1 8 3 "int")
# )
# )
#
# Output => sizeof(cint)
#
# nkCall(
# nkIdent("sizeof"),
# nkIdent("cint")
# )
result = nkCall.newTree(
gState.getIdent("sizeof"),
gState.processTSNode(node[0], typeofNode)
)
proc processTSNode(gState: State, node: TSNode, typeofNode: var PNode): PNode =
## Handle all of the types of expressions here. This proc gets called recursively
## in the processX procs and will drill down to sub nodes.
result = newNode(nkNone)
let nodeName = node.getName()
decho "NODE: ", nodeName, ", VAL: ", node.val
case nodeName
of "number_literal":
# Input -> 0x1234FE, 1231, 123u, 123ul, 123ull, 1.334f
# Output -> 0x1234FE, 1231, 123'u, 123'u32, 123'u64, 1.334
result = gState.processNumberLiteral(node)
of "string_literal":
# Input -> "foo\0\x42"
# Output -> "foo\0"
result = gState.processStringLiteral(node)
of "char_literal":
# Input -> 'F', '\060' // Octal, '\x5A' // Hex, '\r' // escape sequences
# Output -> 'F', '0', 'Z', '\r'
result = gState.processCharacterLiteral(node)
of "expression_statement", "ERROR", "translation_unit":
# Note that we're parsing partial expressions, so the TSNode might contain
# an ERROR node. If that's the case, they usually contain children with
# partial results, which will contain parsed expressions
#
# Input (top level statement) -> ((1 + 3 - IDENT) - (int)400.0)
# Output -> (1 + typeof(1)(3) - typeof(1)(IDENT) - typeof(1)(cast[int](400.0))) # Type casting in case some args differ
if node.len == 1:
result = gState.processTSNode(node[0], typeofNode)
elif node.len > 1:
var nodes: seq[PNode]
for i in 0 ..< node.len:
let subNode = gState.processTSNode(node[i], typeofNode)
if subNode.kind != nkNone:
nodes.add(subNode)
# Multiple nodes can get tricky. Don't support them yet, unless they
# have at most one valid node
if nodes.len > 1:
raise newException(ExprParseError, &"Node type \"{nodeName}\" with val ({node.val}) has more than one non empty node")
if nodes.len == 1:
result = nodes[0]
else:
raise newException(ExprParseError, &"Node type \"{nodeName}\" has no children")
of "parenthesized_expression":
# Input -> (IDENT - OTHERIDENT)
# Output -> (IDENT - typeof(IDENT)(OTHERIDENT)) # Type casting in case OTHERIDENT is a slightly different type (uint vs int)
result = gState.processParenthesizedExpr(node, typeofNode)
of "sizeof_expression":
# Input -> sizeof(char)
# Output -> sizeof(cchar)
result = gState.processSizeofExpression(node, typeofNode)
# binary_expression from the new treesitter upgrade should work here
# once we upgrade
of "math_expression", "logical_expression", "relational_expression",
"bitwise_expression", "equality_expression", "binary_expression",
"shift_expression":
# Input -> a == b, a != b, !a, ~a, a < b, a > b, a <= b, a >= b, a >> b, a << b
# Output ->
# typeof(a)(a == typeof(a)(b))
# typeof(a)(a != typeof(a)(b))
# (not a)
# (not a)
# typeof(a)(a < typeof(a)(b))
# typeof(a)(a > typeof(a)(b))
# typeof(a)(a <= typeof(a)(b))
# typeof(a)(a >= typeof(a)(b))
# a shr typeof(a)(b)
# a shl typeof(a)(b)
result = gState.processUnaryOrBinaryExpression(node, typeofNode)
of "cast_expression":
# Input -> (int) a
# Output -> cast[cint](a)
result = gState.processCastExpression(node, typeofNode)
# Why are these node types named true/false?
of "true", "false":
# Input -> true, false
# Output -> true, false
result = gState.parseString(node.val)
of "type_descriptor", "sized_type_specifier":
# Input -> int, unsigned int, long int, etc
# Output -> cint, cuint, clong, etc
let ty = getType(node.val)
if ty.len > 0:
# If ty is not empty, one of C's builtin types has been found
result = gState.getExprIdent(ty, nskType, parent=node.getName())
else:
result = gState.getExprIdent(node.val, nskType, parent=node.getName())
if result.kind == nkNone:
raise newException(ExprParseError, &"Missing type specifier \"{node.val}\"")
of "identifier":
# Input -> IDENT
# Output -> IDENT (if found in sym table, else error)
result = gState.getExprIdent(node, parent=node.getName())
if result.kind == nkNone:
raise newException(ExprParseError, &"Missing identifier \"{node.val}\"")
of "comment":
discard
else:
raise newException(ExprParseError, &"Unsupported node type \"{nodeName}\" for node \"{node.val}\"")
decho "NODE RESULT: ", result
proc parseCExpression*(gState: State, codeRoot: TSNode, name = ""): PNode =
## Parse a c expression from a root ts node
# This var is used for keeping track of the type of the first
# symbol used for type casting
var tnode: PNode = nil
result = newNode(nkNone)
try:
result = gState.processTSNode(codeRoot, tnode)
except ExprParseError as e:
decho e.msg
result = newNode(nkNone)
except Exception as e:
decho "UNEXPECTED EXCEPTION: ", e.msg
result = newNode(nkNone)
proc parseCExpression*(gState: State, code: string, name = ""): PNode =
## Convert the C string to a nim PNode tree
gState.currentExpr = code
gState.currentTyCastName = name
withCodeAst(gState.currentExpr, gState.mode):
result = gState.parseCExpression(root, name)
# Clear the state
gState.currentExpr = ""
gState.currentTyCastName = ""

View file

@ -221,16 +221,19 @@ proc len*(node: TSNode): int =
result = node.tsNodeNamedChildCount().int
proc `[]`*(node: TSNode, i: SomeInteger): TSNode =
if i < node.len:
if i < type(i)(node.len()):
result = node.tsNodeNamedChild(i.uint32)
proc getName*(node: TSNode): string {.inline.} =
if not node.isNil:
return $node.tsNodeType()
proc getNodeVal*(gState: State, node: TSNode): string =
proc getNodeVal*(code: var string, node: TSNode): string =
if not node.isNil:
return gState.code[node.tsNodeStartByte() .. node.tsNodeEndByte()-1].strip()
return code[node.tsNodeStartByte() .. node.tsNodeEndByte()-1].strip()
proc getNodeVal*(gState: State, node: TSNode): string =
gState.code.getNodeVal(node)
proc getAtom*(node: TSNode): TSNode =
if not node.isNil:
@ -349,13 +352,16 @@ proc inChildren*(node: TSNode, ntype: string): bool =
result = true
break
proc getLineCol*(gState: State, node: TSNode): tuple[line, col: int] =
proc getLineCol*(code: var string, node: TSNode): tuple[line, col: int] =
# Get line number and column info for node
let
point = node.tsNodeStartPoint()
result.line = point.row.int + 1
result.col = point.column.int + 1
proc getLineCol*(gState: State, node: TSNode): tuple[line, col: int] =
getLineCol(gState.code, node)
proc getTSNodeNamedChildCountSansComments*(node: TSNode): int =
for i in 0 ..< node.len:
if node.getName() != "comment":
@ -374,7 +380,7 @@ proc getPxName*(node: TSNode, offset: int): string =
if count == offset and not np.isNil:
return np.getName()
proc printLisp*(gState: State, root: TSNode): string =
proc printLisp*(code: var string, root: TSNode): string =
var
node = root
nextnode: TSNode
@ -384,18 +390,18 @@ proc printLisp*(gState: State, root: TSNode): string =
if not node.isNil and depth > -1:
result &= spaces(depth)
let
(line, col) = gState.getLineCol(node)
(line, col) = code.getLineCol(node)
result &= &"({$node.tsNodeType()} {line} {col} {node.tsNodeEndByte() - node.tsNodeStartByte()}"
let
val = gState.getNodeVal(node)
val = code.getNodeVal(node)
if "\n" notin val and " " notin val:
result &= &" \"{val}\""
else:
break
if node.tsNodeNamedChildCount() != 0:
if node.len() != 0:
result &= "\n"
nextnode = node.tsNodeNamedChild(0)
nextnode = node[0]
depth += 1
else:
result &= ")\n"
@ -419,21 +425,24 @@ proc printLisp*(gState: State, root: TSNode): string =
if node == root:
break
proc printLisp*(gState: State, root: TSNode): string =
printLisp(gState.code, root)
proc getCommented*(str: string): string =
"\n# " & str.strip().replace("\n", "\n# ")
proc printTree*(gState: State, pnode: PNode, offset = ""): string =
if gState.debug and pnode.kind != nkNone:
if not pnode.isNil and gState.debug and pnode.kind != nkNone:
result &= "\n# " & offset & $pnode.kind & "("
case pnode.kind
of nkCharLit:
result &= "'" & pnode.intVal.char & "')"
result &= ($pnode.intVal.char).escape & ")"
of nkIntLit..nkUInt64Lit:
result &= $pnode.intVal & ")"
of nkFloatLit..nkFloat128Lit:
result &= $pnode.floatVal & ")"
of nkStrLit..nkTripleStrLit:
result &= "\"" & pnode.strVal & "\")"
result &= pnode.strVal.escape & ")"
of nkSym:
result &= $pnode.sym & ")"
of nkIdent:
@ -452,13 +461,13 @@ proc printTree*(gState: State, pnode: PNode, offset = ""): string =
proc printDebug*(gState: State, node: TSNode) =
if gState.debug:
gecho ("Input => " & gState.getNodeVal(node)).getCommented() & "\n" &
gState.printLisp(node).getCommented()
gecho ("Input => " & gState.getNodeVal(node)).getCommented()
gecho gState.printLisp(node).getCommented()
proc printDebug*(gState: State, pnode: PNode) =
if gState.debug:
gecho ("Output => " & $pnode).getCommented() & "\n" &
gState.printTree(pnode)
if gState.debug and pnode.kind != nkNone:
gecho ("Output => " & $pnode).getCommented()
gecho gState.printTree(pnode)
# Compiler shortcuts

View file

@ -1,4 +1,4 @@
import sequtils, sets, tables
import sequtils, sets, tables, strutils
import regex
@ -76,6 +76,11 @@ type
# All const names for enum casting
constIdentifiers*: HashSet[string]
# All symbols that have been skipped due to
# being unwrappable or the user provided
# override is blank
skippedSyms*: HashSet[string]
# Legacy ast fields, remove when ast2 becomes default
constStr*, enumStr*, procStr*, typeStr*: string
@ -93,6 +98,9 @@ type
currentHeader*, impShort*, sourceFile*: string
# Used for the exprparser.nim module
currentExpr*, currentTyCastName*: string
data*: seq[tuple[name, val: string]]
nodeBranch*: seq[string]
@ -113,12 +121,12 @@ when not declared(CIMPORT):
export gAtoms, gExpressions, gEnumVals, Kind, Ast, AstTable, State, nBl, Bl
# Redirect output to file when required
template gecho*(args: string) {.dirty.} =
template gecho*(args: string) =
if gState.outputHandle.isNil:
echo args
else:
gState.outputHandle.writeLine(args)
template decho*(str: untyped): untyped =
template decho*(args: varargs[string, `$`]): untyped =
if gState.debug:
gecho str.getCommented()
gecho join(args, "").getCommented()

View file

@ -2,16 +2,11 @@ import os, osproc, strformat, strutils, tables, times
import "."/treesitter/[api, c, cpp]
import "."/[ast, ast2, globals, getters, grammar, build]
import "."/[ast, ast2, globals, getters, grammar, build, tshelp]
proc process(gState: State, path: string, astTable: AstTable) =
doAssert existsFile(path), &"Invalid path {path}"
var parser = tsParserNew()
defer:
parser.tsParserDelete()
if gState.mode.Bl:
gState.mode = getCompilerMode(path)
@ -20,31 +15,16 @@ proc process(gState: State, path: string, astTable: AstTable) =
else:
gState.code = readFile(path)
doAssert gState.code.nBl, "Empty file or preprocessor error"
if gState.mode == "c":
doAssert parser.tsParserSetLanguage(treeSitterC()), "Failed to load C parser"
elif gState.mode == "cpp":
doAssert parser.tsParserSetLanguage(treeSitterCpp()), "Failed to load C++ parser"
else:
doAssert false, &"Invalid parser {gState.mode}"
var
tree = parser.tsParserParseString(nil, gState.code.cstring, gState.code.len.uint32)
root = tree.tsTreeRootNode()
defer:
tree.tsTreeDelete()
if gState.past:
gecho gState.printLisp(root)
elif gState.pnim:
if Feature.ast2 in gState.feature:
ast2.parseNim(gState, path, root)
else:
ast.parseNim(gState, path, root, astTable)
elif gState.preprocess:
gecho gState.code
withCodeAst(gState.code, gState.mode):
if gState.past:
gecho gState.printLisp(root)
elif gState.pnim:
if Feature.ast2 in gState.feature:
ast2.parseNim(gState, path, root)
else:
ast.parseNim(gState, path, root, astTable)
elif gState.preprocess:
gecho gState.code
# CLI processing with default values
proc main(

28
nimterop/tshelp.nim Normal file
View file

@ -0,0 +1,28 @@
import "."/treesitter/[c, cpp]
template withCodeAst*(code: string, mode: string, body: untyped): untyped =
## A simple template to inject the TSNode into a body of code
mixin treeSitterC
mixin treeSitterCpp
var parser = tsParserNew()
defer:
parser.tsParserDelete()
doAssert code.nBl, "Empty code or preprocessor error"
if mode == "c":
doAssert parser.tsParserSetLanguage(treeSitterC()), "Failed to load C parser"
elif mode == "cpp":
doAssert parser.tsParserSetLanguage(treeSitterCpp()), "Failed to load C++ parser"
else:
doAssert false, "Invalid parser " & mode
var
tree = parser.tsParserParseString(nil, code.cstring, code.len.uint32)
root {.inject.} = tree.tsTreeRootNode()
body
defer:
tree.tsTreeDelete()

View file

@ -8,6 +8,42 @@ extern "C" {
#define D "hello"
#define E 'c'
#define UEXPR (1234u << 1)
#define ULEXPR (1234ul << 2)
#define ULLEXPR (1234ull << 3)
#define LEXPR (1234l << 4)
#define LLEXPR (1234ll << 5)
#define SHL1 (1u << 1)
#define SHL2 (1u << 2)
#define SHL3 (1u << 3)
#define COERCE 645635634896ull + 35436
#define COERCE2 645635634896 + 35436ul
#define BINEXPR ~(-(1u << !-1)) ^ (10 >> 1)
#define BOOL true
#define MATHEXPR (1 + 2/3*20 - 100)
#define ANDEXPR (100 & 11000)
#define CASTEXPR (char) 34
#define AVAL 100
#define BVAL 200
#define EQ1 AVAL <= BVAL
#define EQ2 AVAL >= BVAL
#define EQ3 AVAL > BVAL
#define EQ4 AVAL < BVAL
#define EQ5 AVAL != BVAL
#define EQ6 AVAL == BVAL
#define SIZEOF sizeof(char)
#define REG_STR "regular string"
#define NOTSUPPORTEDSTR "not a " REG_STR
#define NULLCHAR '\0'/* comments should not break things*/
#define OCTCHAR '\012' // nor should this comment
#define HEXCHAR '\xFE'
#define TRICKYSTR "\x4E\034\nfoo\0\'\"\r\v\a\b\e\f\t\\\?bar"
#define ALLSHL (SHL1 | SHL2 | SHL3)
struct A0;
struct A1 {};
typedef struct A2;

View file

@ -3,6 +3,10 @@ import macros, os, sets, strutils
import nimterop/[cimport]
static:
# Skip casting on lower nim compilers because
# the VM does not support it
when (NimMajor, NimMinor, NimPatch) < (1, 0, 0):
cSkipSymbol @["CASTEXPR"]
cDebug()
const
@ -93,11 +97,11 @@ macro testFields(t: typed, fields: static[string] = "") =
for i in 0 ..< rl.len:
let
name = ($rl[i][0]).strip(chars = {'*'})
typ = ($(rl[i][1].repr())).replace("\n", "").replace(" ", "")
typ = ($(rl[i][1].repr())).replace("\n", "").replace(" ", "").replace("typeof", "type")
n = names.find(name)
assert n != -1, $t & "." & name & " invalid"
assert types[n] == typ,
"typeof(" & $t & ":" & name & ") != " & types[n] & ", is " & typ
assert types[n].replace("typeof", "type") == typ,
"typeof(" & $t & ":" & name & ") != " & types[n].replace("typeof", "type") & ", is " & typ
assert A == 2
assert B == 1.0
@ -105,6 +109,48 @@ assert C == 0x10
assert D == "hello"
assert E == 'c'
assert not defined(NOTSUPPORTEDSTR)
assert UEXPR == (1234.uint shl 1)
assert ULEXPR == (1234.uint32 shl 2)
assert ULLEXPR == (1234.uint64 shl 3)
assert LEXPR == (1234.int32 shl 4)
assert LLEXPR == (1234.int64 shl 5)
assert AVAL == 100
assert BVAL == 200
assert EQ1 == (AVAL <= BVAL)
assert EQ2 == (AVAL >= BVAL)
assert EQ3 == (AVAL > BVAL)
assert EQ4 == (AVAL < BVAL)
assert EQ5 == (AVAL != BVAL)
assert EQ6 == (AVAL == BVAL)
assert SIZEOF == 1
assert COERCE == 645635670332'u64
assert COERCE2 == 645635670332'i64
assert BINEXPR == 5
assert BOOL == true
assert MATHEXPR == -99
assert ANDEXPR == 96
when (NimMajor, NimMinor, NimPatch) >= (1, 0, 0):
assert CASTEXPR == 34.chr
assert TRICKYSTR == "N\x1C\nfoo\x00\'\"\c\v\a\b\e\f\t\\\\?bar"
assert NULLCHAR == '\0'
assert OCTCHAR == '\n'
assert HEXCHAR.int == 0xFE
assert SHL1 == (1.uint shl 1)
assert SHL2 == (1.uint shl 2)
assert SHL3 == (1.uint shl 3)
assert ALLSHL == (SHL1 or SHL2 or SHL3)
assert A0 is object
testFields(A0, "f1!cint")
checkPragmas(A0, pHeaderBy, istype = false)
@ -271,7 +317,7 @@ var a21p: A21p
a21p = addr a20
assert A22 is object
testFields(A22, "f1|f2!ptr ptr cint|array[123 + 132, ptr cint]")
testFields(A22, "f1|f2!ptr ptr cint|array[123 + type(123)(132), ptr cint]")
checkPragmas(A22, pHeaderBy, istype = false)
var a22: A22
a22.f1 = addr a15.a2[0]
@ -427,4 +473,4 @@ checkPragmas(nested, pHeaderImpBy)
when defined(HEADER):
assert sitest1(5) == 10
assert sitest1(10) == 20
assert sitest1(10) == 20

View file

@ -13,6 +13,12 @@ when defined(windows):
complex = object
static:
when (NimMajor, NimMinor, NimPatch) < (1, 0, 0):
# FP_ILOGB0 and FP_ILOGBNAN are casts that are unsupported
# on lower Nim VMs
cSkipSymbol @["math_errhandling", "FP_ILOGB0", "FP_ILOGBNAN"]
else:
cSkipSymbol @["math_errhandling"]
cDebug()
cDisableCaching()
cAddStdDir()