Attempt at a fix for issue #48.

This commit is contained in:
Jon Riehl 2013-01-28 20:08:09 -06:00
commit ffc067f852
6 changed files with 132 additions and 44 deletions

View file

@ -5,13 +5,13 @@ import opcode
import opcode_util
import pprint
from bytecode_visitor import BytecodeFlowVisitor, BenignBytecodeVisitorMixin
from bytecode_visitor import BasicBlockVisitor, BenignBytecodeVisitorMixin
from control_flow import ControlFlowGraph
# ______________________________________________________________________
class ControlFlowBuilder (BenignBytecodeVisitorMixin, BytecodeFlowVisitor):
'''Visitor responsible for traversing a bytecode flow object and
class ControlFlowBuilder (BenignBytecodeVisitorMixin, BasicBlockVisitor):
'''Visitor responsible for traversing a bytecode basic block map and
building a control flow graph (CFG).
The primary purpose of this transformation is to create a CFG,
@ -27,21 +27,26 @@ class ControlFlowBuilder (BenignBytecodeVisitorMixin, BytecodeFlowVisitor):
del self.nargs
return ret_val
def enter_flow_object (self, flow):
super(ControlFlowBuilder, self).enter_flow_object(flow)
self.flow = flow
def enter_blocks (self, blocks):
super(ControlFlowBuilder, self).enter_blocks(blocks)
self.blocks = blocks
self.block_list = list(blocks.keys())
self.block_list.sort()
self.cfg = ControlFlowGraph()
for block in flow.keys():
self.cfg.add_block(block, flow[block])
self.loop_stack = []
for block in self.block_list:
self.cfg.add_block(block, blocks[block])
def exit_flow_object (self, flow):
super(ControlFlowBuilder, self).exit_flow_object(flow)
assert self.flow == flow
def exit_blocks (self, blocks):
super(ControlFlowBuilder, self).exit_blocks(blocks)
assert self.blocks == blocks
self.cfg.compute_dataflow()
self.cfg.update_for_ssa()
ret_val = self.cfg
del self.loop_stack
del self.cfg
del self.flow
del self.block_list
del self.blocks
return ret_val
def enter_block (self, block):
@ -58,13 +63,15 @@ class ControlFlowBuilder (BenignBytecodeVisitorMixin, BytecodeFlowVisitor):
def exit_block (self, block):
assert block == self.block
del self.block
i, op, opname, arg, args = self.flow[block][-1]
i, op, arg = self.blocks[block][-1]
opname = opcode.opname[op]
if op in opcode.hasjabs:
self.cfg.add_edge(block, arg)
elif op in opcode.hasjrel:
self.cfg.add_edge(block, i + arg + 3)
elif opname == 'BREAK_LOOP':
self.cfg.add_edge(block, arg)
loop_i, _, loop_arg = self.loop_stack[-1]
self.cfg.add_edge(block, loop_i + loop_arg + 3)
elif opname != 'RETURN_VALUE':
self.cfg.add_edge(block, self._get_next_block(block))
if op in opcode_util.hascbranch:
@ -80,15 +87,24 @@ class ControlFlowBuilder (BenignBytecodeVisitorMixin, BytecodeFlowVisitor):
return super(ControlFlowBuilder, self).op_STORE_FAST(i, op, arg, *args,
**kws)
def op_SETUP_LOOP (self, i, op, arg, *args, **kws):
self.loop_stack.append((i, op, arg))
return super(ControlFlowBuilder, self).op_SETUP_LOOP(i, op, arg, *args,
**kws)
def op_POP_BLOCK (self, i, op, arg, *args, **kws):
self.loop_stack.pop()
return super(ControlFlowBuilder, self).op_POP_BLOCK(i, op, arg, *args,
**kws)
# ______________________________________________________________________
def build_cfg (func):
'''Given a Python function, create a bytecode flow, visit the flow
object, and return a control flow graph.'''
import byte_flow
return ControlFlowBuilder().visit(
byte_flow.build_flow(func),
opcode_util.get_code_object(func).co_argcount)
co_obj = opcode_util.get_code_object(func)
return ControlFlowBuilder().visit(opcode_util.build_basic_blocks(co_obj),
co_obj.co_argcount)
# ______________________________________________________________________
# Main (self-test) routine

View file

@ -4,13 +4,13 @@
import dis
import opcode
from bytecode_visitor import BytecodeIterVisitor
from bytecode_visitor import BasicBlockVisitor
import opcode_util
# ______________________________________________________________________
class BytecodeFlowBuilder (BytecodeIterVisitor):
'''Transforms a bytecode vector into a bytecode "flow tree".
class BytecodeFlowBuilder (BasicBlockVisitor):
'''Transforms a CFG into a bytecode "flow tree".
The flow tree is a Python dictionary, described loosely by the
following set of productions:
@ -36,6 +36,8 @@ class BytecodeFlowBuilder (BytecodeIterVisitor):
if pops:
if pops < 0:
pops = arg - pops - 1
assert pops <= len(self.stack), ("Stack underflow at instruction "
"%d (%s)!" % (i, opname))
stk_args = self.stack[-pops:]
del self.stack[-pops:]
else:
@ -51,27 +53,50 @@ class BytecodeFlowBuilder (BytecodeIterVisitor):
opname, pops, pushes, appends = self.opmap[op]
return self._visit_op(i, op, arg, opname, pops, pushes, appends)
def enter_code_object (self, co_obj):
labels = dis.findlabels(co_obj.co_code)
labels = opcode_util.extendlabels(co_obj.co_code, labels)
def visit_cfg (self, cfg):
self.cfg = cfg
ret_val = self.visit(cfg.blocks)
del self.cfg
return ret_val
def enter_blocks (self, blocks):
labels = list(blocks.keys())
labels.sort()
self.blocks = dict((index, [])
for index in labels)
self.stack = []
self.loop_stack = []
self.blocks[0] = self.block = []
self.stacks = {}
def exit_code_object (self, co_obj):
def exit_blocks (self, blocks):
ret_val = self.blocks
del self.stack
del self.stacks
del self.loop_stack
del self.block
del self.blocks
return ret_val
def visit_op (self, i, op, arg):
if i in self.blocks:
self.block = self.blocks[i]
return super(BytecodeFlowBuilder, self).visit_op(i, op, arg)
def enter_block (self, block):
self.block_no = block
self.block = self.blocks[block]
in_blocks = self.cfg.blocks_in[block]
if len(in_blocks) == 0:
self.stack = []
else:
pred_stack = None
for pred in in_blocks:
if pred in self.stacks:
pred_stack = self.stacks[pred]
break
if pred_stack is not None:
self.stack = pred_stack[:]
else:
raise NotImplementedError()
def exit_block (self, block):
assert self.block_no == block
self.stacks[block] = self.stack
del self.stack
del self.block
del self.block_no
op_BINARY_ADD = _op
op_BINARY_AND = _op
@ -140,8 +165,14 @@ class BytecodeFlowBuilder (BytecodeIterVisitor):
op_INPLACE_XOR = _op
op_JUMP_ABSOLUTE = _op
op_JUMP_FORWARD = _op
op_JUMP_IF_FALSE = _op
op_JUMP_IF_TRUE = _op
def op_JUMP_IF_FALSE (self, i, op, arg):
opname, _, _, _ = self.opmap[op]
ret_val = (i, op, opname, arg, [self.stack[-1]])
self.block.append(ret_val)
return ret_val
op_JUMP_IF_TRUE = op_JUMP_IF_FALSE
op_LIST_APPEND = _op
op_LOAD_ATTR = _op
op_LOAD_CLOSURE = _op
@ -211,7 +242,9 @@ class BytecodeFlowBuilder (BytecodeIterVisitor):
def build_flow (func):
'''Given a Python function, return a bytecode flow tree for that
function.'''
return BytecodeFlowBuilder().visit(opcode_util.get_code_object(func))
import byte_control
cfg = byte_control.build_cfg(func)
return BytecodeFlowBuilder().visit_cfg(cfg)
# ______________________________________________________________________
# Main (self-test) routine

View file

@ -153,7 +153,7 @@ class LLVMTranslator (BytecodeFlowVisitor):
environment.'''
if llvm_type is None:
if llvm_function is None:
llvm_type = lc.Type.function(lvoid, ())
llvm_type = lc.Type.function(bytetype.lvoid, ())
else:
llvm_type = llvm_function.type.pointee
if env is None:
@ -178,7 +178,8 @@ class LLVMTranslator (BytecodeFlowVisitor):
self.globals = func_globals
nargs = self.code_obj.co_argcount
self.cfg = self.control_flow_builder.visit(
self.bytecode_flow_builder.visit(self.code_obj), nargs)
opcode_util.build_basic_blocks(self.code_obj), nargs)
self.cfg.blocks = self.bytecode_flow_builder.visit_cfg(self.cfg)
self.llvm_function = llvm_function
flow = self.phi_injector.visit_cfg(self.cfg, nargs)
ret_val = self.visit(flow)
@ -450,7 +451,7 @@ class LLVMTranslator (BytecodeFlowVisitor):
def op_JUMP_IF_FALSE (self, i, op, arg, *args, **kws):
cond = args[0]
block_false = self.llvm_blocks[op]
block_false = self.llvm_blocks[i + 3 + arg]
block_true = self.llvm_blocks[i + 3]
return [self.builder.cbranch(cond, block_true, block_false)]
# raise NotImplementedError("LLVMTranslator.op_JUMP_IF_FALSE")

View file

@ -164,6 +164,32 @@ class BytecodeIterVisitor (BytecodeVisitor):
# ______________________________________________________________________
class BasicBlockVisitor (BytecodeVisitor):
def visit (self, blocks):
self.enter_blocks(blocks)
block_indices = list(blocks.keys())
block_indices.sort()
for block_index in block_indices:
self.enter_block(block_index)
for i, op, arg in blocks[block_index]:
self.visit_op(i, op, arg)
self.exit_block(block_index)
return self.exit_blocks(blocks)
def enter_blocks (self, blocks):
pass
def exit_blocks (self, blocks):
pass
def enter_block (self, block_index):
pass
def exit_block (self, block_index):
pass
# ______________________________________________________________________
class BytecodeFlowVisitor (BytecodeVisitor):
def visit (self, flow):
self.block_list = list(flow.keys())

View file

@ -79,9 +79,9 @@ OPCODE_MAP = {
'INPLACE_XOR': (2, 1, None),
'JUMP_ABSOLUTE': (0, None, 1),
'JUMP_FORWARD': (0, None, 1),
'JUMP_IF_FALSE': (1, None, 1),
'JUMP_IF_FALSE': (1, 1, 1),
'JUMP_IF_FALSE_OR_POP': (None, None, None),
'JUMP_IF_TRUE': (1, None, 1),
'JUMP_IF_TRUE': (1, 1, 1),
'JUMP_IF_TRUE_OR_POP': (None, None, None),
'LIST_APPEND': (2, 0, 1),
'LOAD_ATTR': (1, 1, None),
@ -148,7 +148,7 @@ OPCODE_MAP = {
# ______________________________________________________________________
# Module functions
def itercode(code):
def itercode(code, start = 0):
"""Return a generator of byte-offset, opcode, and argument
from a byte-code-string
"""
@ -159,7 +159,7 @@ def itercode(code):
n = len(code)
while i < n:
op = code[i]
num = i
num = i + start
i = i + 1
oparg = None
if op >= opcode.HAVE_ARGUMENT:
@ -211,5 +211,16 @@ def extendlabels(code, labels = None):
def get_code_object (func):
return getattr(func, '__code__', getattr(func, 'func_code', None))
# ______________________________________________________________________
def build_basic_blocks (co_obj):
co_code = co_obj.co_code
labels = extendlabels(co_code, dis.findlabels(co_code))
labels.sort()
blocks = dict((index, list(itercode(co_code[index:next_index], index)))
for index, next_index in zip([0] + labels,
labels + [len(co_code)]))
return blocks
# ______________________________________________________________________
# End of opcode_util.py

View file

@ -127,9 +127,10 @@ class PhiInjector (BenignBytecodeVisitorMixin, BytecodeFlowVisitor):
def inject_phis (func):
'''Given a Python function, return a bytecode flow object that has
been transformed by a fresh PhiInjector instance.'''
import byte_control
import byte_control, byte_flow
argcount = byte_control.opcode_util.get_code_object(func).co_argcount
cfg = byte_control.build_cfg(func)
cfg.blocks = byte_flow.BytecodeFlowBuilder().visit_cfg(cfg)
return PhiInjector().visit_cfg(cfg, argcount)
# ______________________________________________________________________