diff --git a/llpython/byte_control.py b/llpython/byte_control.py index 34ebae4..9df6d5c 100644 --- a/llpython/byte_control.py +++ b/llpython/byte_control.py @@ -5,13 +5,13 @@ import opcode import opcode_util import pprint -from bytecode_visitor import BytecodeFlowVisitor, BenignBytecodeVisitorMixin +from bytecode_visitor import BasicBlockVisitor, BenignBytecodeVisitorMixin from control_flow import ControlFlowGraph # ______________________________________________________________________ -class ControlFlowBuilder (BenignBytecodeVisitorMixin, BytecodeFlowVisitor): - '''Visitor responsible for traversing a bytecode flow object and +class ControlFlowBuilder (BenignBytecodeVisitorMixin, BasicBlockVisitor): + '''Visitor responsible for traversing a bytecode basic block map and building a control flow graph (CFG). The primary purpose of this transformation is to create a CFG, @@ -27,21 +27,26 @@ class ControlFlowBuilder (BenignBytecodeVisitorMixin, BytecodeFlowVisitor): del self.nargs return ret_val - def enter_flow_object (self, flow): - super(ControlFlowBuilder, self).enter_flow_object(flow) - self.flow = flow + def enter_blocks (self, blocks): + super(ControlFlowBuilder, self).enter_blocks(blocks) + self.blocks = blocks + self.block_list = list(blocks.keys()) + self.block_list.sort() self.cfg = ControlFlowGraph() - for block in flow.keys(): - self.cfg.add_block(block, flow[block]) + self.loop_stack = [] + for block in self.block_list: + self.cfg.add_block(block, blocks[block]) - def exit_flow_object (self, flow): - super(ControlFlowBuilder, self).exit_flow_object(flow) - assert self.flow == flow + def exit_blocks (self, blocks): + super(ControlFlowBuilder, self).exit_blocks(blocks) + assert self.blocks == blocks self.cfg.compute_dataflow() self.cfg.update_for_ssa() ret_val = self.cfg + del self.loop_stack del self.cfg - del self.flow + del self.block_list + del self.blocks return ret_val def enter_block (self, block): @@ -58,13 +63,15 @@ class ControlFlowBuilder (BenignBytecodeVisitorMixin, BytecodeFlowVisitor): def exit_block (self, block): assert block == self.block del self.block - i, op, opname, arg, args = self.flow[block][-1] + i, op, arg = self.blocks[block][-1] + opname = opcode.opname[op] if op in opcode.hasjabs: self.cfg.add_edge(block, arg) elif op in opcode.hasjrel: self.cfg.add_edge(block, i + arg + 3) elif opname == 'BREAK_LOOP': - self.cfg.add_edge(block, arg) + loop_i, _, loop_arg = self.loop_stack[-1] + self.cfg.add_edge(block, loop_i + loop_arg + 3) elif opname != 'RETURN_VALUE': self.cfg.add_edge(block, self._get_next_block(block)) if op in opcode_util.hascbranch: @@ -80,15 +87,24 @@ class ControlFlowBuilder (BenignBytecodeVisitorMixin, BytecodeFlowVisitor): return super(ControlFlowBuilder, self).op_STORE_FAST(i, op, arg, *args, **kws) + def op_SETUP_LOOP (self, i, op, arg, *args, **kws): + self.loop_stack.append((i, op, arg)) + return super(ControlFlowBuilder, self).op_SETUP_LOOP(i, op, arg, *args, + **kws) + + def op_POP_BLOCK (self, i, op, arg, *args, **kws): + self.loop_stack.pop() + return super(ControlFlowBuilder, self).op_POP_BLOCK(i, op, arg, *args, + **kws) + # ______________________________________________________________________ def build_cfg (func): '''Given a Python function, create a bytecode flow, visit the flow object, and return a control flow graph.''' - import byte_flow - return ControlFlowBuilder().visit( - byte_flow.build_flow(func), - opcode_util.get_code_object(func).co_argcount) + co_obj = opcode_util.get_code_object(func) + return ControlFlowBuilder().visit(opcode_util.build_basic_blocks(co_obj), + co_obj.co_argcount) # ______________________________________________________________________ # Main (self-test) routine diff --git a/llpython/byte_flow.py b/llpython/byte_flow.py index 822e564..4a878b2 100644 --- a/llpython/byte_flow.py +++ b/llpython/byte_flow.py @@ -4,13 +4,13 @@ import dis import opcode -from bytecode_visitor import BytecodeIterVisitor +from bytecode_visitor import BasicBlockVisitor import opcode_util # ______________________________________________________________________ -class BytecodeFlowBuilder (BytecodeIterVisitor): - '''Transforms a bytecode vector into a bytecode "flow tree". +class BytecodeFlowBuilder (BasicBlockVisitor): + '''Transforms a CFG into a bytecode "flow tree". The flow tree is a Python dictionary, described loosely by the following set of productions: @@ -36,6 +36,8 @@ class BytecodeFlowBuilder (BytecodeIterVisitor): if pops: if pops < 0: pops = arg - pops - 1 + assert pops <= len(self.stack), ("Stack underflow at instruction " + "%d (%s)!" % (i, opname)) stk_args = self.stack[-pops:] del self.stack[-pops:] else: @@ -51,27 +53,50 @@ class BytecodeFlowBuilder (BytecodeIterVisitor): opname, pops, pushes, appends = self.opmap[op] return self._visit_op(i, op, arg, opname, pops, pushes, appends) - def enter_code_object (self, co_obj): - labels = dis.findlabels(co_obj.co_code) - labels = opcode_util.extendlabels(co_obj.co_code, labels) + def visit_cfg (self, cfg): + self.cfg = cfg + ret_val = self.visit(cfg.blocks) + del self.cfg + return ret_val + + def enter_blocks (self, blocks): + labels = list(blocks.keys()) + labels.sort() self.blocks = dict((index, []) for index in labels) - self.stack = [] self.loop_stack = [] - self.blocks[0] = self.block = [] + self.stacks = {} - def exit_code_object (self, co_obj): + def exit_blocks (self, blocks): ret_val = self.blocks - del self.stack + del self.stacks del self.loop_stack - del self.block del self.blocks return ret_val - def visit_op (self, i, op, arg): - if i in self.blocks: - self.block = self.blocks[i] - return super(BytecodeFlowBuilder, self).visit_op(i, op, arg) + def enter_block (self, block): + self.block_no = block + self.block = self.blocks[block] + in_blocks = self.cfg.blocks_in[block] + if len(in_blocks) == 0: + self.stack = [] + else: + pred_stack = None + for pred in in_blocks: + if pred in self.stacks: + pred_stack = self.stacks[pred] + break + if pred_stack is not None: + self.stack = pred_stack[:] + else: + raise NotImplementedError() + + def exit_block (self, block): + assert self.block_no == block + self.stacks[block] = self.stack + del self.stack + del self.block + del self.block_no op_BINARY_ADD = _op op_BINARY_AND = _op @@ -140,8 +165,14 @@ class BytecodeFlowBuilder (BytecodeIterVisitor): op_INPLACE_XOR = _op op_JUMP_ABSOLUTE = _op op_JUMP_FORWARD = _op - op_JUMP_IF_FALSE = _op - op_JUMP_IF_TRUE = _op + + def op_JUMP_IF_FALSE (self, i, op, arg): + opname, _, _, _ = self.opmap[op] + ret_val = (i, op, opname, arg, [self.stack[-1]]) + self.block.append(ret_val) + return ret_val + + op_JUMP_IF_TRUE = op_JUMP_IF_FALSE op_LIST_APPEND = _op op_LOAD_ATTR = _op op_LOAD_CLOSURE = _op @@ -211,7 +242,9 @@ class BytecodeFlowBuilder (BytecodeIterVisitor): def build_flow (func): '''Given a Python function, return a bytecode flow tree for that function.''' - return BytecodeFlowBuilder().visit(opcode_util.get_code_object(func)) + import byte_control + cfg = byte_control.build_cfg(func) + return BytecodeFlowBuilder().visit_cfg(cfg) # ______________________________________________________________________ # Main (self-test) routine diff --git a/llpython/byte_translator.py b/llpython/byte_translator.py index 93ccb6f..de36640 100644 --- a/llpython/byte_translator.py +++ b/llpython/byte_translator.py @@ -153,7 +153,7 @@ class LLVMTranslator (BytecodeFlowVisitor): environment.''' if llvm_type is None: if llvm_function is None: - llvm_type = lc.Type.function(lvoid, ()) + llvm_type = lc.Type.function(bytetype.lvoid, ()) else: llvm_type = llvm_function.type.pointee if env is None: @@ -178,7 +178,8 @@ class LLVMTranslator (BytecodeFlowVisitor): self.globals = func_globals nargs = self.code_obj.co_argcount self.cfg = self.control_flow_builder.visit( - self.bytecode_flow_builder.visit(self.code_obj), nargs) + opcode_util.build_basic_blocks(self.code_obj), nargs) + self.cfg.blocks = self.bytecode_flow_builder.visit_cfg(self.cfg) self.llvm_function = llvm_function flow = self.phi_injector.visit_cfg(self.cfg, nargs) ret_val = self.visit(flow) @@ -450,7 +451,7 @@ class LLVMTranslator (BytecodeFlowVisitor): def op_JUMP_IF_FALSE (self, i, op, arg, *args, **kws): cond = args[0] - block_false = self.llvm_blocks[op] + block_false = self.llvm_blocks[i + 3 + arg] block_true = self.llvm_blocks[i + 3] return [self.builder.cbranch(cond, block_true, block_false)] # raise NotImplementedError("LLVMTranslator.op_JUMP_IF_FALSE") diff --git a/llpython/bytecode_visitor.py b/llpython/bytecode_visitor.py index 091e138..dcc3a0c 100644 --- a/llpython/bytecode_visitor.py +++ b/llpython/bytecode_visitor.py @@ -164,6 +164,32 @@ class BytecodeIterVisitor (BytecodeVisitor): # ______________________________________________________________________ +class BasicBlockVisitor (BytecodeVisitor): + def visit (self, blocks): + self.enter_blocks(blocks) + block_indices = list(blocks.keys()) + block_indices.sort() + for block_index in block_indices: + self.enter_block(block_index) + for i, op, arg in blocks[block_index]: + self.visit_op(i, op, arg) + self.exit_block(block_index) + return self.exit_blocks(blocks) + + def enter_blocks (self, blocks): + pass + + def exit_blocks (self, blocks): + pass + + def enter_block (self, block_index): + pass + + def exit_block (self, block_index): + pass + +# ______________________________________________________________________ + class BytecodeFlowVisitor (BytecodeVisitor): def visit (self, flow): self.block_list = list(flow.keys()) diff --git a/llpython/opcode_util.py b/llpython/opcode_util.py index 5c0cff9..efd0c5c 100644 --- a/llpython/opcode_util.py +++ b/llpython/opcode_util.py @@ -79,9 +79,9 @@ OPCODE_MAP = { 'INPLACE_XOR': (2, 1, None), 'JUMP_ABSOLUTE': (0, None, 1), 'JUMP_FORWARD': (0, None, 1), - 'JUMP_IF_FALSE': (1, None, 1), + 'JUMP_IF_FALSE': (1, 1, 1), 'JUMP_IF_FALSE_OR_POP': (None, None, None), - 'JUMP_IF_TRUE': (1, None, 1), + 'JUMP_IF_TRUE': (1, 1, 1), 'JUMP_IF_TRUE_OR_POP': (None, None, None), 'LIST_APPEND': (2, 0, 1), 'LOAD_ATTR': (1, 1, None), @@ -148,7 +148,7 @@ OPCODE_MAP = { # ______________________________________________________________________ # Module functions -def itercode(code): +def itercode(code, start = 0): """Return a generator of byte-offset, opcode, and argument from a byte-code-string """ @@ -159,7 +159,7 @@ def itercode(code): n = len(code) while i < n: op = code[i] - num = i + num = i + start i = i + 1 oparg = None if op >= opcode.HAVE_ARGUMENT: @@ -211,5 +211,16 @@ def extendlabels(code, labels = None): def get_code_object (func): return getattr(func, '__code__', getattr(func, 'func_code', None)) +# ______________________________________________________________________ + +def build_basic_blocks (co_obj): + co_code = co_obj.co_code + labels = extendlabels(co_code, dis.findlabels(co_code)) + labels.sort() + blocks = dict((index, list(itercode(co_code[index:next_index], index))) + for index, next_index in zip([0] + labels, + labels + [len(co_code)])) + return blocks + # ______________________________________________________________________ # End of opcode_util.py diff --git a/llpython/phi_injector.py b/llpython/phi_injector.py index 2618e97..6cccb66 100644 --- a/llpython/phi_injector.py +++ b/llpython/phi_injector.py @@ -127,9 +127,10 @@ class PhiInjector (BenignBytecodeVisitorMixin, BytecodeFlowVisitor): def inject_phis (func): '''Given a Python function, return a bytecode flow object that has been transformed by a fresh PhiInjector instance.''' - import byte_control + import byte_control, byte_flow argcount = byte_control.opcode_util.get_code_object(func).co_argcount cfg = byte_control.build_cfg(func) + cfg.blocks = byte_flow.BytecodeFlowBuilder().visit_cfg(cfg) return PhiInjector().visit_cfg(cfg, argcount) # ______________________________________________________________________