From 8787af154e3c5eccc11e78c2ac1b8b39e543e1cf Mon Sep 17 00:00:00 2001 From: Jon Riehl Date: Wed, 8 May 2013 18:23:55 -0500 Subject: [PATCH 01/32] Change idom() method to get_a_dom(), update uses, and documentation. --- llpython/control_flow.py | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/llpython/control_flow.py b/llpython/control_flow.py index 5ea030a..78c8b88 100644 --- a/llpython/control_flow.py +++ b/llpython/control_flow.py @@ -101,15 +101,25 @@ class ControlFlowGraph (object): if hasattr(self, 'reaching_definitions'): del self.reaching_definitions - def idom (self, block): - '''Compute the immediate dominator (idom) of the given block - key. Returns None if the block has no in edges. + def get_a_dom (self, block): + '''Find an immediate predecessor of the given block such that + the predecessor is either the only entry point, or the + precessor is not in its own dominance set (a non-loop + predecessor). Returns None if the given block has no + predecessor. - Note that in the case where there are multiple immediate - dominators (a join after a non-loop branch), this returns one - of the predecessors, but is not guaranteed to reliably select - one over the others (depends on the ordering of the set type - iterator).''' + Note that in the case where there are multiple dominators (a + join after a non-loop branch), this returns one of the + predecessors, but is not guaranteed to reliably select one + over the others (depends on the ordering of the set type + iterator). + + Note: Previously, this method's documentation erroneously + identified the return value as being the immediate dominator + of the input block. Instead, it attempts to find a "nearby" + dominator. Normally, the immediate dominator of a join is the + least upperbound of the closed immediate dominance + relationship over its two entrants.''' preds = self.blocks_in[block] npreds = len(preds) if npreds == 0: @@ -143,7 +153,7 @@ class ControlFlowGraph (object): ret_val = {} for pred in preds: ret_val[pred] = self.block_writes_to_writer_map(pred) - crnt = self.idom(pred) + crnt = self.get_a_dom(pred) while crnt != None: crnt_writer_map = self.block_writes_to_writer_map(crnt) # This order of update favors the first definitions @@ -151,7 +161,7 @@ class ControlFlowGraph (object): # visits blocks in reverse execution order. crnt_writer_map.update(ret_val[pred]) ret_val[pred] = crnt_writer_map - crnt = self.idom(crnt) + crnt = self.get_a_dom(crnt) if not has_memoized: self.reaching_definitions = {} self.reaching_definitions[block] = ret_val From 5ef00671e71f4395141c68b0101429e0a9a11bbe Mon Sep 17 00:00:00 2001 From: Jon Riehl Date: Wed, 8 May 2013 18:24:38 -0500 Subject: [PATCH 02/32] Add support for more bytecodes. --- llpython/byte_flow.py | 12 ++++++++++-- llpython/opcode_util.py | 6 +++--- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/llpython/byte_flow.py b/llpython/byte_flow.py index 2201791..c86c6d1 100644 --- a/llpython/byte_flow.py +++ b/llpython/byte_flow.py @@ -118,7 +118,7 @@ class BytecodeFlowBuilder (BasicBlockVisitor): assert arg is None return self._op(i, op, loop_i + loop_arg + 3) - #op_BUILD_CLASS = _op + op_BUILD_CLASS = _op op_BUILD_LIST = _op op_BUILD_MAP = _op op_BUILD_SLICE = _op @@ -147,7 +147,15 @@ class BytecodeFlowBuilder (BasicBlockVisitor): #op_EXTENDED_ARG = _op op_FOR_ITER = _op op_GET_ITER = _op - op_IMPORT_FROM = _op + + def op_IMPORT_FROM (self, i, op, arg): + # References top of stack without popping, so we can't use the + # generic machinery. + opname = self.opmap[op][0] + ret_val = i, op, opname, arg, [self.stack[-1]] + self.stack.append(ret_val) + return ret_val + op_IMPORT_NAME = _op op_IMPORT_STAR = _op op_INPLACE_ADD = _op diff --git a/llpython/opcode_util.py b/llpython/opcode_util.py index efd0c5c..1de6507 100644 --- a/llpython/opcode_util.py +++ b/llpython/opcode_util.py @@ -31,9 +31,9 @@ OPCODE_MAP = { 'BINARY_TRUE_DIVIDE': (2, 1, None), 'BINARY_XOR': (2, 1, None), 'BREAK_LOOP': (0, None, 1), - 'BUILD_CLASS': (None, None, None), + 'BUILD_CLASS': (3, 1, None), 'BUILD_LIST': (-1, 1, None), - 'BUILD_MAP': (None, None, None), + 'BUILD_MAP': (-1, 1, None), 'BUILD_SET': (None, None, None), 'BUILD_SLICE': (None, None, None), 'BUILD_TUPLE': (-1, 1, None), @@ -62,7 +62,7 @@ OPCODE_MAP = { 'FOR_ITER': (1, 1, 1), 'GET_ITER': (1, 1, None), 'IMPORT_FROM': (None, None, None), - 'IMPORT_NAME': (None, None, None), + 'IMPORT_NAME': (2, 1, None), 'IMPORT_STAR': (1, None, 1), 'INPLACE_ADD': (2, 1, None), 'INPLACE_AND': (2, 1, None), From 6f532f164a1fab9f7a49cc38b60e89ea40315ca7 Mon Sep 17 00:00:00 2001 From: Jon Riehl Date: Thu, 9 May 2013 16:03:31 -0500 Subject: [PATCH 03/32] Working on full opcode coverage and support for exception control flow. --- llpython/byte_control.py | 25 +++++++++++++++---- llpython/byte_flow.py | 49 +++++++++++++++++++++++++++---------- llpython/byte_translator.py | 2 +- llpython/opcode_util.py | 8 +++--- 4 files changed, 61 insertions(+), 23 deletions(-) diff --git a/llpython/byte_control.py b/llpython/byte_control.py index acd5a41..5aec6be 100644 --- a/llpython/byte_control.py +++ b/llpython/byte_control.py @@ -33,7 +33,7 @@ class ControlFlowBuilder (BenignBytecodeVisitorMixin, BasicBlockVisitor): self.block_list = list(blocks.keys()) self.block_list.sort() self.cfg = ControlFlowGraph() - self.loop_stack = [] + self.control_stack = [] for block in self.block_list: self.cfg.add_block(block, blocks[block]) @@ -43,7 +43,7 @@ class ControlFlowBuilder (BenignBytecodeVisitorMixin, BasicBlockVisitor): self.cfg.compute_dataflow() self.cfg.update_for_ssa() ret_val = self.cfg - del self.loop_stack + del self.control_stack del self.cfg del self.block_list del self.blocks @@ -70,7 +70,7 @@ class ControlFlowBuilder (BenignBytecodeVisitorMixin, BasicBlockVisitor): elif op in opcode.hasjrel: self.cfg.add_edge(block, i + arg + 3) elif opname == 'BREAK_LOOP': - loop_i, _, loop_arg = self.loop_stack[-1] + loop_i, _, loop_arg = self.control_stack[-1] self.cfg.add_edge(block, loop_i + loop_arg + 3) elif opname != 'RETURN_VALUE': self.cfg.add_edge(block, self._get_next_block(block)) @@ -87,13 +87,28 @@ class ControlFlowBuilder (BenignBytecodeVisitorMixin, BasicBlockVisitor): return super(ControlFlowBuilder, self).op_STORE_FAST(i, op, arg, *args, **kws) + def op_SETUP_EXCEPT (self, i, op, arg, *args, **kws): + self.control_stack.append((i, op, arg)) + return super(ControlFlowBuilder, self).op_SETUP_EXCEPT(i, op, arg, + *args, **kws) + + def op_SETUP_FINALLY (self, i, op, arg, *args, **kws): + self.control_stack.append((i, op, arg)) + return super(ControlFlowBuilder, self).op_SETUP_FINALLY(i, op, arg, + *args, **kws) + def op_SETUP_LOOP (self, i, op, arg, *args, **kws): - self.loop_stack.append((i, op, arg)) + self.control_stack.append((i, op, arg)) return super(ControlFlowBuilder, self).op_SETUP_LOOP(i, op, arg, *args, **kws) + def op_SETUP_WITH (self, i, op, arg, *args, **kws): + self.control_stack.append((i, op, arg)) + return super(ControlFlowBuilder, self).op_SETUP_WITH(i, op, arg, *args, + **kws) + def op_POP_BLOCK (self, i, op, arg, *args, **kws): - self.loop_stack.pop() + self.control_stack.pop() return super(ControlFlowBuilder, self).op_POP_BLOCK(i, op, arg, *args, **kws) diff --git a/llpython/byte_flow.py b/llpython/byte_flow.py index c86c6d1..a5abd9f 100644 --- a/llpython/byte_flow.py +++ b/llpython/byte_flow.py @@ -128,7 +128,7 @@ class BytecodeFlowBuilder (BasicBlockVisitor): op_CALL_FUNCTION_VAR = _op op_CALL_FUNCTION_VAR_KW = _op op_COMPARE_OP = _op - #op_CONTINUE_LOOP = _op + #op_CONTINUE_LOOP = _not_implemented op_DELETE_ATTR = _op op_DELETE_FAST = _op op_DELETE_GLOBAL = _op @@ -142,9 +142,14 @@ class BytecodeFlowBuilder (BasicBlockVisitor): def op_DUP_TOPX (self, i, op, arg): self.stack += self.stack[-arg:] - #op_END_FINALLY = _op + #op_DUP_TOP_TWO = _not_implemented + #op_END_FINALLY = _not_implemented op_EXEC_STMT = _op - #op_EXTENDED_ARG = _op + + def op_EXTENDED_ARG (self, i, op, arg): + raise ValueError("Unexpected EXTENDED_ARG opcode at index %d (should " + "be removed by itercode)." % i) + op_FOR_ITER = _op op_GET_ITER = _op @@ -175,15 +180,26 @@ class BytecodeFlowBuilder (BasicBlockVisitor): op_JUMP_FORWARD = _op def op_JUMP_IF_FALSE (self, i, op, arg): - opname, _, _, _ = self.opmap[op] - ret_val = (i, op, opname, arg, [self.stack[-1]]) + ret_val = i, op, self.opnames[op], arg, [self.stack[-1]] self.block.append(ret_val) return ret_val + #op_JUMP_IF_FALSE_OR_POP = _not_implemented op_JUMP_IF_TRUE = op_JUMP_IF_FALSE - op_LIST_APPEND = _op + #op_JUMP_IF_TRUE_OR_POP = op_JUMP_IF_FALSE_OR_POP + + def op_LIST_APPEND (self, i, op, arg): + '''This method is used for both LIST_APPEND, and SET_ADD + opcodes.''' + elem = self.stack.pop() + container = self.stack[-arg] + ret_val = i, op, self.opnames[op], arg, [container, elem] + self.block.append(ret_val) + return ret_val + op_LOAD_ATTR = _op - op_LOAD_CLOSURE = _op + #op_LOAD_BUILD_CLASS = _not_implemented + #op_LOAD_CLOSURE = _not_implemented op_LOAD_CONST = _op op_LOAD_DEREF = _op op_LOAD_FAST = _op @@ -219,19 +235,24 @@ class BytecodeFlowBuilder (BasicBlockVisitor): def op_ROT_TWO (self, i, op, arg): self.stack[-2:] = (self.stack[-1], self.stack[-2]) - #op_SETUP_EXCEPT = _op - #op_SETUP_FINALLY = _op + #op_SETUP_EXCEPT = _not_implemented + #op_SETUP_FINALLY = _not_implemented def op_SETUP_LOOP (self, i, op, arg): self.loop_stack.append((i, op, arg)) - self.block.append((i, op, self.opnames[op], arg, [])) + ret_val = i, op, self.opnames[op], arg, [] + self.block.append(ret_val) + return ret_val + #op_SETUP_WITH = _not_implemented + op_SET_ADD = op_LIST_APPEND op_SLICE = _op - #op_STOP_CODE = _op + #op_STOP_CODE = _not_implemented op_STORE_ATTR = _op op_STORE_DEREF = _op op_STORE_FAST = _op op_STORE_GLOBAL = _op + #op_STORE_LOCALS = _not_implemented op_STORE_MAP = _op op_STORE_NAME = _op op_STORE_SLICE = _op @@ -241,8 +262,9 @@ class BytecodeFlowBuilder (BasicBlockVisitor): op_UNARY_NEGATIVE = _op op_UNARY_NOT = _op op_UNARY_POSITIVE = _op - op_UNPACK_SEQUENCE = _op - #op_WITH_CLEANUP = _op + #op_UNPACK_EX = _not_implemented + #op_UNPACK_SEQUENCE = _not_implemented + #op_WITH_CLEANUP = _not_implemented op_YIELD_VALUE = _op # ______________________________________________________________________ @@ -254,6 +276,7 @@ def build_flow (func): cfg = byte_control.build_cfg(func) return BytecodeFlowBuilder().visit_cfg(cfg) + # ______________________________________________________________________ # Main (self-test) routine diff --git a/llpython/byte_translator.py b/llpython/byte_translator.py index 9b6d2de..5473f20 100644 --- a/llpython/byte_translator.py +++ b/llpython/byte_translator.py @@ -591,7 +591,7 @@ def llpython_into (llvm_function, **kws): # Main (self-test) routine def main (*args): - from tests import llfuncs, llfunctys + from .tests import llfuncs, llfunctys if not args: args = ('doslice',) elif 'all' in args: diff --git a/llpython/opcode_util.py b/llpython/opcode_util.py index 1de6507..aecdff7 100644 --- a/llpython/opcode_util.py +++ b/llpython/opcode_util.py @@ -34,8 +34,8 @@ OPCODE_MAP = { 'BUILD_CLASS': (3, 1, None), 'BUILD_LIST': (-1, 1, None), 'BUILD_MAP': (-1, 1, None), - 'BUILD_SET': (None, None, None), - 'BUILD_SLICE': (None, None, None), + 'BUILD_SET': (-1, 1, None), + 'BUILD_SLICE': (-1, 1, None), # oparg should only be 2 or 3 'BUILD_TUPLE': (-1, 1, None), 'CALL_FUNCTION': (-2, 1, None), 'CALL_FUNCTION_KW': (-3, 1, None), @@ -57,7 +57,7 @@ OPCODE_MAP = { 'DUP_TOPX': (None, None, None), 'DUP_TOP_TWO': (None, None, None), 'END_FINALLY': (None, None, None), - 'EXEC_STMT': (None, None, None), + 'EXEC_STMT': (3, 0, 1), 'EXTENDED_ARG': (None, None, None), 'FOR_ITER': (1, 1, 1), 'GET_ITER': (1, 1, None), @@ -83,7 +83,7 @@ OPCODE_MAP = { 'JUMP_IF_FALSE_OR_POP': (None, None, None), 'JUMP_IF_TRUE': (1, 1, 1), 'JUMP_IF_TRUE_OR_POP': (None, None, None), - 'LIST_APPEND': (2, 0, 1), + 'LIST_APPEND': (None, None, None), 'LOAD_ATTR': (1, 1, None), 'LOAD_BUILD_CLASS': (None, None, None), 'LOAD_CLOSURE': (None, None, None), From d3b252531d0bea40b11ff0c5aea29c7224c07587 Mon Sep 17 00:00:00 2001 From: Jon Riehl Date: Fri, 10 May 2013 19:04:03 -0500 Subject: [PATCH 04/32] Updates to llpython.opcode_util to better support loop and exception control flow. * Inlcuded non-argument branching opcodes in the hasjump opcode list. * Added all the SETUP_* opcodes to the hascbranch list. * Modified itercode() to not output the EXTENDED_ARG bytecode. * Cleaned up extendlabels(), and made it use the hasjump opcode list uniformly. --- llpython/opcode_util.py | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/llpython/opcode_util.py b/llpython/opcode_util.py index aecdff7..16a45d6 100644 --- a/llpython/opcode_util.py +++ b/llpython/opcode_util.py @@ -7,10 +7,21 @@ import opcode # ______________________________________________________________________ # Module data -hasjump = opcode.hasjrel + opcode.hasjabs -hascbranch = [op for op in hasjump - if 'IF' in opcode.opname[op] - or opcode.opname[op] in ('FOR_ITER', 'SETUP_LOOP')] +# Note that opcode.hasjrel and opcode.hasjabs applies only to opcodes +# that calculate a jump point based on the argument. This ignores +# jumps that use the frame stack to calculate their targets. + +NON_ARG_JUMP_NAMES = 'BREAK_LOOP', 'RETURN_VALUE', 'END_FINALLY' +NON_ARG_JUMPS = [opcode.opmap[opname] + for opname in NON_ARG_JUMP_NAMES + if opname in opcode.opmap] +HAS_CBRANCH_NAMES = 'FOR_ITER', +hasjump = opcode.hasjrel + opcode.hasjabs + NON_ARG_JUMPS +hascbranch = [op for op, opname in ((op, opcode.opname[op]) + for op in hasjump) + if 'IF' in opname + or 'SETUP' in opname + or opname in HAS_CBRANCH_NAMES] # Since the actual opcode value may change, manage opcode abstraction # data by opcode name. @@ -168,6 +179,7 @@ def itercode(code, start = 0): i = i + 2 if op == opcode.EXTENDED_ARG: extended_arg = oparg * 65536 + continue delta = yield num, op, oparg if delta is not None: @@ -195,15 +207,8 @@ def extendlabels(code, labels = None): i += 1 if op >= dis.HAVE_ARGUMENT: i += 2 - label = -1 - if op in hasjump: - label = i - if label >= 0: - if label not in labels: - labels.append(label) - elif op == opcode.opmap['BREAK_LOOP']: - if i not in labels: - labels.append(i) + if op in hasjump and i < n and i not in labels: + labels.append(i) return labels # ______________________________________________________________________ From 8ae1b7020643afebcddb3d71f73938aadc0e42e5 Mon Sep 17 00:00:00 2001 From: Jon Riehl Date: Tue, 14 May 2013 17:34:54 -0500 Subject: [PATCH 05/32] Initial attempt to support return statements in try-finally block in llpython.byte_control. Added incomplete unit tests. --- llpython/byte_control.py | 95 ++++++++++++++++++++++++++--- llpython/tests/test_byte_control.py | 81 ++++++++++++++++++++++++ 2 files changed, 168 insertions(+), 8 deletions(-) create mode 100644 llpython/tests/test_byte_control.py diff --git a/llpython/byte_control.py b/llpython/byte_control.py index 5aec6be..56c14ec 100644 --- a/llpython/byte_control.py +++ b/llpython/byte_control.py @@ -9,6 +9,29 @@ from .bytecode_visitor import BasicBlockVisitor, BenignBytecodeVisitorMixin from .control_flow import ControlFlowGraph # ______________________________________________________________________ +# Module data + +# The following opcodes branch based on the control (a.k.a. frame) +# stack: +RETURN_VALUE, CONTINUE_LOOP, BREAK_LOOP, END_FINALLY = ( + opcode.opmap[opname] for opname in ( + 'RETURN_VALUE', 'CONTINUE_LOOP', 'BREAK_LOOP', 'END_FINALLY')) + +# The following opcodes push a new frame on the control stack: +SETUP_EXCEPT, SETUP_FINALLY, SETUP_LOOP, SETUP_WITH = ( + opcode.opmap.get(opname, None) for opname in ( + 'SETUP_EXCEPT', 'SETUP_FINALLY', 'SETUP_LOOP', 'SETUP_WITH')) + +WHY_NOT = 1 +WHY_EXCEPTION = WHY_NOT << 1 +WHY_RERAISE = WHY_EXCEPTION << 1 +WHY_RETURN = WHY_RERAISE << 1 +WHY_BREAK = WHY_RETURN << 1 +WHY_CONTINUE = WHY_BREAK << 1 +WHY_YIELD = WHY_CONTINUE << 1 + +# ______________________________________________________________________ +# Class definition(s) class ControlFlowBuilder (BenignBytecodeVisitorMixin, BasicBlockVisitor): '''Visitor responsible for traversing a bytecode basic block map and @@ -60,21 +83,77 @@ class ControlFlowBuilder (BenignBytecodeVisitorMixin, BasicBlockVisitor): def _get_next_block (self, block): return self.block_list[self.block_list.index(block) + 1] + def _generate_handler_edge (self, block, i, op, arg, why): + """Given a reason (corresponding to the why code in + Python/ceval.c), interrupt control flow, possibly using the + control flow (a.k.a. frame) stack to calculate the next + target. + + Returns True if an edge was added to the CFG, False otherwise. + Based on the opcode the return result may mean different + things (for example: if why == WHY_RETURN, then the function + returns).""" + ret_val = False + if len(self.control_stack) > 0: + handlers = set((SETUP_FINALLY, SETUP_WITH)) + if why == WHY_EXCEPTION: + handlers.add(SETUP_EXCEPT) + reverse_stack = self.control_stack[::-1] + target = None + for handler_i, handler_op, handler_arg in reverse_stack: + if handler_op in handlers: + target = handler_i + handler_arg + 3 + elif handler_op == SETUP_LOOP: + if why == WHY_CONTINUE: + if op == CONTINUE_LOOP: + target = i + arg + 3 + else: + # XXX This isn't going to be correct for + # for-loops, or really long while-loops + # (which use EXTENDED_ARG): + target = handler_i + 3 + elif why == WHY_BREAK: + target = handler_i + handler_arg + 3 + if target is not None: + self.cfg.add_edge(block, target) + ret_val = True + break + return ret_val + def exit_block (self, block): assert block == self.block del self.block i, op, arg = self.blocks[block][-1] - opname = opcode.opname[op] - if op in opcode.hasjabs: + goto_next = False + if op == RETURN_VALUE: + self._generate_handler_edge(block, i, op, arg, WHY_RETURN) + elif op == CONTINUE_LOOP: + branched = self._generate_handler_edge(block, i, op, arg, + WHY_CONTINUE) + assert branched, ("Attempted to continue outside of loop %r" % + (self.blocks[block][-1],)) + elif op == BREAK_LOOP: + branched = self._generate_handler_edge(block, i, op, arg, + WHY_BREAK) + assert branched, ("Attempted to break outside of loop %r" % + (self.blocks[block][-1],)) + elif op == END_FINALLY: + # XXX Should we detect cases where return, continue, and + # break appear inside the try-block? This would create + # more accurate control flow graphs by eliding edges we + # know won't be taken. + self._generate_handler_edge(block, i, op, arg, WHY_EXCEPTION) + self._generate_handler_edge(block, i, op, arg, WHY_RETURN) + self._generate_handler_edge(block, i, op, arg, WHY_BREAK) + self._generate_handler_edge(block, i, op, arg, WHY_CONTINUE) + goto_next = True # why == WHY_NOT + elif op in opcode.hasjabs: self.cfg.add_edge(block, arg) elif op in opcode.hasjrel: self.cfg.add_edge(block, i + arg + 3) - elif opname == 'BREAK_LOOP': - loop_i, _, loop_arg = self.control_stack[-1] - self.cfg.add_edge(block, loop_i + loop_arg + 3) - elif opname != 'RETURN_VALUE': - self.cfg.add_edge(block, self._get_next_block(block)) - if op in opcode_util.hascbranch: + else: + goto_next = True + if op in opcode_util.hascbranch or goto_next: self.cfg.add_edge(block, self._get_next_block(block)) def op_LOAD_FAST (self, i, op, arg, *args, **kws): diff --git a/llpython/tests/test_byte_control.py b/llpython/tests/test_byte_control.py new file mode 100644 index 0000000..e13d5f4 --- /dev/null +++ b/llpython/tests/test_byte_control.py @@ -0,0 +1,81 @@ +#! /usr/bin/env python +# ______________________________________________________________________ + +from __future__ import absolute_import + +import unittest + +from llpython import byte_control + +# ______________________________________________________________________ +# Global data + +got_done = 0 + +# ______________________________________________________________________ +# Utility function definitions + +def do_something(): + global got_done + got_done += 1 + print("Something good got done.") + +# ____________________________________________________________ + +def do_something_else(): + raise Exception("Something bad got done, and I don't like it") + +# ______________________________________________________________________ +# Test function definitions + +def try_finally_0(m, n): # why == WHY_RETURN + try: + return n - m + finally: + do_something() + return do_something_else() + +# ____________________________________________________________ + +def try_finally_1(m, n): # why == WHY_BREAK + i = -1 + for i in range(m, n): + try: + if i == 101: + break + finally: + do_something() + return i + +# ______________________________________________________________________ +# Class (test case) definition(s) + +class TestByteControl(unittest.TestCase): + def fail_unless_cfg_match(self, test_cfg, block_count, edges): + assert len(test_cfg.blocks) == block_count + block_keys = list(test_cfg.blocks.keys()) + block_keys.sort() + # TODO: Ensure unexpected edges cause error. + for from_block_ofs, to_block_ofs in edges: + from_block = block_keys[from_block_ofs] + to_block = block_keys[to_block_ofs] + assert from_block in test_cfg.blocks_in[to_block] + assert to_block in test_cfg.blocks_out[from_block] + + def test_try_finally_0(self): + cfg = byte_control.build_cfg(try_finally_0) + self.fail_unless_cfg_match(cfg, 5, ((0, 1), (0, 3), (1, 3), (2, 3), + (3, 4))) + + def test_try_finally_1(self): + cfg = byte_control.build_cfg(try_finally_1) + # TODO: Translate known graph to offsets... + self.fail_unless_cfg_match(cfg, 12, ()) + +# ______________________________________________________________________ + +if __name__ == "__main__": + unittest.main() + +# ______________________________________________________________________ +# End of test_byte_control.py From c5d95e01e18c88306fe0cf4421febc94b1c0f1d9 Mon Sep 17 00:00:00 2001 From: Jon Riehl Date: Wed, 15 May 2013 18:40:44 -0500 Subject: [PATCH 06/32] More work on CFA for try-finally in the presence of loops. --- llpython/byte_control.py | 113 ++++++++++++---- llpython/opcode_util.py | 8 +- llpython/tests/test_byte_control.py | 202 +++++++++++++++++++++++++++- 3 files changed, 288 insertions(+), 35 deletions(-) diff --git a/llpython/byte_control.py b/llpython/byte_control.py index 56c14ec..315f762 100644 --- a/llpython/byte_control.py +++ b/llpython/byte_control.py @@ -1,5 +1,6 @@ #! /usr/bin/env python # ______________________________________________________________________ + from __future__ import absolute_import import opcode from . import opcode_util @@ -13,9 +14,10 @@ from .control_flow import ControlFlowGraph # The following opcodes branch based on the control (a.k.a. frame) # stack: -RETURN_VALUE, CONTINUE_LOOP, BREAK_LOOP, END_FINALLY = ( +RETURN_VALUE, CONTINUE_LOOP, BREAK_LOOP, END_FINALLY, RAISE_VARARGS = ( opcode.opmap[opname] for opname in ( - 'RETURN_VALUE', 'CONTINUE_LOOP', 'BREAK_LOOP', 'END_FINALLY')) + 'RETURN_VALUE', 'CONTINUE_LOOP', 'BREAK_LOOP', 'END_FINALLY', + 'RAISE_VARARGS')) # The following opcodes push a new frame on the control stack: SETUP_EXCEPT, SETUP_FINALLY, SETUP_LOOP, SETUP_WITH = ( @@ -24,7 +26,11 @@ SETUP_EXCEPT, SETUP_FINALLY, SETUP_LOOP, SETUP_WITH = ( WHY_NOT = 1 WHY_EXCEPTION = WHY_NOT << 1 -WHY_RERAISE = WHY_EXCEPTION << 1 +WHY_RERAISE = WHY_EXCEPTION << 1 # We don't worry about this code + # during CFA, since its primary use + # is to log traceback information; + # WHY_RERAISE's bytecode control flow + # is the same as WHY_EXCEPTION. WHY_RETURN = WHY_RERAISE << 1 WHY_BREAK = WHY_RETURN << 1 WHY_CONTINUE = WHY_BREAK << 1 @@ -41,10 +47,10 @@ class ControlFlowBuilder (BenignBytecodeVisitorMixin, BasicBlockVisitor): which is used by later transformers for dataflow analysis. ''' def visit (self, flow, nargs = 0, *args, **kws): - '''Given a bytecode flow, and an optional number of arguments, - return a :py:class:`llpython.control_flow.ControlFlowGraph` - instance describing the full control flow of the bytecode - flow.''' + '''Given a map of bytecode basic blocks, and an optional + number of arguments, return a + :py:class:`llpython.control_flow.ControlFlowGraph` instance + describing the full control flow of the bytecode flow.''' self.nargs = nargs ret_val = super(ControlFlowBuilder, self).visit(flow, *args, **kws) del self.nargs @@ -57,6 +63,13 @@ class ControlFlowBuilder (BenignBytecodeVisitorMixin, BasicBlockVisitor): self.block_list.sort() self.cfg = ControlFlowGraph() self.control_stack = [] + self.continue_targets = {} # Map from SETUP_LOOP addresses to + # start of loop addresses, based on + # observed CONTINUE_LOOP opcodes. + self.break_targets = set() # Set of SETUP_LOOP address that + # have at least one observed + # BREAK_LOOP opcode corresponding + # to them. for block in self.block_list: self.cfg.add_block(block, blocks[block]) @@ -66,6 +79,7 @@ class ControlFlowBuilder (BenignBytecodeVisitorMixin, BasicBlockVisitor): self.cfg.compute_dataflow() self.cfg.update_for_ssa() ret_val = self.cfg + del self.continue_targets del self.control_stack del self.cfg del self.block_list @@ -91,29 +105,37 @@ class ControlFlowBuilder (BenignBytecodeVisitorMixin, BasicBlockVisitor): Returns True if an edge was added to the CFG, False otherwise. Based on the opcode the return result may mean different - things (for example: if why == WHY_RETURN, then the function - returns).""" + things (for example: if why == WHY_RETURN, then a False return + result means the function returned, and no edge was + generated).""" ret_val = False if len(self.control_stack) > 0: handlers = set((SETUP_FINALLY, SETUP_WITH)) if why == WHY_EXCEPTION: handlers.add(SETUP_EXCEPT) - reverse_stack = self.control_stack[::-1] + reversed_stack = reversed(self.control_stack) target = None - for handler_i, handler_op, handler_arg in reverse_stack: + for handler_i, handler_op, handler_arg in reversed_stack: if handler_op in handlers: target = handler_i + handler_arg + 3 elif handler_op == SETUP_LOOP: if why == WHY_CONTINUE: - if op == CONTINUE_LOOP: - target = i + arg + 3 + # Only generate a WHY_CONTINUE edge if a continue + # statement has been observed for this loop. + if handler_i not in self.continue_targets: + break + elif op == CONTINUE_LOOP: + target = arg + assert target == self.continue_targets[handler_i] else: - # XXX This isn't going to be correct for - # for-loops, or really long while-loops - # (which use EXTENDED_ARG): - target = handler_i + 3 + target = self.continue_targets[handler_i] elif why == WHY_BREAK: - target = handler_i + handler_arg + 3 + # Only generate a WHY_BREAK edge if a break + # statement has been observed for this loop. + if handler_i not in self.break_targets: + break + else: + target = handler_i + handler_arg + 3 if target is not None: self.cfg.add_edge(block, target) ret_val = True @@ -137,11 +159,12 @@ class ControlFlowBuilder (BenignBytecodeVisitorMixin, BasicBlockVisitor): WHY_BREAK) assert branched, ("Attempted to break outside of loop %r" % (self.blocks[block][-1],)) + elif op == RAISE_VARARGS: + self._generate_handler_edge(block, i, op, arg, WHY_EXCEPTION) elif op == END_FINALLY: - # XXX Should we detect cases where return, continue, and - # break appear inside the try-block? This would create - # more accurate control flow graphs by eliding edges we - # know won't be taken. + # The following does a lot of redundant traversal of the + # simulated frame stack, but it works, and keeps a lot of + # special case logic out of _generate_handler_edge(). self._generate_handler_edge(block, i, op, arg, WHY_EXCEPTION) self._generate_handler_edge(block, i, op, arg, WHY_RETURN) self._generate_handler_edge(block, i, op, arg, WHY_BREAK) @@ -156,6 +179,9 @@ class ControlFlowBuilder (BenignBytecodeVisitorMixin, BasicBlockVisitor): if op in opcode_util.hascbranch or goto_next: self.cfg.add_edge(block, self._get_next_block(block)) + # ____________________________________________________________ + # LOAD/STORE_FAST + def op_LOAD_FAST (self, i, op, arg, *args, **kws): self.cfg.blocks_reads[self.block].add(arg) return super(ControlFlowBuilder, self).op_LOAD_FAST(i, op, arg, *args, @@ -166,6 +192,44 @@ class ControlFlowBuilder (BenignBytecodeVisitorMixin, BasicBlockVisitor): return super(ControlFlowBuilder, self).op_STORE_FAST(i, op, arg, *args, **kws) + # ____________________________________________________________ + # *_LOOP: Special loop control flow. + + def _get_current_loop (self): + for handler in reversed(self.control_stack): + if handler[1] == SETUP_LOOP: + return handler + return None, None, None + + def op_BREAK_LOOP (self, i, op, arg, *args, **kws): + handler_i, _, _ = self._get_current_loop() + assert handler_i is not None + self.break_targets.add(handler_i) + + def op_CONTINUE_LOOP (self, i, op, arg, *args, **kws): + """ + CONTINUE_LOOP has to be handled differently than BREAK_LOOP, + since its argument specifies where the start of the loop is + (in the case of for-loops, FOR_ITER defines the true start of + the loop, instead of SETUP_LOOP.) + """ + handler_i, _, _ = self._get_current_loop() + assert handler_i is not None + if handler_i in self.continue_targets: + assert arg == self.continue_targets[handler_i] + else: + self.continue_targets[handler_i] = arg + + # ____________________________________________________________ + # POP_BLOCK + + def op_POP_BLOCK (self, i, op, arg, *args, **kws): + self.control_stack.pop() + return super(ControlFlowBuilder, self).op_POP_BLOCK(i, op, arg, *args, + **kws) + # ____________________________________________________________ + # SETUP_* + def op_SETUP_EXCEPT (self, i, op, arg, *args, **kws): self.control_stack.append((i, op, arg)) return super(ControlFlowBuilder, self).op_SETUP_EXCEPT(i, op, arg, @@ -186,11 +250,6 @@ class ControlFlowBuilder (BenignBytecodeVisitorMixin, BasicBlockVisitor): return super(ControlFlowBuilder, self).op_SETUP_WITH(i, op, arg, *args, **kws) - def op_POP_BLOCK (self, i, op, arg, *args, **kws): - self.control_stack.pop() - return super(ControlFlowBuilder, self).op_POP_BLOCK(i, op, arg, *args, - **kws) - # ______________________________________________________________________ def build_cfg (func): diff --git a/llpython/opcode_util.py b/llpython/opcode_util.py index 16a45d6..c1ddf57 100644 --- a/llpython/opcode_util.py +++ b/llpython/opcode_util.py @@ -9,9 +9,11 @@ import opcode # Note that opcode.hasjrel and opcode.hasjabs applies only to opcodes # that calculate a jump point based on the argument. This ignores -# jumps that use the frame stack to calculate their targets. +# jumps that use the frame stack to calculate their targets, and +# exceptions. -NON_ARG_JUMP_NAMES = 'BREAK_LOOP', 'RETURN_VALUE', 'END_FINALLY' +NON_ARG_JUMP_NAMES = ('BREAK_LOOP', 'RETURN_VALUE', 'END_FINALLY', + 'RAISE_VARARGS') NON_ARG_JUMPS = [opcode.opmap[opname] for opname in NON_ARG_JUMP_NAMES if opname in opcode.opmap] @@ -118,7 +120,7 @@ OPCODE_MAP = { 'PRINT_ITEM_TO': (2, None, 1), 'PRINT_NEWLINE': (0, None, 1), 'PRINT_NEWLINE_TO': (1, None, 1), - 'RAISE_VARARGS': (None, None, None), + 'RAISE_VARARGS': (-1, None, 1), 'RETURN_VALUE': (1, None, 1), 'ROT_FOUR': (None, None, None), 'ROT_THREE': (None, None, None), diff --git a/llpython/tests/test_byte_control.py b/llpython/tests/test_byte_control.py index e13d5f4..b3e185b 100644 --- a/llpython/tests/test_byte_control.py +++ b/llpython/tests/test_byte_control.py @@ -3,6 +3,7 @@ from __future__ import absolute_import +import sys import unittest from llpython import byte_control @@ -10,6 +11,11 @@ from llpython import byte_control # ______________________________________________________________________ # Global data +# Technically we could also compute if we need special logic for the +# old bytecode compiler by scanning for JUMP_IF_TRUE and JUMP_IF_FALSE +# opcodes. These opcodes require additional POP_TOP's be inserted. +OLD_BYTECODE_COMPILER = sys.version_info < (2, 7) + got_done = 0 # ______________________________________________________________________ @@ -19,6 +25,7 @@ def do_something(): global got_done got_done += 1 print("Something good got done.") + return got_done # ____________________________________________________________ @@ -47,6 +54,62 @@ def try_finally_1(m, n): # why == WHY_BREAK do_something() return i +# ____________________________________________________________ + +def try_finally_2(m, n): # why == WHY_CONTINUE + i = m + while i < n: + try: + if i == 101: + i += 200 + continue + finally: + do_something() + i += 1 + return i + +# ____________________________________________________________ + +def try_finally_3(m, n): # why == WHY_EXCEPTION (or WHY_RETURN) + d = {} + try: + return d[n] - d[m] + finally: + do_something() + return do_something_else() + +# ____________________________________________________________ + +def try_finally_4(m, n): # why == WHY_NOT + try: + rv = n - m + finally: + do_something() + return rv + +# ____________________________________________________________ + +def try_finally_5(m, n): + for i in range(m, n): + try: + if i == 99: + break + elif i == 121: + continue + elif i == 86: + return + elif i < -102: + raise ValueError(i) + else: + try: + if i < 0: + raise ValueError(i) + finally: + do_something() + finally: + do_something() + return do_something() + # ______________________________________________________________________ # Class (test case) definition(s) @@ -55,22 +118,151 @@ class TestByteControl(unittest.TestCase): assert len(test_cfg.blocks) == block_count block_keys = list(test_cfg.blocks.keys()) block_keys.sort() - # TODO: Ensure unexpected edges cause error. + expected_blocks_in = dict((block_key, set()) + for block_key in block_keys) + expected_blocks_out = dict((block_key, set()) + for block_key in block_keys) for from_block_ofs, to_block_ofs in edges: from_block = block_keys[from_block_ofs] to_block = block_keys[to_block_ofs] - assert from_block in test_cfg.blocks_in[to_block] - assert to_block in test_cfg.blocks_out[from_block] + expected_blocks_in[to_block].add(from_block) + expected_blocks_out[from_block].add(to_block) + for block_key in block_keys: + expected_in = expected_blocks_in[block_key] + test_in = test_cfg.blocks_in[block_key] + self.assertEqual( + expected_in, test_in, '%r != %r for blocks_in[%d]' % ( + test_in, expected_in, block_key)) + expected_out = expected_blocks_out[block_key] + test_out = test_cfg.blocks_out[block_key] + self.assertEqual( + expected_out, test_out, '%r != %r for set blocks_out[%d]' % ( + test_out, expected_out, block_key)) + + def test_raise(self): + cfg = byte_control.build_cfg(do_something_else) + self.fail_unless_cfg_match(cfg, 2, ()) def test_try_finally_0(self): + """ + Expected CFG (Python 2.7+): + digraph CFG_try_finally_0 { + BLOCK_0 -> BLOCK_3; // 0 -> 1 + BLOCK_0 -> BLOCK_15; // 0 -> 3 + BLOCK_3 -> BLOCK_15; // 1 -> 3 + BLOCK_11 -> BLOCK_15; // 2 -> 3 + BLOCK_15 -> BLOCK_23; // 3 -> 4, why == WHY_NOT + BLOCK_23; // 4 + } + (Possibly terminal blocks: 15, 23.) + """ cfg = byte_control.build_cfg(try_finally_0) self.fail_unless_cfg_match(cfg, 5, ((0, 1), (0, 3), (1, 3), (2, 3), (3, 4))) def test_try_finally_1(self): + """ + Expected CFG (Python 2.7+): + digraph CFG_try_finally_1 { + BLOCK_0 -> BLOCK_9; // 0 -> 1 + BLOCK_0 -> BLOCK_63; // 0 -> 11 + BLOCK_9 -> BLOCK_22; // 1 -> 2 + BLOCK_22 -> BLOCK_25; // 2 -> 3 + BLOCK_22 -> BLOCK_62; // 2 -> 10 + BLOCK_25 -> BLOCK_31; // 3 -> 4 + BLOCK_25 -> BLOCK_51; // 3 -> 8 + BLOCK_31 -> BLOCK_43; // 4 -> 5 + BLOCK_31 -> BLOCK_47; // 4 -> 7 + BLOCK_43 -> BLOCK_51; // 5 -> 8 + BLOCK_44 -> BLOCK_47; // 6 -> 7 + BLOCK_47 -> BLOCK_51; // 7 -> 8 + BLOCK_51 -> BLOCK_59; // 8 -> 9, why == WHY_NOT + BLOCK_51 -> BLOCK_63; // 8 -> 11, why == WHY_BREAK, WHY_RETURN, ... + BLOCK_59 -> BLOCK_22; // 9 -> 2 + BLOCK_62 -> BLOCK_63; // 10 -> 11 + BLOCK_63; // 11 + } + (Possibly terminal blocks: 51, 63.) + """ cfg = byte_control.build_cfg(try_finally_1) - # TODO: Translate known graph to offsets... - self.fail_unless_cfg_match(cfg, 12, ()) + if not OLD_BYTECODE_COMPILER: + self.fail_unless_cfg_match( + cfg, 12, ((0, 1), (0, 11), (1, 2), (2, 3), (2, 10), (3, 4), + (3, 8), (4, 5), (4, 7), (5, 8), (6, 7), (7, 8), + (8, 9), (8, 11), (9, 2), (10, 11))) + else: + self.fail_unless_cfg_match( + cfg, 13, ((0, 1), (0, 12), (1, 2), (2, 3), (2, 11), (3, 4), + (3, 9), (4, 5), (4, 7), (5, 9), (6, 8), (7, 8), + (8, 9), (9, 10), (9, 12), (10, 2), (11, 12))) + + def test_try_finally_2(self): + """ + Expected CFG (Python 2.7+): + digraph CFG_try_finally_2 { + BLOCK_0 -> BLOCK_9; // 0 -> 1 + BLOCK_0 -> BLOCK_78; // 0 -> 10 + BLOCK_9 -> BLOCK_21; // 1 -> 2 + BLOCK_9 -> BLOCK_77; // 1 -> 9 + BLOCK_21 -> BLOCK_24; // 2 -> 3 + BLOCK_21 -> BLOCK_56; // 2 -> 7 + BLOCK_24 -> BLOCK_36; // 3 -> 4 + BLOCK_24 -> BLOCK_52; // 3 -> 6 + BLOCK_36 -> BLOCK_56; // 4 -> 7 + BLOCK_49 -> BLOCK_52; // 5 -> 6 + BLOCK_52 -> BLOCK_56; // 6 -> 7 + BLOCK_56 -> BLOCK_9; // 7 -> 1, why == WHY_CONTINUE + BLOCK_56 -> BLOCK_64; // 7 -> 8, why == WHY_NOT + BLOCK_64 -> BLOCK_9; // 8 -> 1 + BLOCK_77 -> BLOCK_78; // 9 -> 10 + BLOCK_78; // 10 + } + (Possibly terminal blocks: 56, 78.) + """ + cfg = byte_control.build_cfg(try_finally_2) + if not OLD_BYTECODE_COMPILER: + self.fail_unless_cfg_match( + cfg, 11, ((0, 1), (0, 10), (1, 2), (1, 9), (2, 3), (2, 7), + (3, 4), (3, 6), (4, 7), (5, 6), (6, 7), (7, 1), + (7, 8), (8, 1), (9, 10))) + + def test_try_finally_3(self): + """ + Expected (Python 2.7+): + digraph CFG_foo3 { + BLOCK_0 -> BLOCK_9; // 0 -> 1 + BLOCK_0 -> BLOCK_29; // 0 -> 3 + BLOCK_9 -> BLOCK_29; // 1 -> 3 + BLOCK_25 -> BLOCK_29; // 2 -> 3 + BLOCK_29 -> BLOCK_37; // 3 -> 4, why == WHY_NOT + BLOCK_37; // 4 + } + (Possibly terminal blocks: 29, 37.) + """ + cfg = byte_control.build_cfg(try_finally_3) + self.fail_unless_cfg_match(cfg, 5, ((0, 1), (0, 3), (1, 3), (2, 3), + (3, 4))) + + def test_try_finally_4(self): + """ + Expected: + digraph CFG_foo4 { + BLOCK_0 -> BLOCK_3; // 0 -> 1 + BLOCK_0 -> BLOCK_17; // 0 -> 2 + BLOCK_3 -> BLOCK_17; // 1 -> 2 + BLOCK_17 -> BLOCK_25; // 2 -> 3, why == WHY_NOT + BLOCK_25; // 3 + } + (Possibly terminal blocks: 17, 25.) + """ + cfg = byte_control.build_cfg(try_finally_4) + self.fail_unless_cfg_match(cfg, 4, ((0, 1), (0, 2), (1, 2), (2, 3))) + + def test_try_finally_5(self): + pass # TODO: fix ControlFlowGraph.update_for_ssa which + # diverges on the CFG for the following: + #cfg = byte_control.build_cfg(try_finally_5) + #self.fail_unless_cfg_match(cfg, XXX, ()) # ______________________________________________________________________ From c06f811473891a305987a87891299431cde4a746 Mon Sep 17 00:00:00 2001 From: Jon Riehl Date: Thu, 16 May 2013 15:38:12 -0500 Subject: [PATCH 07/32] Reintroduced dead code elimination to CFA pass to avoid dataflow problems. Updated tests. --- llpython/byte_control.py | 1 + llpython/control_flow.py | 4 ++-- llpython/tests/test_byte_control.py | 32 +++++++++++++++++------------ 3 files changed, 22 insertions(+), 15 deletions(-) diff --git a/llpython/byte_control.py b/llpython/byte_control.py index 315f762..e21252a 100644 --- a/llpython/byte_control.py +++ b/llpython/byte_control.py @@ -76,6 +76,7 @@ class ControlFlowBuilder (BenignBytecodeVisitorMixin, BasicBlockVisitor): def exit_blocks (self, blocks): super(ControlFlowBuilder, self).exit_blocks(blocks) assert self.blocks == blocks + self.cfg.unlink_unreachables() self.cfg.compute_dataflow() self.cfg.update_for_ssa() ret_val = self.cfg diff --git a/llpython/control_flow.py b/llpython/control_flow.py index 78c8b88..5b546ff 100644 --- a/llpython/control_flow.py +++ b/llpython/control_flow.py @@ -31,12 +31,12 @@ class ControlFlowGraph (object): def unlink_unreachables (self): changed = True - next_blocks = self.blocks.keys() + next_blocks = set(self.blocks.keys()) next_blocks.remove(0) while changed: changed = False blocks = next_blocks - next_blocks = blocks[:] + next_blocks = blocks.copy() for block in blocks: if len(self.blocks_in[block]) == 0: blocks_out = self.blocks_out[block] diff --git a/llpython/tests/test_byte_control.py b/llpython/tests/test_byte_control.py index b3e185b..3ee3e32 100644 --- a/llpython/tests/test_byte_control.py +++ b/llpython/tests/test_byte_control.py @@ -150,14 +150,14 @@ class TestByteControl(unittest.TestCase): BLOCK_0 -> BLOCK_3; // 0 -> 1 BLOCK_0 -> BLOCK_15; // 0 -> 3 BLOCK_3 -> BLOCK_15; // 1 -> 3 - BLOCK_11 -> BLOCK_15; // 2 -> 3 + // DEAD: BLOCK_11 -> BLOCK_15; // 2 -> 3 BLOCK_15 -> BLOCK_23; // 3 -> 4, why == WHY_NOT BLOCK_23; // 4 } (Possibly terminal blocks: 15, 23.) """ cfg = byte_control.build_cfg(try_finally_0) - self.fail_unless_cfg_match(cfg, 5, ((0, 1), (0, 3), (1, 3), (2, 3), + self.fail_unless_cfg_match(cfg, 5, ((0, 1), (0, 3), (1, 3), (3, 4))) def test_try_finally_1(self): @@ -174,7 +174,7 @@ class TestByteControl(unittest.TestCase): BLOCK_31 -> BLOCK_43; // 4 -> 5 BLOCK_31 -> BLOCK_47; // 4 -> 7 BLOCK_43 -> BLOCK_51; // 5 -> 8 - BLOCK_44 -> BLOCK_47; // 6 -> 7 + // DEAD: BLOCK_44 -> BLOCK_47; // 6 -> 7 BLOCK_47 -> BLOCK_51; // 7 -> 8 BLOCK_51 -> BLOCK_59; // 8 -> 9, why == WHY_NOT BLOCK_51 -> BLOCK_63; // 8 -> 11, why == WHY_BREAK, WHY_RETURN, ... @@ -188,12 +188,12 @@ class TestByteControl(unittest.TestCase): if not OLD_BYTECODE_COMPILER: self.fail_unless_cfg_match( cfg, 12, ((0, 1), (0, 11), (1, 2), (2, 3), (2, 10), (3, 4), - (3, 8), (4, 5), (4, 7), (5, 8), (6, 7), (7, 8), + (3, 8), (4, 5), (4, 7), (5, 8), (7, 8), (8, 9), (8, 11), (9, 2), (10, 11))) else: self.fail_unless_cfg_match( cfg, 13, ((0, 1), (0, 12), (1, 2), (2, 3), (2, 11), (3, 4), - (3, 9), (4, 5), (4, 7), (5, 9), (6, 8), (7, 8), + (3, 9), (4, 5), (4, 7), (5, 9), (7, 8), (8, 9), (9, 10), (9, 12), (10, 2), (11, 12))) def test_try_finally_2(self): @@ -209,7 +209,7 @@ class TestByteControl(unittest.TestCase): BLOCK_24 -> BLOCK_36; // 3 -> 4 BLOCK_24 -> BLOCK_52; // 3 -> 6 BLOCK_36 -> BLOCK_56; // 4 -> 7 - BLOCK_49 -> BLOCK_52; // 5 -> 6 + // DEAD: BLOCK_49 -> BLOCK_52; // 5 -> 6 BLOCK_52 -> BLOCK_56; // 6 -> 7 BLOCK_56 -> BLOCK_9; // 7 -> 1, why == WHY_CONTINUE BLOCK_56 -> BLOCK_64; // 7 -> 8, why == WHY_NOT @@ -223,7 +223,7 @@ class TestByteControl(unittest.TestCase): if not OLD_BYTECODE_COMPILER: self.fail_unless_cfg_match( cfg, 11, ((0, 1), (0, 10), (1, 2), (1, 9), (2, 3), (2, 7), - (3, 4), (3, 6), (4, 7), (5, 6), (6, 7), (7, 1), + (3, 4), (3, 6), (4, 7), (6, 7), (7, 1), (7, 8), (8, 1), (9, 10))) def test_try_finally_3(self): @@ -233,14 +233,14 @@ class TestByteControl(unittest.TestCase): BLOCK_0 -> BLOCK_9; // 0 -> 1 BLOCK_0 -> BLOCK_29; // 0 -> 3 BLOCK_9 -> BLOCK_29; // 1 -> 3 - BLOCK_25 -> BLOCK_29; // 2 -> 3 + // DEAD: BLOCK_25 -> BLOCK_29; // 2 -> 3 BLOCK_29 -> BLOCK_37; // 3 -> 4, why == WHY_NOT BLOCK_37; // 4 } (Possibly terminal blocks: 29, 37.) """ cfg = byte_control.build_cfg(try_finally_3) - self.fail_unless_cfg_match(cfg, 5, ((0, 1), (0, 3), (1, 3), (2, 3), + self.fail_unless_cfg_match(cfg, 5, ((0, 1), (0, 3), (1, 3), (3, 4))) def test_try_finally_4(self): @@ -259,10 +259,16 @@ class TestByteControl(unittest.TestCase): self.fail_unless_cfg_match(cfg, 4, ((0, 1), (0, 2), (1, 2), (2, 3))) def test_try_finally_5(self): - pass # TODO: fix ControlFlowGraph.update_for_ssa which - # diverges on the CFG for the following: - #cfg = byte_control.build_cfg(try_finally_5) - #self.fail_unless_cfg_match(cfg, XXX, ()) + cfg = byte_control.build_cfg(try_finally_5) + if not OLD_BYTECODE_COMPILER: + self.fail_unless_cfg_match( + cfg, 26, ((0, 1), (0, 25), (1, 2), (2, 24), (2, 3), (3, 4), + (3, 22), (4, 5), (4, 7), (5, 22), (7, 8), + (7, 10), (8, 22), (10, 11), (10, 12), + (11, 22), (12, 13), (12, 15), (13, 22), + (15, 16), (15, 20), (16, 17), (16, 19), (17, 20), + (19, 20), (20, 21), (20, 22), (21, 22), + (22, 25), (22, 2), (22, 23), (23, 2), (24, 25))) # ______________________________________________________________________ From abbf97276801d5985110c82cab87a53d84ea0d78 Mon Sep 17 00:00:00 2001 From: Jon Riehl Date: Tue, 21 May 2013 11:58:26 -0500 Subject: [PATCH 08/32] Added address flow builder, and type constraint generator. --- llpython/addr_flow.py | 100 ++++++++++++++++ llpython/byte_flow.py | 3 +- llpython/bytecode_visitor.py | 4 +- llpython/type_flow.py | 220 +++++++++++++++++++++++++++++++++++ 4 files changed, 323 insertions(+), 4 deletions(-) create mode 100644 llpython/addr_flow.py create mode 100644 llpython/type_flow.py diff --git a/llpython/addr_flow.py b/llpython/addr_flow.py new file mode 100644 index 0000000..7447789 --- /dev/null +++ b/llpython/addr_flow.py @@ -0,0 +1,100 @@ +#! /usr/bin/env python +# ______________________________________________________________________ +# Module imports + +from __future__ import absolute_import + +from .byte_flow import BytecodeFlowBuilder + +# ______________________________________________________________________ +# Class definition(s) + +class AddressFlowBuilder(BytecodeFlowBuilder): + ''' + Builds on top of the BytecodeFlowBuilder with two important differences: + + * Child nodes are represented by bytecode indices. + + * All operations (other than purely stack manipulation operations) + are retained in the block list (as opposed to being nested). + + The resulting data structure describes a directed acyclic graph + (DAG) in a similar fashion to BytecodeFlowBuilder: + + * `flow_dag` ``:=`` ``{`` `blocks` ``*`` ``}`` + * `blocks` ``:=`` `block_index` ``:`` ``[`` `bytecode_tuple` ``*`` ``]`` + * `bytecode_tuple` ``:=`` ``(`` `opcode_index` ``,`` `opcode` ``,`` + `opname` ``,`` `arg` ``,`` ``[`` `opcode_index` ``*`` ``]`` ``)`` + ''' + def _visit_op(self, i, op, arg, opname, pops, pushes, appends): + assert pops is not None, ('%s not well defined in opcode_util.' + 'OPCODE_MAP' % opname) + if pops: + if pops < 0: + pops = arg - pops - 1 + assert pops <= len(self.stack), ("Stack underflow at instruction " + "%d (%s)!" % (i, opname)) + stk_args = [stk_arg[0] for stk_arg in self.stack[-pops:]] + del self.stack[-pops:] + else: + stk_args = [] + ret_val = (i, op, opname, arg, stk_args) + if pushes: + self.stack.append(ret_val) + self.block.append(ret_val) + return ret_val + + def op_IMPORT_FROM (self, i, op, arg): + # References top of stack without popping, so we can't use the + # generic machinery. + opname = self.opmap[op][0] + ret_val = i, op, opname, arg, [self.stack[-1][0]] + self.stack.append(ret_val) + self.block.append(ret_val) + return ret_val + + def op_JUMP_IF_FALSE (self, i, op, arg): + ret_val = i, op, self.opnames[op], arg, [self.stack[-1][0]] + self.block.append(ret_val) + return ret_val + + op_JUMP_IF_TRUE = op_JUMP_IF_FALSE + + def op_LIST_APPEND (self, i, op, arg): + '''This method is used for both LIST_APPEND, and SET_ADD + opcodes.''' + elem = self.stack.pop() + container = self.stack[-arg] + ret_val = i, op, self.opnames[op], arg, [container[0], elem[0]] + self.block.append(ret_val) + return ret_val + + op_SET_ADD = op_LIST_APPEND + +# ______________________________________________________________________ +# Function definition(s) + +def build_addr_flow(func): + from . import byte_control + cfg = byte_control.build_cfg(func) + return AddressFlowBuilder().visit_cfg(cfg) + +# ______________________________________________________________________ +# Main (self-test) routine + +def main(*args): + import pprint + from .tests import llfuncs + if not args: + args = ('pymod',) + for arg in args: + pprint.pprint(build_addr_flow(getattr(llfuncs, arg))) + +# ______________________________________________________________________ + +if __name__ == "__main__": + import sys + main(*sys.argv[1:]) + +# ______________________________________________________________________ +# End of addr_flow.py diff --git a/llpython/byte_flow.py b/llpython/byte_flow.py index a5abd9f..4c354cd 100644 --- a/llpython/byte_flow.py +++ b/llpython/byte_flow.py @@ -272,11 +272,10 @@ class BytecodeFlowBuilder (BasicBlockVisitor): def build_flow (func): '''Given a Python function, return a bytecode flow tree for that function.''' - import byte_control + from . import byte_control cfg = byte_control.build_cfg(func) return BytecodeFlowBuilder().visit_cfg(cfg) - # ______________________________________________________________________ # Main (self-test) routine diff --git a/llpython/bytecode_visitor.py b/llpython/bytecode_visitor.py index 009d1a0..bbd2f01 100644 --- a/llpython/bytecode_visitor.py +++ b/llpython/bytecode_visitor.py @@ -171,8 +171,8 @@ class BasicBlockVisitor (BytecodeVisitor): block_indices.sort() for block_index in block_indices: self.enter_block(block_index) - for i, op, arg in blocks[block_index]: - self.visit_op(i, op, arg) + for op_tuple in blocks[block_index]: + self.visit_op(*op_tuple) self.exit_block(block_index) return self.exit_blocks(blocks) diff --git a/llpython/type_flow.py b/llpython/type_flow.py new file mode 100644 index 0000000..cd67e69 --- /dev/null +++ b/llpython/type_flow.py @@ -0,0 +1,220 @@ +#! /usr/bin/env python +# ______________________________________________________________________ +# Module imports + +from .bytecode_visitor import BasicBlockVisitor, BenignBytecodeVisitorMixin + +# ______________________________________________________________________ +# Class definition(s) + +class TypeFlowBuilder(BenignBytecodeVisitorMixin, BasicBlockVisitor): + def __init__(self, co_obj, *args, **kws): + super(TypeFlowBuilder, self).__init__(*args, **kws) + self.co_obj = co_obj + self.locals = {} + self.globals = {} + self.refs = {} + self.type_flow = {} + self.requirements = {} + + def get_type_eqns(self): + return self.type_flow, self.requirements, self.locals, self.globals + + def _op(self, i, op, opname, arg, args, *extras, **kws): + self.type_flow[i] = set(args) + + op_BINARY_ADD = _op + op_BINARY_AND = _op + op_BINARY_DIVIDE = _op + op_BINARY_FLOOR_DIVIDE = _op + op_BINARY_LSHIFT = _op + op_BINARY_MODULO = _op + op_BINARY_MULTIPLY = _op + op_BINARY_OR = _op + op_BINARY_POWER = _op + op_BINARY_RSHIFT = _op + op_BINARY_SUBSCR = _op + op_BINARY_SUBTRACT = _op + op_BINARY_TRUE_DIVIDE = _op + op_BINARY_XOR = _op + + #op_BUILD_CLASS = _do_nothing + #op_BUILD_LIST = _do_nothing + #op_BUILD_MAP = _do_nothing + #op_BUILD_SET = _do_nothing + #op_BUILD_SLICE = _do_nothing + #op_BUILD_TUPLE = _do_nothing + + #op_CALL_FUNCTION = _do_nothing + #op_CALL_FUNCTION_KW = _do_nothing + #op_CALL_FUNCTION_VAR = _do_nothing + #op_CALL_FUNCTION_VAR_KW = _do_nothing + + def op_COMPARE_OP(self, i, op, opname, arg, args, *extras, **kws): + self.requirements[i] = set(args) + self.type_flow[i] = bool + + #op_CONTINUE_LOOP = _do_nothing + #op_DELETE_ATTR = _do_nothing + #op_DELETE_DEREF = _do_nothing + #op_DELETE_FAST = _do_nothing + #op_DELETE_GLOBAL = _do_nothing + #op_DELETE_NAME = _do_nothing + #op_DELETE_SLICE = _do_nothing + #op_DELETE_SUBSCR = _do_nothing + #op_END_FINALLY = _do_nothing + #op_EXEC_STMT = _do_nothing + #op_EXTENDED_ARG = _do_nothing + #op_FOR_ITER = _do_nothing + #op_GET_ITER = _do_nothing + #op_IMPORT_FROM = _do_nothing + #op_IMPORT_NAME = _do_nothing + #op_IMPORT_STAR = _do_nothing + + op_INPLACE_ADD = _op + op_INPLACE_AND = _op + op_INPLACE_DIVIDE = _op + op_INPLACE_FLOOR_DIVIDE = _op + op_INPLACE_LSHIFT = _op + op_INPLACE_MODULO = _op + op_INPLACE_MULTIPLY = _op + op_INPLACE_OR = _op + op_INPLACE_POWER = _op + op_INPLACE_RSHIFT = _op + op_INPLACE_SUBTRACT = _op + op_INPLACE_TRUE_DIVIDE = _op + op_INPLACE_XOR = _op + + #op_JUMP_ABSOLUTE = _do_nothing + #op_JUMP_FORWARD = _do_nothing + #op_JUMP_IF_FALSE = _do_nothing + #op_JUMP_IF_FALSE_OR_POP = _do_nothing + #op_JUMP_IF_TRUE = _do_nothing + #op_JUMP_IF_TRUE_OR_POP = _do_nothing + #op_LIST_APPEND = _do_nothing + #op_LOAD_ATTR = _do_nothing + #op_LOAD_BUILD_CLASS = _do_nothing + #op_LOAD_CLOSURE = _do_nothing + + def op_LOAD_CONST(self, i, op, opname, arg, args, *extras, **kws): + self.type_flow[i] = type(self.co_obj.co_consts[arg]) + + def op_LOAD_DEREF(self, i, op, opname, arg, args, *extras, **kws): + if arg not in self.refs: + result = set(('inref%d' % arg,)) + self.refs[arg] = result + else: + result = self.refs[arg] + self.type_flow[i] = result + + def op_LOAD_FAST(self, i, op, opname, arg, args, *extras, **kws): + if arg not in self.locals: + if arg < self.co_obj.co_argcount: + result = set(('in%d' % arg,)) + else: + result = set() + self.locals[arg] = result + else: + result = self.locals[arg] + self.type_flow[i] = result + + def op_LOAD_GLOBAL(self, i, op, opname, arg, args, *extras, **kws): + if arg not in self.globals: + result = set(('in%d' % arg,)) + self.globals[arg] = result + else: + result = self.globals[arg] + self.type_flow[i] = result + + #op_LOAD_LOCALS = _do_nothing + #op_LOAD_NAME = _do_nothing + #op_MAKE_CLOSURE = _do_nothing + #op_MAKE_FUNCTION = _do_nothing + #op_MAP_ADD = _do_nothing + #op_NOP = _do_nothing + #op_POP_BLOCK = _do_nothing + #op_POP_EXCEPT = _do_nothing + #op_POP_JUMP_IF_FALSE = _do_nothing + #op_POP_JUMP_IF_TRUE = _do_nothing + #op_POP_TOP = _do_nothing + #op_PRINT_EXPR = _do_nothing + #op_PRINT_ITEM = _do_nothing + #op_PRINT_ITEM_TO = _do_nothing + #op_PRINT_NEWLINE = _do_nothing + #op_PRINT_NEWLINE_TO = _do_nothing + #op_RAISE_VARARGS = _do_nothing + + op_RETURN_VALUE = _op + + #op_SETUP_EXCEPT = _do_nothing + #op_SETUP_FINALLY = _do_nothing + #op_SETUP_LOOP = _do_nothing + #op_SETUP_WITH = _do_nothing + #op_SET_ADD = _do_nothing + + #op_SLICE = _do_nothing + + #op_STOP_CODE = _do_nothing + #op_STORE_ATTR = _do_nothing + #op_STORE_DEREF = _do_nothing + + def op_STORE_FAST(self, i, op, opname, arg, args, *extras, **kws): + if arg not in self.locals: + if arg < self.co_obj.co_argcount: + result = set(('in%d' % arg,)) + else: + result = set() + self.locals[arg] = result + else: + result = self.locals[arg] + assert len(args) == 1 + result.add(args[0]) + + #op_STORE_GLOBAL = _do_nothing + #op_STORE_LOCALS = _do_nothing + #op_STORE_MAP = _do_nothing + #op_STORE_NAME = _do_nothing + #op_STORE_SLICE = _do_nothing + #op_STORE_SUBSCR = _do_nothing + + op_UNARY_CONVERT = _op + op_UNARY_INVERT = _op + op_UNARY_NEGATIVE = _op + op_UNARY_NOT = _op + op_UNARY_POSITIVE = _op + + #op_UNPACK_EX = _do_nothing + #op_UNPACK_SEQUENCE = _do_nothing + #op_WITH_CLEANUP = _do_nothing + #op_YIELD_VALUE = _do_nothing + +# ______________________________________________________________________ +# Function definition(s) + +def build_type_flow(func): + from .opcode_util import get_code_object + from .addr_flow import build_addr_flow + blocks = build_addr_flow(func) + ty_builder = TypeFlowBuilder(get_code_object(func)) + ty_builder.visit(blocks) + return ty_builder.get_type_eqns() + +# ______________________________________________________________________ +# Main (self-test) routine + +def main(*args): + import pprint + from .tests import llfuncs + if not args: + args = ('pymod',) + for arg in args: + pprint.pprint(build_type_flow(getattr(llfuncs, arg))) + +# ______________________________________________________________________ + +if __name__ == "__main__": + import sys + main(*sys.argv[1:]) + +# ______________________________________________________________________ +# End of type_flow.py From 643f0706e9f1fc4dd6ffa2e76e46a7dd92b99443 Mon Sep 17 00:00:00 2001 From: Jon Riehl Date: Mon, 3 Jun 2013 16:00:46 -0500 Subject: [PATCH 09/32] Checking in non-functional attempt at type dependency simplification pass in llpython.type_flow. --- llpython/type_flow.py | 64 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/llpython/type_flow.py b/llpython/type_flow.py index cd67e69..3f3e2a9 100644 --- a/llpython/type_flow.py +++ b/llpython/type_flow.py @@ -4,6 +4,11 @@ from .bytecode_visitor import BasicBlockVisitor, BenignBytecodeVisitorMixin +DEBUG_SIMPLIFY = False + +if DEBUG_SIMPLIFY: + from pprint import pprint as pp + # ______________________________________________________________________ # Class definition(s) @@ -20,6 +25,65 @@ class TypeFlowBuilder(BenignBytecodeVisitorMixin, BasicBlockVisitor): def get_type_eqns(self): return self.type_flow, self.requirements, self.locals, self.globals + def simplify(self): + """ + This method isn't working as intended. It should simplify + strongly connected components s.t. instead of outputing + several types like the following: + + {0: set(['in0']), + 3: set(['in1']), + ... + 10: set([0, 3, 34, 37, 62, 65, 'in0', 'in1']), + ... + 34: set([0, 3, 34, 37, 62, 65, 'in0', 'in1']), + 37: set(['in1']), + ... + 62: set([0, 3, 34, 37, 62, 65, 'in0', 'in1']), + 65: set(['in1']), + ... + 75: set([0, 3, 34, 37, 62, 65, 'in0', 'in1']), + ...} + + It outputs the following: + + {0: set(['in0']), + 3: set(['in1']), + ... + 10: set(['in0', 'in1']), + ... + 34: set(['in0', 'in1']), + 37: set(['in1']), + ... + 62: set(['in0', 'in1']), + 65: set(['in1']), + ... + 75: set(['in0', 'in1']), + ...} + """ + if not DEBUG_SIMPLIFY: + raise NotImplementedError("See docstring.") + type_flow = self.type_flow + changed = True + while changed: + changed = False + next_flow = type_flow.copy() + for index, types in type_flow.items(): + if isinstance(types, set): + next_types = set.union( + *(type_flow.get(child_index, + set([child_index])) + for child_index in types)) + else: + next_types = set([types]) + if next_types != types: + next_flow[index] = next_types + changed = True + pp(next_flow) + print() + type_flow = next_flow + return type_flow + def _op(self, i, op, opname, arg, args, *extras, **kws): self.type_flow[i] = set(args) From c890cbdd92bd523662eeaa2a7c817802051c02f4 Mon Sep 17 00:00:00 2001 From: Jon Riehl Date: Mon, 3 Jun 2013 18:00:06 -0500 Subject: [PATCH 10/32] Attempting to improve readability of opcode value stack action map using namedtuple in llpython.opcode_util. --- llpython/opcode_util.py | 258 ++++++++++++++++++++-------------------- 1 file changed, 131 insertions(+), 127 deletions(-) diff --git a/llpython/opcode_util.py b/llpython/opcode_util.py index c1ddf57..bec9c01 100644 --- a/llpython/opcode_util.py +++ b/llpython/opcode_util.py @@ -3,6 +3,7 @@ import dis import opcode +from collections import namedtuple # ______________________________________________________________________ # Module data @@ -25,137 +26,140 @@ hascbranch = [op for op, opname in ((op, opcode.opname[op]) or 'SETUP' in opname or opname in HAS_CBRANCH_NAMES] +OpcodeData = namedtuple('OpcodeData', ('pops', 'pushes', 'is_stmt')) +NO_OPCODE_DATA = OpcodeData(None, None, None) + # Since the actual opcode value may change, manage opcode abstraction # data by opcode name. OPCODE_MAP = { - 'BINARY_ADD': (2, 1, None), - 'BINARY_AND': (2, 1, None), - 'BINARY_DIVIDE': (2, 1, None), - 'BINARY_FLOOR_DIVIDE': (2, 1, None), - 'BINARY_LSHIFT': (2, 1, None), - 'BINARY_MODULO': (2, 1, None), - 'BINARY_MULTIPLY': (2, 1, None), - 'BINARY_OR': (2, 1, None), - 'BINARY_POWER': (2, 1, None), - 'BINARY_RSHIFT': (2, 1, None), - 'BINARY_SUBSCR': (2, 1, None), - 'BINARY_SUBTRACT': (2, 1, None), - 'BINARY_TRUE_DIVIDE': (2, 1, None), - 'BINARY_XOR': (2, 1, None), - 'BREAK_LOOP': (0, None, 1), - 'BUILD_CLASS': (3, 1, None), - 'BUILD_LIST': (-1, 1, None), - 'BUILD_MAP': (-1, 1, None), - 'BUILD_SET': (-1, 1, None), - 'BUILD_SLICE': (-1, 1, None), # oparg should only be 2 or 3 - 'BUILD_TUPLE': (-1, 1, None), - 'CALL_FUNCTION': (-2, 1, None), - 'CALL_FUNCTION_KW': (-3, 1, None), - 'CALL_FUNCTION_VAR': (-3, 1, None), - 'CALL_FUNCTION_VAR_KW': (-4, 1, None), - 'COMPARE_OP': (2, 1, None), - 'CONTINUE_LOOP': (None, None, None), - 'DELETE_ATTR': (1, None, 1), - 'DELETE_DEREF': (None, None, None), - 'DELETE_FAST': (0, None, 1), - 'DELETE_GLOBAL': (0, None, 1), - 'DELETE_NAME': (0, None, 1), - 'DELETE_SLICE+0': (1, None, 1), - 'DELETE_SLICE+1': (2, None, 1), - 'DELETE_SLICE+2': (2, None, 1), - 'DELETE_SLICE+3': (3, None, 1), - 'DELETE_SUBSCR': (2, None, 1), - 'DUP_TOP': (None, None, None), - 'DUP_TOPX': (None, None, None), - 'DUP_TOP_TWO': (None, None, None), - 'END_FINALLY': (None, None, None), - 'EXEC_STMT': (3, 0, 1), - 'EXTENDED_ARG': (None, None, None), - 'FOR_ITER': (1, 1, 1), - 'GET_ITER': (1, 1, None), - 'IMPORT_FROM': (None, None, None), - 'IMPORT_NAME': (2, 1, None), - 'IMPORT_STAR': (1, None, 1), - 'INPLACE_ADD': (2, 1, None), - 'INPLACE_AND': (2, 1, None), - 'INPLACE_DIVIDE': (2, 1, None), - 'INPLACE_FLOOR_DIVIDE': (2, 1, None), - 'INPLACE_LSHIFT': (2, 1, None), - 'INPLACE_MODULO': (2, 1, None), - 'INPLACE_MULTIPLY': (2, 1, None), - 'INPLACE_OR': (2, 1, None), - 'INPLACE_POWER': (2, 1, None), - 'INPLACE_RSHIFT': (2, 1, None), - 'INPLACE_SUBTRACT': (2, 1, None), - 'INPLACE_TRUE_DIVIDE': (2, 1, None), - 'INPLACE_XOR': (2, 1, None), - 'JUMP_ABSOLUTE': (0, None, 1), - 'JUMP_FORWARD': (0, None, 1), - 'JUMP_IF_FALSE': (1, 1, 1), - 'JUMP_IF_FALSE_OR_POP': (None, None, None), - 'JUMP_IF_TRUE': (1, 1, 1), - 'JUMP_IF_TRUE_OR_POP': (None, None, None), - 'LIST_APPEND': (None, None, None), - 'LOAD_ATTR': (1, 1, None), - 'LOAD_BUILD_CLASS': (None, None, None), - 'LOAD_CLOSURE': (None, None, None), - 'LOAD_CONST': (0, 1, None), - 'LOAD_DEREF': (0, 1, None), - 'LOAD_FAST': (0, 1, None), - 'LOAD_GLOBAL': (0, 1, None), - 'LOAD_LOCALS': (None, None, None), - 'LOAD_NAME': (0, 1, None), - 'MAKE_CLOSURE': (None, None, None), - 'MAKE_FUNCTION': (-2, 1, None), - 'MAP_ADD': (None, None, None), - 'NOP': (0, None, None), - 'POP_BLOCK': (0, None, 1), - 'POP_EXCEPT': (None, None, None), - 'POP_JUMP_IF_FALSE': (1, None, 1), - 'POP_JUMP_IF_TRUE': (1, None, 1), - 'POP_TOP': (1, None, 1), - 'PRINT_EXPR': (1, None, 1), - 'PRINT_ITEM': (1, None, 1), - 'PRINT_ITEM_TO': (2, None, 1), - 'PRINT_NEWLINE': (0, None, 1), - 'PRINT_NEWLINE_TO': (1, None, 1), - 'RAISE_VARARGS': (-1, None, 1), - 'RETURN_VALUE': (1, None, 1), - 'ROT_FOUR': (None, None, None), - 'ROT_THREE': (None, None, None), - 'ROT_TWO': (None, None, None), - 'SETUP_EXCEPT': (None, None, None), - 'SETUP_FINALLY': (None, None, None), - 'SETUP_LOOP': (None, None, None), - 'SETUP_WITH': (None, None, None), - 'SET_ADD': (None, None, None), - 'SLICE+0': (1, 1, None), - 'SLICE+1': (2, 1, None), - 'SLICE+2': (2, 1, None), - 'SLICE+3': (3, 1, None), - 'STOP_CODE': (None, None, None), - 'STORE_ATTR': (2, None, 1), - 'STORE_DEREF': (1, 0, 1), - 'STORE_FAST': (1, None, 1), - 'STORE_GLOBAL': (1, None, 1), - 'STORE_LOCALS': (None, None, None), - 'STORE_MAP': (1, None, 1), - 'STORE_NAME': (1, None, 1), - 'STORE_SLICE+0': (1, None, 1), - 'STORE_SLICE+1': (2, None, 1), - 'STORE_SLICE+2': (2, None, 1), - 'STORE_SLICE+3': (3, None, 1), - 'STORE_SUBSCR': (3, None, 1), - 'UNARY_CONVERT': (1, 1, None), - 'UNARY_INVERT': (1, 1, None), - 'UNARY_NEGATIVE': (1, 1, None), - 'UNARY_NOT': (1, 1, None), - 'UNARY_POSITIVE': (1, 1, None), - 'UNPACK_EX': (None, None, None), - 'UNPACK_SEQUENCE': (None, None, None), - 'WITH_CLEANUP': (None, None, None), - 'YIELD_VALUE': (1, None, 1), + 'BINARY_ADD': OpcodeData(2, 1, None), + 'BINARY_AND': OpcodeData(2, 1, None), + 'BINARY_DIVIDE': OpcodeData(2, 1, None), + 'BINARY_FLOOR_DIVIDE': OpcodeData(2, 1, None), + 'BINARY_LSHIFT': OpcodeData(2, 1, None), + 'BINARY_MODULO': OpcodeData(2, 1, None), + 'BINARY_MULTIPLY': OpcodeData(2, 1, None), + 'BINARY_OR': OpcodeData(2, 1, None), + 'BINARY_POWER': OpcodeData(2, 1, None), + 'BINARY_RSHIFT': OpcodeData(2, 1, None), + 'BINARY_SUBSCR': OpcodeData(2, 1, None), + 'BINARY_SUBTRACT': OpcodeData(2, 1, None), + 'BINARY_TRUE_DIVIDE': OpcodeData(2, 1, None), + 'BINARY_XOR': OpcodeData(2, 1, None), + 'BREAK_LOOP': OpcodeData(0, None, 1), + 'BUILD_CLASS': OpcodeData(3, 1, None), + 'BUILD_LIST': OpcodeData(-1, 1, None), + 'BUILD_MAP': OpcodeData(-1, 1, None), + 'BUILD_SET': OpcodeData(-1, 1, None), + 'BUILD_SLICE': OpcodeData(-1, 1, None), # oparg should only be 2 or 3 + 'BUILD_TUPLE': OpcodeData(-1, 1, None), + 'CALL_FUNCTION': OpcodeData(-2, 1, None), + 'CALL_FUNCTION_KW': OpcodeData(-3, 1, None), + 'CALL_FUNCTION_VAR': OpcodeData(-3, 1, None), + 'CALL_FUNCTION_VAR_KW': OpcodeData(-4, 1, None), + 'COMPARE_OP': OpcodeData(2, 1, None), + 'CONTINUE_LOOP': NO_OPCODE_DATA, + 'DELETE_ATTR': OpcodeData(1, None, 1), + 'DELETE_DEREF': NO_OPCODE_DATA, + 'DELETE_FAST': OpcodeData(0, None, 1), + 'DELETE_GLOBAL': OpcodeData(0, None, 1), + 'DELETE_NAME': OpcodeData(0, None, 1), + 'DELETE_SLICE+0': OpcodeData(1, None, 1), + 'DELETE_SLICE+1': OpcodeData(2, None, 1), + 'DELETE_SLICE+2': OpcodeData(2, None, 1), + 'DELETE_SLICE+3': OpcodeData(3, None, 1), + 'DELETE_SUBSCR': OpcodeData(2, None, 1), + 'DUP_TOP': NO_OPCODE_DATA, + 'DUP_TOPX': NO_OPCODE_DATA, + 'DUP_TOP_TWO': NO_OPCODE_DATA, + 'END_FINALLY': NO_OPCODE_DATA, + 'EXEC_STMT': OpcodeData(3, 0, 1), + 'EXTENDED_ARG': NO_OPCODE_DATA, + 'FOR_ITER': OpcodeData(1, 1, 1), + 'GET_ITER': OpcodeData(1, 1, None), + 'IMPORT_FROM': NO_OPCODE_DATA, + 'IMPORT_NAME': OpcodeData(2, 1, None), + 'IMPORT_STAR': OpcodeData(1, None, 1), + 'INPLACE_ADD': OpcodeData(2, 1, None), + 'INPLACE_AND': OpcodeData(2, 1, None), + 'INPLACE_DIVIDE': OpcodeData(2, 1, None), + 'INPLACE_FLOOR_DIVIDE': OpcodeData(2, 1, None), + 'INPLACE_LSHIFT': OpcodeData(2, 1, None), + 'INPLACE_MODULO': OpcodeData(2, 1, None), + 'INPLACE_MULTIPLY': OpcodeData(2, 1, None), + 'INPLACE_OR': OpcodeData(2, 1, None), + 'INPLACE_POWER': OpcodeData(2, 1, None), + 'INPLACE_RSHIFT': OpcodeData(2, 1, None), + 'INPLACE_SUBTRACT': OpcodeData(2, 1, None), + 'INPLACE_TRUE_DIVIDE': OpcodeData(2, 1, None), + 'INPLACE_XOR': OpcodeData(2, 1, None), + 'JUMP_ABSOLUTE': OpcodeData(0, None, 1), + 'JUMP_FORWARD': OpcodeData(0, None, 1), + 'JUMP_IF_FALSE': OpcodeData(1, 1, 1), + 'JUMP_IF_FALSE_OR_POP': NO_OPCODE_DATA, + 'JUMP_IF_TRUE': OpcodeData(1, 1, 1), + 'JUMP_IF_TRUE_OR_POP': NO_OPCODE_DATA, + 'LIST_APPEND': NO_OPCODE_DATA, + 'LOAD_ATTR': OpcodeData(1, 1, None), + 'LOAD_BUILD_CLASS': NO_OPCODE_DATA, + 'LOAD_CLOSURE': NO_OPCODE_DATA, + 'LOAD_CONST': OpcodeData(0, 1, None), + 'LOAD_DEREF': OpcodeData(0, 1, None), + 'LOAD_FAST': OpcodeData(0, 1, None), + 'LOAD_GLOBAL': OpcodeData(0, 1, None), + 'LOAD_LOCALS': NO_OPCODE_DATA, + 'LOAD_NAME': OpcodeData(0, 1, None), + 'MAKE_CLOSURE': NO_OPCODE_DATA, + 'MAKE_FUNCTION': OpcodeData(-2, 1, None), + 'MAP_ADD': NO_OPCODE_DATA, + 'NOP': OpcodeData(0, None, None), + 'POP_BLOCK': OpcodeData(0, None, 1), + 'POP_EXCEPT': NO_OPCODE_DATA, + 'POP_JUMP_IF_FALSE': OpcodeData(1, None, 1), + 'POP_JUMP_IF_TRUE': OpcodeData(1, None, 1), + 'POP_TOP': OpcodeData(1, None, 1), + 'PRINT_EXPR': OpcodeData(1, None, 1), + 'PRINT_ITEM': OpcodeData(1, None, 1), + 'PRINT_ITEM_TO': OpcodeData(2, None, 1), + 'PRINT_NEWLINE': OpcodeData(0, None, 1), + 'PRINT_NEWLINE_TO': OpcodeData(1, None, 1), + 'RAISE_VARARGS': OpcodeData(-1, None, 1), + 'RETURN_VALUE': OpcodeData(1, None, 1), + 'ROT_FOUR': NO_OPCODE_DATA, + 'ROT_THREE': NO_OPCODE_DATA, + 'ROT_TWO': NO_OPCODE_DATA, + 'SETUP_EXCEPT': NO_OPCODE_DATA, + 'SETUP_FINALLY': NO_OPCODE_DATA, + 'SETUP_LOOP': NO_OPCODE_DATA, + 'SETUP_WITH': NO_OPCODE_DATA, + 'SET_ADD': NO_OPCODE_DATA, + 'SLICE+0': OpcodeData(1, 1, None), + 'SLICE+1': OpcodeData(2, 1, None), + 'SLICE+2': OpcodeData(2, 1, None), + 'SLICE+3': OpcodeData(3, 1, None), + 'STOP_CODE': NO_OPCODE_DATA, + 'STORE_ATTR': OpcodeData(2, None, 1), + 'STORE_DEREF': OpcodeData(1, 0, 1), + 'STORE_FAST': OpcodeData(1, None, 1), + 'STORE_GLOBAL': OpcodeData(1, None, 1), + 'STORE_LOCALS': NO_OPCODE_DATA, + 'STORE_MAP': OpcodeData(1, None, 1), + 'STORE_NAME': OpcodeData(1, None, 1), + 'STORE_SLICE+0': OpcodeData(1, None, 1), + 'STORE_SLICE+1': OpcodeData(2, None, 1), + 'STORE_SLICE+2': OpcodeData(2, None, 1), + 'STORE_SLICE+3': OpcodeData(3, None, 1), + 'STORE_SUBSCR': OpcodeData(3, None, 1), + 'UNARY_CONVERT': OpcodeData(1, 1, None), + 'UNARY_INVERT': OpcodeData(1, 1, None), + 'UNARY_NEGATIVE': OpcodeData(1, 1, None), + 'UNARY_NOT': OpcodeData(1, 1, None), + 'UNARY_POSITIVE': OpcodeData(1, 1, None), + 'UNPACK_EX': NO_OPCODE_DATA, + 'UNPACK_SEQUENCE': NO_OPCODE_DATA, + 'WITH_CLEANUP': NO_OPCODE_DATA, + 'YIELD_VALUE': OpcodeData(1, None, 1), } # ______________________________________________________________________ From b5a15a5019691450dec72ad8437c9645bb9ca6e1 Mon Sep 17 00:00:00 2001 From: Jon Riehl Date: Mon, 3 Jun 2013 19:12:19 -0500 Subject: [PATCH 11/32] Added function to iterate through nested code objects, llpython.opcode_util.itercodeobjs(), and used it in llpython.addr_flow. --- llpython/addr_flow.py | 32 ++++++++++++++++++++++++++++++-- llpython/opcode_util.py | 13 ++++++++++++- 2 files changed, 42 insertions(+), 3 deletions(-) diff --git a/llpython/addr_flow.py b/llpython/addr_flow.py index 7447789..8630a5a 100644 --- a/llpython/addr_flow.py +++ b/llpython/addr_flow.py @@ -5,6 +5,7 @@ from __future__ import absolute_import from .byte_flow import BytecodeFlowBuilder +from .opcode_util import build_basic_blocks, itercodeobjs # ______________________________________________________________________ # Class definition(s) @@ -79,16 +80,43 @@ def build_addr_flow(func): cfg = byte_control.build_cfg(func) return AddressFlowBuilder().visit_cfg(cfg) +# ______________________________________________________________________ + +def build_addr_flow_from_co(codeobj): + from .byte_control import ControlFlowBuilder + cfg = ControlFlowBuilder().visit(build_basic_blocks(codeobj), + codeobj.co_argcount) + return AddressFlowBuilder().visit_cfg(cfg) + +# ______________________________________________________________________ + +def build_addr_flows_from_co(root_co): + return dict((co, build_addr_flow_from_co(co)) + for co in itercodeobjs(root_co)) + # ______________________________________________________________________ # Main (self-test) routine def main(*args): import pprint - from .tests import llfuncs + try: + from .tests import llfuncs + except ImportError: + llfuncs = object() if not args: args = ('pymod',) for arg in args: - pprint.pprint(build_addr_flow(getattr(llfuncs, arg))) + if arg.endswith('.py'): + with open(arg) as in_file: + in_source = in_file.read() + in_codeobj = compile(in_source, arg, 'exec') + flow_map = build_addr_flows_from_co(in_codeobj) + for codeobj, flow in flow_map.items(): + print("_" * 70) + print(codeobj) + pprint.pprint(flow) + else: + pprint.pprint(build_addr_flow(getattr(llfuncs, arg))) # ______________________________________________________________________ diff --git a/llpython/opcode_util.py b/llpython/opcode_util.py index bec9c01..b6ccf11 100644 --- a/llpython/opcode_util.py +++ b/llpython/opcode_util.py @@ -1,9 +1,10 @@ #! /usr/bin/env python # ______________________________________________________________________ +from collections import namedtuple import dis import opcode -from collections import namedtuple +import types # ______________________________________________________________________ # Module data @@ -195,6 +196,16 @@ def itercode(code, start = 0): # ______________________________________________________________________ +def itercodeobjs(codeobj): + "Iterator that traverses code objects via the co_consts member." + yield codeobj + for const in codeobj.co_consts: + if isinstance(const, types.CodeType): + for childobj in itercodeobjs(const): + yield childobj + +# ______________________________________________________________________ + def extendlabels(code, labels = None): """Extend the set of jump target labels to account for the passthrough targets of conditional branches. From 31119a48d469e9f5ee7a07403526586c6d6a8e12 Mon Sep 17 00:00:00 2001 From: Jon Riehl Date: Wed, 5 Jun 2013 13:48:30 -0500 Subject: [PATCH 12/32] Modified main routine in llpython.addr_flow to make it easier to determine which code object caused an exception in the translator. --- llpython/addr_flow.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/llpython/addr_flow.py b/llpython/addr_flow.py index 8630a5a..26213e8 100644 --- a/llpython/addr_flow.py +++ b/llpython/addr_flow.py @@ -110,11 +110,10 @@ def main(*args): with open(arg) as in_file: in_source = in_file.read() in_codeobj = compile(in_source, arg, 'exec') - flow_map = build_addr_flows_from_co(in_codeobj) - for codeobj, flow in flow_map.items(): + for codeobj in itercodeobjs(in_codeobj): print("_" * 70) print(codeobj) - pprint.pprint(flow) + pprint.pprint(build_addr_flow_from_co(codeobj)) else: pprint.pprint(build_addr_flow(getattr(llfuncs, arg))) From 102d6eac6646c2f1f17cf0f0e12502b16739e5b0 Mon Sep 17 00:00:00 2001 From: Jon Riehl Date: Wed, 5 Jun 2013 13:49:02 -0500 Subject: [PATCH 13/32] Adding basic unit test for llpython.addr_flow. --- llpython/tests/test_addr_flow.py | 52 ++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 llpython/tests/test_addr_flow.py diff --git a/llpython/tests/test_addr_flow.py b/llpython/tests/test_addr_flow.py new file mode 100644 index 0000000..774e853 --- /dev/null +++ b/llpython/tests/test_addr_flow.py @@ -0,0 +1,52 @@ +#! /usr/bin/env python +# ______________________________________________________________________ + +from __future__ import absolute_import + +import unittest + +from llpython import addr_flow, opcode_util + +from . import test_byte_control as tbc + +# ______________________________________________________________________ +# Class (test case) definition(s) + +class TestAddrFlow(unittest.TestCase): + def fail_unless_valid_flow(self, flow): + raise NotImplementedError("XXX") + # TODO: Make sure child indices are valid bytecode addresses + # TODO: Make sure opcode has a "reasonable" number of child indices + + def test_try_finally_0(self): + self.fail_unless_valid_flow( + addr_flow.build_addr_flow(tbc.try_finally_0)) + + def test_try_finally_1(self): + self.fail_unless_valid_flow( + addr_flow.build_addr_flow(tbc.try_finally_1)) + + def test_try_finally_2(self): + self.fail_unless_valid_flow( + addr_flow.build_addr_flow(tbc.try_finally_2)) + + def test_try_finally_3(self): + self.fail_unless_valid_flow( + addr_flow.build_addr_flow(tbc.try_finally_3)) + + def test_try_finally_4(self): + self.fail_unless_valid_flow( + addr_flow.build_addr_flow(tbc.try_finally_4)) + + def test_try_finally_5(self): + self.fail_unless_valid_flow( + addr_flow.build_addr_flow(tbc.try_finally_5)) + +# ______________________________________________________________________ +# Main (unit test) routine + +if __name__ == "__main__": + unittest.main() + +# ______________________________________________________________________ +# End of test_addr_flow.py From 4837bf194eb26bc11f4790dbb01f82ecad2bf8ee Mon Sep 17 00:00:00 2001 From: Jon Riehl Date: Wed, 5 Jun 2013 15:28:17 -0500 Subject: [PATCH 14/32] Moved some utility functions into class methods of the BytecodeFlowBuilder class, and eliminated redundant code in AddressFlowBuilder. --- llpython/addr_flow.py | 43 +------------------- llpython/byte_flow.py | 91 ++++++++++++++++++++++++++++++++----------- 2 files changed, 70 insertions(+), 64 deletions(-) diff --git a/llpython/addr_flow.py b/llpython/addr_flow.py index 26213e8..46c4673 100644 --- a/llpython/addr_flow.py +++ b/llpython/addr_flow.py @@ -4,7 +4,7 @@ from __future__ import absolute_import -from .byte_flow import BytecodeFlowBuilder +from .byte_flow import BytecodeFlowBuilder, demo_flow_builder from .opcode_util import build_basic_blocks, itercodeobjs # ______________________________________________________________________ @@ -72,50 +72,11 @@ class AddressFlowBuilder(BytecodeFlowBuilder): op_SET_ADD = op_LIST_APPEND -# ______________________________________________________________________ -# Function definition(s) - -def build_addr_flow(func): - from . import byte_control - cfg = byte_control.build_cfg(func) - return AddressFlowBuilder().visit_cfg(cfg) - -# ______________________________________________________________________ - -def build_addr_flow_from_co(codeobj): - from .byte_control import ControlFlowBuilder - cfg = ControlFlowBuilder().visit(build_basic_blocks(codeobj), - codeobj.co_argcount) - return AddressFlowBuilder().visit_cfg(cfg) - -# ______________________________________________________________________ - -def build_addr_flows_from_co(root_co): - return dict((co, build_addr_flow_from_co(co)) - for co in itercodeobjs(root_co)) - # ______________________________________________________________________ # Main (self-test) routine def main(*args): - import pprint - try: - from .tests import llfuncs - except ImportError: - llfuncs = object() - if not args: - args = ('pymod',) - for arg in args: - if arg.endswith('.py'): - with open(arg) as in_file: - in_source = in_file.read() - in_codeobj = compile(in_source, arg, 'exec') - for codeobj in itercodeobjs(in_codeobj): - print("_" * 70) - print(codeobj) - pprint.pprint(build_addr_flow_from_co(codeobj)) - else: - pprint.pprint(build_addr_flow(getattr(llfuncs, arg))) + return demo_flow_builder(AddressFlowBuilder, *args) # ______________________________________________________________________ diff --git a/llpython/byte_flow.py b/llpython/byte_flow.py index 4c354cd..d6a8267 100644 --- a/llpython/byte_flow.py +++ b/llpython/byte_flow.py @@ -1,11 +1,13 @@ #! /usr/bin/env python # ______________________________________________________________________ + from __future__ import absolute_import import dis import opcode from .bytecode_visitor import BasicBlockVisitor from . import opcode_util +from . import byte_control # ______________________________________________________________________ @@ -64,13 +66,13 @@ class BytecodeFlowBuilder (BasicBlockVisitor): labels.sort() self.blocks = dict((index, []) for index in labels) - self.loop_stack = [] + self.control_stack = [] self.stacks = {} def exit_blocks (self, blocks): ret_val = self.blocks del self.stacks - del self.loop_stack + del self.control_stack del self.blocks return ret_val @@ -114,7 +116,8 @@ class BytecodeFlowBuilder (BasicBlockVisitor): op_BINARY_XOR = _op def op_BREAK_LOOP (self, i, op, arg): - loop_i, _, loop_arg = self.loop_stack[-1] + # XXX Not sure this is correct. + loop_i, _, loop_arg, _ = self.control_stack[-1] assert arg is None return self._op(i, op, loop_i + loop_arg + 3) @@ -211,8 +214,9 @@ class BytecodeFlowBuilder (BasicBlockVisitor): op_NOP = _op def op_POP_BLOCK (self, i, op, arg): - self.loop_stack.pop() - return self._op(i, op, arg) + _, _, _, target_stack_size = self.control_stack.pop() + pops = len(self.stack) - target_stack_size + return self._visit_op(i, op, arg, self.opnames[op], pops, 0, 1) op_POP_JUMP_IF_FALSE = _op op_POP_JUMP_IF_TRUE = _op @@ -235,15 +239,16 @@ class BytecodeFlowBuilder (BasicBlockVisitor): def op_ROT_TWO (self, i, op, arg): self.stack[-2:] = (self.stack[-1], self.stack[-2]) - #op_SETUP_EXCEPT = _not_implemented - #op_SETUP_FINALLY = _not_implemented - - def op_SETUP_LOOP (self, i, op, arg): - self.loop_stack.append((i, op, arg)) + def _op_SETUP (self, i, op, arg): + self.control_stack.append((i, op, arg, len(self.stack))) ret_val = i, op, self.opnames[op], arg, [] self.block.append(ret_val) return ret_val + op_SETUP_EXCEPT = _op_SETUP + op_SETUP_FINALLY = _op_SETUP + op_SETUP_LOOP = _op_SETUP + #op_SETUP_WITH = _not_implemented op_SET_ADD = op_LIST_APPEND op_SLICE = _op @@ -267,25 +272,65 @@ class BytecodeFlowBuilder (BasicBlockVisitor): #op_WITH_CLEANUP = _not_implemented op_YIELD_VALUE = _op + @classmethod + def build_flow(cls, func): + '''Given a Python function, return a flow representation of that + function.''' + cfg = byte_control.build_cfg(func) + return cls().visit_cfg(cfg) + + @classmethod + def build_flow_from_co(cls, code_obj): + '''Given a Python code object, return a flow representation of + that code object.''' + bbs = opcode_util.build_basic_blocks(code_obj) + cfg = byte_control.ControlFlowBuilder().visit(bbs, + code_obj.co_argcount) + return cls().visit_cfg(cfg) + + @classmethod + def build_flows_from_co(cls, root_code_obj): + '''Given a Python code object, return a map from that code + object and any nested code objects to flow representations of + those code objects.''' + return dict((co, cls.build_flow_from_co(co)) + for co in opcode_util.itercodeobjs(root_code_obj)) + +# ______________________________________________________________________ +# Function definition(s) + +def build_flow(func): + '''Kept for backwards compatibility in downstream modules. Use + BytecodeFlowBuilder.build_flow() instead.''' + return BytecodeFlowBuilder.build_flow(func) + # ______________________________________________________________________ -def build_flow (func): - '''Given a Python function, return a bytecode flow tree for that - function.''' - from . import byte_control - cfg = byte_control.build_cfg(func) - return BytecodeFlowBuilder().visit_cfg(cfg) +def demo_flow_builder(builder_cls, *args): + import pprint + try: + from .tests import llfuncs + except ImportError: + llfuncs = object() + if not args: + args = ('pymod',) + for arg in args: + if arg.endswith('.py'): + with open(arg) as in_file: + in_source = in_file.read() + in_codeobj = compile(in_source, arg, 'exec') + for codeobj in opcode_util.itercodeobjs(in_codeobj): + print("_" * 70) + print(codeobj) + pprint.pprint(builder_cls.build_flow_from_co(codeobj)) + else: + pprint.pprint(builder_cls.build_flow(getattr(llfuncs, arg))) # ______________________________________________________________________ # Main (self-test) routine -def main (*args): - import pprint - from tests import llfuncs - if not args: - args = ('doslice',) - for arg in args: - pprint.pprint(build_flow(getattr(llfuncs, arg))) +def main(*args): + return demo_flow_builder(BytecodeFlowBuilder, *args) # ______________________________________________________________________ From bba305774a84d590d41c96d4aaa6a11c438da4ac Mon Sep 17 00:00:00 2001 From: Jon Riehl Date: Wed, 5 Jun 2013 18:52:27 -0500 Subject: [PATCH 15/32] Moved most of llpython.tests.test_addr_flow into llpython.tests.test_byte_flow, made some modifications to various modules to facilitate proper handling of try-finally. --- llpython/byte_flow.py | 39 ++++++++++--- llpython/opcode_util.py | 14 ++++- llpython/tests/test_addr_flow.py | 39 +++---------- llpython/tests/test_byte_flow.py | 98 ++++++++++++++++++++++++++++++++ 4 files changed, 150 insertions(+), 40 deletions(-) create mode 100644 llpython/tests/test_byte_flow.py diff --git a/llpython/byte_flow.py b/llpython/byte_flow.py index d6a8267..b37d39d 100644 --- a/llpython/byte_flow.py +++ b/llpython/byte_flow.py @@ -10,6 +10,7 @@ from . import opcode_util from . import byte_control # ______________________________________________________________________ +# Class definition(s) class BytecodeFlowBuilder (BasicBlockVisitor): '''Transforms a CFG into a bytecode "flow tree". @@ -116,10 +117,17 @@ class BytecodeFlowBuilder (BasicBlockVisitor): op_BINARY_XOR = _op def op_BREAK_LOOP (self, i, op, arg): - # XXX Not sure this is correct. - loop_i, _, loop_arg, _ = self.control_stack[-1] - assert arg is None - return self._op(i, op, loop_i + loop_arg + 3) + if self.opnames[op] == 'BREAK_LOOP': + # Break target was already computed in control flow analysis; + # reuse that, replacing the opcode argument. + blocks_out = tuple(self.cfg.blocks_out[self.block_no]) + assert len(blocks_out) == 1 + assert arg is None + arg = blocks_out[0] + # else: Continue target is already in the argument. Note that + # the argument might not be the same as CFG destination block, + # since we might have a finally block to visit first. + return self._op(i, op, arg) op_BUILD_CLASS = _op op_BUILD_LIST = _op @@ -131,7 +139,7 @@ class BytecodeFlowBuilder (BasicBlockVisitor): op_CALL_FUNCTION_VAR = _op op_CALL_FUNCTION_VAR_KW = _op op_COMPARE_OP = _op - #op_CONTINUE_LOOP = _not_implemented + op_CONTINUE_LOOP = op_BREAK_LOOP op_DELETE_ATTR = _op op_DELETE_FAST = _op op_DELETE_GLOBAL = _op @@ -146,7 +154,11 @@ class BytecodeFlowBuilder (BasicBlockVisitor): self.stack += self.stack[-arg:] #op_DUP_TOP_TWO = _not_implemented - #op_END_FINALLY = _not_implemented + + # See the note regarding END_FINALLY in the definition of + # opcope_util.OPCODE_MAP. + op_END_FINALLY = _op + op_EXEC_STMT = _op def op_EXTENDED_ARG (self, i, op, arg): @@ -249,7 +261,20 @@ class BytecodeFlowBuilder (BasicBlockVisitor): op_SETUP_FINALLY = _op_SETUP op_SETUP_LOOP = _op_SETUP - #op_SETUP_WITH = _not_implemented + def op_SETUP_WITH (self, i, op, arg): + assert arg is not None + # Care has to be taken here. SETUP_WITH pushes two things on + # the value stack (the exit ), and once on the handler frame. + ctx = self.stack.pop() + # We signal that the value is an exit handler by setting arg to None + exit_handler = i, op, self.opnames[op], None, [ctx] + self.stack.append(exit_handler) + ret_val = i, op, self.opnames[op], arg, [ctx] + self.control_stack.append((i, op, arg, len(self.stack))) + self.stack.append(ret_val) + self.block.append(ret_val) + return ret_val + op_SET_ADD = op_LIST_APPEND op_SLICE = _op #op_STOP_CODE = _not_implemented diff --git a/llpython/opcode_util.py b/llpython/opcode_util.py index b6ccf11..657c851 100644 --- a/llpython/opcode_util.py +++ b/llpython/opcode_util.py @@ -60,7 +60,7 @@ OPCODE_MAP = { 'CALL_FUNCTION_VAR': OpcodeData(-3, 1, None), 'CALL_FUNCTION_VAR_KW': OpcodeData(-4, 1, None), 'COMPARE_OP': OpcodeData(2, 1, None), - 'CONTINUE_LOOP': NO_OPCODE_DATA, + 'CONTINUE_LOOP': OpcodeData(0, None, 1), 'DELETE_ATTR': OpcodeData(1, None, 1), 'DELETE_DEREF': NO_OPCODE_DATA, 'DELETE_FAST': OpcodeData(0, None, 1), @@ -74,7 +74,15 @@ OPCODE_MAP = { 'DUP_TOP': NO_OPCODE_DATA, 'DUP_TOPX': NO_OPCODE_DATA, 'DUP_TOP_TWO': NO_OPCODE_DATA, - 'END_FINALLY': NO_OPCODE_DATA, + + # The data for END_FINALLY is a total fabrication; END_FINALLY may + # pop 1 or 3 values off the value stack, based on the type of the + # top of the value stack. If, however, a value stack simulator + # ignores the part of the CPython evaluator loop that pushes the + # why code on the value stack for WHY_RETURN and WHY_CONTINUE (as + # this table does), this should work out fine. + 'END_FINALLY': OpcodeData(0, 0, 1), + 'EXEC_STMT': OpcodeData(3, 0, 1), 'EXTENDED_ARG': NO_OPCODE_DATA, 'FOR_ITER': OpcodeData(1, 1, 1), @@ -109,7 +117,7 @@ OPCODE_MAP = { 'LOAD_DEREF': OpcodeData(0, 1, None), 'LOAD_FAST': OpcodeData(0, 1, None), 'LOAD_GLOBAL': OpcodeData(0, 1, None), - 'LOAD_LOCALS': NO_OPCODE_DATA, + 'LOAD_LOCALS': OpcodeData(0, 1, None), 'LOAD_NAME': OpcodeData(0, 1, None), 'MAKE_CLOSURE': NO_OPCODE_DATA, 'MAKE_FUNCTION': OpcodeData(-2, 1, None), diff --git a/llpython/tests/test_addr_flow.py b/llpython/tests/test_addr_flow.py index 774e853..93db19d 100644 --- a/llpython/tests/test_addr_flow.py +++ b/llpython/tests/test_addr_flow.py @@ -5,42 +5,21 @@ from __future__ import absolute_import import unittest -from llpython import addr_flow, opcode_util +from llpython import addr_flow -from . import test_byte_control as tbc +from . import test_byte_flow # ______________________________________________________________________ # Class (test case) definition(s) -class TestAddrFlow(unittest.TestCase): - def fail_unless_valid_flow(self, flow): - raise NotImplementedError("XXX") - # TODO: Make sure child indices are valid bytecode addresses - # TODO: Make sure opcode has a "reasonable" number of child indices +class TestAddressFlowBuilder(unittest.TestCase, test_byte_flow.FlowTestMixin): + BUILDER_CLS = addr_flow.AddressFlowBuilder - def test_try_finally_0(self): - self.fail_unless_valid_flow( - addr_flow.build_addr_flow(tbc.try_finally_0)) - - def test_try_finally_1(self): - self.fail_unless_valid_flow( - addr_flow.build_addr_flow(tbc.try_finally_1)) - - def test_try_finally_2(self): - self.fail_unless_valid_flow( - addr_flow.build_addr_flow(tbc.try_finally_2)) - - def test_try_finally_3(self): - self.fail_unless_valid_flow( - addr_flow.build_addr_flow(tbc.try_finally_3)) - - def test_try_finally_4(self): - self.fail_unless_valid_flow( - addr_flow.build_addr_flow(tbc.try_finally_4)) - - def test_try_finally_5(self): - self.fail_unless_valid_flow( - addr_flow.build_addr_flow(tbc.try_finally_5)) + def fail_unless_valid_instruction(self, instr): + super(TestAddressFlowBuilder, self).fail_unless_valid_instruction( + instr) + for arg_addr in instr[-1]: + self.fail_unless_valid_address(arg_addr) # ______________________________________________________________________ # Main (unit test) routine diff --git a/llpython/tests/test_byte_flow.py b/llpython/tests/test_byte_flow.py new file mode 100644 index 0000000..471813e --- /dev/null +++ b/llpython/tests/test_byte_flow.py @@ -0,0 +1,98 @@ +#! /usr/bin/env python +# ______________________________________________________________________ + +from __future__ import absolute_import + +import unittest + +from llpython import byte_flow +from llpython import opcode_util + +from . import test_byte_control as tbc +from . import llfuncs + +# ______________________________________________________________________ +# Class (test case) definition(s) + +class FlowTestMixin(object): + + def fail_unless_valid_address(self, address): + self.failUnless(address >= 0) + self.failUnless(address < self.max_addr) + self.failUnless(address in self.valid_addrs) + + def fail_unless_valid_instruction(self, instr): + address = instr[0] + self.visited.add(address) + self.fail_unless_valid_address(instr[0]) + + def fail_unless_valid_flow(self, flow, func): + self.failUnless(len(flow) > 0) + func_code = opcode_util.get_code_object(func).co_code + self.valid_addrs = set(addr for addr, _, _ in + opcode_util.itercode(func_code)) + self.visited = set() + self.max_addr = len(func_code) + for block_index, block_instrs in flow.items(): + self.failUnless(block_index < self.max_addr) + for instr in block_instrs: + self.fail_unless_valid_instruction(instr) + del self.max_addr + # Make sure that all instructions identified by itercode were + # checked at least once; they should be represented in the + # resulting flow, even if their basic block is unreachable. + self.failUnless(self.valid_addrs == self.visited, + 'Failed to visit following addresses: %r' % + (self.valid_addrs - self.visited)) + del self.visited + del self.valid_addrs + + def build_and_test_flow(self, func): + self.fail_unless_valid_flow(self.BUILDER_CLS.build_flow(func), func) + + def test_doslice(self): + self.build_and_test_flow(llfuncs.doslice) + + def test_ipow(self): + self.build_and_test_flow(llfuncs.ipow) + + def test_pymod(self): + self.build_and_test_flow(llfuncs.pymod) + + def test_try_finally_0(self): + self.build_and_test_flow(tbc.try_finally_0) + + def test_try_finally_1(self): + self.build_and_test_flow(tbc.try_finally_1) + + def test_try_finally_2(self): + self.build_and_test_flow(tbc.try_finally_2) + + def test_try_finally_3(self): + self.build_and_test_flow(tbc.try_finally_3) + + def test_try_finally_4(self): + self.build_and_test_flow(tbc.try_finally_4) + + def test_try_finally_5(self): + self.build_and_test_flow(tbc.try_finally_5) + +# ______________________________________________________________________ + +class TestBytecodeFlowBuilder(unittest.TestCase, FlowTestMixin): + BUILDER_CLS = byte_flow.BytecodeFlowBuilder + + def fail_unless_valid_instruction(self, instr): + super(TestBytecodeFlowBuilder, self).fail_unless_valid_instruction( + instr) + for child_instr in instr[-1]: + self.fail_unless_valid_instruction(child_instr) + +# ______________________________________________________________________ +# Main (unit test) routine + +if __name__ == "__main__": + unittest.main() + +# ______________________________________________________________________ +# End of test_byte_flow.py From c55198095f0aafed19f401eedce8d8c7a183b379 Mon Sep 17 00:00:00 2001 From: Jon Riehl Date: Thu, 6 Jun 2013 14:04:48 -0500 Subject: [PATCH 16/32] Modified FlowTestMixin.build_and_test_flow() to return generated flow for further testing. --- llpython/tests/test_byte_flow.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/llpython/tests/test_byte_flow.py b/llpython/tests/test_byte_flow.py index 471813e..8c4decb 100644 --- a/llpython/tests/test_byte_flow.py +++ b/llpython/tests/test_byte_flow.py @@ -46,9 +46,11 @@ class FlowTestMixin(object): (self.valid_addrs - self.visited)) del self.visited del self.valid_addrs + return flow def build_and_test_flow(self, func): - self.fail_unless_valid_flow(self.BUILDER_CLS.build_flow(func), func) + return self.fail_unless_valid_flow( + self.BUILDER_CLS.build_flow(func), func) def test_doslice(self): self.build_and_test_flow(llfuncs.doslice) @@ -80,6 +82,7 @@ class FlowTestMixin(object): # ______________________________________________________________________ class TestBytecodeFlowBuilder(unittest.TestCase, FlowTestMixin): + BUILDER_CLS = byte_flow.BytecodeFlowBuilder def fail_unless_valid_instruction(self, instr): From 722453502c2fc481c4977b9cd204f8a5c9290488 Mon Sep 17 00:00:00 2001 From: Jon Riehl Date: Thu, 6 Jun 2013 14:08:09 -0500 Subject: [PATCH 17/32] Continued generalization of byte_flow.demo_flow_builder(), moving traversal logic into visitor function opcode_util.visit_code_args(). Modified byte_control.main() to use visit_code_args(). --- llpython/byte_control.py | 40 ++++++++++++++++++++++++++++----------- llpython/byte_flow.py | 41 ++++++++++++++++++++-------------------- llpython/opcode_util.py | 28 +++++++++++++++++++++++++++ 3 files changed, 78 insertions(+), 31 deletions(-) diff --git a/llpython/byte_control.py b/llpython/byte_control.py index e21252a..efd54aa 100644 --- a/llpython/byte_control.py +++ b/llpython/byte_control.py @@ -1,11 +1,14 @@ #! /usr/bin/env python # ______________________________________________________________________ +# Module imports from __future__ import absolute_import -import opcode -from . import opcode_util -import pprint +import opcode +import pprint +import types + +from . import opcode_util from .bytecode_visitor import BasicBlockVisitor, BenignBytecodeVisitorMixin from .control_flow import ControlFlowGraph @@ -251,24 +254,39 @@ class ControlFlowBuilder (BenignBytecodeVisitorMixin, BasicBlockVisitor): return super(ControlFlowBuilder, self).op_SETUP_WITH(i, op, arg, *args, **kws) + # ____________________________________________________________ + # Class convenience methods + + @classmethod + def build_cfg_from_co(cls, co_obj): + return cls().visit(opcode_util.build_basic_blocks(co_obj), + co_obj.co_argcount) + + @classmethod + def build_cfg(cls, func): + co_obj = opcode_util.get_code_object(func) + return cls.build_cfg_from_co(co_obj) + # ______________________________________________________________________ def build_cfg (func): '''Given a Python function, create a bytecode flow, visit the flow object, and return a control flow graph.''' - co_obj = opcode_util.get_code_object(func) - return ControlFlowBuilder().visit(opcode_util.build_basic_blocks(co_obj), - co_obj.co_argcount) + return ControlFlowBuilder.build_cfg(func) # ______________________________________________________________________ # Main (self-test) routine def main (*args, **kws): - from tests import llfuncs - if not args: - args = ('doslice',) - for arg in args: - build_cfg(getattr(llfuncs, arg)).pprint() + def _visit(obj): + print("_" * 70) + print(obj) + if type(obj) == types.FunctionType: + cfg = build_cfg(obj) + else: + cfg = ControlFlowBuilder.build_cfg_from_co(obj) + cfg.pprint() + return opcode_util.visit_code_args(_visit, *args, **kws) # ______________________________________________________________________ diff --git a/llpython/byte_flow.py b/llpython/byte_flow.py index b37d39d..452a7f7 100644 --- a/llpython/byte_flow.py +++ b/llpython/byte_flow.py @@ -4,6 +4,8 @@ from __future__ import absolute_import import dis import opcode +import pprint +import types from .bytecode_visitor import BasicBlockVisitor from . import opcode_util @@ -293,7 +295,16 @@ class BytecodeFlowBuilder (BasicBlockVisitor): op_UNARY_NOT = _op op_UNARY_POSITIVE = _op #op_UNPACK_EX = _not_implemented - #op_UNPACK_SEQUENCE = _not_implemented + + def op_UNPACK_SEQUENCE (self, i, op, arg): + seq = self.stack.pop() + opname = self.opnames[op] + while arg > 0: + arg -= 1 + ret_val = i, op, opname, arg, [seq] + self.stack.append(ret_val) + return ret_val + #op_WITH_CLEANUP = _not_implemented op_YIELD_VALUE = _op @@ -308,9 +319,7 @@ class BytecodeFlowBuilder (BasicBlockVisitor): def build_flow_from_co(cls, code_obj): '''Given a Python code object, return a flow representation of that code object.''' - bbs = opcode_util.build_basic_blocks(code_obj) - cfg = byte_control.ControlFlowBuilder().visit(bbs, - code_obj.co_argcount) + cfg = byte_control.ControlFlowBuilder.build_cfg_from_co(code_obj) return cls().visit_cfg(cfg) @classmethod @@ -333,23 +342,15 @@ def build_flow(func): def demo_flow_builder(builder_cls, *args): import pprint - try: - from .tests import llfuncs - except ImportError: - llfuncs = object() - if not args: - args = ('pymod',) - for arg in args: - if arg.endswith('.py'): - with open(arg) as in_file: - in_source = in_file.read() - in_codeobj = compile(in_source, arg, 'exec') - for codeobj in opcode_util.itercodeobjs(in_codeobj): - print("_" * 70) - print(codeobj) - pprint.pprint(builder_cls.build_flow_from_co(codeobj)) + def _visit(obj): + print("_" * 70) + print(obj) + if type(obj) == types.FunctionType: + flow = builder_cls.build_flow(obj) else: - pprint.pprint(builder_cls.build_flow(getattr(llfuncs, arg))) + flow = builder_cls.build_flow_from_co(obj) + pprint.pprint(flow) + return opcode_util.visit_code_args(_visit, *args) # ______________________________________________________________________ # Main (self-test) routine diff --git a/llpython/opcode_util.py b/llpython/opcode_util.py index 657c851..e79257e 100644 --- a/llpython/opcode_util.py +++ b/llpython/opcode_util.py @@ -214,6 +214,34 @@ def itercodeobjs(codeobj): # ______________________________________________________________________ +def visit_code_args(visitor, *args, **kws): + """Utility function for testing or demonstrating various code + analysis passes in llpython. + + Takes a visitor function and a sequence of command line arguments. + The visitor function should be able to handle either function + objects or code objects.""" + try: + from .tests import llfuncs + except ImportError: + llfuncs = object() + if not args: + if 'default_args' in kws: + args = kws['default_args'] + else: + args = ('pymod',) + for arg in args: + if arg.endswith('.py'): + with open(arg) as in_file: + in_source = in_file.read() + in_codeobj = compile(in_source, arg, 'exec') + for codeobj in itercodeobjs(in_codeobj): + visitor(codeobj) + else: + visitor(getattr(llfuncs, arg)) + +# ______________________________________________________________________ + def extendlabels(code, labels = None): """Extend the set of jump target labels to account for the passthrough targets of conditional branches. From ee557c55cc37c12aef6844f99726a591dfcd13c2 Mon Sep 17 00:00:00 2001 From: Jon Riehl Date: Thu, 6 Jun 2013 14:13:39 -0500 Subject: [PATCH 18/32] Added llpython.tests.test_all module for unit testing all llpython. --- llpython/tests/test_all.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 llpython/tests/test_all.py diff --git a/llpython/tests/test_all.py b/llpython/tests/test_all.py new file mode 100644 index 0000000..82aa9c8 --- /dev/null +++ b/llpython/tests/test_all.py @@ -0,0 +1,16 @@ +#! /usr/bin/env python +# ______________________________________________________________________ + +import unittest + +from .test_byte_control import TestByteControl +from .test_byte_flow import TestBytecodeFlowBuilder +from .test_addr_flow import TestAddressFlowBuilder + +# ______________________________________________________________________ + +if __name__ == "__main__": + unittest.main() + +# ______________________________________________________________________ +# End of llpython/tests/test_all.py From 0b24adb80aa2ab12e8e8eff970dc5f99f6ed6d8d Mon Sep 17 00:00:00 2001 From: Jon Riehl Date: Fri, 7 Jun 2013 15:30:45 -0500 Subject: [PATCH 19/32] Modified various passes to use the inspect module instead of using type comparisons or isinstance. --- llpython/byte_control.py | 4 ++-- llpython/byte_flow.py | 5 ++--- llpython/opcode_util.py | 4 ++-- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/llpython/byte_control.py b/llpython/byte_control.py index efd54aa..93e0d83 100644 --- a/llpython/byte_control.py +++ b/llpython/byte_control.py @@ -6,7 +6,7 @@ from __future__ import absolute_import import opcode import pprint -import types +import inspect from . import opcode_util from .bytecode_visitor import BasicBlockVisitor, BenignBytecodeVisitorMixin @@ -281,7 +281,7 @@ def main (*args, **kws): def _visit(obj): print("_" * 70) print(obj) - if type(obj) == types.FunctionType: + if inspect.isfunction(obj): cfg = build_cfg(obj) else: cfg = ControlFlowBuilder.build_cfg_from_co(obj) diff --git a/llpython/byte_flow.py b/llpython/byte_flow.py index 452a7f7..76e1a23 100644 --- a/llpython/byte_flow.py +++ b/llpython/byte_flow.py @@ -5,7 +5,7 @@ from __future__ import absolute_import import dis import opcode import pprint -import types +import inspect from .bytecode_visitor import BasicBlockVisitor from . import opcode_util @@ -341,11 +341,10 @@ def build_flow(func): # ______________________________________________________________________ def demo_flow_builder(builder_cls, *args): - import pprint def _visit(obj): print("_" * 70) print(obj) - if type(obj) == types.FunctionType: + if inspect.isfunction(obj): flow = builder_cls.build_flow(obj) else: flow = builder_cls.build_flow_from_co(obj) diff --git a/llpython/opcode_util.py b/llpython/opcode_util.py index e79257e..727e4ce 100644 --- a/llpython/opcode_util.py +++ b/llpython/opcode_util.py @@ -4,7 +4,7 @@ from collections import namedtuple import dis import opcode -import types +import inspect # ______________________________________________________________________ # Module data @@ -208,7 +208,7 @@ def itercodeobjs(codeobj): "Iterator that traverses code objects via the co_consts member." yield codeobj for const in codeobj.co_consts: - if isinstance(const, types.CodeType): + if inspect.iscode(const): for childobj in itercodeobjs(const): yield childobj From 6c7d493441686a3dd11d58c869ec96d2a44017b9 Mon Sep 17 00:00:00 2001 From: Jon Riehl Date: Fri, 7 Jun 2013 19:18:34 -0500 Subject: [PATCH 20/32] Fixed llpython.type_flow's main routine. Was using addr_flow.build_addr_flow() which was removed. --- llpython/type_flow.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llpython/type_flow.py b/llpython/type_flow.py index 3f3e2a9..c87f3c4 100644 --- a/llpython/type_flow.py +++ b/llpython/type_flow.py @@ -257,8 +257,8 @@ class TypeFlowBuilder(BenignBytecodeVisitorMixin, BasicBlockVisitor): def build_type_flow(func): from .opcode_util import get_code_object - from .addr_flow import build_addr_flow - blocks = build_addr_flow(func) + from .addr_flow import AddressFlowBuilder + blocks = AddressFlowBuilder.build_flow(func) ty_builder = TypeFlowBuilder(get_code_object(func)) ty_builder.visit(blocks) return ty_builder.get_type_eqns() From 68030d51d186885b713c3139942594a8788e859f Mon Sep 17 00:00:00 2001 From: Jon Riehl Date: Tue, 11 Jun 2013 16:44:15 -0500 Subject: [PATCH 21/32] Modified BytecodeFlowBuilder and AddressFlowBuilder to use a named tuple for representing bytecode instructions. --- llpython/addr_flow.py | 10 +++++----- llpython/byte_flow.py | 22 ++++++++++++++-------- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/llpython/addr_flow.py b/llpython/addr_flow.py index 46c4673..d9a774b 100644 --- a/llpython/addr_flow.py +++ b/llpython/addr_flow.py @@ -4,7 +4,7 @@ from __future__ import absolute_import -from .byte_flow import BytecodeFlowBuilder, demo_flow_builder +from .byte_flow import Instr, BytecodeFlowBuilder, demo_flow_builder from .opcode_util import build_basic_blocks, itercodeobjs # ______________________________________________________________________ @@ -39,7 +39,7 @@ class AddressFlowBuilder(BytecodeFlowBuilder): del self.stack[-pops:] else: stk_args = [] - ret_val = (i, op, opname, arg, stk_args) + ret_val = Instr(i, op, opname, arg, stk_args) if pushes: self.stack.append(ret_val) self.block.append(ret_val) @@ -49,13 +49,13 @@ class AddressFlowBuilder(BytecodeFlowBuilder): # References top of stack without popping, so we can't use the # generic machinery. opname = self.opmap[op][0] - ret_val = i, op, opname, arg, [self.stack[-1][0]] + ret_val = Instr(i, op, opname, arg, [self.stack[-1][0]]) self.stack.append(ret_val) self.block.append(ret_val) return ret_val def op_JUMP_IF_FALSE (self, i, op, arg): - ret_val = i, op, self.opnames[op], arg, [self.stack[-1][0]] + ret_val = Instr(i, op, self.opnames[op], arg, [self.stack[-1][0]]) self.block.append(ret_val) return ret_val @@ -66,7 +66,7 @@ class AddressFlowBuilder(BytecodeFlowBuilder): opcodes.''' elem = self.stack.pop() container = self.stack[-arg] - ret_val = i, op, self.opnames[op], arg, [container[0], elem[0]] + ret_val = Instr(i, op, self.opnames[op], arg, [container[0], elem[0]]) self.block.append(ret_val) return ret_val diff --git a/llpython/byte_flow.py b/llpython/byte_flow.py index 76e1a23..3b9ba76 100644 --- a/llpython/byte_flow.py +++ b/llpython/byte_flow.py @@ -6,6 +6,7 @@ import dis import opcode import pprint import inspect +from collections import namedtuple from .bytecode_visitor import BasicBlockVisitor from . import opcode_util @@ -14,6 +15,11 @@ from . import byte_control # ______________________________________________________________________ # Class definition(s) +Instr = namedtuple('Instr', ('address', 'opcode', 'opname', 'oparg', + 'stackargs')) + +# ______________________________________________________________________ + class BytecodeFlowBuilder (BasicBlockVisitor): '''Transforms a CFG into a bytecode "flow tree". @@ -47,7 +53,7 @@ class BytecodeFlowBuilder (BasicBlockVisitor): del self.stack[-pops:] else: stk_args = [] - ret_val = (i, op, opname, arg, stk_args) + ret_val = Instr(i, op, opname, arg, stk_args) if pushes: self.stack.append(ret_val) if appends: @@ -174,7 +180,7 @@ class BytecodeFlowBuilder (BasicBlockVisitor): # References top of stack without popping, so we can't use the # generic machinery. opname = self.opmap[op][0] - ret_val = i, op, opname, arg, [self.stack[-1]] + ret_val = Instr(i, op, opname, arg, [self.stack[-1]]) self.stack.append(ret_val) return ret_val @@ -197,7 +203,7 @@ class BytecodeFlowBuilder (BasicBlockVisitor): op_JUMP_FORWARD = _op def op_JUMP_IF_FALSE (self, i, op, arg): - ret_val = i, op, self.opnames[op], arg, [self.stack[-1]] + ret_val = Instr(i, op, self.opnames[op], arg, [self.stack[-1]]) self.block.append(ret_val) return ret_val @@ -210,7 +216,7 @@ class BytecodeFlowBuilder (BasicBlockVisitor): opcodes.''' elem = self.stack.pop() container = self.stack[-arg] - ret_val = i, op, self.opnames[op], arg, [container, elem] + ret_val = Instr(i, op, self.opnames[op], arg, [container, elem]) self.block.append(ret_val) return ret_val @@ -255,7 +261,7 @@ class BytecodeFlowBuilder (BasicBlockVisitor): def _op_SETUP (self, i, op, arg): self.control_stack.append((i, op, arg, len(self.stack))) - ret_val = i, op, self.opnames[op], arg, [] + ret_val = Instr(i, op, self.opnames[op], arg, []) self.block.append(ret_val) return ret_val @@ -269,9 +275,9 @@ class BytecodeFlowBuilder (BasicBlockVisitor): # the value stack (the exit ), and once on the handler frame. ctx = self.stack.pop() # We signal that the value is an exit handler by setting arg to None - exit_handler = i, op, self.opnames[op], None, [ctx] + exit_handler = Instr(i, op, self.opnames[op], None, [ctx]) self.stack.append(exit_handler) - ret_val = i, op, self.opnames[op], arg, [ctx] + ret_val = Instr(i, op, self.opnames[op], arg, [ctx]) self.control_stack.append((i, op, arg, len(self.stack))) self.stack.append(ret_val) self.block.append(ret_val) @@ -301,7 +307,7 @@ class BytecodeFlowBuilder (BasicBlockVisitor): opname = self.opnames[op] while arg > 0: arg -= 1 - ret_val = i, op, opname, arg, [seq] + ret_val = Instr(i, op, opname, arg, [seq]) self.stack.append(ret_val) return ret_val From 5f81cab357a2e00a6a89401ada9b083fbbacae33 Mon Sep 17 00:00:00 2001 From: Jon Riehl Date: Tue, 11 Jun 2013 17:11:38 -0500 Subject: [PATCH 22/32] Moved most of bytecode_visitor.BytecodeFlowVisitor into a new parent class GenericFlowVisitor, which is a useful starting point for address-based flow visitors. --- llpython/bytecode_visitor.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/llpython/bytecode_visitor.py b/llpython/bytecode_visitor.py index bbd2f01..0b2dff3 100644 --- a/llpython/bytecode_visitor.py +++ b/llpython/bytecode_visitor.py @@ -190,7 +190,7 @@ class BasicBlockVisitor (BytecodeVisitor): # ______________________________________________________________________ -class BytecodeFlowVisitor (BytecodeVisitor): +class GenericFlowVisitor (BytecodeVisitor): def visit (self, flow): self.block_list = list(flow.keys()) self.block_list.sort() @@ -209,15 +209,6 @@ class BytecodeFlowVisitor (BytecodeVisitor): del self.block_list return self.exit_flow_object(flow) - def visit_op (self, i, op, arg, *args, **kws): - new_args = [] - for child_i, child_op, _, child_arg, child_args in args: - new_args.extend(self.visit_op(child_i, child_op, child_arg, - *child_args)) - ret_val = super(BytecodeFlowVisitor, self).visit_op(i, op, arg, - *new_args) - return ret_val - def enter_flow_object (self, flow): self.new_flow = {} @@ -234,6 +225,18 @@ class BytecodeFlowVisitor (BytecodeVisitor): # ______________________________________________________________________ +class BytecodeFlowVisitor (GenericFlowVisitor): + def visit_op (self, i, op, arg, *args, **kws): + new_args = [] + for child_i, child_op, _, child_arg, child_args in args: + new_args.extend(self.visit_op(child_i, child_op, child_arg, + *child_args)) + ret_val = super(BytecodeFlowVisitor, self).visit_op(i, op, arg, + *new_args) + return ret_val + +# ______________________________________________________________________ + class BenignBytecodeVisitorMixin (object): def _do_nothing (self, i, op, arg, *args, **kws): return [(i, op, self.opnames[op], arg, args)] From fd41a9355454e5694622606e1bad46be4aefb2f1 Mon Sep 17 00:00:00 2001 From: Jon Riehl Date: Thu, 13 Jun 2013 15:25:28 -0500 Subject: [PATCH 23/32] Added llpython.af_to_api module. Started work on a code generator that accepts address flows, and outputs an LLVM function that implements the given code object using (undefined) API calls for bytecode instructions. --- llpython/af_to_api.py | 239 ++++++++++++++++++++++++++++++++++++++++ llpython/opcode_util.py | 7 ++ 2 files changed, 246 insertions(+) create mode 100644 llpython/af_to_api.py diff --git a/llpython/af_to_api.py b/llpython/af_to_api.py new file mode 100644 index 0000000..8569a46 --- /dev/null +++ b/llpython/af_to_api.py @@ -0,0 +1,239 @@ +#! /usr/bin/env python +# -*- coding: utf-8 -*- +# ______________________________________________________________________ +# Module imports + +from __future__ import print_function, division, absolute_import + +import inspect + +import llvm.core as lc + +from . import byte_control +from . import addr_flow +from . import opcode_util +from . import bytetype +from .bytecode_visitor import GenericFlowVisitor + +# ______________________________________________________________________ +# Class definitions + +class AddressFlowToLLVMPyAPICalls(GenericFlowVisitor): + + def __init__(self, _prefix=None, _postfix=None): + if _prefix is not None: + if inspect.isfunction(_prefix): + __prefix = _prefix + else: + def __prefix(self, opname, *opargs): + return _prefix + self.prefix = __prefix + if _postfix is not None: + if inspect.isfunction(_postfix): + __postfix = _postfix + else: + def __postfix(self, opname, *opargs): + return _postfix + self.postfix = __postfix + + def prefix(self, opname, *opargs): + return '' + + def postfix(self, opname, *opargs): + return '' + + def translate_cfg(self, code_obj, cfg, llvm_module=None, **kws): + assert inspect.iscode(code_obj) + self.code_obj = code_obj + self.cfg = cfg + self.target_function_name = kws.get( + 'target_function_name', 'co_%s_%x' % (code_obj.co_name, + id(code_obj))) + if llvm_module is None: + llvm_module = lc.Module.new('lmod_' + self.target_function_name) + self.llvm_module = llvm_module + self.visit(cfg.blocks) + del self.llvm_module + del self.cfg + del self.code_obj + return llvm_module + + def enter_flow_object(self, flow): + super(AddressFlowToLLVMPyAPICalls, self).enter_flow_object(flow) + self.nargs = opcode_util.get_nargs(self.code_obj) + lltype = lc.Type.function(bytetype.l_pyobj_p, + tuple(bytetype.l_pyobj_p + for _ in range(self.nargs))) + self.llvm_function = self.llvm_module.add_function( + lltype, self.target_function_name) + self.llvm_blocks = {} + for block in self.block_list: + if 0 in self.cfg.blocks_reaching[block]: + bb = self.llvm_function.append_basic_block( + 'BLOCK_%d' % (block,)) + self.llvm_blocks[block] = bb + + def exit_flow_object(self, flow): + super(AddressFlowToLLVMPyAPICalls, self).exit_flow_object(flow) + del self.llvm_blocks + del self.llvm_function + + def enter_block(self, block): + ret_val = False + if block in self.llvm_blocks: + self.llvm_block = self.llvm_blocks[block] + self.builder = lc.Builder.new(self.llvm_block) + ret_val = True + return ret_val + + def exit_block(self, block): + # XXX Isn't this really a bug in GenericFlowVisitor.visit()? + if block in self.llvm_blocks: + del self.builder + del self.llvm_block + + def _op(self, i, op, arg, *args, **kws): + return[] + raise NotImplementedError() + + op_BINARY_ADD = _op + op_BINARY_AND = _op + op_BINARY_DIVIDE = _op + op_BINARY_FLOOR_DIVIDE = _op + op_BINARY_LSHIFT = _op + op_BINARY_MODULO = _op + op_BINARY_MULTIPLY = _op + op_BINARY_OR = _op + op_BINARY_POWER = _op + op_BINARY_RSHIFT = _op + op_BINARY_SUBSCR = _op + op_BINARY_SUBTRACT = _op + op_BINARY_TRUE_DIVIDE = _op + op_BINARY_XOR = _op + op_BREAK_LOOP = _op + op_BUILD_CLASS = _op + op_BUILD_LIST = _op + op_BUILD_MAP = _op + op_BUILD_SET = _op + op_BUILD_SLICE = _op + op_BUILD_TUPLE = _op + op_CALL_FUNCTION = _op + op_CALL_FUNCTION_KW = _op + op_CALL_FUNCTION_VAR = _op + op_CALL_FUNCTION_VAR_KW = _op + op_COMPARE_OP = _op + op_CONTINUE_LOOP = _op + op_DELETE_ATTR = _op + op_DELETE_DEREF = _op + op_DELETE_FAST = _op + op_DELETE_GLOBAL = _op + op_DELETE_NAME = _op + op_DELETE_SLICE = _op + op_DELETE_SUBSCR = _op + op_DUP_TOP = _op + op_DUP_TOPX = _op + op_DUP_TOP_TWO = _op + op_END_FINALLY = _op + op_EXEC_STMT = _op + op_EXTENDED_ARG = _op + op_FOR_ITER = _op + op_GET_ITER = _op + op_IMPORT_FROM = _op + op_IMPORT_NAME = _op + op_IMPORT_STAR = _op + op_INPLACE_ADD = _op + op_INPLACE_AND = _op + op_INPLACE_DIVIDE = _op + op_INPLACE_FLOOR_DIVIDE = _op + op_INPLACE_LSHIFT = _op + op_INPLACE_MODULO = _op + op_INPLACE_MULTIPLY = _op + op_INPLACE_OR = _op + op_INPLACE_POWER = _op + op_INPLACE_RSHIFT = _op + op_INPLACE_SUBTRACT = _op + op_INPLACE_TRUE_DIVIDE = _op + op_INPLACE_XOR = _op + op_JUMP_ABSOLUTE = _op + op_JUMP_FORWARD = _op + op_JUMP_IF_FALSE = _op + op_JUMP_IF_FALSE_OR_POP = _op + op_JUMP_IF_TRUE = _op + op_JUMP_IF_TRUE_OR_POP = _op + op_LIST_APPEND = _op + op_LOAD_ATTR = _op + op_LOAD_BUILD_CLASS = _op + op_LOAD_CLOSURE = _op + op_LOAD_CONST = _op + op_LOAD_DEREF = _op + op_LOAD_FAST = _op + op_LOAD_GLOBAL = _op + op_LOAD_LOCALS = _op + op_LOAD_NAME = _op + op_MAKE_CLOSURE = _op + op_MAKE_FUNCTION = _op + op_MAP_ADD = _op + op_NOP = _op + op_POP_BLOCK = _op + op_POP_EXCEPT = _op + op_POP_JUMP_IF_FALSE = _op + op_POP_JUMP_IF_TRUE = _op + op_POP_TOP = _op + op_PRINT_EXPR = _op + op_PRINT_ITEM = _op + op_PRINT_ITEM_TO = _op + op_PRINT_NEWLINE = _op + op_PRINT_NEWLINE_TO = _op + op_RAISE_VARARGS = _op + op_RETURN_VALUE = _op + op_ROT_FOUR = _op + op_ROT_THREE = _op + op_ROT_TWO = _op + op_SETUP_EXCEPT = _op + op_SETUP_FINALLY = _op + op_SETUP_LOOP = _op + op_SETUP_WITH = _op + op_SET_ADD = _op + op_SLICE = _op + op_STOP_CODE = _op + op_STORE_ATTR = _op + op_STORE_DEREF = _op + op_STORE_FAST = _op + op_STORE_GLOBAL = _op + op_STORE_LOCALS = _op + op_STORE_MAP = _op + op_STORE_NAME = _op + op_STORE_SLICE = _op + op_STORE_SUBSCR = _op + op_UNARY_CONVERT = _op + op_UNARY_INVERT = _op + op_UNARY_NEGATIVE = _op + op_UNARY_NOT = _op + op_UNARY_POSITIVE = _op + op_UNPACK_EX = _op + op_UNPACK_SEQUENCE = _op + op_WITH_CLEANUP = _op + op_YIELD_VALUE = _op + +# ______________________________________________________________________ +# Function definition(s) + +def demo_translator(*args, **kws): + def _visit(obj): + if inspect.isfunction(obj): + obj = opcode_util.get_code_object(obj) + print('\n; %s\n; %r' % ('_' * 70, obj)) + cfg = byte_control.ControlFlowBuilder.build_cfg_from_co(obj) + cfg.blocks = addr_flow.AddressFlowBuilder().visit_cfg(cfg) + print(AddressFlowToLLVMPyAPICalls(**kws).translate_cfg(obj, cfg)) + return opcode_util.visit_code_args(_visit, *args) + +# ______________________________________________________________________ +# Main (self-test) routine + +if __name__ == '__main__': + import sys + demo_translator(*sys.argv[1:], _prefix = '_') + +# ______________________________________________________________________ +# End of af_to_api.py diff --git a/llpython/opcode_util.py b/llpython/opcode_util.py index 727e4ce..ce92043 100644 --- a/llpython/opcode_util.py +++ b/llpython/opcode_util.py @@ -280,5 +280,12 @@ def build_basic_blocks (co_obj): labels + [len(co_code)])) return blocks +# ______________________________________________________________________ + +def get_nargs(co_obj): + flags = co_obj.co_flags + return (1 + co_obj.co_argcount + (1 if flags & 4 else 0) + + (1 if flags & 8 else 0)) + # ______________________________________________________________________ # End of opcode_util.py From fcf5420803ad98b547aae7bc560447a06da8c6d2 Mon Sep 17 00:00:00 2001 From: Jon Riehl Date: Fri, 14 Jun 2013 15:59:51 -0500 Subject: [PATCH 24/32] Added argument naming to llpython.byte_translator. --- llpython/byte_translator.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/llpython/byte_translator.py b/llpython/byte_translator.py index 5473f20..bcb2748 100644 --- a/llpython/byte_translator.py +++ b/llpython/byte_translator.py @@ -1,4 +1,5 @@ #! /usr/bin/env python +# -*- coding: utf-8 -*- # ______________________________________________________________________ '''Defines a bytecode based LLVM translator for llpython code. ''' @@ -197,6 +198,10 @@ class LLVMTranslator (BytecodeFlowVisitor): if self.llvm_function is None: self.llvm_function = self.llvm_module.add_function( self.llvm_type, self.target_function_name) + if self.llvm_function.args and not self.llvm_function.args[0].name: + for index in range(len(self.llvm_function.args)): + argname = self.code_obj.co_varnames[index] + self.llvm_function.args[index].name = argname self.llvm_blocks = {} self.llvm_definitions = {} self.pending_phis = {} From a8747d21250a94b512eb74d7f6e2cfb4a99b3526 Mon Sep 17 00:00:00 2001 From: Jon Riehl Date: Fri, 14 Jun 2013 16:00:42 -0500 Subject: [PATCH 25/32] Initial proof of concept for generic API calling code generator in llpython.af_to_api. --- llpython/af_to_api.py | 113 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 104 insertions(+), 9 deletions(-) diff --git a/llpython/af_to_api.py b/llpython/af_to_api.py index 8569a46..e7599ca 100644 --- a/llpython/af_to_api.py +++ b/llpython/af_to_api.py @@ -6,20 +6,37 @@ from __future__ import print_function, division, absolute_import import inspect +import opcode import llvm.core as lc from . import byte_control from . import addr_flow from . import opcode_util -from . import bytetype +from .bytetype import l_pyobj_p, lc_int from .bytecode_visitor import GenericFlowVisitor # ______________________________________________________________________ # Class definitions class AddressFlowToLLVMPyAPICalls(GenericFlowVisitor): + ''' + Code generator for translating from a Python code object and its + address flow (output by addr_flow.AddressFlowBuilder) into an LLVM + function. The resulting LLVM function calls into a user-provided + (or undefined) API function for each interpreter byte code. + Target API function names are based on a programmable name + mangling scheme: + + OPCODE_NAME + + The and strings are determined by the prefix() + and postfix() methods. These methods may either be overloaded, or + specialized at construction time using _prefix and _postfix + arguments. The OPCODE_NAME is the opcode name as determined by + the map in opcode.opname. + ''' def __init__(self, _prefix=None, _postfix=None): if _prefix is not None: if inspect.isfunction(_prefix): @@ -42,8 +59,20 @@ class AddressFlowToLLVMPyAPICalls(GenericFlowVisitor): def postfix(self, opname, *opargs): return '' - def translate_cfg(self, code_obj, cfg, llvm_module=None, **kws): + def translate_cfg(self, code_obj, cfg, llvm_module=None, + monotype = l_pyobj_p, **kws): + ''' + Generate LLVM code for the given code object and it's control + flow graph. + + If no LLVM module is given as an argument, translate_cfg() + creates a new module. + + Returns the resulting LLVM module. + ''' assert inspect.iscode(code_obj) + self.obj_type = monotype + self.null = lc.Constant.null(self.obj_type) self.code_obj = code_obj self.cfg = cfg self.target_function_name = kws.get( @@ -59,11 +88,13 @@ class AddressFlowToLLVMPyAPICalls(GenericFlowVisitor): return llvm_module def enter_flow_object(self, flow): + ''' + Set up any state for dealing a new dictionary of basic blocks. + ''' super(AddressFlowToLLVMPyAPICalls, self).enter_flow_object(flow) self.nargs = opcode_util.get_nargs(self.code_obj) - lltype = lc.Type.function(bytetype.l_pyobj_p, - tuple(bytetype.l_pyobj_p - for _ in range(self.nargs))) + lltype = lc.Type.function( + self.obj_type, tuple(self.obj_type for _ in range(self.nargs))) self.llvm_function = self.llvm_module.add_function( lltype, self.target_function_name) self.llvm_blocks = {} @@ -72,29 +103,79 @@ class AddressFlowToLLVMPyAPICalls(GenericFlowVisitor): bb = self.llvm_function.append_basic_block( 'BLOCK_%d' % (block,)) self.llvm_blocks[block] = bb + self.symtab = {} + self.values = {} def exit_flow_object(self, flow): + ''' + Clean up any state created while visiting the given dictionary + of basic blocks. + ''' super(AddressFlowToLLVMPyAPICalls, self).exit_flow_object(flow) + del self.symtab del self.llvm_blocks del self.llvm_function + def generate_co_init(self): + ''' + Initialize the code object's local variables on the stack. + ''' + for name in self.code_obj.co_varnames: + ptr = self.builder.alloca(self.obj_type, name + '_p') + self.symtab[name] = ptr + self.builder.store(self.null, ptr) + for arg_index, arg in zip(range(self.nargs), self.llvm_function.args): + if arg_index == 0: + arg.name = '_globals_%x' % id(self.code_obj) + self.globals = arg + else: + local_index = arg_index - 1 + name = self.code_obj.co_varnames[local_index] + arg.name = name + self.builder.store(arg, self.symtab[name]) + def enter_block(self, block): + ''' + Set up state for generating code in a new basic block. If + this is the first basic block, initialize the local variables + using generate_co_init(). + ''' ret_val = False if block in self.llvm_blocks: self.llvm_block = self.llvm_blocks[block] self.builder = lc.Builder.new(self.llvm_block) + if block == 0: + self.generate_co_init() ret_val = True return ret_val def exit_block(self, block): + ''' + Tear down any state created for code generation in the current + basic block. If the basic block isn't already terminated by a + control flow statement, assume it branches to the next basic + block. + ''' # XXX Isn't this really a bug in GenericFlowVisitor.visit()? if block in self.llvm_blocks: del self.builder del self.llvm_block def _op(self, i, op, arg, *args, **kws): - return[] - raise NotImplementedError() + args = [self.values[stkarg] for stkarg in args] + if arg is not None: + args.insert(0, lc.Constant.int(lc_int, arg)) + argtys = [arg.type for arg in args] + target_fnty = lc.Type.function(self.obj_type, argtys) + # XXX Modify the visitor! Should be passing Instr named tuples here. + opname = self.opnames[op] + target_fnname = ''.join((self.prefix(opname, *args), opname, + self.postfix(opname, *args))) + target_fn = self.llvm_module.get_or_insert_function(target_fnty, + target_fnname) + result = self.builder.call(target_fn, args) + self.values[i] = result + return [result] op_BINARY_ADD = _op op_BINARY_AND = _op @@ -166,7 +247,13 @@ class AddressFlowToLLVMPyAPICalls(GenericFlowVisitor): op_LOAD_CLOSURE = _op op_LOAD_CONST = _op op_LOAD_DEREF = _op - op_LOAD_FAST = _op + + def op_LOAD_FAST(self, i, op, arg, *args, **kws): + varname = self.code_obj.co_varnames[arg] + result = self.builder.load(self.symtab[varname]) + self.values[i] = result + return [result] + op_LOAD_GLOBAL = _op op_LOAD_LOCALS = _op op_LOAD_NAME = _op @@ -198,7 +285,15 @@ class AddressFlowToLLVMPyAPICalls(GenericFlowVisitor): op_STOP_CODE = _op op_STORE_ATTR = _op op_STORE_DEREF = _op - op_STORE_FAST = _op + + def op_STORE_FAST(self, i, op, arg, *args): + src_index, = args + src = self.values[src_index] + varname = self.code_obj.co_varnames[arg] + result = self.builder.store(src, self.symtab[varname]) + self.values[i] = result + return [result] + op_STORE_GLOBAL = _op op_STORE_LOCALS = _op op_STORE_MAP = _op From d8297cf50bdba73fe23d60d058e7442f3a714be0 Mon Sep 17 00:00:00 2001 From: Jon Riehl Date: Fri, 14 Jun 2013 18:18:30 -0500 Subject: [PATCH 26/32] Added support for rudimentary control flow, and completely incorrect refcounting in llpython.af_to_api. --- llpython/af_to_api.py | 72 +++++++++++++++++++++++++++++++------------ 1 file changed, 53 insertions(+), 19 deletions(-) diff --git a/llpython/af_to_api.py b/llpython/af_to_api.py index e7599ca..61aeb74 100644 --- a/llpython/af_to_api.py +++ b/llpython/af_to_api.py @@ -59,6 +59,14 @@ class AddressFlowToLLVMPyAPICalls(GenericFlowVisitor): def postfix(self, opname, *opargs): return '' + def get_op_function(self, opname, *opargs, **kwds): + target_fn_ty = lc.Type.function(kwds.get('return_type', self.obj_type), + [arg.type for arg in opargs]) + target_fn_name = ''.join((self.prefix(opname, *opargs), opname, + self.postfix(opname, *opargs))) + return self.llvm_module.get_or_insert_function(target_fn_ty, + target_fn_name) + def translate_cfg(self, code_obj, cfg, llvm_module=None, monotype = l_pyobj_p, **kws): ''' @@ -81,6 +89,9 @@ class AddressFlowToLLVMPyAPICalls(GenericFlowVisitor): if llvm_module is None: llvm_module = lc.Module.new('lmod_' + self.target_function_name) self.llvm_module = llvm_module + self.incref = self.get_op_function('INCREF', self.null) + self.decref = self.get_op_function('DECREF', self.null, + return_type = lc.Type.void()) self.visit(cfg.blocks) del self.llvm_module del self.cfg @@ -158,6 +169,12 @@ class AddressFlowToLLVMPyAPICalls(GenericFlowVisitor): ''' # XXX Isn't this really a bug in GenericFlowVisitor.visit()? if block in self.llvm_blocks: + bb_instrs = self.llvm_block.instructions + if ((len(bb_instrs) == 0) or + (not bb_instrs[-1].is_terminator)): + out_blocks = list(self.cfg.blocks_out[block]) + assert len(out_blocks) == 1, [str(i) for i in bb_instrs] + self.builder.branch(self.llvm_blocks[out_blocks[0]]) del self.builder del self.llvm_block @@ -165,18 +182,16 @@ class AddressFlowToLLVMPyAPICalls(GenericFlowVisitor): args = [self.values[stkarg] for stkarg in args] if arg is not None: args.insert(0, lc.Constant.int(lc_int, arg)) - argtys = [arg.type for arg in args] - target_fnty = lc.Type.function(self.obj_type, argtys) - # XXX Modify the visitor! Should be passing Instr named tuples here. - opname = self.opnames[op] - target_fnname = ''.join((self.prefix(opname, *args), opname, - self.postfix(opname, *args))) - target_fn = self.llvm_module.get_or_insert_function(target_fnty, - target_fnname) + # XXX Modify the visitor! Should be passing Instr named + # tuples here, and not looking up the operation name. + target_fn = self.get_op_function(self.opnames[op], *args, **kws) result = self.builder.call(target_fn, args) self.values[i] = result return [result] + def _not_implemented(self, i, op, arg, *args, **kws): + raise NotImplementedError(self.opnames[op]) + op_BINARY_ADD = _op op_BINARY_AND = _op op_BINARY_DIVIDE = _op @@ -235,12 +250,17 @@ class AddressFlowToLLVMPyAPICalls(GenericFlowVisitor): op_INPLACE_SUBTRACT = _op op_INPLACE_TRUE_DIVIDE = _op op_INPLACE_XOR = _op - op_JUMP_ABSOLUTE = _op - op_JUMP_FORWARD = _op - op_JUMP_IF_FALSE = _op - op_JUMP_IF_FALSE_OR_POP = _op - op_JUMP_IF_TRUE = _op - op_JUMP_IF_TRUE_OR_POP = _op + + def op_JUMP_ABSOLUTE(self, i, op, arg, *args, **kws): + return [self.builder.branch(self.llvm_blocks[arg])] + + def op_JUMP_FORWARD(self, i, op, arg, *args, **kws): + return [self.builder.branch(self.llvm_blocks[i + arg + 3])] + + op_JUMP_IF_FALSE = _not_implemented + op_JUMP_IF_FALSE_OR_POP = _not_implemented + op_JUMP_IF_TRUE = _not_implemented + op_JUMP_IF_TRUE_OR_POP = _not_implemented op_LIST_APPEND = _op op_LOAD_ATTR = _op op_LOAD_BUILD_CLASS = _op @@ -252,7 +272,7 @@ class AddressFlowToLLVMPyAPICalls(GenericFlowVisitor): varname = self.code_obj.co_varnames[arg] result = self.builder.load(self.symtab[varname]) self.values[i] = result - return [result] + return [result, self.builder.call(self.incref, [result])] op_LOAD_GLOBAL = _op op_LOAD_LOCALS = _op @@ -263,16 +283,30 @@ class AddressFlowToLLVMPyAPICalls(GenericFlowVisitor): op_NOP = _op op_POP_BLOCK = _op op_POP_EXCEPT = _op - op_POP_JUMP_IF_FALSE = _op - op_POP_JUMP_IF_TRUE = _op - op_POP_TOP = _op + + def _op_cbranch(self, i, op, arg, *args, **kws): + branch_taken = self.llvm_blocks[arg] + branch_not_taken = self.llvm_blocks[i + 3] + _kws = kws.copy() + _kws.update(return_type=lc.Type.int(1)) + test = self._op(i, op, None, *args, **_kws)[0] + return [test, self.builder.cbranch(test, branch_taken, + branch_not_taken)] + + op_POP_JUMP_IF_FALSE = _op_cbranch + op_POP_JUMP_IF_TRUE = _op_cbranch + + op_POP_TOP = _not_implemented op_PRINT_EXPR = _op op_PRINT_ITEM = _op op_PRINT_ITEM_TO = _op op_PRINT_NEWLINE = _op op_PRINT_NEWLINE_TO = _op op_RAISE_VARARGS = _op - op_RETURN_VALUE = _op + + def op_RETURN_VALUE(self, i, op, arg, *args): + return [self.builder.ret(self.values[args[0]])] + op_ROT_FOUR = _op op_ROT_THREE = _op op_ROT_TWO = _op From a6cf8c4f71a3ec335fe956495a653355d6c226d0 Mon Sep 17 00:00:00 2001 From: Jon Riehl Date: Mon, 17 Jun 2013 18:08:36 -0500 Subject: [PATCH 27/32] Made some improvements to llpython.af_to_api, specifically attempting to normalize variable access so reference counting becomes the target API's responsibility. --- llpython/af_to_api.py | 130 +++++++++++++++++++++++++++++----------- llpython/opcode_util.py | 12 +++- 2 files changed, 105 insertions(+), 37 deletions(-) diff --git a/llpython/af_to_api.py b/llpython/af_to_api.py index 61aeb74..e04a921 100644 --- a/llpython/af_to_api.py +++ b/llpython/af_to_api.py @@ -13,8 +13,9 @@ import llvm.core as lc from . import byte_control from . import addr_flow from . import opcode_util -from .bytetype import l_pyobj_p, lc_int +from .bytetype import lvoid, li1, lc_int, lc_long, l_pyobj_p from .bytecode_visitor import GenericFlowVisitor +from .nobitey import get_string_constant # ______________________________________________________________________ # Class definitions @@ -37,7 +38,7 @@ class AddressFlowToLLVMPyAPICalls(GenericFlowVisitor): arguments. The OPCODE_NAME is the opcode name as determined by the map in opcode.opname. ''' - def __init__(self, _prefix=None, _postfix=None): + def __init__(self, _prefix=None, _postfix=None, **kwds): if _prefix is not None: if inspect.isfunction(_prefix): __prefix = _prefix @@ -67,8 +68,17 @@ class AddressFlowToLLVMPyAPICalls(GenericFlowVisitor): return self.llvm_module.get_or_insert_function(target_fn_ty, target_fn_name) + def call_op_function(self, index, opname, *opargs, **kwds): + target_fn = self.get_op_function(opname, *opargs, **kwds) + if target_fn.type.pointee.return_type != lvoid: + name = kwds.get('name', 'op_%d' % index) + result = self.builder.call(target_fn, opargs, name) + else: + result = self.builder.call(target_fn, opargs) + return result + def translate_cfg(self, code_obj, cfg, llvm_module=None, - monotype = l_pyobj_p, **kws): + monotype=l_pyobj_p, **kwds): ''' Generate LLVM code for the given code object and it's control flow graph. @@ -83,15 +93,12 @@ class AddressFlowToLLVMPyAPICalls(GenericFlowVisitor): self.null = lc.Constant.null(self.obj_type) self.code_obj = code_obj self.cfg = cfg - self.target_function_name = kws.get( + self.target_function_name = kwds.get( 'target_function_name', 'co_%s_%x' % (code_obj.co_name, id(code_obj))) if llvm_module is None: llvm_module = lc.Module.new('lmod_' + self.target_function_name) self.llvm_module = llvm_module - self.incref = self.get_op_function('INCREF', self.null) - self.decref = self.get_op_function('DECREF', self.null, - return_type = lc.Type.void()) self.visit(cfg.blocks) del self.llvm_module del self.cfg @@ -143,7 +150,15 @@ class AddressFlowToLLVMPyAPICalls(GenericFlowVisitor): local_index = arg_index - 1 name = self.code_obj.co_varnames[local_index] arg.name = name - self.builder.store(arg, self.symtab[name]) + self.call_op_function( + None, 'STORE_FAST', arg, self.symtab[name], + return_type=lvoid) + + def generate_co_deinit(self, index): + for value in self.symtab.values(): + self.call_op_function(index, 'DELETE_FAST', + lc.Constant.int(li1, 0), value, + return_type=lvoid) def enter_block(self, block): ''' @@ -178,18 +193,17 @@ class AddressFlowToLLVMPyAPICalls(GenericFlowVisitor): del self.builder del self.llvm_block - def _op(self, i, op, arg, *args, **kws): + def _op(self, i, op, arg, *args, **kwds): args = [self.values[stkarg] for stkarg in args] if arg is not None: args.insert(0, lc.Constant.int(lc_int, arg)) # XXX Modify the visitor! Should be passing Instr named # tuples here, and not looking up the operation name. - target_fn = self.get_op_function(self.opnames[op], *args, **kws) - result = self.builder.call(target_fn, args) + result = self.call_op_function(i, self.opnames[op], *args, **kwds) self.values[i] = result return [result] - def _not_implemented(self, i, op, arg, *args, **kws): + def _not_implemented(self, i, op, arg, *args, **kwds): raise NotImplementedError(self.opnames[op]) op_BINARY_ADD = _op @@ -221,14 +235,28 @@ class AddressFlowToLLVMPyAPICalls(GenericFlowVisitor): op_CONTINUE_LOOP = _op op_DELETE_ATTR = _op op_DELETE_DEREF = _op - op_DELETE_FAST = _op - op_DELETE_GLOBAL = _op + + def op_DELETE_FAST(self, i, op, arg, *args, **kwds): + varname = self.code_obj.co_varnames[arg] + result = self.call_op_function(i, 'DELETE_FAST', + lc.Constant.int(li1, 1), + self.symtab[varname], return_type=lvoid) + return [result] + + def op_DELETE_GLOBAL(self, i, op, arg, *args, **kwds): + varname = get_string_constant(self.llvm_module, + self.code_obj.co_names[arg]) + result = self.call_op_function(i, 'DELETE_GLOBAL', self.globals, + varname, return_type=lvoid) + self.values[i] = result + return [result] + op_DELETE_NAME = _op op_DELETE_SLICE = _op op_DELETE_SUBSCR = _op - op_DUP_TOP = _op - op_DUP_TOPX = _op - op_DUP_TOP_TWO = _op + op_DUP_TOP = _not_implemented + op_DUP_TOPX = _not_implemented + op_DUP_TOP_TWO = _not_implemented op_END_FINALLY = _op op_EXEC_STMT = _op op_EXTENDED_ARG = _op @@ -251,10 +279,10 @@ class AddressFlowToLLVMPyAPICalls(GenericFlowVisitor): op_INPLACE_TRUE_DIVIDE = _op op_INPLACE_XOR = _op - def op_JUMP_ABSOLUTE(self, i, op, arg, *args, **kws): + def op_JUMP_ABSOLUTE(self, i, op, arg, *args, **kwds): return [self.builder.branch(self.llvm_blocks[arg])] - def op_JUMP_FORWARD(self, i, op, arg, *args, **kws): + def op_JUMP_FORWARD(self, i, op, arg, *args, **kwds): return [self.builder.branch(self.llvm_blocks[i + arg + 3])] op_JUMP_IF_FALSE = _not_implemented @@ -265,16 +293,40 @@ class AddressFlowToLLVMPyAPICalls(GenericFlowVisitor): op_LOAD_ATTR = _op op_LOAD_BUILD_CLASS = _op op_LOAD_CLOSURE = _op - op_LOAD_CONST = _op + + def op_LOAD_CONST(self, i, op, arg, *args, **kwds): + py_val = self.code_obj.co_consts[arg] + if isinstance(py_val, int): + # XXX Add bounds check on integer values; use big int + # (from string?) constructor if necessary. + result = self.call_op_function(i, 'LOAD_CONST_INT', + lc.Constant.int(lc_long, py_val)) + elif isinstance(py_val, float): + result = self.call_op_function(i, 'LOAD_CONST_FLOAT', + lc.Constant.double(py_val)) + elif py_val is None: + result = self.call_op_function(i, 'LOAD_CONST_NONE') + else: + raise NotImplementedError('Constant conversion for %r' % (py_val,)) + self.values[i] = result + return [result] + op_LOAD_DEREF = _op - def op_LOAD_FAST(self, i, op, arg, *args, **kws): + def op_LOAD_FAST(self, i, op, arg, *args, **kwds): varname = self.code_obj.co_varnames[arg] - result = self.builder.load(self.symtab[varname]) + args = self.symtab[varname], + result = self.call_op_function(i, 'LOAD_FAST', *args) self.values[i] = result - return [result, self.builder.call(self.incref, [result])] + return [result] + + def op_LOAD_GLOBAL(self, i, op, arg, *args, **kwds): + varname = get_string_constant(self.llvm_module, + self.code_obj.co_names[arg]) + result = self.call_op_function(i, 'LOAD_GLOBAL', self.globals, varname) + self.values[i] = result + return [result] - op_LOAD_GLOBAL = _op op_LOAD_LOCALS = _op op_LOAD_NAME = _op op_MAKE_CLOSURE = _op @@ -284,12 +336,12 @@ class AddressFlowToLLVMPyAPICalls(GenericFlowVisitor): op_POP_BLOCK = _op op_POP_EXCEPT = _op - def _op_cbranch(self, i, op, arg, *args, **kws): + def _op_cbranch(self, i, op, arg, *args, **kwds): branch_taken = self.llvm_blocks[arg] branch_not_taken = self.llvm_blocks[i + 3] - _kws = kws.copy() - _kws.update(return_type=lc.Type.int(1)) - test = self._op(i, op, None, *args, **_kws)[0] + _kwds = kwds.copy() + _kwds.update(return_type=li1) + test = self._op(i, op, None, *args, **_kwds)[0] return [test, self.builder.cbranch(test, branch_taken, branch_not_taken)] @@ -305,6 +357,7 @@ class AddressFlowToLLVMPyAPICalls(GenericFlowVisitor): op_RAISE_VARARGS = _op def op_RETURN_VALUE(self, i, op, arg, *args): + self.generate_co_deinit(i) return [self.builder.ret(self.values[args[0]])] op_ROT_FOUR = _op @@ -324,12 +377,20 @@ class AddressFlowToLLVMPyAPICalls(GenericFlowVisitor): src_index, = args src = self.values[src_index] varname = self.code_obj.co_varnames[arg] - result = self.builder.store(src, self.symtab[varname]) - self.values[i] = result + dest = self.symtab[varname] + result = self.call_op_function(i, 'STORE_FAST', src, dest, + return_type=lvoid) return [result] - op_STORE_GLOBAL = _op - op_STORE_LOCALS = _op + def op_STORE_GLOBAL(self, i, op, arg, *args): + varname = get_string_constant(self.llvm_module, + self.code_obj.co_names[arg]) + result = self.call_op_function( + i, 'STORE_GLOBAL', self.values[args[0]], self.globals, varname, + return_type=lvoid) + return [result] + + op_STORE_LOCALS = _not_implemented op_STORE_MAP = _op op_STORE_NAME = _op op_STORE_SLICE = _op @@ -347,14 +408,15 @@ class AddressFlowToLLVMPyAPICalls(GenericFlowVisitor): # ______________________________________________________________________ # Function definition(s) -def demo_translator(*args, **kws): +def demo_translator(*args, **kwds): def _visit(obj): if inspect.isfunction(obj): obj = opcode_util.get_code_object(obj) print('\n; %s\n; %r' % ('_' * 70, obj)) cfg = byte_control.ControlFlowBuilder.build_cfg_from_co(obj) cfg.blocks = addr_flow.AddressFlowBuilder().visit_cfg(cfg) - print(AddressFlowToLLVMPyAPICalls(**kws).translate_cfg(obj, cfg)) + print(AddressFlowToLLVMPyAPICalls(**kwds).translate_cfg(obj, cfg, + **kwds)) return opcode_util.visit_code_args(_visit, *args) # ______________________________________________________________________ diff --git a/llpython/opcode_util.py b/llpython/opcode_util.py index ce92043..d47d39f 100644 --- a/llpython/opcode_util.py +++ b/llpython/opcode_util.py @@ -221,6 +221,9 @@ def visit_code_args(visitor, *args, **kws): Takes a visitor function and a sequence of command line arguments. The visitor function should be able to handle either function objects or code objects.""" + def _visit_code_objs(root_obj): + for code_obj in itercodeobjs(root_obj): + visitor(code_obj) try: from .tests import llfuncs except ImportError: @@ -231,12 +234,15 @@ def visit_code_args(visitor, *args, **kws): else: args = ('pymod',) for arg in args: - if arg.endswith('.py'): + if inspect.iscode(arg): + _visit_code_objs(arg) + elif inspect.isfunction(arg): + _visit_code_objs(get_code_object(arg)) + elif arg.endswith('.py'): with open(arg) as in_file: in_source = in_file.read() in_codeobj = compile(in_source, arg, 'exec') - for codeobj in itercodeobjs(in_codeobj): - visitor(codeobj) + _visit_code_objs(in_codeobj) else: visitor(getattr(llfuncs, arg)) From e4f4d56fe63bf259d43d89a190e00fc9e8dda66b Mon Sep 17 00:00:00 2001 From: Jon Riehl Date: Tue, 18 Jun 2013 11:52:30 -0500 Subject: [PATCH 28/32] Added support for POP_TOP and SETUP_LOOP to llpython.af_to_api. --- llpython/af_to_api.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/llpython/af_to_api.py b/llpython/af_to_api.py index e04a921..d3027f3 100644 --- a/llpython/af_to_api.py +++ b/llpython/af_to_api.py @@ -43,14 +43,14 @@ class AddressFlowToLLVMPyAPICalls(GenericFlowVisitor): if inspect.isfunction(_prefix): __prefix = _prefix else: - def __prefix(self, opname, *opargs): + def __prefix(opname, *opargs): return _prefix self.prefix = __prefix if _postfix is not None: if inspect.isfunction(_postfix): __postfix = _postfix else: - def __postfix(self, opname, *opargs): + def __postfix(opname, *opargs): return _postfix self.postfix = __postfix @@ -61,6 +61,7 @@ class AddressFlowToLLVMPyAPICalls(GenericFlowVisitor): return '' def get_op_function(self, opname, *opargs, **kwds): + print(opname, opargs, kwds) target_fn_ty = lc.Type.function(kwds.get('return_type', self.obj_type), [arg.type for arg in opargs]) target_fn_name = ''.join((self.prefix(opname, *opargs), opname, @@ -337,7 +338,10 @@ class AddressFlowToLLVMPyAPICalls(GenericFlowVisitor): op_POP_EXCEPT = _op def _op_cbranch(self, i, op, arg, *args, **kwds): - branch_taken = self.llvm_blocks[arg] + if op in opcode.hasjabs: + branch_taken = self.llvm_blocks[arg] + else: + branch_taken = self.llvm_blocks[i + arg + 3] branch_not_taken = self.llvm_blocks[i + 3] _kwds = kwds.copy() _kwds.update(return_type=li1) @@ -348,7 +352,10 @@ class AddressFlowToLLVMPyAPICalls(GenericFlowVisitor): op_POP_JUMP_IF_FALSE = _op_cbranch op_POP_JUMP_IF_TRUE = _op_cbranch - op_POP_TOP = _not_implemented + def op_POP_TOP(self, i, op, arg, *args): + return [self.call_op_function(i, 'POP_TOP', self.values[args[0]], + return_type=lvoid)] + op_PRINT_EXPR = _op op_PRINT_ITEM = _op op_PRINT_ITEM_TO = _op @@ -365,7 +372,7 @@ class AddressFlowToLLVMPyAPICalls(GenericFlowVisitor): op_ROT_TWO = _op op_SETUP_EXCEPT = _op op_SETUP_FINALLY = _op - op_SETUP_LOOP = _op + op_SETUP_LOOP = _op_cbranch op_SETUP_WITH = _op op_SET_ADD = _op op_SLICE = _op From 9fe9e8eb3949166582cc4a441cbcd38f3fbafaa9 Mon Sep 17 00:00:00 2001 From: Jon Riehl Date: Fri, 12 Jul 2013 19:38:50 -0500 Subject: [PATCH 29/32] Incremental extensions to llpython to better support using the Python C-API. --- llpython/byte_translator.py | 67 ++++++++++++++++++++++++++++++------- llpython/bytetype.py | 24 +++++++++++-- 2 files changed, 75 insertions(+), 16 deletions(-) diff --git a/llpython/byte_translator.py b/llpython/byte_translator.py index bcb2748..a4dc980 100644 --- a/llpython/byte_translator.py +++ b/llpython/byte_translator.py @@ -10,6 +10,7 @@ import opcode import types import logging +from llvm import LLVMException import llvm.core as lc from . import opcode_util @@ -40,6 +41,13 @@ _compare_mapping_sint = {'>':lc.ICMP_SGT, '<=':lc.ICMP_SLE, '!=':lc.ICMP_NE} +_compare_mapping_ptr = {'>':lc.ICMP_UGT, + '<':lc.ICMP_ULT, + '==':lc.ICMP_EQ, + '>=':lc.ICMP_UGE, + '<=':lc.ICMP_ULE, + '!=':lc.ICMP_NE} + # XXX Stolen from numba.llvm_types: class LLVMCaster (object): @@ -120,6 +128,37 @@ class LLVMCaster (object): raise NotImplementedError(lkind1, lkind2) return ret_val +# ______________________________________________________________________ + +def _convert_const(py_val): + '''Convert a constant Python value into a comparable LLVM + constant. Preserves Python values and data structures such as + lists, tuples, and None. + ''' + if isinstance(py_val, list): + ret_val = [_convert_const(child) for child in py_val] + elif isinstance(py_val, tuple): + ret_val = tuple(_convert_const(child) for child in py_val) + elif isinstance(py_val, int): + ret_val = lc.Constant.int(bytetype.lc_int, py_val) + elif isinstance(py_val, float): + ret_val = lc.Constant.double(py_val) + elif py_val == None: + ret_val = py_val + else: + raise NotImplementedError('Constant conversion for %r' % (py_val,)) + return ret_val + +# ______________________________________________________________________ + +def get_or_insert_global_variable(llvm_module, variable_ty, variable_name): + try: + ret_val = llvm_module.get_global_variable_named(variable_name) + # XXX Check LLVM value is of correct type?! + except LLVMException: + ret_val = llvm_module.add_global_variable(variable_ty, variable_name) + return ret_val + # ______________________________________________________________________ # Class definitions @@ -377,11 +416,12 @@ class LLVMTranslator (BytecodeFlowVisitor): raise NotImplementedError("LLVMTranslator.op_BUILD_SLICE") def op_BUILD_TUPLE (self, i, op, arg, *args, **kws): - return args + return [args] def op_CALL_FUNCTION (self, i, op, arg, *args, **kws): fn = args[0] args = args[1:] + argcount = len(args) fn_name = getattr(fn, '__name__', None) if isinstance(fn, (types.FunctionType, types.MethodType)): ret_val = [fn(self.builder, *args)] @@ -421,6 +461,9 @@ class LLVMTranslator (BytecodeFlowVisitor): elif arg1.type.kind in (lc.TYPE_FLOAT, lc.TYPE_DOUBLE): ret_val = [self.builder.fcmp(_compare_mapping_float[cmp_kind], arg1, arg2)] + elif isinstance(arg1.type, lc.PointerType): + ret_val = [self.builder.icmp(_compare_mapping_ptr[cmp_kind], + arg1, arg2)] else: raise NotImplementedError('Comparison of type %r' % (arg1.type,)) return ret_val @@ -480,17 +523,7 @@ class LLVMTranslator (BytecodeFlowVisitor): raise NotImplementedError("LLVMTranslator.op_LOAD_ATTR") def op_LOAD_CONST (self, i, op, arg, *args, **kws): - py_val = self.code_obj.co_consts[arg] - if isinstance(py_val, int): - ret_val = [lc.Constant.int(bytetype.lc_int, py_val)] - elif isinstance(py_val, float): - ret_val = [lc.Constant.double(py_val)] - elif py_val == None: - ret_val = [None] - else: - raise NotImplementedError('Constant converstion for %r' % - (py_val,)) - return ret_val + return [_convert_const(self.code_obj.co_consts[arg])] def op_LOAD_DEREF (self, i, op, arg, *args, **kws): name = self.code_obj.co_freevars[arg] @@ -546,7 +579,15 @@ class LLVMTranslator (BytecodeFlowVisitor): return [self.builder.store(store_val, dest_addr)] def op_UNARY_CONVERT (self, i, op, arg, *args, **kws): - raise NotImplementedError("LLVMTranslator.op_UNARY_CONVERT") + var_ty = args[0] + if isinstance(var_ty, lc.Type): + var_name = var_ty.__name__ + ret_val = [get_or_insert_global_variable(self.llvm_module, var_ty, + var_name)] + else: + raise NotImplementedError("LLVMTranslator.op_UNARY_CONVERT: %r" % + (var_ty,)) + return ret_val def op_UNARY_INVERT (self, i, op, arg, *args, **kws): raise NotImplementedError("LLVMTranslator.op_UNARY_INVERT") diff --git a/llpython/bytetype.py b/llpython/bytetype.py index 211f3f8..6549133 100644 --- a/llpython/bytetype.py +++ b/llpython/bytetype.py @@ -1,4 +1,5 @@ #! /usr/bin/env python +# -*- coding: utf-8 -*- # ______________________________________________________________________ import ctypes @@ -23,9 +24,10 @@ li8_ptr = lc.Type.pointer(li8) lc_int = lc.Type.int(ctypes.sizeof(ctypes.c_int) * 8) lc_long = lc.Type.int(ctypes.sizeof(ctypes.c_long) * 8) -l_pyobject_head = [lc_size_t, lc.Type.pointer(li32)] +l_pyobject_head = [lc_size_t, li8_ptr] l_pyobject_head_struct = lc.Type.struct(l_pyobject_head) l_pyobj_p = l_pyobject_head_struct_p = lc.Type.pointer(l_pyobject_head_struct) +l_pyobj_pp = lc.Type.pointer(l_pyobj_p) l_pyfunc = lc.Type.function(l_pyobj_p, (l_pyobj_p, l_pyobj_p)) strlen = lc.Type.function(lc_size_t, (li8_ptr,)) @@ -34,10 +36,26 @@ strndup = lc.Type.function(li8_ptr, (li8_ptr, lc_size_t)) malloc = lc.Type.function(li8_ptr, (lc_size_t,)) free = lc.Type.function(lvoid, (li8_ptr,)) -Py_BuildValue = lc.Type.function(l_pyobj_p, [li8_ptr], True) PyArg_ParseTuple = lc.Type.function(lc_int, [l_pyobj_p, li8_ptr], True) +PyBool_FromLong = lc.Type.function(l_pyobj_p, [lc_long]) PyEval_SaveThread = lc.Type.function(li8_ptr, []) -PyEval_RestoreThread = lc.Type.function(lc.Type.void(), [li8_ptr]) +PyEval_RestoreThread = lc.Type.function(lvoid, [li8_ptr]) +PyInt_AsLong = lc.Type.function(lc_long, [l_pyobj_p]) +PyInt_FromLong = lc.Type.function(l_pyobj_p, [lc_long]) +PyNumber_Add = lc.Type.function(l_pyobj_p, (l_pyobj_p, l_pyobj_p)) +PyNumber_Divide = lc.Type.function(l_pyobj_p, (l_pyobj_p, l_pyobj_p)) +PyNumber_Multiply = lc.Type.function(l_pyobj_p, (l_pyobj_p, l_pyobj_p)) +PyNumber_Remainder = lc.Type.function(l_pyobj_p, (l_pyobj_p, l_pyobj_p)) +PyNumber_Subtract = lc.Type.function(l_pyobj_p, (l_pyobj_p, l_pyobj_p)) +PyNumber_TrueDivide = lc.Type.function(l_pyobj_p, (l_pyobj_p, l_pyobj_p)) +PyString_Check = lc.Type.function(lc_int, [l_pyobj_p]) +PyString_CheckExact = lc.Type.function(lc_int, [l_pyobj_p]) +PyString_Format = lc.Type.function(l_pyobj_p, (l_pyobj_p, l_pyobj_p)) +Py_BuildValue = lc.Type.function(l_pyobj_p, [li8_ptr], True) +Py_DecRef = lc.Type.function(lvoid, [l_pyobj_p]) +Py_IncRef = lc.Type.function(lvoid, [l_pyobj_p]) + +PyInt_Type = li8 # ______________________________________________________________________ # End of bytetype.py From 1adf3d871a1423adcab4382e601c6b156553e4a1 Mon Sep 17 00:00:00 2001 From: Jon Riehl Date: Tue, 23 Jul 2013 16:42:46 -0500 Subject: [PATCH 30/32] Added support for UNARY_INVERT bytecode. --- llpython/byte_translator.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/llpython/byte_translator.py b/llpython/byte_translator.py index a4dc980..278edce 100644 --- a/llpython/byte_translator.py +++ b/llpython/byte_translator.py @@ -590,7 +590,12 @@ class LLVMTranslator (BytecodeFlowVisitor): return ret_val def op_UNARY_INVERT (self, i, op, arg, *args, **kws): - raise NotImplementedError("LLVMTranslator.op_UNARY_INVERT") + arg1, = args + if isinstance(arg1.type, lc.IntegerType): + ret_val = [self.builder.xor(arg1, lc.Constant.int(arg1.type, -1))] + else: + raise NotImplementedError('Invert for type %r' % (arg1.type,)) + return ret_val def op_UNARY_NEGATIVE (self, i, op, arg, *args, **kws): raise NotImplementedError("LLVMTranslator.op_UNARY_NEGATIVE") From f332fbf9de10a75f224aa20e3c7458c32f837915 Mon Sep 17 00:00:00 2001 From: Jon Riehl Date: Tue, 23 Jul 2013 16:42:59 -0500 Subject: [PATCH 31/32] Added a few additional Python C API function declarations, used for comparing objects. --- llpython/bytetype.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/llpython/bytetype.py b/llpython/bytetype.py index 6549133..26c1734 100644 --- a/llpython/bytetype.py +++ b/llpython/bytetype.py @@ -38,6 +38,7 @@ free = lc.Type.function(lvoid, (li8_ptr,)) PyArg_ParseTuple = lc.Type.function(lc_int, [l_pyobj_p, li8_ptr], True) PyBool_FromLong = lc.Type.function(l_pyobj_p, [lc_long]) +PyErr_GivenExceptionMatches = lc.Type.function(lc_int, (l_pyobj_p, l_pyobj_p)) PyEval_SaveThread = lc.Type.function(li8_ptr, []) PyEval_RestoreThread = lc.Type.function(lvoid, [li8_ptr]) PyInt_AsLong = lc.Type.function(lc_long, [l_pyobj_p]) @@ -48,6 +49,9 @@ PyNumber_Multiply = lc.Type.function(l_pyobj_p, (l_pyobj_p, l_pyobj_p)) PyNumber_Remainder = lc.Type.function(l_pyobj_p, (l_pyobj_p, l_pyobj_p)) PyNumber_Subtract = lc.Type.function(l_pyobj_p, (l_pyobj_p, l_pyobj_p)) PyNumber_TrueDivide = lc.Type.function(l_pyobj_p, (l_pyobj_p, l_pyobj_p)) +PyObject_RichCompare = lc.Type.function(l_pyobj_p, (l_pyobj_p, l_pyobj_p, + lc_int)) +PySequence_Contains = lc.Type.function(lc_int, (l_pyobj_p, l_pyobj_p)) PyString_Check = lc.Type.function(lc_int, [l_pyobj_p]) PyString_CheckExact = lc.Type.function(lc_int, [l_pyobj_p]) PyString_Format = lc.Type.function(l_pyobj_p, (l_pyobj_p, l_pyobj_p)) From 4f815224f9134670ca50984abf23ef2a83a3f983 Mon Sep 17 00:00:00 2001 From: Jon Riehl Date: Thu, 25 Jul 2013 15:56:01 -0500 Subject: [PATCH 32/32] Added types for more of the Python C API. --- llpython/bytetype.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/llpython/bytetype.py b/llpython/bytetype.py index 26c1734..3c85f93 100644 --- a/llpython/bytetype.py +++ b/llpython/bytetype.py @@ -49,6 +49,14 @@ PyNumber_Multiply = lc.Type.function(l_pyobj_p, (l_pyobj_p, l_pyobj_p)) PyNumber_Remainder = lc.Type.function(l_pyobj_p, (l_pyobj_p, l_pyobj_p)) PyNumber_Subtract = lc.Type.function(l_pyobj_p, (l_pyobj_p, l_pyobj_p)) PyNumber_TrueDivide = lc.Type.function(l_pyobj_p, (l_pyobj_p, l_pyobj_p)) +PyNumber_InPlaceAdd = lc.Type.function(l_pyobj_p, (l_pyobj_p, l_pyobj_p)) +PyNumber_InPlaceDivide = lc.Type.function(l_pyobj_p, (l_pyobj_p, l_pyobj_p)) +PyNumber_InPlaceMultiply = lc.Type.function(l_pyobj_p, (l_pyobj_p, l_pyobj_p)) +PyNumber_InPlaceRemainder = lc.Type.function(l_pyobj_p, (l_pyobj_p, l_pyobj_p)) +PyNumber_InPlaceSubtract = lc.Type.function(l_pyobj_p, (l_pyobj_p, l_pyobj_p)) +PyNumber_InPlaceTrueDivide = lc.Type.function(l_pyobj_p, (l_pyobj_p, + l_pyobj_p)) +PyObject_IsTrue = lc.Type.function(lc_int, [l_pyobj_p]) PyObject_RichCompare = lc.Type.function(l_pyobj_p, (l_pyobj_p, l_pyobj_p, lc_int)) PySequence_Contains = lc.Type.function(lc_int, (l_pyobj_p, l_pyobj_p))