From abbf97276801d5985110c82cab87a53d84ea0d78 Mon Sep 17 00:00:00 2001 From: Jon Riehl Date: Tue, 21 May 2013 11:58:26 -0500 Subject: [PATCH] Added address flow builder, and type constraint generator. --- llpython/addr_flow.py | 100 ++++++++++++++++ llpython/byte_flow.py | 3 +- llpython/bytecode_visitor.py | 4 +- llpython/type_flow.py | 220 +++++++++++++++++++++++++++++++++++ 4 files changed, 323 insertions(+), 4 deletions(-) create mode 100644 llpython/addr_flow.py create mode 100644 llpython/type_flow.py diff --git a/llpython/addr_flow.py b/llpython/addr_flow.py new file mode 100644 index 0000000..7447789 --- /dev/null +++ b/llpython/addr_flow.py @@ -0,0 +1,100 @@ +#! /usr/bin/env python +# ______________________________________________________________________ +# Module imports + +from __future__ import absolute_import + +from .byte_flow import BytecodeFlowBuilder + +# ______________________________________________________________________ +# Class definition(s) + +class AddressFlowBuilder(BytecodeFlowBuilder): + ''' + Builds on top of the BytecodeFlowBuilder with two important differences: + + * Child nodes are represented by bytecode indices. + + * All operations (other than purely stack manipulation operations) + are retained in the block list (as opposed to being nested). + + The resulting data structure describes a directed acyclic graph + (DAG) in a similar fashion to BytecodeFlowBuilder: + + * `flow_dag` ``:=`` ``{`` `blocks` ``*`` ``}`` + * `blocks` ``:=`` `block_index` ``:`` ``[`` `bytecode_tuple` ``*`` ``]`` + * `bytecode_tuple` ``:=`` ``(`` `opcode_index` ``,`` `opcode` ``,`` + `opname` ``,`` `arg` ``,`` ``[`` `opcode_index` ``*`` ``]`` ``)`` + ''' + def _visit_op(self, i, op, arg, opname, pops, pushes, appends): + assert pops is not None, ('%s not well defined in opcode_util.' + 'OPCODE_MAP' % opname) + if pops: + if pops < 0: + pops = arg - pops - 1 + assert pops <= len(self.stack), ("Stack underflow at instruction " + "%d (%s)!" % (i, opname)) + stk_args = [stk_arg[0] for stk_arg in self.stack[-pops:]] + del self.stack[-pops:] + else: + stk_args = [] + ret_val = (i, op, opname, arg, stk_args) + if pushes: + self.stack.append(ret_val) + self.block.append(ret_val) + return ret_val + + def op_IMPORT_FROM (self, i, op, arg): + # References top of stack without popping, so we can't use the + # generic machinery. + opname = self.opmap[op][0] + ret_val = i, op, opname, arg, [self.stack[-1][0]] + self.stack.append(ret_val) + self.block.append(ret_val) + return ret_val + + def op_JUMP_IF_FALSE (self, i, op, arg): + ret_val = i, op, self.opnames[op], arg, [self.stack[-1][0]] + self.block.append(ret_val) + return ret_val + + op_JUMP_IF_TRUE = op_JUMP_IF_FALSE + + def op_LIST_APPEND (self, i, op, arg): + '''This method is used for both LIST_APPEND, and SET_ADD + opcodes.''' + elem = self.stack.pop() + container = self.stack[-arg] + ret_val = i, op, self.opnames[op], arg, [container[0], elem[0]] + self.block.append(ret_val) + return ret_val + + op_SET_ADD = op_LIST_APPEND + +# ______________________________________________________________________ +# Function definition(s) + +def build_addr_flow(func): + from . import byte_control + cfg = byte_control.build_cfg(func) + return AddressFlowBuilder().visit_cfg(cfg) + +# ______________________________________________________________________ +# Main (self-test) routine + +def main(*args): + import pprint + from .tests import llfuncs + if not args: + args = ('pymod',) + for arg in args: + pprint.pprint(build_addr_flow(getattr(llfuncs, arg))) + +# ______________________________________________________________________ + +if __name__ == "__main__": + import sys + main(*sys.argv[1:]) + +# ______________________________________________________________________ +# End of addr_flow.py diff --git a/llpython/byte_flow.py b/llpython/byte_flow.py index a5abd9f..4c354cd 100644 --- a/llpython/byte_flow.py +++ b/llpython/byte_flow.py @@ -272,11 +272,10 @@ class BytecodeFlowBuilder (BasicBlockVisitor): def build_flow (func): '''Given a Python function, return a bytecode flow tree for that function.''' - import byte_control + from . import byte_control cfg = byte_control.build_cfg(func) return BytecodeFlowBuilder().visit_cfg(cfg) - # ______________________________________________________________________ # Main (self-test) routine diff --git a/llpython/bytecode_visitor.py b/llpython/bytecode_visitor.py index 009d1a0..bbd2f01 100644 --- a/llpython/bytecode_visitor.py +++ b/llpython/bytecode_visitor.py @@ -171,8 +171,8 @@ class BasicBlockVisitor (BytecodeVisitor): block_indices.sort() for block_index in block_indices: self.enter_block(block_index) - for i, op, arg in blocks[block_index]: - self.visit_op(i, op, arg) + for op_tuple in blocks[block_index]: + self.visit_op(*op_tuple) self.exit_block(block_index) return self.exit_blocks(blocks) diff --git a/llpython/type_flow.py b/llpython/type_flow.py new file mode 100644 index 0000000..cd67e69 --- /dev/null +++ b/llpython/type_flow.py @@ -0,0 +1,220 @@ +#! /usr/bin/env python +# ______________________________________________________________________ +# Module imports + +from .bytecode_visitor import BasicBlockVisitor, BenignBytecodeVisitorMixin + +# ______________________________________________________________________ +# Class definition(s) + +class TypeFlowBuilder(BenignBytecodeVisitorMixin, BasicBlockVisitor): + def __init__(self, co_obj, *args, **kws): + super(TypeFlowBuilder, self).__init__(*args, **kws) + self.co_obj = co_obj + self.locals = {} + self.globals = {} + self.refs = {} + self.type_flow = {} + self.requirements = {} + + def get_type_eqns(self): + return self.type_flow, self.requirements, self.locals, self.globals + + def _op(self, i, op, opname, arg, args, *extras, **kws): + self.type_flow[i] = set(args) + + op_BINARY_ADD = _op + op_BINARY_AND = _op + op_BINARY_DIVIDE = _op + op_BINARY_FLOOR_DIVIDE = _op + op_BINARY_LSHIFT = _op + op_BINARY_MODULO = _op + op_BINARY_MULTIPLY = _op + op_BINARY_OR = _op + op_BINARY_POWER = _op + op_BINARY_RSHIFT = _op + op_BINARY_SUBSCR = _op + op_BINARY_SUBTRACT = _op + op_BINARY_TRUE_DIVIDE = _op + op_BINARY_XOR = _op + + #op_BUILD_CLASS = _do_nothing + #op_BUILD_LIST = _do_nothing + #op_BUILD_MAP = _do_nothing + #op_BUILD_SET = _do_nothing + #op_BUILD_SLICE = _do_nothing + #op_BUILD_TUPLE = _do_nothing + + #op_CALL_FUNCTION = _do_nothing + #op_CALL_FUNCTION_KW = _do_nothing + #op_CALL_FUNCTION_VAR = _do_nothing + #op_CALL_FUNCTION_VAR_KW = _do_nothing + + def op_COMPARE_OP(self, i, op, opname, arg, args, *extras, **kws): + self.requirements[i] = set(args) + self.type_flow[i] = bool + + #op_CONTINUE_LOOP = _do_nothing + #op_DELETE_ATTR = _do_nothing + #op_DELETE_DEREF = _do_nothing + #op_DELETE_FAST = _do_nothing + #op_DELETE_GLOBAL = _do_nothing + #op_DELETE_NAME = _do_nothing + #op_DELETE_SLICE = _do_nothing + #op_DELETE_SUBSCR = _do_nothing + #op_END_FINALLY = _do_nothing + #op_EXEC_STMT = _do_nothing + #op_EXTENDED_ARG = _do_nothing + #op_FOR_ITER = _do_nothing + #op_GET_ITER = _do_nothing + #op_IMPORT_FROM = _do_nothing + #op_IMPORT_NAME = _do_nothing + #op_IMPORT_STAR = _do_nothing + + op_INPLACE_ADD = _op + op_INPLACE_AND = _op + op_INPLACE_DIVIDE = _op + op_INPLACE_FLOOR_DIVIDE = _op + op_INPLACE_LSHIFT = _op + op_INPLACE_MODULO = _op + op_INPLACE_MULTIPLY = _op + op_INPLACE_OR = _op + op_INPLACE_POWER = _op + op_INPLACE_RSHIFT = _op + op_INPLACE_SUBTRACT = _op + op_INPLACE_TRUE_DIVIDE = _op + op_INPLACE_XOR = _op + + #op_JUMP_ABSOLUTE = _do_nothing + #op_JUMP_FORWARD = _do_nothing + #op_JUMP_IF_FALSE = _do_nothing + #op_JUMP_IF_FALSE_OR_POP = _do_nothing + #op_JUMP_IF_TRUE = _do_nothing + #op_JUMP_IF_TRUE_OR_POP = _do_nothing + #op_LIST_APPEND = _do_nothing + #op_LOAD_ATTR = _do_nothing + #op_LOAD_BUILD_CLASS = _do_nothing + #op_LOAD_CLOSURE = _do_nothing + + def op_LOAD_CONST(self, i, op, opname, arg, args, *extras, **kws): + self.type_flow[i] = type(self.co_obj.co_consts[arg]) + + def op_LOAD_DEREF(self, i, op, opname, arg, args, *extras, **kws): + if arg not in self.refs: + result = set(('inref%d' % arg,)) + self.refs[arg] = result + else: + result = self.refs[arg] + self.type_flow[i] = result + + def op_LOAD_FAST(self, i, op, opname, arg, args, *extras, **kws): + if arg not in self.locals: + if arg < self.co_obj.co_argcount: + result = set(('in%d' % arg,)) + else: + result = set() + self.locals[arg] = result + else: + result = self.locals[arg] + self.type_flow[i] = result + + def op_LOAD_GLOBAL(self, i, op, opname, arg, args, *extras, **kws): + if arg not in self.globals: + result = set(('in%d' % arg,)) + self.globals[arg] = result + else: + result = self.globals[arg] + self.type_flow[i] = result + + #op_LOAD_LOCALS = _do_nothing + #op_LOAD_NAME = _do_nothing + #op_MAKE_CLOSURE = _do_nothing + #op_MAKE_FUNCTION = _do_nothing + #op_MAP_ADD = _do_nothing + #op_NOP = _do_nothing + #op_POP_BLOCK = _do_nothing + #op_POP_EXCEPT = _do_nothing + #op_POP_JUMP_IF_FALSE = _do_nothing + #op_POP_JUMP_IF_TRUE = _do_nothing + #op_POP_TOP = _do_nothing + #op_PRINT_EXPR = _do_nothing + #op_PRINT_ITEM = _do_nothing + #op_PRINT_ITEM_TO = _do_nothing + #op_PRINT_NEWLINE = _do_nothing + #op_PRINT_NEWLINE_TO = _do_nothing + #op_RAISE_VARARGS = _do_nothing + + op_RETURN_VALUE = _op + + #op_SETUP_EXCEPT = _do_nothing + #op_SETUP_FINALLY = _do_nothing + #op_SETUP_LOOP = _do_nothing + #op_SETUP_WITH = _do_nothing + #op_SET_ADD = _do_nothing + + #op_SLICE = _do_nothing + + #op_STOP_CODE = _do_nothing + #op_STORE_ATTR = _do_nothing + #op_STORE_DEREF = _do_nothing + + def op_STORE_FAST(self, i, op, opname, arg, args, *extras, **kws): + if arg not in self.locals: + if arg < self.co_obj.co_argcount: + result = set(('in%d' % arg,)) + else: + result = set() + self.locals[arg] = result + else: + result = self.locals[arg] + assert len(args) == 1 + result.add(args[0]) + + #op_STORE_GLOBAL = _do_nothing + #op_STORE_LOCALS = _do_nothing + #op_STORE_MAP = _do_nothing + #op_STORE_NAME = _do_nothing + #op_STORE_SLICE = _do_nothing + #op_STORE_SUBSCR = _do_nothing + + op_UNARY_CONVERT = _op + op_UNARY_INVERT = _op + op_UNARY_NEGATIVE = _op + op_UNARY_NOT = _op + op_UNARY_POSITIVE = _op + + #op_UNPACK_EX = _do_nothing + #op_UNPACK_SEQUENCE = _do_nothing + #op_WITH_CLEANUP = _do_nothing + #op_YIELD_VALUE = _do_nothing + +# ______________________________________________________________________ +# Function definition(s) + +def build_type_flow(func): + from .opcode_util import get_code_object + from .addr_flow import build_addr_flow + blocks = build_addr_flow(func) + ty_builder = TypeFlowBuilder(get_code_object(func)) + ty_builder.visit(blocks) + return ty_builder.get_type_eqns() + +# ______________________________________________________________________ +# Main (self-test) routine + +def main(*args): + import pprint + from .tests import llfuncs + if not args: + args = ('pymod',) + for arg in args: + pprint.pprint(build_type_flow(getattr(llfuncs, arg))) + +# ______________________________________________________________________ + +if __name__ == "__main__": + import sys + main(*sys.argv[1:]) + +# ______________________________________________________________________ +# End of type_flow.py