commit 6d8fbafb54f802c6fcba2b3ea539ac482c4b98b8 Author: Jon Riehl Date: Thu Oct 11 15:46:56 2012 -0500 Moved llnumba out of sandbox. diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/byte_control.py b/byte_control.py new file mode 100644 index 0000000..8180cf2 --- /dev/null +++ b/byte_control.py @@ -0,0 +1,98 @@ +#! /usr/bin/env python +# ______________________________________________________________________ + +import opcode +import opcode_util +import pprint + +from bytecode_visitor import BytecodeFlowVisitor, BenignBytecodeVisitorMixin +from control_flow import ControlFlowGraph + +# ______________________________________________________________________ + +class ControlFlowBuilder (BenignBytecodeVisitorMixin, BytecodeFlowVisitor): + def visit (self, flow, nargs = 0, *args, **kws): + self.nargs = nargs + ret_val = super(ControlFlowBuilder, self).visit(flow, *args, **kws) + del self.nargs + return ret_val + + def enter_flow_object (self, flow): + super(ControlFlowBuilder, self).enter_flow_object(flow) + self.flow = flow + self.cfg = ControlFlowGraph() + for block in flow.keys(): + self.cfg.add_block(block, flow[block]) + + def exit_flow_object (self, flow): + super(ControlFlowBuilder, self).exit_flow_object(flow) + assert self.flow == flow + self.cfg.compute_dataflow() + self.cfg.update_for_ssa() + ret_val = self.cfg + del self.cfg + del self.flow + return ret_val + + def enter_block (self, block): + self.block = block + assert block in self.cfg.blocks + if block == 0: + for local_index in range(self.nargs): + self.op_STORE_FAST(0, opcode.opmap['STORE_FAST'], local_index) + return True + + def _get_next_block (self, block): + return self.block_list[self.block_list.index(block) + 1] + + def exit_block (self, block): + assert block == self.block + del self.block + i, op, opname, arg, args = self.flow[block][-1] + if op in opcode.hasjabs: + self.cfg.add_edge(block, arg) + elif op in opcode.hasjrel: + self.cfg.add_edge(block, i + arg + 3) + elif opname == 'BREAK_LOOP': + self.cfg.add_edge(block, arg) + elif opname != 'RETURN_VALUE': + self.cfg.add_edge(block, self._get_next_block(block)) + if op in opcode_util.hascbranch: + self.cfg.add_edge(block, self._get_next_block(block)) + + def op_LOAD_FAST (self, i, op, arg, *args, **kws): + self.cfg.blocks_reads[self.block].add(arg) + return super(ControlFlowBuilder, self).op_LOAD_FAST(i, op, arg, *args, + **kws) + + def op_STORE_FAST (self, i, op, arg, *args, **kws): + self.cfg.writes_local(self.block, i, arg) + return super(ControlFlowBuilder, self).op_STORE_FAST(i, op, arg, *args, + **kws) + +# ______________________________________________________________________ + +def build_cfg (func): + import byte_flow + return ControlFlowBuilder().visit( + byte_flow.build_flow(func), + opcode_util.get_code_object(func).co_argcount) + +# ______________________________________________________________________ +# Main (self-test) routine + +def main (*args, **kws): + from tests import llfuncs + if not args: + args = ('doslice',) + for arg in args: + build_cfg(getattr(llfuncs, arg)).pprint() + +# ______________________________________________________________________ + +if __name__ == "__main__": + import sys + main(*sys.argv[1:]) + +# ______________________________________________________________________ +# End of byte_control.py diff --git a/byte_flow.py b/byte_flow.py new file mode 100644 index 0000000..6951b26 --- /dev/null +++ b/byte_flow.py @@ -0,0 +1,218 @@ +#! /usr/bin/env python +# ______________________________________________________________________ + +import dis +import opcode + +from bytecode_visitor import BytecodeIterVisitor +import opcode_util + +# ______________________________________________________________________ + +class BytecodeFlowBuilder (BytecodeIterVisitor): + def __init__ (self, *args, **kws): + super(BytecodeFlowBuilder, self).__init__(*args, **kws) + om_items = opcode_util.OPCODE_MAP.items() + self.opmap = dict((opcode.opmap[opname], (opname, pops, pushes, stmt)) + for opname, (pops, pushes, stmt) in om_items + if opname in opcode.opmap) + + def _visit_op (self, i, op, arg, opname, pops, pushes, appends): + assert pops is not None + if pops: + if pops < 0: + pops = arg - pops - 1 + stk_args = self.stack[-pops:] + del self.stack[-pops:] + else: + stk_args = [] + ret_val = (i, op, opname, arg, stk_args) + if pushes: + self.stack.append(ret_val) + if appends: + self.block.append(ret_val) + return ret_val + + def _op (self, i, op, arg): + opname, pops, pushes, appends = self.opmap[op] + return self._visit_op(i, op, arg, opname, pops, pushes, appends) + + def enter_code_object (self, co_obj): + labels = dis.findlabels(co_obj.co_code) + labels = opcode_util.extendlabels(co_obj.co_code, labels) + self.blocks = dict((index, []) + for index in labels) + self.stack = [] + self.loop_stack = [] + self.blocks[0] = self.block = [] + + def exit_code_object (self, co_obj): + ret_val = self.blocks + del self.stack + del self.loop_stack + del self.block + del self.blocks + return ret_val + + def visit_op (self, i, op, arg): + if i in self.blocks: + self.block = self.blocks[i] + return super(BytecodeFlowBuilder, self).visit_op(i, op, arg) + + op_BINARY_ADD = _op + op_BINARY_AND = _op + op_BINARY_DIVIDE = _op + op_BINARY_FLOOR_DIVIDE = _op + op_BINARY_LSHIFT = _op + op_BINARY_MODULO = _op + op_BINARY_MULTIPLY = _op + op_BINARY_OR = _op + op_BINARY_POWER = _op + op_BINARY_RSHIFT = _op + op_BINARY_SUBSCR = _op + op_BINARY_SUBTRACT = _op + op_BINARY_TRUE_DIVIDE = _op + op_BINARY_XOR = _op + + def op_BREAK_LOOP (self, i, op, arg): + loop_i, _, loop_arg = self.loop_stack[-1] + assert arg is None + return self._op(i, op, loop_i + loop_arg + 3) + + #op_BUILD_CLASS = _op + op_BUILD_LIST = _op + op_BUILD_MAP = _op + op_BUILD_SLICE = _op + op_BUILD_TUPLE = _op + op_CALL_FUNCTION = _op + op_CALL_FUNCTION_KW = _op + op_CALL_FUNCTION_VAR = _op + op_CALL_FUNCTION_VAR_KW = _op + op_COMPARE_OP = _op + #op_CONTINUE_LOOP = _op + op_DELETE_ATTR = _op + op_DELETE_FAST = _op + op_DELETE_GLOBAL = _op + op_DELETE_NAME = _op + op_DELETE_SLICE = _op + op_DELETE_SUBSCR = _op + + def op_DUP_TOP (self, i, op, arg): + self.stack.append(self.stack[-1]) + + def op_DUP_TOPX (self, i, op, arg): + self.stack += self.stack[-arg:] + + #op_END_FINALLY = _op + op_EXEC_STMT = _op + #op_EXTENDED_ARG = _op + op_FOR_ITER = _op + op_GET_ITER = _op + op_IMPORT_FROM = _op + op_IMPORT_NAME = _op + op_IMPORT_STAR = _op + op_INPLACE_ADD = _op + op_INPLACE_AND = _op + op_INPLACE_DIVIDE = _op + op_INPLACE_FLOOR_DIVIDE = _op + op_INPLACE_LSHIFT = _op + op_INPLACE_MODULO = _op + op_INPLACE_MULTIPLY = _op + op_INPLACE_OR = _op + op_INPLACE_POWER = _op + op_INPLACE_RSHIFT = _op + op_INPLACE_SUBTRACT = _op + op_INPLACE_TRUE_DIVIDE = _op + op_INPLACE_XOR = _op + op_JUMP_ABSOLUTE = _op + op_JUMP_FORWARD = _op + op_JUMP_IF_FALSE = _op + op_JUMP_IF_TRUE = _op + op_LIST_APPEND = _op + op_LOAD_ATTR = _op + op_LOAD_CLOSURE = _op + op_LOAD_CONST = _op + op_LOAD_DEREF = _op + op_LOAD_FAST = _op + op_LOAD_GLOBAL = _op + op_LOAD_LOCALS = _op + op_LOAD_NAME = _op + op_MAKE_CLOSURE = _op + op_MAKE_FUNCTION = _op + op_NOP = _op + + def op_POP_BLOCK (self, i, op, arg): + self.loop_stack.pop() + return self._op(i, op, arg) + + op_POP_JUMP_IF_FALSE = _op + op_POP_JUMP_IF_TRUE = _op + op_POP_TOP = _op + op_PRINT_EXPR = _op + op_PRINT_ITEM = _op + op_PRINT_ITEM_TO = _op + op_PRINT_NEWLINE = _op + op_PRINT_NEWLINE_TO = _op + op_RAISE_VARARGS = _op + op_RETURN_VALUE = _op + + def op_ROT_FOUR (self, i, op, arg): + self.stack[-4:] = (self.stack[-1], self.stack[-4], self.stack[-3], + self.stack[-2]) + + def op_ROT_THREE (self, i, op, arg): + self.stack[-3:] = (self.stack[-1], self.stack[-3], self.stack[-2]) + + def op_ROT_TWO (self, i, op, arg): + self.stack[-2:] = (self.stack[-1], self.stack[-2]) + + #op_SETUP_EXCEPT = _op + #op_SETUP_FINALLY = _op + + def op_SETUP_LOOP (self, i, op, arg): + self.loop_stack.append((i, op, arg)) + self.block.append((i, op, self.opnames[op], arg, [])) + + op_SLICE = _op + #op_STOP_CODE = _op + op_STORE_ATTR = _op + op_STORE_DEREF = _op + op_STORE_FAST = _op + op_STORE_GLOBAL = _op + op_STORE_MAP = _op + op_STORE_NAME = _op + op_STORE_SLICE = _op + op_STORE_SUBSCR = _op + op_UNARY_CONVERT = _op + op_UNARY_INVERT = _op + op_UNARY_NEGATIVE = _op + op_UNARY_NOT = _op + op_UNARY_POSITIVE = _op + op_UNPACK_SEQUENCE = _op + #op_WITH_CLEANUP = _op + op_YIELD_VALUE = _op + +# ______________________________________________________________________ + +def build_flow (func): + return BytecodeFlowBuilder().visit(opcode_util.get_code_object(func)) + +# ______________________________________________________________________ +# Main (self-test) routine + +def main (*args): + import pprint + from tests import llfuncs + if not args: + args = ('doslice',) + for arg in args: + pprint.pprint(build_flow(getattr(llfuncs, arg))) + +# ______________________________________________________________________ + +if __name__ == '__main__': + import sys + main(*sys.argv[1:]) + +# ______________________________________________________________________ +# End of byte_flow.py diff --git a/byte_translator.py b/byte_translator.py new file mode 100644 index 0000000..7954970 --- /dev/null +++ b/byte_translator.py @@ -0,0 +1,525 @@ +#! /usr/bin/env python +# ______________________________________________________________________ +# Module imports + +import opcode + +import llvm.core as lc + +import opcode_util + +import bytetype +from bytecode_visitor import BytecodeFlowVisitor +from byte_flow import BytecodeFlowBuilder +from byte_control import ControlFlowBuilder +from phi_injector import PhiInjector, synthetic_opname + +# ______________________________________________________________________ +# Module data + +# XXX Stolen from numba.translate: + +_compare_mapping_float = {'>':lc.FCMP_OGT, + '<':lc.FCMP_OLT, + '==':lc.FCMP_OEQ, + '>=':lc.FCMP_OGE, + '<=':lc.FCMP_OLE, + '!=':lc.FCMP_ONE} + +_compare_mapping_sint = {'>':lc.ICMP_SGT, + '<':lc.ICMP_SLT, + '==':lc.ICMP_EQ, + '>=':lc.ICMP_SGE, + '<=':lc.ICMP_SLE, + '!=':lc.ICMP_NE} + +# XXX Stolen from numba.llvm_types: + +class LLVMCaster (object): + def build_pointer_cast(_, builder, lval1, lty2): + return builder.bitcast(lval1, lty2) + + def build_int_cast(_, builder, lval1, lty2, unsigned = False): + width1 = lval1.type.width + width2 = lty2.width + ret_val = lval1 + if width2 > width1: + if unsigned: + ret_val = builder.zext(lval1, lty2) + else: + ret_val = builder.sext(lval1, lty2) + elif width2 < width1: + ret_val = builder.trunc(lval1, lty2) + return ret_val + + def build_float_ext(_, builder, lval1, lty2): + return builder.fpext(lval1, lty2) + + def build_float_trunc(_, builder, lval1, lty2): + return builder.fptrunc(lval1, lty2) + + def build_int_to_float_cast(_, builder, lval1, lty2, unsigned = False): + ret_val = None + if unsigned: + ret_val = builder.uitofp(lval1, lty2) + else: + ret_val = builder.sitofp(lval1, lty2) + return ret_val + + def build_float_to_int_cast(_, builder, lval1, lty2, unsigned = False): + ret_val = None + if unsigned: + ret_val = builder.fptoui(lval1, lty2) + else: + ret_val = builder.fptosi(lval1, lty2) + return ret_val + + CAST_MAP = { + lc.TYPE_POINTER : build_pointer_cast, + lc.TYPE_INTEGER: build_int_cast, + (lc.TYPE_FLOAT, lc.TYPE_DOUBLE) : build_float_ext, + (lc.TYPE_DOUBLE, lc.TYPE_FLOAT) : build_float_trunc, + (lc.TYPE_INTEGER, lc.TYPE_FLOAT) : build_int_to_float_cast, + (lc.TYPE_INTEGER, lc.TYPE_DOUBLE) : build_int_to_float_cast, + (lc.TYPE_FLOAT, lc.TYPE_INTEGER) : build_float_to_int_cast, + (lc.TYPE_DOUBLE, lc.TYPE_INTEGER) : build_float_to_int_cast, + + } + + @classmethod + def build_cast(cls, builder, lval1, lty2, *args, **kws): + ret_val = lval1 + lty1 = lval1.type + lkind1 = lty1.kind + lkind2 = lty2.kind + + if lkind1 == lkind2: + + if lkind1 in cls.CAST_MAP: + ret_val = cls.CAST_MAP[lkind1](cls, builder, lval1, lty2, + *args, **kws) + else: + raise NotImplementedError(lkind1) + else: + map_index = (lkind1, lkind2) + if map_index in cls.CAST_MAP: + ret_val = cls.CAST_MAP[map_index](cls, builder, lval1, lty2, + *args, **kws) + else: + raise NotImplementedError(lkind1, lkind2) + return ret_val + +# ______________________________________________________________________ +# Class definitions + +class LLVMTranslator (BytecodeFlowVisitor): + def __init__ (self, llvm_module = None, *args, **kws): + super(LLVMTranslator, self).__init__(*args, **kws) + if llvm_module is None: + llvm_module = lc.Module.new('Translated_Module_%d' % (id(self),)) + self.llvm_module = llvm_module + self.bytecode_flow_builder = BytecodeFlowBuilder() + self.control_flow_builder = ControlFlowBuilder() + self.phi_injector = PhiInjector() + + def translate (self, function, llvm_type = None, env = None): + if llvm_type is None: + llvm_type = lc.Type.function(lvoid, ()) + if env is None: + env = {} + else: + env = env.copy() + env.update((name, method) + for name, method in lc.Builder.__dict__.items() + if not name.startswith('_')) + env.update((name, value) + for name, value in bytetype.__dict__.items() + if not name.startswith('_')) + self.loop_stack = [] + self.llvm_type = llvm_type + self.function = function + self.code_obj = opcode_util.get_code_object(function) + func_globals = getattr(function, 'func_globals', + getattr(function, '__globals__', {})).copy() + func_globals.update(env) + self.globals = func_globals + nargs = self.code_obj.co_argcount + self.cfg = self.control_flow_builder.visit( + self.bytecode_flow_builder.visit(self.code_obj), nargs) + flow = self.phi_injector.visit_cfg(self.cfg, nargs) + ret_val = self.visit(flow) + del self.cfg + del self.globals + del self.code_obj + del self.function + del self.llvm_type + del self.loop_stack + return ret_val + + def enter_flow_object (self, flow): + super(LLVMTranslator, self).enter_flow_object(flow) + self.llvm_function = self.llvm_module.add_function( + self.llvm_type, self.function.__name__) + self.llvm_blocks = {} + self.llvm_definitions = {} + self.pending_phis = {} + for block in self.block_list: + if 0 in self.cfg.blocks_reaching[block]: + bb = self.llvm_function.append_basic_block( + 'BLOCK_%d' % (block,)) + self.llvm_blocks[block] = bb + + def exit_flow_object (self, flow): + super(LLVMTranslator, self).exit_flow_object(flow) + ret_val = self.llvm_function + del self.pending_phis + del self.llvm_definitions + del self.llvm_blocks + del self.llvm_function + return ret_val + + def enter_block (self, block): + ret_val = False + if block in self.llvm_blocks: + self.llvm_block = self.llvm_blocks[block] + self.builder = lc.Builder.new(self.llvm_block) + ret_val = True + return ret_val + + def exit_block (self, block): + del self.llvm_block + del self.builder + + def visit_synthetic_op (self, i, op, arg, *args, **kws): + method = getattr(self, 'op_%s' % (synthetic_opname[op],)) + return method(i, op, arg, *args, **kws) + + def op_REF_ARG (self, i, op, arg, *args, **kws): + return [self.llvm_function.args[arg]] + + def op_BUILD_PHI (self, i, op, arg, *args, **kws): + phi_type = None + incoming = [] + pending = [] + for child_arg in arg: + child_block, _, child_opname, child_arg, _ = child_arg + assert child_opname == 'REF_DEF' + if child_arg in self.llvm_definitions: + child_def = self.llvm_definitions[child_arg] + if phi_type is None: + phi_type = child_def.type + incoming.append((child_block, child_def)) + else: + pending.append((child_arg, child_block)) + phi = self.builder.phi(phi_type) + for block_index, defn in incoming: + phi.add_incoming(defn, self.llvm_blocks[block_index]) + for defn_index, block_index in pending: + if defn_index not in self.pending_phis: + self.pending_phis[defn_index] = [] + self.pending_phis[defn_index].append((phi, block_index)) + return [phi] + + def op_DEFINITION (self, i, op, def_index, *args, **kws): + assert len(args) == 1 + arg = args[0] + if def_index in self.pending_phis: + for phi, block_index in self.pending_phis[def_index]: + phi.add_incoming(arg, self.llvm_blocks[block_index]) + self.llvm_definitions[def_index] = arg + return args + + def op_REF_DEF (self, i, op, arg, *args, **kws): + return [self.llvm_definitions[arg]] + + def op_BINARY_ADD (self, i, op, arg, *args, **kws): + arg1, arg2 = args + if arg1.type.kind == lc.TYPE_INTEGER: + ret_val = [self.builder.add(arg1, arg2)] + elif arg1.type.kind in (lc.TYPE_FLOAT, lc.TYPE_DOUBLE): + ret_val = [self.builder.fadd(arg1, arg2)] + elif arg1.type.kind == lc.TYPE_POINTER: + ret_val = [self.builder.gep(arg1, [arg2])] + else: + raise NotImplementedError("LLVMTranslator.op_BINARY_ADD for %r" % + (args,)) + return ret_val + + def op_BINARY_AND (self, i, op, arg, *args, **kws): + return [self.builder.and_(args[0], args[1])] + + def op_BINARY_DIVIDE (self, i, op, arg, *args, **kws): + arg1, arg2 = args + if arg1.type.kind == lc.TYPE_INTEGER: + ret_val = [self.builder.sdiv(arg1, arg2)] + elif arg1.type.kind in (lc.TYPE_FLOAT, lc.TYPE_DOUBLE): + ret_val = [self.builder.fdiv(arg1, arg2)] + else: + raise NotImplementedError("LLVMTranslator.op_BINARY_DIVIDE for %r" + % (args,)) + return ret_val + + def op_BINARY_FLOOR_DIVIDE (self, i, op, arg, *args, **kws): + raise NotImplementedError("LLVMTranslator.op_BINARY_FLOOR_DIVIDE") + + def op_BINARY_LSHIFT (self, i, op, arg, *args, **kws): + return [self.builder.shl(args[0], args[1])] + + def op_BINARY_MODULO (self, i, op, arg, *args, **kws): + arg1, arg2 = args + if arg1.type.kind == lc.TYPE_INTEGER: + ret_val = [self.builder.srem(arg1, arg2)] + elif arg1.type.kind in (lc.TYPE_FLOAT, lc.TYPE_DOUBLE): + ret_val = [self.builder.frem(arg1, arg2)] + else: + raise NotImplementedError("LLVMTranslator.op_BINARY_MODULO for %r" + % (args,)) + return ret_val + + def op_BINARY_MULTIPLY (self, i, op, arg, *args, **kws): + arg1, arg2 = args + if arg1.type.kind == lc.TYPE_INTEGER: + ret_val = [self.builder.mul(arg1, arg2)] + elif arg1.type.kind in (lc.TYPE_FLOAT, lc.TYPE_DOUBLE): + ret_val = [self.builder.fmul(arg1, arg2)] + else: + raise NotImplementedError("LLVMTranslator.op_BINARY_MULTIPLY for " + "%r" % (args,)) + return ret_val + + def op_BINARY_OR (self, i, op, arg, *args, **kws): + return [self.builder.or_(args[0], args[1])] + + def op_BINARY_POWER (self, i, op, arg, *args, **kws): + raise NotImplementedError("LLVMTranslator.op_BINARY_POWER") + + def op_BINARY_RSHIFT (self, i, op, arg, *args, **kws): + return [self.builder.lshr(args[0], args[1])] + + def op_BINARY_SUBSCR (self, i, op, arg, *args, **kws): + arr_val = args[0] + index_vals = args[1:] + ret_val = gep_result = self.builder.gep(arr_val, index_vals) + if (gep_result.type.kind == lc.TYPE_POINTER and + gep_result.type.pointee.kind != lc.TYPE_POINTER): + ret_val = self.builder.load(gep_result) + return [ret_val] + + def op_BINARY_SUBTRACT (self, i, op, arg, *args, **kws): + arg1, arg2 = args + if arg1.type.kind == lc.TYPE_INTEGER: + ret_val = [self.builder.sub(arg1, arg2)] + elif arg1.type.kind in (lc.TYPE_FLOAT, lc.TYPE_DOUBLE): + ret_val = [self.builder.fsub(arg1, arg2)] + else: + raise NotImplementedError("LLVMTranslator.op_BINARY_SUBTRACT for " + "%r" % (args,)) + return ret_val + + op_BINARY_TRUE_DIVIDE = op_BINARY_DIVIDE + + def op_BINARY_XOR (self, i, op, arg, *args, **kws): + return [self.builder.xor(args[0], args[1])] + + def op_BREAK_LOOP (self, i, op, arg, *args, **kws): + return [self.builder.branch(self.llvm_blocks[arg])] + + def op_BUILD_SLICE (self, i, op, arg, *args, **kws): + raise NotImplementedError("LLVMTranslator.op_BUILD_SLICE") + + def op_BUILD_TUPLE (self, i, op, arg, *args, **kws): + return args + + def op_CALL_FUNCTION (self, i, op, arg, *args, **kws): + fn = args[0] + args = args[1:] + if isinstance(fn, lc.Type): + if isinstance(fn, lc.FunctionType): + ret_val = [self.builder.call( + self.llvm_module.get_or_insert_function(fn, fn.__name__), + args)] + else: + assert len(args) == 1 + ret_val = [LLVMCaster.build_cast(self.builder, args[0], fn)] + elif fn.__name__ in lc.Builder.__dict__: + ret_val = [fn(self.builder, *args)] + else: + raise NotImplementedError("Don't know how to call %r!" % (fn,)) + return ret_val + + def op_CALL_FUNCTION_KW (self, i, op, arg, *args, **kws): + raise NotImplementedError("LLVMTranslator.op_CALL_FUNCTION_KW") + + def op_CALL_FUNCTION_VAR (self, i, op, arg, *args, **kws): + raise NotImplementedError("LLVMTranslator.op_CALL_FUNCTION_VAR") + + def op_CALL_FUNCTION_VAR_KW (self, i, op, arg, *args, **kws): + raise NotImplementedError("LLVMTranslator.op_CALL_FUNCTION_VAR_KW") + + def op_COMPARE_OP (self, i, op, arg, *args, **kws): + arg1, arg2 = args + cmp_kind = opcode.cmp_op[arg] + if isinstance(arg1.type, lc.IntegerType): + ret_val = [self.builder.icmp(_compare_mapping_sint[cmp_kind], + arg1, arg2)] + elif arg1.type.kind in (lc.TYPE_FLOAT, lc.TYPE_DOUBLE): + ret_val = [self.builder.fcmp(_compare_mapping_float[cmp_kind], + arg1, arg2)] + else: + raise NotImplementedError('Comparison of type %r' % (arg1.type,)) + return ret_val + + def op_CONTINUE_LOOP (self, i, op, arg, *args, **kws): + raise NotImplementedError("LLVMTranslator.op_CONTINUE_LOOP") + + def op_DELETE_ATTR (self, i, op, arg, *args, **kws): + raise NotImplementedError("LLVMTranslator.op_DELETE_ATTR") + + def op_DELETE_SLICE (self, i, op, arg, *args, **kws): + raise NotImplementedError("LLVMTranslator.op_DELETE_SLICE") + + def op_FOR_ITER (self, i, op, arg, *args, **kws): + raise NotImplementedError("LLVMTranslator.op_FOR_ITER") + + def op_GET_ITER (self, i, op, arg, *args, **kws): + raise NotImplementedError("LLVMTranslator.op_GET_ITER") + + op_INPLACE_ADD = op_BINARY_ADD + op_INPLACE_AND = op_BINARY_AND + op_INPLACE_DIVIDE = op_BINARY_DIVIDE + op_INPLACE_FLOOR_DIVIDE = op_BINARY_FLOOR_DIVIDE + op_INPLACE_LSHIFT = op_BINARY_LSHIFT + op_INPLACE_MODULO = op_BINARY_MODULO + op_INPLACE_MULTIPLY = op_BINARY_MULTIPLY + op_INPLACE_OR = op_BINARY_OR + op_INPLACE_POWER = op_BINARY_POWER + op_INPLACE_RSHIFT = op_BINARY_RSHIFT + op_INPLACE_SUBTRACT = op_BINARY_SUBTRACT + op_INPLACE_TRUE_DIVIDE = op_BINARY_TRUE_DIVIDE + op_INPLACE_XOR = op_BINARY_XOR + + def op_JUMP_ABSOLUTE (self, i, op, arg, *args, **kws): + return [self.builder.branch(self.llvm_blocks[arg])] + + def op_JUMP_FORWARD (self, i, op, arg, *args, **kws): + return [self.builder.branch(self.llvm_blocks[i + arg + 3])] + + def op_JUMP_IF_FALSE (self, i, op, arg, *args, **kws): + raise NotImplementedError("LLVMTranslator.op_JUMP_IF_FALSE") + + def op_JUMP_IF_FALSE_OR_POP (self, i, op, arg, *args, **kws): + raise NotImplementedError("LLVMTranslator.op_JUMP_IF_FALSE_OR_POP") + + def op_JUMP_IF_TRUE (self, i, op, arg, *args, **kws): + raise NotImplementedError("LLVMTranslator.op_JUMP_IF_TRUE") + + def op_JUMP_IF_TRUE_OR_POP (self, i, op, arg, *args, **kws): + raise NotImplementedError("LLVMTranslator.op_JUMP_IF_TRUE_OR_POP") + + def op_LOAD_ATTR (self, i, op, arg, *args, **kws): + raise NotImplementedError("LLVMTranslator.op_LOAD_ATTR") + + def op_LOAD_CONST (self, i, op, arg, *args, **kws): + py_val = self.code_obj.co_consts[arg] + if isinstance(py_val, int): + ret_val = [lc.Constant.int(bytetype.lc_int, py_val)] + elif isinstance(py_val, float): + ret_val = [lc.Constant.double(py_val)] + elif py_val == None: + ret_val = [None] + else: + raise NotImplementedError('Constant converstion for %r' % + (py_val,)) + return ret_val + + def op_LOAD_GLOBAL (self, i, op, arg, *args, **kws): + ret_val = self.globals[self.code_obj.co_names[arg]] + if not hasattr(ret_val, '__name__'): + ret_val.__name__ = self.code_obj.co_names[arg] + return [ret_val] + + def op_POP_BLOCK (self, i, op, arg, *args, **kws): + self.loop_stack.pop() + return [self.builder.branch(self.llvm_blocks[i + 1])] + + def op_POP_JUMP_IF_FALSE (self, i, op, arg, *args, **kws): + return [self.builder.cbranch(args[0], self.llvm_blocks[i + 3], + self.llvm_blocks[arg])] + + def op_POP_JUMP_IF_TRUE (self, i, op, arg, *args, **kws): + raise NotImplementedError("LLVMTranslator.op_POP_JUMP_IF_TRUE") + + def op_POP_TOP (self, i, op, arg, *args, **kws): + return args + + def op_RETURN_VALUE (self, i, op, arg, *args, **kws): + if args[0] is None: + ret_val = [self.builder.ret_void()] + else: + ret_val = [self.builder.ret(args[0])] + return ret_val + + def op_SETUP_LOOP (self, i, op, arg, *args, **kws): + self.loop_stack.append((i, arg)) + return [self.builder.branch(self.llvm_blocks[i + 3])] + + def op_SLICE (self, i, op, arg, *args, **kws): + raise NotImplementedError("LLVMTranslator.op_SLICE") + + def op_STORE_ATTR (self, i, op, arg, *args, **kws): + raise NotImplementedError("LLVMTranslator.op_STORE_ATTR") + + def op_STORE_SLICE (self, i, op, arg, *args, **kws): + raise NotImplementedError("LLVMTranslator.op_STORE_SLICE") + + def op_STORE_SUBSCR (self, i, op, arg, *args, **kws): + store_val, arr_val, index_val = args + dest_addr = self.builder.gep(arr_val, [index_val]) + return [self.builder.store(store_val, dest_addr)] + + def op_UNARY_CONVERT (self, i, op, arg, *args, **kws): + raise NotImplementedError("LLVMTranslator.op_UNARY_CONVERT") + + def op_UNARY_INVERT (self, i, op, arg, *args, **kws): + raise NotImplementedError("LLVMTranslator.op_UNARY_INVERT") + + def op_UNARY_NEGATIVE (self, i, op, arg, *args, **kws): + raise NotImplementedError("LLVMTranslator.op_UNARY_NEGATIVE") + + def op_UNARY_NOT (self, i, op, arg, *args, **kws): + raise NotImplementedError("LLVMTranslator.op_UNARY_NOT") + + def op_UNARY_POSITIVE (self, i, op, arg, *args, **kws): + raise NotImplementedError("LLVMTranslator.op_UNARY_POSITIVE") + +# ______________________________________________________________________ + +def translate_function (func, lltype, llvm_module = None, **kws): + translator = LLVMTranslator(llvm_module) + translator.translate(func, lltype, kws) + return translator + +# ______________________________________________________________________ +# Main (self-test) routine + +def main (*args): + from tests import llfuncs, llfunctys + if not args: + args = ('doslice',) + elif 'all' in args: + args = [llfunc + for llfunc in dir(llfuncs) if not llfunc.startswith('_')] + llvm_module = lc.Module.new('test_module') + for arg in args: + translate_function(getattr(llfuncs, arg), getattr(llfunctys, arg), + llvm_module) + print(llvm_module) + +# ______________________________________________________________________ + +if __name__ == '__main__': + import sys + main(*sys.argv[1:]) + +# ______________________________________________________________________ +# End of byte_translator.py diff --git a/bytecode_visitor.py b/bytecode_visitor.py new file mode 100644 index 0000000..6048ba8 --- /dev/null +++ b/bytecode_visitor.py @@ -0,0 +1,566 @@ +#! /usr/bin/env python +# ______________________________________________________________________ + +import itertools + +import opcode +from opcode_util import itercode + +# ______________________________________________________________________ + +class BytecodeVisitor (object): + opnames = [name.split('+')[0] for name in opcode.opname] + + def visit_op (self, i, op, arg, *args, **kws): + if op < 0: + ret_val = self.visit_synthetic_op(i, op, arg, *args, **kws) + else: + method = getattr(self, 'op_' + self.opnames[op]) + ret_val = method(i, op, arg, *args, **kws) + return ret_val + + def visit_synthetic_op (self, i, op, arg, *args, **kws): + raise NotImplementedError( + 'BytecodeVisitor.visit_synthetic_op() must be overloaded if using ' + 'synthetic opcodes.') + + def op_BINARY_ADD (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_BINARY_ADD") + + def op_BINARY_AND (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_BINARY_AND") + + def op_BINARY_DIVIDE (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_BINARY_DIVIDE") + + def op_BINARY_FLOOR_DIVIDE (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_BINARY_FLOOR_DIVIDE") + + def op_BINARY_LSHIFT (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_BINARY_LSHIFT") + + def op_BINARY_MODULO (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_BINARY_MODULO") + + def op_BINARY_MULTIPLY (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_BINARY_MULTIPLY") + + def op_BINARY_OR (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_BINARY_OR") + + def op_BINARY_POWER (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_BINARY_POWER") + + def op_BINARY_RSHIFT (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_BINARY_RSHIFT") + + def op_BINARY_SUBSCR (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_BINARY_SUBSCR") + + def op_BINARY_SUBTRACT (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_BINARY_SUBTRACT") + + def op_BINARY_TRUE_DIVIDE (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_BINARY_TRUE_DIVIDE") + + def op_BINARY_XOR (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_BINARY_XOR") + + def op_BREAK_LOOP (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_BREAK_LOOP") + + def op_BUILD_CLASS (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_BUILD_CLASS") + + def op_BUILD_LIST (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_BUILD_LIST") + + def op_BUILD_MAP (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_BUILD_MAP") + + def op_BUILD_SET (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_BUILD_SET") + + def op_BUILD_SLICE (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_BUILD_SLICE") + + def op_BUILD_TUPLE (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_BUILD_TUPLE") + + def op_CALL_FUNCTION (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_CALL_FUNCTION") + + def op_CALL_FUNCTION_KW (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_CALL_FUNCTION_KW") + + def op_CALL_FUNCTION_VAR (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_CALL_FUNCTION_VAR") + + def op_CALL_FUNCTION_VAR_KW (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_CALL_FUNCTION_VAR_KW") + + def op_COMPARE_OP (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_COMPARE_OP") + + def op_CONTINUE_LOOP (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_CONTINUE_LOOP") + + def op_DELETE_ATTR (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_DELETE_ATTR") + + def op_DELETE_DEREF (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_DELETE_DEREF") + + def op_DELETE_FAST (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_DELETE_FAST") + + def op_DELETE_GLOBAL (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_DELETE_GLOBAL") + + def op_DELETE_NAME (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_DELETE_NAME") + + def op_DELETE_SLICE (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_DELETE_SLICE") + + def op_DELETE_SUBSCR (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_DELETE_SUBSCR") + + def op_DUP_TOP (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_DUP_TOP") + + def op_DUP_TOPX (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_DUP_TOPX") + + def op_DUP_TOP_TWO (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_DUP_TOP_TWO") + + def op_END_FINALLY (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_END_FINALLY") + + def op_EXEC_STMT (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_EXEC_STMT") + + def op_EXTENDED_ARG (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_EXTENDED_ARG") + + def op_FOR_ITER (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_FOR_ITER") + + def op_GET_ITER (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_GET_ITER") + + def op_IMPORT_FROM (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_IMPORT_FROM") + + def op_IMPORT_NAME (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_IMPORT_NAME") + + def op_IMPORT_STAR (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_IMPORT_STAR") + + def op_INPLACE_ADD (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_INPLACE_ADD") + + def op_INPLACE_AND (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_INPLACE_AND") + + def op_INPLACE_DIVIDE (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_INPLACE_DIVIDE") + + def op_INPLACE_FLOOR_DIVIDE (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_INPLACE_FLOOR_DIVIDE") + + def op_INPLACE_LSHIFT (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_INPLACE_LSHIFT") + + def op_INPLACE_MODULO (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_INPLACE_MODULO") + + def op_INPLACE_MULTIPLY (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_INPLACE_MULTIPLY") + + def op_INPLACE_OR (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_INPLACE_OR") + + def op_INPLACE_POWER (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_INPLACE_POWER") + + def op_INPLACE_RSHIFT (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_INPLACE_RSHIFT") + + def op_INPLACE_SUBTRACT (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_INPLACE_SUBTRACT") + + def op_INPLACE_TRUE_DIVIDE (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_INPLACE_TRUE_DIVIDE") + + def op_INPLACE_XOR (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_INPLACE_XOR") + + def op_JUMP_ABSOLUTE (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_JUMP_ABSOLUTE") + + def op_JUMP_FORWARD (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_JUMP_FORWARD") + + def op_JUMP_IF_FALSE (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_JUMP_IF_FALSE") + + def op_JUMP_IF_FALSE_OR_POP (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_JUMP_IF_FALSE_OR_POP") + + def op_JUMP_IF_TRUE (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_JUMP_IF_TRUE") + + def op_JUMP_IF_TRUE_OR_POP (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_JUMP_IF_TRUE_OR_POP") + + def op_LIST_APPEND (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_LIST_APPEND") + + def op_LOAD_ATTR (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_LOAD_ATTR") + + def op_LOAD_BUILD_CLASS (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_LOAD_BUILD_CLASS") + + def op_LOAD_CLOSURE (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_LOAD_CLOSURE") + + def op_LOAD_CONST (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_LOAD_CONST") + + def op_LOAD_DEREF (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_LOAD_DEREF") + + def op_LOAD_FAST (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_LOAD_FAST") + + def op_LOAD_GLOBAL (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_LOAD_GLOBAL") + + def op_LOAD_LOCALS (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_LOAD_LOCALS") + + def op_LOAD_NAME (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_LOAD_NAME") + + def op_MAKE_CLOSURE (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_MAKE_CLOSURE") + + def op_MAKE_FUNCTION (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_MAKE_FUNCTION") + + def op_MAP_ADD (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_MAP_ADD") + + def op_NOP (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_NOP") + + def op_POP_BLOCK (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_POP_BLOCK") + + def op_POP_EXCEPT (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_POP_EXCEPT") + + def op_POP_JUMP_IF_FALSE (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_POP_JUMP_IF_FALSE") + + def op_POP_JUMP_IF_TRUE (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_POP_JUMP_IF_TRUE") + + def op_POP_TOP (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_POP_TOP") + + def op_PRINT_EXPR (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_PRINT_EXPR") + + def op_PRINT_ITEM (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_PRINT_ITEM") + + def op_PRINT_ITEM_TO (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_PRINT_ITEM_TO") + + def op_PRINT_NEWLINE (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_PRINT_NEWLINE") + + def op_PRINT_NEWLINE_TO (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_PRINT_NEWLINE_TO") + + def op_RAISE_VARARGS (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_RAISE_VARARGS") + + def op_RETURN_VALUE (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_RETURN_VALUE") + + def op_ROT_FOUR (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_ROT_FOUR") + + def op_ROT_THREE (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_ROT_THREE") + + def op_ROT_TWO (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_ROT_TWO") + + def op_SETUP_EXCEPT (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_SETUP_EXCEPT") + + def op_SETUP_FINALLY (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_SETUP_FINALLY") + + def op_SETUP_LOOP (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_SETUP_LOOP") + + def op_SETUP_WITH (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_SETUP_WITH") + + def op_SET_ADD (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_SET_ADD") + + def op_SLICE (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_SLICE") + + def op_STOP_CODE (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_STOP_CODE") + + def op_STORE_ATTR (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_STORE_ATTR") + + def op_STORE_DEREF (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_STORE_DEREF") + + def op_STORE_FAST (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_STORE_FAST") + + def op_STORE_GLOBAL (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_STORE_GLOBAL") + + def op_STORE_LOCALS (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_STORE_LOCALS") + + def op_STORE_MAP (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_STORE_MAP") + + def op_STORE_NAME (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_STORE_NAME") + + def op_STORE_SLICE (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_STORE_SLICE") + + def op_STORE_SUBSCR (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_STORE_SUBSCR") + + def op_UNARY_CONVERT (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_UNARY_CONVERT") + + def op_UNARY_INVERT (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_UNARY_INVERT") + + def op_UNARY_NEGATIVE (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_UNARY_NEGATIVE") + + def op_UNARY_NOT (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_UNARY_NOT") + + def op_UNARY_POSITIVE (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_UNARY_POSITIVE") + + def op_UNPACK_EX (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_UNPACK_EX") + + def op_UNPACK_SEQUENCE (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_UNPACK_SEQUENCE") + + def op_WITH_CLEANUP (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_WITH_CLEANUP") + + def op_YIELD_VALUE (self, i, op, arg, *args, **kws): + raise NotImplementedError("BytecodeVisitor.op_YIELD_VALUE") + +# ______________________________________________________________________ + +class BytecodeIterVisitor (BytecodeVisitor): + def visit (self, co_obj): + self.enter_code_object(co_obj) + for i, op, arg in itercode(co_obj.co_code): + self.visit_op(i, op, arg) + return self.exit_code_object(co_obj) + + def enter_code_object (self, co_obj): + pass + + def exit_code_object (self, co_obj): + pass + +# ______________________________________________________________________ + +class BytecodeFlowVisitor (BytecodeVisitor): + def visit (self, flow): + self.block_list = list(flow.keys()) + self.block_list.sort() + self.enter_flow_object(flow) + for block in self.block_list: + prelude = self.enter_block(block) + prelude_isa_list = isinstance(prelude, list) + if prelude or prelude_isa_list: + if not prelude_isa_list: + prelude = [] + new_stmts = list(self.visit_op(i, op, arg, *args) + for i, op, _, arg, args in flow[block]) + self.new_flow[block] = list(itertools.chain( + prelude, *new_stmts)) + self.exit_block(block) + del self.block_list + return self.exit_flow_object(flow) + + def visit_op (self, i, op, arg, *args, **kws): + new_args = [] + for child_i, child_op, _, child_arg, child_args in args: + new_args.extend(self.visit_op(child_i, child_op, child_arg, + *child_args)) + ret_val = super(BytecodeFlowVisitor, self).visit_op(i, op, arg, + *new_args) + return ret_val + + def enter_flow_object (self, flow): + self.new_flow = {} + + def exit_flow_object (self, flow): + ret_val = self.new_flow + del self.new_flow + return ret_val + + def enter_block (self, block): + pass + + def exit_block (self, block): + pass + +# ______________________________________________________________________ + +class BenignBytecodeVisitorMixin (object): + def _do_nothing (self, i, op, arg, *args, **kws): + return [(i, op, self.opnames[op], arg, args)] + + op_BINARY_ADD = _do_nothing + op_BINARY_AND = _do_nothing + op_BINARY_DIVIDE = _do_nothing + op_BINARY_FLOOR_DIVIDE = _do_nothing + op_BINARY_LSHIFT = _do_nothing + op_BINARY_MODULO = _do_nothing + op_BINARY_MULTIPLY = _do_nothing + op_BINARY_OR = _do_nothing + op_BINARY_POWER = _do_nothing + op_BINARY_RSHIFT = _do_nothing + op_BINARY_SUBSCR = _do_nothing + op_BINARY_SUBTRACT = _do_nothing + op_BINARY_TRUE_DIVIDE = _do_nothing + op_BINARY_XOR = _do_nothing + op_BREAK_LOOP = _do_nothing + op_BUILD_CLASS = _do_nothing + op_BUILD_LIST = _do_nothing + op_BUILD_MAP = _do_nothing + op_BUILD_SET = _do_nothing + op_BUILD_SLICE = _do_nothing + op_BUILD_TUPLE = _do_nothing + op_CALL_FUNCTION = _do_nothing + op_CALL_FUNCTION_KW = _do_nothing + op_CALL_FUNCTION_VAR = _do_nothing + op_CALL_FUNCTION_VAR_KW = _do_nothing + op_COMPARE_OP = _do_nothing + op_CONTINUE_LOOP = _do_nothing + op_DELETE_ATTR = _do_nothing + op_DELETE_DEREF = _do_nothing + op_DELETE_FAST = _do_nothing + op_DELETE_GLOBAL = _do_nothing + op_DELETE_NAME = _do_nothing + op_DELETE_SLICE = _do_nothing + op_DELETE_SUBSCR = _do_nothing + op_DUP_TOP = _do_nothing + op_DUP_TOPX = _do_nothing + op_DUP_TOP_TWO = _do_nothing + op_END_FINALLY = _do_nothing + op_EXEC_STMT = _do_nothing + op_EXTENDED_ARG = _do_nothing + op_FOR_ITER = _do_nothing + op_GET_ITER = _do_nothing + op_IMPORT_FROM = _do_nothing + op_IMPORT_NAME = _do_nothing + op_IMPORT_STAR = _do_nothing + op_INPLACE_ADD = _do_nothing + op_INPLACE_AND = _do_nothing + op_INPLACE_DIVIDE = _do_nothing + op_INPLACE_FLOOR_DIVIDE = _do_nothing + op_INPLACE_LSHIFT = _do_nothing + op_INPLACE_MODULO = _do_nothing + op_INPLACE_MULTIPLY = _do_nothing + op_INPLACE_OR = _do_nothing + op_INPLACE_POWER = _do_nothing + op_INPLACE_RSHIFT = _do_nothing + op_INPLACE_SUBTRACT = _do_nothing + op_INPLACE_TRUE_DIVIDE = _do_nothing + op_INPLACE_XOR = _do_nothing + op_JUMP_ABSOLUTE = _do_nothing + op_JUMP_FORWARD = _do_nothing + op_JUMP_IF_FALSE = _do_nothing + op_JUMP_IF_FALSE_OR_POP = _do_nothing + op_JUMP_IF_TRUE = _do_nothing + op_JUMP_IF_TRUE_OR_POP = _do_nothing + op_LIST_APPEND = _do_nothing + op_LOAD_ATTR = _do_nothing + op_LOAD_BUILD_CLASS = _do_nothing + op_LOAD_CLOSURE = _do_nothing + op_LOAD_CONST = _do_nothing + op_LOAD_DEREF = _do_nothing + op_LOAD_FAST = _do_nothing + op_LOAD_GLOBAL = _do_nothing + op_LOAD_LOCALS = _do_nothing + op_LOAD_NAME = _do_nothing + op_MAKE_CLOSURE = _do_nothing + op_MAKE_FUNCTION = _do_nothing + op_MAP_ADD = _do_nothing + op_NOP = _do_nothing + op_POP_BLOCK = _do_nothing + op_POP_EXCEPT = _do_nothing + op_POP_JUMP_IF_FALSE = _do_nothing + op_POP_JUMP_IF_TRUE = _do_nothing + op_POP_TOP = _do_nothing + op_PRINT_EXPR = _do_nothing + op_PRINT_ITEM = _do_nothing + op_PRINT_ITEM_TO = _do_nothing + op_PRINT_NEWLINE = _do_nothing + op_PRINT_NEWLINE_TO = _do_nothing + op_RAISE_VARARGS = _do_nothing + op_RETURN_VALUE = _do_nothing + op_ROT_FOUR = _do_nothing + op_ROT_THREE = _do_nothing + op_ROT_TWO = _do_nothing + op_SETUP_EXCEPT = _do_nothing + op_SETUP_FINALLY = _do_nothing + op_SETUP_LOOP = _do_nothing + op_SETUP_WITH = _do_nothing + op_SET_ADD = _do_nothing + op_SLICE = _do_nothing + op_STOP_CODE = _do_nothing + op_STORE_ATTR = _do_nothing + op_STORE_DEREF = _do_nothing + op_STORE_FAST = _do_nothing + op_STORE_GLOBAL = _do_nothing + op_STORE_LOCALS = _do_nothing + op_STORE_MAP = _do_nothing + op_STORE_NAME = _do_nothing + op_STORE_SLICE = _do_nothing + op_STORE_SUBSCR = _do_nothing + op_UNARY_CONVERT = _do_nothing + op_UNARY_INVERT = _do_nothing + op_UNARY_NEGATIVE = _do_nothing + op_UNARY_NOT = _do_nothing + op_UNARY_POSITIVE = _do_nothing + op_UNPACK_EX = _do_nothing + op_UNPACK_SEQUENCE = _do_nothing + op_WITH_CLEANUP = _do_nothing + op_YIELD_VALUE = _do_nothing + +# ______________________________________________________________________ +# End of bytecode_visitor.py diff --git a/bytetype.py b/bytetype.py new file mode 100644 index 0000000..25a0355 --- /dev/null +++ b/bytetype.py @@ -0,0 +1,41 @@ +#! /usr/bin/env python +# ______________________________________________________________________ + +import ctypes + +import llvm.core as lc + +# ______________________________________________________________________ + +lvoid = lc.Type.void() +li1 = lc.Type.int(1) +li8 = lc.Type.int(8) +li16 = lc.Type.int(16) +li32 = lc.Type.int(32) +li64 = lc.Type.int(64) +lc_size_t = lc.Type.int(ctypes.sizeof( + getattr(ctypes, 'c_ssize_t', getattr(ctypes, 'c_size_t'))) * 8) +lfloat = lc.Type.float() +ldouble = lc.Type.double() +li8_ptr = lc.Type.pointer(li8) + +lc_int = lc.Type.int(ctypes.sizeof(ctypes.c_int) * 8) +lc_long = lc.Type.int(ctypes.sizeof(ctypes.c_long) * 8) + +l_pyobject_head = [lc_size_t, lc.Type.pointer(li32)] +l_pyobject_head_struct = lc.Type.struct(l_pyobject_head) +l_pyobj_p = l_pyobject_head_struct_p = lc.Type.pointer(l_pyobject_head_struct) + +strlen = lc.Type.function(lc_size_t, (li8_ptr,)) +strncpy = lc.Type.function(li8_ptr, (li8_ptr, li8_ptr, lc_size_t)) +strndup = lc.Type.function(li8_ptr, (li8_ptr, lc_size_t)) +malloc = lc.Type.function(li8_ptr, (lc_size_t,)) +free = lc.Type.function(lvoid, (li8_ptr,)) + +Py_BuildValue = lc.Type.function(l_pyobj_p, [li8_ptr], True) +PyArg_ParseTuple = lc.Type.function(lc_int, [l_pyobj_p, li8_ptr], True) +PyEval_SaveThread = lc.Type.function(li8_ptr, []) +PyEval_RestoreThread = lc.Type.function(lc.Type.void(), [li8_ptr]) + +# ______________________________________________________________________ +# End of bytetype.py diff --git a/control_flow.py b/control_flow.py new file mode 100644 index 0000000..5ea030a --- /dev/null +++ b/control_flow.py @@ -0,0 +1,218 @@ +#! /usr/bin/env python +# ______________________________________________________________________ + +import pprint + +# ______________________________________________________________________ + +class ControlFlowGraph (object): + def __init__ (self): + self.blocks = {} + self.blocks_in = {} + self.blocks_out = {} + self.blocks_reads = {} + self.blocks_writes = {} + self.blocks_writer = {} + self.blocks_dom = {} + self.blocks_reaching = {} + + def add_block (self, key, value = None): + self.blocks[key] = value + if key not in self.blocks_in: + self.blocks_in[key] = set() + self.blocks_out[key] = set() + self.blocks_reads[key] = set() + self.blocks_writes[key] = set() + self.blocks_writer[key] = {} + + def add_edge (self, from_block, to_block): + self.blocks_out[from_block].add(to_block) + self.blocks_in[to_block].add(from_block) + + def unlink_unreachables (self): + changed = True + next_blocks = self.blocks.keys() + next_blocks.remove(0) + while changed: + changed = False + blocks = next_blocks + next_blocks = blocks[:] + for block in blocks: + if len(self.blocks_in[block]) == 0: + blocks_out = self.blocks_out[block] + for out_edge in blocks_out: + self.blocks_in[out_edge].discard(block) + blocks_out.clear() + next_blocks.remove(block) + changed = True + + def compute_dataflow (self): + '''Compute the dominator and reaching dataflow relationships + in the CFG.''' + blocks = set(self.blocks.keys()) + nonentry_blocks = blocks.copy() + for block in blocks: + self.blocks_dom[block] = blocks + self.blocks_reaching[block] = set((block,)) + if len(self.blocks_in[block]) == 0: + self.blocks_dom[block] = set((block,)) + nonentry_blocks.remove(block) + changed = True + while changed: + changed = False + for block in nonentry_blocks: + olddom = self.blocks_dom[block] + newdom = set.intersection(*[self.blocks_dom[pred] + for pred in self.blocks_in[block]]) + newdom.add(block) + if newdom != olddom: + changed = True + self.blocks_dom[block] = newdom + oldreaching = self.blocks_reaching[block] + newreaching = set.union( + *[self.blocks_reaching[pred] + for pred in self.blocks_in[block]]) + newreaching.add(block) + if newreaching != oldreaching: + changed = True + self.blocks_reaching[block] = newreaching + return self.blocks_dom, self.blocks_reaching + + def update_for_ssa (self): + '''Modify the blocks_writes map to reflect phi nodes inserted + for static single assignment representations.''' + joins = [block for block in self.blocks.keys() + if len(self.blocks_in[block]) > 1] + changed = True + while changed: + changed = False + for block in joins: + phis_needed = self.phi_needed(block) + for affected_local in phis_needed: + if affected_local not in self.blocks_writes[block]: + changed = True + # NOTE: For this to work, we assume that basic + # blocks are indexed by their instruction + # index in the VM bytecode. + self.writes_local(block, block, affected_local) + if changed: + # Any modifications have invalidated the reaching + # definitions, so delete any memoized results. + if hasattr(self, 'reaching_definitions'): + del self.reaching_definitions + + def idom (self, block): + '''Compute the immediate dominator (idom) of the given block + key. Returns None if the block has no in edges. + + Note that in the case where there are multiple immediate + dominators (a join after a non-loop branch), this returns one + of the predecessors, but is not guaranteed to reliably select + one over the others (depends on the ordering of the set type + iterator).''' + preds = self.blocks_in[block] + npreds = len(preds) + if npreds == 0: + ret_val = None + elif npreds == 1: + ret_val = tuple(preds)[0] + else: + ret_val = [pred for pred in preds + if block not in self.blocks_dom[pred]][0] + return ret_val + + def block_writes_to_writer_map (self, block): + ret_val = {} + for local in self.blocks_writes[block]: + ret_val[local] = block + return ret_val + + def get_reaching_definitions (self, block): + '''Return a nested map for the given block + s.t. ret_val[pred][local] equals the block key for the + definition of local that reaches the argument block via that + predecessor. + + Useful for actually populating phi nodes, once you know you + need them.''' + has_memoized = hasattr(self, 'reaching_definitions') + if has_memoized and block in self.reaching_definitions: + ret_val = self.reaching_definitions[block] + else: + preds = self.blocks_in[block] + ret_val = {} + for pred in preds: + ret_val[pred] = self.block_writes_to_writer_map(pred) + crnt = self.idom(pred) + while crnt != None: + crnt_writer_map = self.block_writes_to_writer_map(crnt) + # This order of update favors the first definitions + # encountered in the traversal since the traversal + # visits blocks in reverse execution order. + crnt_writer_map.update(ret_val[pred]) + ret_val[pred] = crnt_writer_map + crnt = self.idom(crnt) + if not has_memoized: + self.reaching_definitions = {} + self.reaching_definitions[block] = ret_val + return ret_val + + def nreaches (self, block): + '''For each local, find the number of unique reaching + definitions the current block has.''' + reaching_definitions = self.get_reaching_definitions(block) + definition_map = {} + for pred in self.blocks_in[block]: + reaching_from_pred = reaching_definitions[pred] + for local in reaching_from_pred.keys(): + if local not in definition_map: + definition_map[local] = set() + definition_map[local].add(reaching_from_pred[local]) + ret_val = {} + for local in definition_map.keys(): + ret_val[local] = len(definition_map[local]) + return ret_val + + def writes_local (self, block, write_instr_index, local_index): + self.blocks_writes[block].add(local_index) + block_writers = self.blocks_writer[block] + old_index = block_writers.get(local_index, -1) + # This checks for a corner case that would impact + # numba.translate.Translate.build_phi_nodes(). + assert old_index != write_instr_index, ( + "Found corner case for STORE_FAST at a CFG join!") + block_writers[local_index] = max(write_instr_index, old_index) + + def phi_needed (self, join): + '''Return the set of locals that will require a phi node to be + generated at the given join.''' + nreaches = self.nreaches(join) + return set([local for local in nreaches.keys() + if nreaches[local] > 1]) + + def pprint (self, *args, **kws): + pprint.pprint(self.__dict__, *args, **kws) + + def pformat (self, *args, **kws): + return pprint.pformat(self.__dict__, *args, **kws) + + def to_dot (self, graph_name = None): + '''Return a dot (digraph visualizer in Graphviz) graph + description as a string.''' + if graph_name is None: + graph_name = 'CFG_%d' % id(self) + lines_out = [] + for block_index in self.blocks: + lines_out.append( + 'BLOCK_%r [shape=box, label="BLOCK_%r\\nr: %r, w: %r"];' % + (block_index, block_index, + tuple(self.blocks_reads[block_index]), + tuple(self.blocks_writes[block_index]))) + for block_index in self.blocks: + for out_edge in self.blocks_out[block_index]: + lines_out.append('BLOCK_%r -> BLOCK_%r;' % + (block_index, out_edge)) + return 'digraph %s {\n%s\n}\n' % (graph_name, '\n'.join(lines_out)) + +# ______________________________________________________________________ +# End of control_flow.py diff --git a/gen_bytecode_visitor.py b/gen_bytecode_visitor.py new file mode 100644 index 0000000..7f922e2 --- /dev/null +++ b/gen_bytecode_visitor.py @@ -0,0 +1,27 @@ +#! /usr/bin/env python +# ______________________________________________________________________ + +import opcode_util + +# ______________________________________________________________________ + +def generate_bytecode_visitor (classname = 'BytecodeVisitor', + baseclass = 'object'): + opnames = list(set((opname.split('+')[0] + for opname in opcode_util.OPCODE_MAP.keys()))) + opnames.sort() + return 'class %s (%s):\n%s\n' % ( + classname, baseclass, + '\n\n'.join((' def op_%s (self, i, op, arg):\n' + ' raise NotImplementedError("%s.op_%s")' % + (opname, classname, opname) + for opname in opnames))) + +# ______________________________________________________________________ + +if __name__ == "__main__": + import sys + print(generate_bytecode_visitor(*sys.argv[1:])) + +# ______________________________________________________________________ +# End of gen_bytecode_visitor.py diff --git a/nobitey.py b/nobitey.py new file mode 100644 index 0000000..bca491e --- /dev/null +++ b/nobitey.py @@ -0,0 +1,366 @@ +#! /usr/bin/env python +# ______________________________________________________________________ + +import sys +import os.path +import imp +import io +import types + +import llvm.core as lc +import llvm.ee as le + +import bytetype + +from pyaddfunc import pyaddfunc + +LLVM_TO_INT_PARSE_STR_MAP = { + 8 : 'b', + 16 : 'h', + 32 : 'i', # Note that on 32-bit systems sizeof(int) == sizeof(long) + 64 : 'L', # Seeing sizeof(long long) == 8 on both 32 and 64-bit platforms +} + +LLVM_TO_PARSE_STR_MAP = { + lc.TYPE_FLOAT : 'f', + lc.TYPE_DOUBLE : 'd', +} + +# ______________________________________________________________________ + +# XXX Stolen from numba.translate + +def get_string_constant (module, const_str): + const_name = "__STR_%x" % (hash(const_str),) + try: + ret_val = module.get_global_variable_named(const_name) + except: + lconst_str = lc.Constant.stringz(const_str) + ret_val = module.add_global_variable(lconst_str.type, const_name) + ret_val.initializer = lconst_str + ret_val.linkage = lc.LINKAGE_INTERNAL + return ret_val + +# ______________________________________________________________________ + +class NoBitey (object): + def __init__ (self, target_module = None, type_annotations = None): + if target_module is None: + target_module = lc.Module.new('NoBitey_%d' % id(self)) + if type_annotations is None: + type_annotations = {} + self.target_module = target_module + self.type_aliases = type_annotations # Reserved for future use. + + def _build_parse_string (self, llvm_type): + kind = llvm_type.kind + if kind == lc.TYPE_INTEGER: + ret_val = LLVM_TO_INT_PARSE_STR_MAP[llvm_type.width] + elif kind in LLVM_TO_PARSE_STR_MAP: + ret_val = LLVM_TO_PARSE_STR_MAP[kind] + else: + raise TypeError('Unsupported LLVM type: %s' % str(llvm_type)) + return ret_val + + def build_parse_string (self, llvm_tys): + """Given a set of LLVM types, return a string for parsing + them via PyArg_ParseTuple.""" + return ''.join((self._build_parse_string(ty) + for ty in llvm_tys)) + + def handle_abi_casts (self, builder, result): + if result.type.kind == lc.TYPE_FLOAT: + # NOTE: The C ABI apparently casts floats to doubles when + # an argument must be pushed on the stack, as is the case + # when calling a variable argument function. + # XXX Is there documentation on this where I can find all + # coercion rules? Do we still need some libffi + # integration? + result = builder.fpext(result, bytetype.ldouble) + return result + + def build_wrapper_function (self, llvm_function): + _pyobj_p = bytetype.l_pyobject_head_struct_p + _void_p = _char_p = bytetype.li8_ptr + self.crnt_function = self.target_module.add_function( + lc.Type.function(_pyobj_p, (_pyobj_p, _pyobj_p)), + llvm_function.name + "_wrapper") + entry_block = self.crnt_function.append_basic_block('entry') + args_ok_block = self.crnt_function.append_basic_block('args_ok') + exit_block = self.crnt_function.append_basic_block('exit') + _int32_zero = lc.Constant.int(bytetype.li32, 0) + _Py_BuildValue = self.target_module.get_or_insert_function( + lc.Type.function(_pyobj_p, [_char_p], True), 'Py_BuildValue') + _PyArg_ParseTuple = self.target_module.get_or_insert_function( + lc.Type.function(bytetype.lc_int, [_pyobj_p, _char_p], True), + 'PyArg_ParseTuple') + _PyEval_SaveThread = self.target_module.get_or_insert_function( + lc.Type.function(_void_p, []), 'PyEval_SaveThread') + _PyEval_RestoreThread = self.target_module.get_or_insert_function( + lc.Type.function(lc.Type.void(), [_void_p]), + 'PyEval_RestoreThread') + # __________________________________________________ + # entry: + builder = lc.Builder.new(entry_block) + arg_types = llvm_function.type.pointee.args + parse_str = builder.gep( + get_string_constant( + self.target_module, + self.build_parse_string(arg_types)), + [_int32_zero, _int32_zero]) + parse_args = [builder.alloca(arg_ty) for arg_ty in arg_types] + parse_args.insert(0, parse_str) + parse_args.insert(0, self.crnt_function.args[1]) + parse_result = builder.call(_PyArg_ParseTuple, parse_args) + builder.cbranch(builder.icmp(lc.ICMP_NE, parse_result, _int32_zero), + args_ok_block, exit_block) + # __________________________________________________ + # args_ok: + builder = lc.Builder.new(args_ok_block) + thread_state = builder.call(_PyEval_SaveThread, ()) + target_args = [builder.load(parse_arg) for parse_arg in parse_args[2:]] + result = builder.call(llvm_function, target_args) + result_cast = self.handle_abi_casts(builder, result) + builder.call(_PyEval_RestoreThread, (thread_state,)) + build_str = builder.gep( + get_string_constant( + self.target_module, + self._build_parse_string(result.type)), + [_int32_zero, _int32_zero]) + py_result = builder.call(_Py_BuildValue, [build_str, result_cast]) + builder.branch(exit_block) + # __________________________________________________ + # exit: + builder = lc.Builder.new(exit_block) + rval = builder.phi(bytetype.l_pyobject_head_struct_p) + rval.add_incoming(lc.Constant.null(bytetype.l_pyobject_head_struct_p), + entry_block) + rval.add_incoming(py_result, args_ok_block) + builder.ret(rval) + return self.crnt_function + + def wrap_llvm_module (self, llvm_module, engine = None, py_module = None): + ''' + Shamefully adapted from bitey.bind.wrap_llvm_module(). + ''' + functions = [func for func in llvm_module.functions + if not func.name.startswith("_") + and not func.is_declaration + and func.linkage == lc.LINKAGE_EXTERNAL] + wrappers = [self.build_wrapper_function(func) for func in functions] + if engine is None: + engine = le.ExecutionEngine.new(llvm_module) + if self.target_module != llvm_module: + engine.add_module(self.target_module) + py_wrappers = [pyaddfunc(wrapper.name, + engine.get_pointer_to_function(wrapper)) + for wrapper in wrappers] + if py_module: + for py_wrapper in py_wrappers: + setattr(py_module, py_wrapper.__name__[:-8], py_wrapper) + setattr(py_module, '_llvm_module', llvm_module) + setattr(py_module, '_llvm_engine', engine) + if self.target_module != llvm_module: + setattr(py_module, '_llvm_wrappers', self.target_module) + return engine, py_wrappers + + def wrap_llvm_module_in_python (self, llvm_module, py_module = None): + ''' + Mildly reworked and abstracted bitey.bind.wrap_llvm_bitcode(). + Abstracted to accept any existing LLVM Module object, and + return a Python wrapper module (even if one wasn't originally + specified). + ''' + if py_module is None: + py_module = types.ModuleType(str(llvm_module.id)) + engine = le.ExecutionEngine.new(llvm_module) + self.wrap_llvm_module(llvm_module, engine, py_module) + return py_module + + def wrap_llvm_bitcode (self, bitcode, py_module = None): + ''' + Intended to be drop-in replacement of + bitey.bind.wrap_llvm_bitcode(). + ''' + return self.wrap_llvm_module_in_python( + lc.Module.from_bitcode(io.BytesIO(bitcode)), py_module) + + def wrap_llvm_assembly (self, llvm_asm, py_module = None): + return self.wrap_llvm_module_in_python( + lc.Module.from_assembly(io.BytesIO(llvm_asm)), py_module) + +# ______________________________________________________________________ + +class NoBiteyLoader(object): + """ + Load LLVM compiled bitcode and autogenerate a ctypes binding. + + Initially copied and adapted from bitey.loader module. + """ + def __init__(self, pkg, name, source, preload, postload): + self.package = pkg + self.name = name + self.fullname = '.'.join((pkg,name)) + self.source = source + self.preload = preload + self.postload = postload + + @classmethod + def _check_magic(cls, filename): + if os.path.exists(filename): + magic = open(filename,"rb").read(4) + if magic == b'\xde\xc0\x17\x0b': + return True + elif magic[:2] == b'\x42\x43': + return True + else: + return False + else: + return False + + @classmethod + def build_module(cls, fullname, source_path, source_data, preload=None, + postload=None): + name = fullname.split(".")[-1] + mod = imp.new_module(name) + if preload: + exec(preload, mod.__dict__, mod.__dict__) + type_annotations = getattr(mod, '_type_annotations', None) + nb = NoBitey(type_annotations = type_annotations) + if source_path.endswith(('.o', '.bc')): + nb.wrap_llvm_bitcode(source_data, mod) + elif source_path.endswith('.s'): + nb.wrap_llvm_assembly(source_data, mod) + if postload: + exec(postload, mod.__dict__, mod.__dict__) + return mod + + @classmethod + def find_module(cls, fullname, paths = None): + if paths is None: + paths = sys.path + names = fullname.split('.') + modname = names[-1] + source_paths = None + for f in paths: + path = os.path.join(os.path.realpath(f), modname) + source = path + '.o' + if cls._check_magic(source): + source_paths = path, source + break + source = path + '.bc' + if os.path.exists(source): + source_paths = path, source + break + source = path + '.s' + if os.path.exists(source): + source_paths = path, source + break + if source_paths: + path, source = source_paths + return cls('.'.join(names[:-1]), modname, source, + path + ".pre.py", path + ".post.py") + + def get_code(self, module): + pass + + def get_data(self, module): + pass + + def get_filename(self, name): + return self.source + + def get_source(self, name): + with open(self.source, 'rb') as f: + return f.read() + + def is_package(self, *args, **kw): + return False + + def load_module(self, fullname): + if fullname in sys.modules: + return sys.modules[fullname] + + preload = None + postload = None + + # Get the preload file (if any) + if os.path.exists(self.preload): + with open(self.preload) as f: + preload = f.read() + + # Get the source + with open(self.source, 'rb') as f: + source_data = f.read() + + # Get the postload file (if any) + if os.path.exists(self.postload): + with open(self.postload) as f: + postload = f.read() + + mod = self.build_module(fullname, self.get_filename(None), source_data, + preload, postload) + sys.modules[fullname] = mod + mod.__loader__ = self + mod.__file__ = self.source + return mod + + @classmethod + def install(cls): + if cls not in sys.meta_path: + sys.meta_path.append(cls) + + @classmethod + def remove(cls): + sys.meta_path.remove(cls) + +# ______________________________________________________________________ + +def _mk_add_42 (llvm_module, at_type = bytetype.lc_long): + f = llvm_module.add_function( + lc.Type.function(at_type, [at_type]), 'add_42_%s' % str(at_type)) + block = f.append_basic_block('entry') + builder = lc.Builder.new(block) + if at_type.kind == lc.TYPE_INTEGER: + const_42 = lc.Constant.int(at_type, 42) + add = builder.add + elif at_type.kind in (lc.TYPE_FLOAT, lc.TYPE_DOUBLE): + const_42 = lc.Constant.real(at_type, 42.) + add = builder.fadd + else: + raise TypeError('Unsupported type: %s' % str(at_type)) + builder.ret(add(f.args[0], const_42)) + return f + +# ______________________________________________________________________ + +def build_test_module (): + llvm_module = lc.Module.new('nobitey_test') + for ty in (bytetype.li32, bytetype.li64, bytetype.lfloat, + bytetype.ldouble): + fn = _mk_add_42(llvm_module, ty) + return llvm_module + +# ______________________________________________________________________ + +def main (*args): + # Build up a module. + m = build_test_module() + print(m) + wrap_module = NoBitey().wrap_llvm_module_in_python(m) + # Now try running the generated wrappers. + for py_wf_name in ('add_42_i32', 'add_42_i64', 'add_42_float', + 'add_42_double'): + py_wf = getattr(wrap_module, py_wf_name) + for i in range(42): + result = py_wf(i) + expected = i + 42 + assert result == expected, "%r != %r in %r" % ( + result, expected, py_wf) + return wrap_module + +if __name__ == "__main__": + main(*sys.argv[1:]) + +# ______________________________________________________________________ +# End of nobitey.py diff --git a/opcode_util.py b/opcode_util.py new file mode 100644 index 0000000..9bc8f25 --- /dev/null +++ b/opcode_util.py @@ -0,0 +1,215 @@ +#! /usr/bin/env python +# ______________________________________________________________________ + +import dis +import opcode + +# ______________________________________________________________________ +# Module data + +hasjump = opcode.hasjrel + opcode.hasjabs +hascbranch = [op for op in hasjump + if 'IF' in opcode.opname[op] + or opcode.opname[op] in ('FOR_ITER', 'SETUP_LOOP')] + +# Since the actual opcode value may change, manage opcode abstraction +# data by opcode name. + +OPCODE_MAP = { + 'BINARY_ADD': (2, 1, None), + 'BINARY_AND': (2, 1, None), + 'BINARY_DIVIDE': (2, 1, None), + 'BINARY_FLOOR_DIVIDE': (2, 1, None), + 'BINARY_LSHIFT': (2, 1, None), + 'BINARY_MODULO': (2, 1, None), + 'BINARY_MULTIPLY': (2, 1, None), + 'BINARY_OR': (2, 1, None), + 'BINARY_POWER': (2, 1, None), + 'BINARY_RSHIFT': (2, 1, None), + 'BINARY_SUBSCR': (2, 1, None), + 'BINARY_SUBTRACT': (2, 1, None), + 'BINARY_TRUE_DIVIDE': (2, 1, None), + 'BINARY_XOR': (2, 1, None), + 'BREAK_LOOP': (0, None, 1), + 'BUILD_CLASS': (None, None, None), + 'BUILD_LIST': (-1, 1, None), + 'BUILD_MAP': (None, None, None), + 'BUILD_SET': (None, None, None), + 'BUILD_SLICE': (None, None, None), + 'BUILD_TUPLE': (-1, 1, None), + 'CALL_FUNCTION': (-2, 1, None), + 'CALL_FUNCTION_KW': (-3, 1, None), + 'CALL_FUNCTION_VAR': (-3, 1, None), + 'CALL_FUNCTION_VAR_KW': (-4, 1, None), + 'COMPARE_OP': (2, 1, None), + 'CONTINUE_LOOP': (None, None, None), + 'DELETE_ATTR': (1, None, 1), + 'DELETE_DEREF': (None, None, None), + 'DELETE_FAST': (0, None, 1), + 'DELETE_GLOBAL': (0, None, 1), + 'DELETE_NAME': (0, None, 1), + 'DELETE_SLICE+0': (1, None, 1), + 'DELETE_SLICE+1': (2, None, 1), + 'DELETE_SLICE+2': (2, None, 1), + 'DELETE_SLICE+3': (3, None, 1), + 'DELETE_SUBSCR': (2, None, 1), + 'DUP_TOP': (None, None, None), + 'DUP_TOPX': (None, None, None), + 'DUP_TOP_TWO': (None, None, None), + 'END_FINALLY': (None, None, None), + 'EXEC_STMT': (None, None, None), + 'EXTENDED_ARG': (None, None, None), + 'FOR_ITER': (1, 1, 1), + 'GET_ITER': (1, 1, None), + 'IMPORT_FROM': (None, None, None), + 'IMPORT_NAME': (None, None, None), + 'IMPORT_STAR': (1, None, 1), + 'INPLACE_ADD': (2, 1, None), + 'INPLACE_AND': (2, 1, None), + 'INPLACE_DIVIDE': (2, 1, None), + 'INPLACE_FLOOR_DIVIDE': (2, 1, None), + 'INPLACE_LSHIFT': (2, 1, None), + 'INPLACE_MODULO': (2, 1, None), + 'INPLACE_MULTIPLY': (2, 1, None), + 'INPLACE_OR': (2, 1, None), + 'INPLACE_POWER': (2, 1, None), + 'INPLACE_RSHIFT': (2, 1, None), + 'INPLACE_SUBTRACT': (2, 1, None), + 'INPLACE_TRUE_DIVIDE': (2, 1, None), + 'INPLACE_XOR': (2, 1, None), + 'JUMP_ABSOLUTE': (0, None, 1), + 'JUMP_FORWARD': (0, None, 1), + 'JUMP_IF_FALSE': (1, None, 1), + 'JUMP_IF_FALSE_OR_POP': (None, None, None), + 'JUMP_IF_TRUE': (1, None, 1), + 'JUMP_IF_TRUE_OR_POP': (None, None, None), + 'LIST_APPEND': (2, 0, 1), + 'LOAD_ATTR': (1, 1, None), + 'LOAD_BUILD_CLASS': (None, None, None), + 'LOAD_CLOSURE': (None, None, None), + 'LOAD_CONST': (0, 1, None), + 'LOAD_DEREF': (0, 1, None), + 'LOAD_FAST': (0, 1, None), + 'LOAD_GLOBAL': (0, 1, None), + 'LOAD_LOCALS': (None, None, None), + 'LOAD_NAME': (0, 1, None), + 'MAKE_CLOSURE': (None, None, None), + 'MAKE_FUNCTION': (None, None, None), + 'MAP_ADD': (None, None, None), + 'NOP': (0, None, None), + 'POP_BLOCK': (0, None, 1), + 'POP_EXCEPT': (None, None, None), + 'POP_JUMP_IF_FALSE': (1, None, 1), + 'POP_JUMP_IF_TRUE': (1, None, 1), + 'POP_TOP': (1, None, 1), + 'PRINT_EXPR': (1, None, 1), + 'PRINT_ITEM': (1, None, 1), + 'PRINT_ITEM_TO': (2, None, 1), + 'PRINT_NEWLINE': (0, None, 1), + 'PRINT_NEWLINE_TO': (1, None, 1), + 'RAISE_VARARGS': (None, None, None), + 'RETURN_VALUE': (1, None, 1), + 'ROT_FOUR': (None, None, None), + 'ROT_THREE': (None, None, None), + 'ROT_TWO': (None, None, None), + 'SETUP_EXCEPT': (None, None, None), + 'SETUP_FINALLY': (None, None, None), + 'SETUP_LOOP': (None, None, None), + 'SETUP_WITH': (None, None, None), + 'SET_ADD': (None, None, None), + 'SLICE+0': (1, 1, None), + 'SLICE+1': (2, 1, None), + 'SLICE+2': (2, 1, None), + 'SLICE+3': (3, 1, None), + 'STOP_CODE': (None, None, None), + 'STORE_ATTR': (2, None, 1), + 'STORE_DEREF': (1, 0, 1), + 'STORE_FAST': (1, None, 1), + 'STORE_GLOBAL': (1, None, 1), + 'STORE_LOCALS': (None, None, None), + 'STORE_MAP': (1, None, 1), + 'STORE_NAME': (1, None, 1), + 'STORE_SLICE+0': (1, None, 1), + 'STORE_SLICE+1': (2, None, 1), + 'STORE_SLICE+2': (2, None, 1), + 'STORE_SLICE+3': (3, None, 1), + 'STORE_SUBSCR': (3, None, 1), + 'UNARY_CONVERT': (1, 1, None), + 'UNARY_INVERT': (1, 1, None), + 'UNARY_NEGATIVE': (1, 1, None), + 'UNARY_NOT': (1, 1, None), + 'UNARY_POSITIVE': (1, 1, None), + 'UNPACK_EX': (None, None, None), + 'UNPACK_SEQUENCE': (None, None, None), + 'WITH_CLEANUP': (None, None, None), + 'YIELD_VALUE': (1, None, 1), +} + +# ______________________________________________________________________ +# Module functions + +def itercode(code): + """Return a generator of byte-offset, opcode, and argument + from a byte-code-string + """ + i = 0 + extended_arg = 0 + if isinstance(code[0], str): + code = [ord(c) for c in code] + n = len(code) + while i < n: + op = code[i] + num = i + i = i + 1 + oparg = None + if op >= opcode.HAVE_ARGUMENT: + oparg = code[i] + (code[i + 1] * 256) + extended_arg + extended_arg = 0 + i = i + 2 + if op == opcode.EXTENDED_ARG: + extended_arg = oparg * 65536 + + delta = yield num, op, oparg + if delta is not None: + abs_rel, dst = delta + assert abs_rel == 'abs' or abs_rel == 'rel' + i = dst if abs_rel == 'abs' else i + dst + +# ______________________________________________________________________ + +def extendlabels(code, labels = None): + """Extend the set of jump target labels to account for the + passthrough targets of conditional branches. + + This allows us to create a control flow graph where there is at + most one branch per basic block. + """ + if labels is None: + labels = [] + if isinstance(code[0], str): + code = [ord(c) for c in code] + n = len(code) + i = 0 + while i < n: + op = code[i] + i += 1 + if op >= dis.HAVE_ARGUMENT: + i += 2 + label = -1 + if op in hasjump: + label = i + if label >= 0: + if label not in labels: + labels.append(label) + elif op == opcode.opmap['BREAK_LOOP']: + if i not in labels: + labels.append(i) + return labels + +# ______________________________________________________________________ + +def get_code_object (func): + return getattr(func, '__code__', getattr(func, 'func_code', None)) + +# ______________________________________________________________________ +# End of opcode_util.py diff --git a/phi_injector.py b/phi_injector.py new file mode 100644 index 0000000..c40abe3 --- /dev/null +++ b/phi_injector.py @@ -0,0 +1,131 @@ +#! /usr/bin/env python +# ______________________________________________________________________ + +from bytecode_visitor import BytecodeFlowVisitor, BenignBytecodeVisitorMixin + +# ______________________________________________________________________ + +synthetic_opname = [] +synthetic_opmap = {} + +def def_synth_op (opname): + global synthetic_opname, synthetic_opmap + ret_val = -(len(synthetic_opname) + 1) + synthetic_opname.insert(0, opname) + synthetic_opmap[opname] = ret_val + return ret_val + +REF_ARG = def_synth_op('REF_ARG') +BUILD_PHI = def_synth_op('BUILD_PHI') +DEFINITION = def_synth_op('DEFINITION') +REF_DEF = def_synth_op('REF_DEF') + +# ______________________________________________________________________ + +class PhiInjector (BenignBytecodeVisitorMixin, BytecodeFlowVisitor): + def visit_cfg (self, cfg, nargs = 0, *args, **kws): + self.cfg = cfg + ret_val = self.visit(cfg.blocks, nargs) + del self.cfg + return ret_val + + def visit (self, flow, nargs = 0, *args, **kws): + self.nargs = nargs + self.definitions = [] + self.phis = [] + self.prev_blocks = [] + self.blocks_locals = dict((block, {}) + for block in self.cfg.blocks.keys()) + ret_val = super(PhiInjector, self).visit(flow, *args, **kws) + for block, _, _, args, _ in self.phis: + local = args.pop() + reaching_definitions = self.cfg.reaching_definitions[block] + for prev in reaching_definitions.keys(): + if 0 in self.cfg.blocks_reaching[prev]: + args.append((prev, REF_DEF, 'REF_DEF', + self.blocks_locals[prev][local], ())) + args.sort() + del self.blocks_locals + del self.prev_blocks + del self.phis + del self.definitions + del self.nargs + return ret_val + + def add_definition (self, index, local, arg): + definition_index = len(self.definitions) + definition = (index, DEFINITION, 'DEFINITION', definition_index, + (arg,)) + self.definitions.append(definition) + self.blocks_locals[self.block][local] = definition_index + return definition + + def add_phi (self, index, local): + ret_val = (index, BUILD_PHI, 'BUILD_PHI', [local], ()) + self.phis.append(ret_val) + return ret_val + + def enter_block (self, block): + ret_val = False + if self.prev_blocks: + prev_block = self.prev_blocks[-1] + self.block = block + if block == 0: + if self.nargs > 0: + ret_val = [self.add_definition(-1, arg, + (-1, REF_ARG, 'REF_ARG', arg, + ())) + for arg in range(self.nargs)] + else: + ret_val = True + elif 0 in self.cfg.blocks_reaching[block]: + ret_val = True + prev_block_locals = self.blocks_locals[prev_block] + self.blocks_locals[block] = prev_block_locals.copy() + phis_needed = self.cfg.phi_needed(block) + if phis_needed: + ret_val = [self.add_definition(block, local, + self.add_phi(block, local)) + for local in phis_needed] + return ret_val + + def exit_block (self, block): + if 0 in self.cfg.blocks_reaching[block]: + self.prev_blocks.append(block) + del self.block + + def op_STORE_FAST (self, i, op, arg, *args, **kws): + assert len(args) == 1 + return [self.add_definition(i, arg, args[0])] + + def op_LOAD_FAST (self, i, op, arg, *args, **kws): + return [(i, REF_DEF, 'REF_DEF', self.blocks_locals[self.block][arg], + args)] + +# ______________________________________________________________________ + +def inject_phis (func): + import byte_control + argcount = byte_control.opcode_util.get_code_object(func).co_argcount + cfg = byte_control.build_cfg(func) + return PhiInjector().visit_cfg(cfg, argcount) + +# ______________________________________________________________________ +# Main (self-test) routine + +def main (*args): + import pprint + from tests import llfuncs + if not args: + args = ('doslice',) + for arg in args: + pprint.pprint(inject_phis(getattr(llfuncs, arg))) + +# ______________________________________________________________________ + +if __name__ == "__main__": + import sys + main(*sys.argv[1:]) + +# ______________________________________________________________________ +# End of phi_injector.py diff --git a/pyaddfunc.py b/pyaddfunc.py new file mode 100644 index 0000000..8e09839 --- /dev/null +++ b/pyaddfunc.py @@ -0,0 +1,41 @@ +#! /usr/bin/env python +# ______________________________________________________________________ + +import ctypes + +# ______________________________________________________________________ +class PyMethodDef (ctypes.Structure): + _fields_ = [ + ('ml_name', ctypes.c_char_p), + ('ml_meth', ctypes.c_void_p), + ('ml_flags', ctypes.c_int), + ('ml_doc', ctypes.c_char_p), + ] + +PyCFunction_NewEx = ctypes.pythonapi.PyCFunction_NewEx +PyCFunction_NewEx.argtypes = (ctypes.POINTER(PyMethodDef), + ctypes.c_void_p, + ctypes.c_void_p) +PyCFunction_NewEx.restype = ctypes.py_object + +cache = {} # Unsure if this is necessary to keep the PyMethodDef + # structures from being garbage collected. Assuming so... + +def pyaddfunc (func_name, func_ptr, func_doc = None): + global cache + if bytes != str: + func_name = bytes(ord(ch) for ch in func_name) + key = (func_name, func_ptr) + if key in cache: + _, ret_val = cache[key] + else: + mdef = PyMethodDef(bytes(func_name), + func_ptr, + 1, # == METH_VARARGS (hopefully remains so...) + func_doc) + ret_val = PyCFunction_NewEx(ctypes.byref(mdef), 0, 0) + cache[key] = (mdef, ret_val) + return ret_val + +# ______________________________________________________________________ +# End of pyaddfunc.py diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/llfuncs.py b/tests/llfuncs.py new file mode 100644 index 0000000..55e9000 --- /dev/null +++ b/tests/llfuncs.py @@ -0,0 +1,42 @@ +#! /usr/bin/env python +# ______________________________________________________________________ + +def doslice (in_string, lower, upper): + l = strlen(in_string) + if lower < 0: + lower += l + if upper < 0: + upper += l + temp_len = upper - lower + if temp_len < 0: + temp_len = 0 + ret_val = alloca_array(li8, temp_len + 1) + strncpy(ret_val, in_string + lower, temp_len) + ret_val[temp_len] = li8(0) + return ret_val + +def ipow (val, exp): + ret_val = 1 + temp = val + w = exp + while w > 0: + if (w & 1) != 0: + ret_val *= temp + # TODO: Overflow check on ret_val + w >>= 1 + if w == 0: break + temp *= temp + # TODO: Overflow check on temp + return ret_val + +def pymod (arg1, arg2): + ret_val = arg1 % arg2 + if ret_val < 0: + if arg2 > 0: + ret_val += arg2 + elif arg2 < 0: + ret_val += arg2 + return ret_val + +# ______________________________________________________________________ +# End of llfuncs.py diff --git a/tests/llfunctys.py b/tests/llfunctys.py new file mode 100644 index 0000000..107b5a3 --- /dev/null +++ b/tests/llfunctys.py @@ -0,0 +1,23 @@ +#! /usr/bin/env python +# ______________________________________________________________________ + +import llvm.core as lc + +try: + from llnumba import bytetype +except ImportError: + from numba.llnumba import bytetype + +# ______________________________________________________________________ + +doslice = lc.Type.function(bytetype.li8_ptr, ( + bytetype.li8_ptr, bytetype.lc_size_t, bytetype.lc_size_t)) + +ipow = lc.Type.function(bytetype.li32, (bytetype.li32, + bytetype.li32)) + +pymod = lc.Type.function(bytetype.li32, (bytetype.li32, + bytetype.li32)) + +# ______________________________________________________________________ +# End of llfunctys.py