#! /usr/bin/env python # -*- coding: utf-8 -*- # ______________________________________________________________________ # Module imports from __future__ import print_function, division, absolute_import import inspect import opcode import llvm.core as lc from . import byte_control from . import addr_flow from . import opcode_util from .bytetype import lvoid, li1, lc_int, lc_long, l_pyobj_p from .bytecode_visitor import GenericFlowVisitor from .nobitey import get_string_constant # ______________________________________________________________________ # Class definitions class AddressFlowToLLVMPyAPICalls(GenericFlowVisitor): ''' Code generator for translating from a Python code object and its address flow (output by addr_flow.AddressFlowBuilder) into an LLVM function. The resulting LLVM function calls into a user-provided (or undefined) API function for each interpreter byte code. Target API function names are based on a programmable name mangling scheme: OPCODE_NAME The and strings are determined by the prefix() and postfix() methods. These methods may either be overloaded, or specialized at construction time using _prefix and _postfix arguments. The OPCODE_NAME is the opcode name as determined by the map in opcode.opname. ''' def __init__(self, _prefix=None, _postfix=None, **kwds): if _prefix is not None: if inspect.isfunction(_prefix): __prefix = _prefix else: def __prefix(opname, *opargs): return _prefix self.prefix = __prefix if _postfix is not None: if inspect.isfunction(_postfix): __postfix = _postfix else: def __postfix(opname, *opargs): return _postfix self.postfix = __postfix def prefix(self, opname, *opargs): return '' def postfix(self, opname, *opargs): return '' def get_op_function(self, opname, *opargs, **kwds): print(opname, opargs, kwds) target_fn_ty = lc.Type.function(kwds.get('return_type', self.obj_type), [arg.type for arg in opargs]) target_fn_name = ''.join((self.prefix(opname, *opargs), opname, self.postfix(opname, *opargs))) return self.llvm_module.get_or_insert_function(target_fn_ty, target_fn_name) def call_op_function(self, index, opname, *opargs, **kwds): target_fn = self.get_op_function(opname, *opargs, **kwds) if target_fn.type.pointee.return_type != lvoid: name = kwds.get('name', 'op_%d' % index) result = self.builder.call(target_fn, opargs, name) else: result = self.builder.call(target_fn, opargs) return result def translate_cfg(self, code_obj, cfg, llvm_module=None, monotype=l_pyobj_p, **kwds): ''' Generate LLVM code for the given code object and it's control flow graph. If no LLVM module is given as an argument, translate_cfg() creates a new module. Returns the resulting LLVM module. ''' assert inspect.iscode(code_obj) self.obj_type = monotype self.null = lc.Constant.null(self.obj_type) self.code_obj = code_obj self.cfg = cfg self.target_function_name = kwds.get( 'target_function_name', 'co_%s_%x' % (code_obj.co_name, id(code_obj))) if llvm_module is None: llvm_module = lc.Module.new('lmod_' + self.target_function_name) self.llvm_module = llvm_module self.visit(cfg.blocks) del self.llvm_module del self.cfg del self.code_obj return llvm_module def enter_flow_object(self, flow): ''' Set up any state for dealing a new dictionary of basic blocks. ''' super(AddressFlowToLLVMPyAPICalls, self).enter_flow_object(flow) self.nargs = opcode_util.get_nargs(self.code_obj) lltype = lc.Type.function( self.obj_type, tuple(self.obj_type for _ in range(self.nargs))) self.llvm_function = self.llvm_module.add_function( lltype, self.target_function_name) self.llvm_blocks = {} for block in self.block_list: if 0 in self.cfg.blocks_reaching[block]: bb = self.llvm_function.append_basic_block( 'BLOCK_%d' % (block,)) self.llvm_blocks[block] = bb self.symtab = {} self.values = {} def exit_flow_object(self, flow): ''' Clean up any state created while visiting the given dictionary of basic blocks. ''' super(AddressFlowToLLVMPyAPICalls, self).exit_flow_object(flow) del self.symtab del self.llvm_blocks del self.llvm_function def generate_co_init(self): ''' Initialize the code object's local variables on the stack. ''' for name in self.code_obj.co_varnames: ptr = self.builder.alloca(self.obj_type, name + '_p') self.symtab[name] = ptr self.builder.store(self.null, ptr) for arg_index, arg in zip(range(self.nargs), self.llvm_function.args): if arg_index == 0: arg.name = '_globals_%x' % id(self.code_obj) self.globals = arg else: local_index = arg_index - 1 name = self.code_obj.co_varnames[local_index] arg.name = name self.call_op_function( None, 'STORE_FAST', arg, self.symtab[name], return_type=lvoid) def generate_co_deinit(self, index): for value in self.symtab.values(): self.call_op_function(index, 'DELETE_FAST', lc.Constant.int(li1, 0), value, return_type=lvoid) def enter_block(self, block): ''' Set up state for generating code in a new basic block. If this is the first basic block, initialize the local variables using generate_co_init(). ''' ret_val = False if block in self.llvm_blocks: self.llvm_block = self.llvm_blocks[block] self.builder = lc.Builder.new(self.llvm_block) if block == 0: self.generate_co_init() ret_val = True return ret_val def exit_block(self, block): ''' Tear down any state created for code generation in the current basic block. If the basic block isn't already terminated by a control flow statement, assume it branches to the next basic block. ''' # XXX Isn't this really a bug in GenericFlowVisitor.visit()? if block in self.llvm_blocks: bb_instrs = self.llvm_block.instructions if ((len(bb_instrs) == 0) or (not bb_instrs[-1].is_terminator)): out_blocks = list(self.cfg.blocks_out[block]) assert len(out_blocks) == 1, [str(i) for i in bb_instrs] self.builder.branch(self.llvm_blocks[out_blocks[0]]) del self.builder del self.llvm_block def _op(self, i, op, arg, *args, **kwds): args = [self.values[stkarg] for stkarg in args] if arg is not None: args.insert(0, lc.Constant.int(lc_int, arg)) # XXX Modify the visitor! Should be passing Instr named # tuples here, and not looking up the operation name. result = self.call_op_function(i, self.opnames[op], *args, **kwds) self.values[i] = result return [result] def _not_implemented(self, i, op, arg, *args, **kwds): raise NotImplementedError(self.opnames[op]) op_BINARY_ADD = _op op_BINARY_AND = _op op_BINARY_DIVIDE = _op op_BINARY_FLOOR_DIVIDE = _op op_BINARY_LSHIFT = _op op_BINARY_MODULO = _op op_BINARY_MULTIPLY = _op op_BINARY_OR = _op op_BINARY_POWER = _op op_BINARY_RSHIFT = _op op_BINARY_SUBSCR = _op op_BINARY_SUBTRACT = _op op_BINARY_TRUE_DIVIDE = _op op_BINARY_XOR = _op op_BREAK_LOOP = _op op_BUILD_CLASS = _op op_BUILD_LIST = _op op_BUILD_MAP = _op op_BUILD_SET = _op op_BUILD_SLICE = _op op_BUILD_TUPLE = _op op_CALL_FUNCTION = _op op_CALL_FUNCTION_KW = _op op_CALL_FUNCTION_VAR = _op op_CALL_FUNCTION_VAR_KW = _op op_COMPARE_OP = _op op_CONTINUE_LOOP = _op op_DELETE_ATTR = _op op_DELETE_DEREF = _op def op_DELETE_FAST(self, i, op, arg, *args, **kwds): varname = self.code_obj.co_varnames[arg] result = self.call_op_function(i, 'DELETE_FAST', lc.Constant.int(li1, 1), self.symtab[varname], return_type=lvoid) return [result] def op_DELETE_GLOBAL(self, i, op, arg, *args, **kwds): varname = get_string_constant(self.llvm_module, self.code_obj.co_names[arg]) result = self.call_op_function(i, 'DELETE_GLOBAL', self.globals, varname, return_type=lvoid) self.values[i] = result return [result] op_DELETE_NAME = _op op_DELETE_SLICE = _op op_DELETE_SUBSCR = _op op_DUP_TOP = _not_implemented op_DUP_TOPX = _not_implemented op_DUP_TOP_TWO = _not_implemented op_END_FINALLY = _op op_EXEC_STMT = _op op_EXTENDED_ARG = _op op_FOR_ITER = _op op_GET_ITER = _op op_IMPORT_FROM = _op op_IMPORT_NAME = _op op_IMPORT_STAR = _op op_INPLACE_ADD = _op op_INPLACE_AND = _op op_INPLACE_DIVIDE = _op op_INPLACE_FLOOR_DIVIDE = _op op_INPLACE_LSHIFT = _op op_INPLACE_MODULO = _op op_INPLACE_MULTIPLY = _op op_INPLACE_OR = _op op_INPLACE_POWER = _op op_INPLACE_RSHIFT = _op op_INPLACE_SUBTRACT = _op op_INPLACE_TRUE_DIVIDE = _op op_INPLACE_XOR = _op def op_JUMP_ABSOLUTE(self, i, op, arg, *args, **kwds): return [self.builder.branch(self.llvm_blocks[arg])] def op_JUMP_FORWARD(self, i, op, arg, *args, **kwds): return [self.builder.branch(self.llvm_blocks[i + arg + 3])] op_JUMP_IF_FALSE = _not_implemented op_JUMP_IF_FALSE_OR_POP = _not_implemented op_JUMP_IF_TRUE = _not_implemented op_JUMP_IF_TRUE_OR_POP = _not_implemented op_LIST_APPEND = _op op_LOAD_ATTR = _op op_LOAD_BUILD_CLASS = _op op_LOAD_CLOSURE = _op def op_LOAD_CONST(self, i, op, arg, *args, **kwds): py_val = self.code_obj.co_consts[arg] if isinstance(py_val, int): # XXX Add bounds check on integer values; use big int # (from string?) constructor if necessary. result = self.call_op_function(i, 'LOAD_CONST_INT', lc.Constant.int(lc_long, py_val)) elif isinstance(py_val, float): result = self.call_op_function(i, 'LOAD_CONST_FLOAT', lc.Constant.double(py_val)) elif py_val is None: result = self.call_op_function(i, 'LOAD_CONST_NONE') else: raise NotImplementedError('Constant conversion for %r' % (py_val,)) self.values[i] = result return [result] op_LOAD_DEREF = _op def op_LOAD_FAST(self, i, op, arg, *args, **kwds): varname = self.code_obj.co_varnames[arg] args = self.symtab[varname], result = self.call_op_function(i, 'LOAD_FAST', *args) self.values[i] = result return [result] def op_LOAD_GLOBAL(self, i, op, arg, *args, **kwds): varname = get_string_constant(self.llvm_module, self.code_obj.co_names[arg]) result = self.call_op_function(i, 'LOAD_GLOBAL', self.globals, varname) self.values[i] = result return [result] op_LOAD_LOCALS = _op op_LOAD_NAME = _op op_MAKE_CLOSURE = _op op_MAKE_FUNCTION = _op op_MAP_ADD = _op op_NOP = _op op_POP_BLOCK = _op op_POP_EXCEPT = _op def _op_cbranch(self, i, op, arg, *args, **kwds): if op in opcode.hasjabs: branch_taken = self.llvm_blocks[arg] else: branch_taken = self.llvm_blocks[i + arg + 3] branch_not_taken = self.llvm_blocks[i + 3] _kwds = kwds.copy() _kwds.update(return_type=li1) test = self._op(i, op, None, *args, **_kwds)[0] return [test, self.builder.cbranch(test, branch_taken, branch_not_taken)] op_POP_JUMP_IF_FALSE = _op_cbranch op_POP_JUMP_IF_TRUE = _op_cbranch def op_POP_TOP(self, i, op, arg, *args): return [self.call_op_function(i, 'POP_TOP', self.values[args[0]], return_type=lvoid)] op_PRINT_EXPR = _op op_PRINT_ITEM = _op op_PRINT_ITEM_TO = _op op_PRINT_NEWLINE = _op op_PRINT_NEWLINE_TO = _op op_RAISE_VARARGS = _op def op_RETURN_VALUE(self, i, op, arg, *args): self.generate_co_deinit(i) return [self.builder.ret(self.values[args[0]])] op_ROT_FOUR = _op op_ROT_THREE = _op op_ROT_TWO = _op op_SETUP_EXCEPT = _op op_SETUP_FINALLY = _op op_SETUP_LOOP = _op_cbranch op_SETUP_WITH = _op op_SET_ADD = _op op_SLICE = _op op_STOP_CODE = _op op_STORE_ATTR = _op op_STORE_DEREF = _op def op_STORE_FAST(self, i, op, arg, *args): src_index, = args src = self.values[src_index] varname = self.code_obj.co_varnames[arg] dest = self.symtab[varname] result = self.call_op_function(i, 'STORE_FAST', src, dest, return_type=lvoid) return [result] def op_STORE_GLOBAL(self, i, op, arg, *args): varname = get_string_constant(self.llvm_module, self.code_obj.co_names[arg]) result = self.call_op_function( i, 'STORE_GLOBAL', self.values[args[0]], self.globals, varname, return_type=lvoid) return [result] op_STORE_LOCALS = _not_implemented op_STORE_MAP = _op op_STORE_NAME = _op op_STORE_SLICE = _op op_STORE_SUBSCR = _op op_UNARY_CONVERT = _op op_UNARY_INVERT = _op op_UNARY_NEGATIVE = _op op_UNARY_NOT = _op op_UNARY_POSITIVE = _op op_UNPACK_EX = _op op_UNPACK_SEQUENCE = _op op_WITH_CLEANUP = _op op_YIELD_VALUE = _op # ______________________________________________________________________ # Function definition(s) def demo_translator(*args, **kwds): def _visit(obj): if inspect.isfunction(obj): obj = opcode_util.get_code_object(obj) print('\n; %s\n; %r' % ('_' * 70, obj)) cfg = byte_control.ControlFlowBuilder.build_cfg_from_co(obj) cfg.blocks = addr_flow.AddressFlowBuilder().visit_cfg(cfg) print(AddressFlowToLLVMPyAPICalls(**kwds).translate_cfg(obj, cfg, **kwds)) return opcode_util.visit_code_args(_visit, *args) # ______________________________________________________________________ # Main (self-test) routine if __name__ == '__main__': import sys demo_translator(*sys.argv[1:], _prefix = '_') # ______________________________________________________________________ # End of af_to_api.py