llvmpy/llpython/opcode_util.py

296 lines
11 KiB
Python

# ______________________________________________________________________
from collections import namedtuple
import dis
import opcode
import inspect
# ______________________________________________________________________
# Module data
# Note that opcode.hasjrel and opcode.hasjabs applies only to opcodes
# that calculate a jump point based on the argument. This ignores
# jumps that use the frame stack to calculate their targets, and
# exceptions.
NON_ARG_JUMP_NAMES = ('BREAK_LOOP', 'RETURN_VALUE', 'END_FINALLY',
'RAISE_VARARGS')
NON_ARG_JUMPS = [opcode.opmap[opname]
for opname in NON_ARG_JUMP_NAMES
if opname in opcode.opmap]
HAS_CBRANCH_NAMES = 'FOR_ITER',
hasjump = opcode.hasjrel + opcode.hasjabs + NON_ARG_JUMPS
hascbranch = [op for op, opname in ((op, opcode.opname[op])
for op in hasjump)
if 'IF' in opname
or 'SETUP' in opname
or opname in HAS_CBRANCH_NAMES]
OpcodeData = namedtuple('OpcodeData', ('pops', 'pushes', 'is_stmt'))
NO_OPCODE_DATA = OpcodeData(None, None, None)
# Since the actual opcode value may change, manage opcode abstraction
# data by opcode name.
OPCODE_MAP = {
'BINARY_ADD': OpcodeData(2, 1, None),
'BINARY_AND': OpcodeData(2, 1, None),
'BINARY_DIVIDE': OpcodeData(2, 1, None),
'BINARY_FLOOR_DIVIDE': OpcodeData(2, 1, None),
'BINARY_LSHIFT': OpcodeData(2, 1, None),
'BINARY_MODULO': OpcodeData(2, 1, None),
'BINARY_MULTIPLY': OpcodeData(2, 1, None),
'BINARY_OR': OpcodeData(2, 1, None),
'BINARY_POWER': OpcodeData(2, 1, None),
'BINARY_RSHIFT': OpcodeData(2, 1, None),
'BINARY_SUBSCR': OpcodeData(2, 1, None),
'BINARY_SUBTRACT': OpcodeData(2, 1, None),
'BINARY_TRUE_DIVIDE': OpcodeData(2, 1, None),
'BINARY_XOR': OpcodeData(2, 1, None),
'BREAK_LOOP': OpcodeData(0, None, 1),
'BUILD_CLASS': OpcodeData(3, 1, None),
'BUILD_LIST': OpcodeData(-1, 1, None),
'BUILD_MAP': OpcodeData(-1, 1, None),
'BUILD_SET': OpcodeData(-1, 1, None),
'BUILD_SLICE': OpcodeData(-1, 1, None), # oparg should only be 2 or 3
'BUILD_TUPLE': OpcodeData(-1, 1, None),
'CALL_FUNCTION': OpcodeData(-2, 1, None),
'CALL_FUNCTION_KW': OpcodeData(-3, 1, None),
'CALL_FUNCTION_VAR': OpcodeData(-3, 1, None),
'CALL_FUNCTION_VAR_KW': OpcodeData(-4, 1, None),
'COMPARE_OP': OpcodeData(2, 1, None),
'CONTINUE_LOOP': OpcodeData(0, None, 1),
'DELETE_ATTR': OpcodeData(1, None, 1),
'DELETE_DEREF': NO_OPCODE_DATA,
'DELETE_FAST': OpcodeData(0, None, 1),
'DELETE_GLOBAL': OpcodeData(0, None, 1),
'DELETE_NAME': OpcodeData(0, None, 1),
'DELETE_SLICE+0': OpcodeData(1, None, 1),
'DELETE_SLICE+1': OpcodeData(2, None, 1),
'DELETE_SLICE+2': OpcodeData(2, None, 1),
'DELETE_SLICE+3': OpcodeData(3, None, 1),
'DELETE_SUBSCR': OpcodeData(2, None, 1),
'DUP_TOP': NO_OPCODE_DATA,
'DUP_TOPX': NO_OPCODE_DATA,
'DUP_TOP_TWO': NO_OPCODE_DATA,
# The data for END_FINALLY is a total fabrication; END_FINALLY may
# pop 1 or 3 values off the value stack, based on the type of the
# top of the value stack. If, however, a value stack simulator
# ignores the part of the CPython evaluator loop that pushes the
# why code on the value stack for WHY_RETURN and WHY_CONTINUE (as
# this table does), this should work out fine.
'END_FINALLY': OpcodeData(0, 0, 1),
'EXEC_STMT': OpcodeData(3, 0, 1),
'EXTENDED_ARG': NO_OPCODE_DATA,
'FOR_ITER': OpcodeData(1, 1, 1),
'GET_ITER': OpcodeData(1, 1, None),
'IMPORT_FROM': NO_OPCODE_DATA,
'IMPORT_NAME': OpcodeData(2, 1, None),
'IMPORT_STAR': OpcodeData(1, None, 1),
'INPLACE_ADD': OpcodeData(2, 1, None),
'INPLACE_AND': OpcodeData(2, 1, None),
'INPLACE_DIVIDE': OpcodeData(2, 1, None),
'INPLACE_FLOOR_DIVIDE': OpcodeData(2, 1, None),
'INPLACE_LSHIFT': OpcodeData(2, 1, None),
'INPLACE_MODULO': OpcodeData(2, 1, None),
'INPLACE_MULTIPLY': OpcodeData(2, 1, None),
'INPLACE_OR': OpcodeData(2, 1, None),
'INPLACE_POWER': OpcodeData(2, 1, None),
'INPLACE_RSHIFT': OpcodeData(2, 1, None),
'INPLACE_SUBTRACT': OpcodeData(2, 1, None),
'INPLACE_TRUE_DIVIDE': OpcodeData(2, 1, None),
'INPLACE_XOR': OpcodeData(2, 1, None),
'JUMP_ABSOLUTE': OpcodeData(0, None, 1),
'JUMP_FORWARD': OpcodeData(0, None, 1),
'JUMP_IF_FALSE': OpcodeData(1, 1, 1),
'JUMP_IF_FALSE_OR_POP': NO_OPCODE_DATA,
'JUMP_IF_TRUE': OpcodeData(1, 1, 1),
'JUMP_IF_TRUE_OR_POP': NO_OPCODE_DATA,
'LIST_APPEND': NO_OPCODE_DATA,
'LOAD_ATTR': OpcodeData(1, 1, None),
'LOAD_BUILD_CLASS': NO_OPCODE_DATA,
'LOAD_CLOSURE': NO_OPCODE_DATA,
'LOAD_CONST': OpcodeData(0, 1, None),
'LOAD_DEREF': OpcodeData(0, 1, None),
'LOAD_FAST': OpcodeData(0, 1, None),
'LOAD_GLOBAL': OpcodeData(0, 1, None),
'LOAD_LOCALS': OpcodeData(0, 1, None),
'LOAD_NAME': OpcodeData(0, 1, None),
'MAKE_CLOSURE': NO_OPCODE_DATA,
'MAKE_FUNCTION': OpcodeData(-2, 1, None),
'MAP_ADD': NO_OPCODE_DATA,
'NOP': OpcodeData(0, None, None),
'POP_BLOCK': OpcodeData(0, None, 1),
'POP_EXCEPT': NO_OPCODE_DATA,
'POP_JUMP_IF_FALSE': OpcodeData(1, None, 1),
'POP_JUMP_IF_TRUE': OpcodeData(1, None, 1),
'POP_TOP': OpcodeData(1, None, 1),
'PRINT_EXPR': OpcodeData(1, None, 1),
'PRINT_ITEM': OpcodeData(1, None, 1),
'PRINT_ITEM_TO': OpcodeData(2, None, 1),
'PRINT_NEWLINE': OpcodeData(0, None, 1),
'PRINT_NEWLINE_TO': OpcodeData(1, None, 1),
'RAISE_VARARGS': OpcodeData(-1, None, 1),
'RETURN_VALUE': OpcodeData(1, None, 1),
'ROT_FOUR': NO_OPCODE_DATA,
'ROT_THREE': NO_OPCODE_DATA,
'ROT_TWO': NO_OPCODE_DATA,
'SETUP_EXCEPT': NO_OPCODE_DATA,
'SETUP_FINALLY': NO_OPCODE_DATA,
'SETUP_LOOP': NO_OPCODE_DATA,
'SETUP_WITH': NO_OPCODE_DATA,
'SET_ADD': NO_OPCODE_DATA,
'SLICE+0': OpcodeData(1, 1, None),
'SLICE+1': OpcodeData(2, 1, None),
'SLICE+2': OpcodeData(2, 1, None),
'SLICE+3': OpcodeData(3, 1, None),
'STOP_CODE': NO_OPCODE_DATA,
'STORE_ATTR': OpcodeData(2, None, 1),
'STORE_DEREF': OpcodeData(1, 0, 1),
'STORE_FAST': OpcodeData(1, None, 1),
'STORE_GLOBAL': OpcodeData(1, None, 1),
'STORE_LOCALS': NO_OPCODE_DATA,
'STORE_MAP': OpcodeData(1, None, 1),
'STORE_NAME': OpcodeData(1, None, 1),
'STORE_SLICE+0': OpcodeData(1, None, 1),
'STORE_SLICE+1': OpcodeData(2, None, 1),
'STORE_SLICE+2': OpcodeData(2, None, 1),
'STORE_SLICE+3': OpcodeData(3, None, 1),
'STORE_SUBSCR': OpcodeData(3, None, 1),
'UNARY_CONVERT': OpcodeData(1, 1, None),
'UNARY_INVERT': OpcodeData(1, 1, None),
'UNARY_NEGATIVE': OpcodeData(1, 1, None),
'UNARY_NOT': OpcodeData(1, 1, None),
'UNARY_POSITIVE': OpcodeData(1, 1, None),
'UNPACK_EX': NO_OPCODE_DATA,
'UNPACK_SEQUENCE': NO_OPCODE_DATA,
'WITH_CLEANUP': NO_OPCODE_DATA,
'YIELD_VALUE': OpcodeData(1, None, 1),
}
# ______________________________________________________________________
# Module functions
def itercode(code, start = 0):
"""Return a generator of byte-offset, opcode, and argument
from a byte-code-string
"""
i = 0
extended_arg = 0
if isinstance(code[0], str):
code = [ord(c) for c in code]
n = len(code)
while i < n:
op = code[i]
num = i + start
i = i + 1
oparg = None
if op >= opcode.HAVE_ARGUMENT:
oparg = code[i] + (code[i + 1] * 256) + extended_arg
extended_arg = 0
i = i + 2
if op == opcode.EXTENDED_ARG:
extended_arg = oparg * 65536
continue
delta = yield num, op, oparg
if delta is not None:
abs_rel, dst = delta
assert abs_rel == 'abs' or abs_rel == 'rel'
i = dst if abs_rel == 'abs' else i + dst
# ______________________________________________________________________
def itercodeobjs(codeobj):
"Iterator that traverses code objects via the co_consts member."
yield codeobj
for const in codeobj.co_consts:
if inspect.iscode(const):
for childobj in itercodeobjs(const):
yield childobj
# ______________________________________________________________________
def visit_code_args(visitor, *args, **kws):
"""Utility function for testing or demonstrating various code
analysis passes in llpython.
Takes a visitor function and a sequence of command line arguments.
The visitor function should be able to handle either function
objects or code objects."""
def _visit_code_objs(root_obj):
for code_obj in itercodeobjs(root_obj):
visitor(code_obj)
try:
from .tests import llfuncs
except ImportError:
llfuncs = object()
if not args:
if 'default_args' in kws:
args = kws['default_args']
else:
args = ('pymod',)
for arg in args:
if inspect.iscode(arg):
_visit_code_objs(arg)
elif inspect.isfunction(arg):
_visit_code_objs(get_code_object(arg))
elif arg.endswith('.py'):
with open(arg) as in_file:
in_source = in_file.read()
in_codeobj = compile(in_source, arg, 'exec')
_visit_code_objs(in_codeobj)
else:
visitor(getattr(llfuncs, arg))
# ______________________________________________________________________
def extendlabels(code, labels = None):
"""Extend the set of jump target labels to account for the
passthrough targets of conditional branches.
This allows us to create a control flow graph where there is at
most one branch per basic block.
"""
if labels is None:
labels = []
if isinstance(code[0], str):
code = [ord(c) for c in code]
n = len(code)
i = 0
while i < n:
op = code[i]
i += 1
if op >= dis.HAVE_ARGUMENT:
i += 2
if op in hasjump and i < n and i not in labels:
labels.append(i)
return labels
# ______________________________________________________________________
def get_code_object (func):
return getattr(func, '__code__', getattr(func, 'func_code', None))
# ______________________________________________________________________
def build_basic_blocks (co_obj):
co_code = co_obj.co_code
labels = extendlabels(co_code, dis.findlabels(co_code))
labels.sort()
blocks = dict((index, list(itercode(co_code[index:next_index], index)))
for index, next_index in zip([0] + labels,
labels + [len(co_code)]))
return blocks
# ______________________________________________________________________
def get_nargs(co_obj):
flags = co_obj.co_flags
return (1 + co_obj.co_argcount + (1 if flags & 4 else 0) +
(1 if flags & 8 else 0))
# ______________________________________________________________________
# End of opcode_util.py